From 33a84f4966c2e3dfe53b0add3f927d3df5f7b5b2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 5 Nov 2020 12:25:14 -0500 Subject: [PATCH 001/343] API declaration for copy_v_transform_reduce_key_aggregated_out_nbr and transform_reduce_by_key_e --- .../copy_v_transform_reduce_in_out_nbr.cuh | 4 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 97 +++++++++++++++++++ .../patterns/transform_reduce_by_key_e.cuh | 92 ++++++++++++++++++ 3 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh create mode 100644 cpp/include/patterns/transform_reduce_by_key_e.cuh diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index f3c36897dd6..688255fac04 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -627,7 +627,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p @@ -689,7 +689,7 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, * adj_matrix_col_value_input_first + j) (where i is in [0, * graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh new file mode 100644 index 00000000000..8b04230ca3c --- /dev/null +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Iterate over the key-aggregated outgoing edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges + * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy + * input). + * Unlike coy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to + * support two level reduction for each vertex. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam KeyIterator Type of the iterator for graph adjacency matrix column key values for + * aggregation. + * @tparam ValueType Type of the value in (key, value) pairs stored in @p kv_map. + * @tparam KeyAggregatedEdgeOp Type of the quaternary (or quinary) key-aggregated edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam T Type of the initial value for reduction over the key-aggregated outgoing edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + * + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param out_nbr_key_first Iterator pointing to the adjacency matrix column key (for aggregation) + * for the first (inclusive) column (assigned to this process in multi-GPU). `out_nbr_key_last` + * (exclusive) is deduced as @p out_nbr_key_first + @p + * graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p + * adj_matrix_row_value_input_first + i), and value stored in @p kv_map for the key (where i is in + * [0, graph_view.get_number_of_local_adj_matrix_partition_rows())) and returns a value to be + * reduced. + * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for + * each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the + * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_key_aggregated_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + KeyIterator out_nbr_key_first, + cuco::static_map::value_type, ValueType> kv_map, + KeyAggregatedEdgeOp key_aggregated_e_op, + ReduceOp reduce_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + static_assert(std::is_integral::value_type>::value); + + CUGRAPH_FAIL("unimplemented."); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/transform_reduce_by_key_e.cuh b/cpp/include/patterns/transform_reduce_by_key_e.cuh new file mode 100644 index 00000000000..2c077c1ed94 --- /dev/null +++ b/cpp/include/patterns/transform_reduce_by_key_e.cuh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. + * + * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the initial value of the value in each (key, value) pair. + * @tparam KeyIterator Type of the iterator for keys in (key, value) pairs. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a pair of a key and a transformed + * value to be reduced. + * @param init Initial value to be added to the value in each transform-reduced (key, value) pair. + * @param map_key_first Iterator pointing to the first (inclusive) key to be stored in the returned + * cuco::static_map (which is local to this process in mulit-GPU). + * @param map_key_last Iterator pointing to the last (exclusive) key to be stored in the returned + * cuco::static_map (which is local to this process in multi-GPU). + * @return cuco::static_map A hash-based map of (key, value) pairs for the keys pointed by + * [map_key_first, map_key_last). + */ +template +cuco::static_map::value_type, T> +transform_reduce_by_key_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + KeyIterator map_key_first, + KeyIterator map_key_last) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_integral::value_type>::value); + + CUGRAPH_FAIL("unimplemented."); + + return cuco::static_map::value_type, T>(); +} + +} // namespace experimental +} // namespace cugraph From de01b6ae9f3556c58b13ed6202b0bc2d13853ecf Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 5 Nov 2020 12:42:16 -0500 Subject: [PATCH 002/343] add coarsening based graph constructor --- cpp/include/experimental/graph.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 88c84414cd0..e280153257f 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -70,6 +70,16 @@ class graph_t const &graph_view, + vertex_t *labels); + graph_view_t view() { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); @@ -147,6 +157,16 @@ class graph_t const &graph_view, + vertex_t *labels); + private: rmm::device_uvector offsets_; rmm::device_uvector indices_; From 5b53c8a4a7bdd814b18f86d521ee6ef0862d29b5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 5 Nov 2020 13:47:20 -0500 Subject: [PATCH 003/343] minor documentation fix --- ...copy_v_transform_reduce_key_aggregated_out_nbr.cuh | 11 +++++++---- cpp/include/patterns/transform_reduce_by_key_e.cuh | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 8b04230ca3c..25038dea971 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -33,7 +33,7 @@ namespace experimental { * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy * input). - * Unlike coy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to + * Unlike copy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to * support two level reduction for each vertex. * * @tparam GraphViewType Type of the passed non-owning graph object. @@ -52,16 +52,19 @@ namespace experimental { * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first - * + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param out_nbr_key_first Iterator pointing to the adjacency matrix column key (for aggregation) * for the first (inclusive) column (assigned to this process in multi-GPU). `out_nbr_key_last` * (exclusive) is deduced as @p out_nbr_key_first + @p * graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p + * @param kv_map cuco::static_map object holding (key, value) pairs for the keys pointed by @p + * out_nbr_key_first + i (where i is in [0, + * graph_view.get_number_of_local_adj_matrix_partition_rows())) + * @param key_aggregated_e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p * adj_matrix_row_value_input_first + i), and value stored in @p kv_map for the key (where i is in * [0, graph_view.get_number_of_local_adj_matrix_partition_rows())) and returns a value to be * reduced. + * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for * each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the diff --git a/cpp/include/patterns/transform_reduce_by_key_e.cuh b/cpp/include/patterns/transform_reduce_by_key_e.cuh index 2c077c1ed94..6b6ae135ba1 100644 --- a/cpp/include/patterns/transform_reduce_by_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_key_e.cuh @@ -61,7 +61,7 @@ namespace experimental { * cuco::static_map (which is local to this process in mulit-GPU). * @param map_key_last Iterator pointing to the last (exclusive) key to be stored in the returned * cuco::static_map (which is local to this process in multi-GPU). - * @return cuco::static_map A hash-based map of (key, value) pairs for the keys pointed by + * @return cuco::static_map Hash-based map of (key, value) pairs for the keys pointed by * [map_key_first, map_key_last). */ template Date: Fri, 6 Nov 2020 16:24:49 -0500 Subject: [PATCH 004/343] remove the coarsen function --- cpp/include/experimental/graph.hpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index e280153257f..88c84414cd0 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -70,16 +70,6 @@ class graph_t const &graph_view, - vertex_t *labels); - graph_view_t view() { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); @@ -157,16 +147,6 @@ class graph_t const &graph_view, - vertex_t *labels); - private: rmm::device_uvector offsets_; rmm::device_uvector indices_; From 5dd3fe7da8835c316abca68dbed6c635e223bc8a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Sun, 8 Nov 2020 00:02:12 -0500 Subject: [PATCH 005/343] several cleanups (graph.cu) --- cpp/src/experimental/graph.cu | 97 +++++++++++------------------------ 1 file changed, 29 insertions(+), 68 deletions(-) diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index b6124bff94e..54be03a550c 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -62,25 +62,19 @@ struct out_of_range_t { template std:: tuple, rmm::device_uvector, rmm::device_uvector> - edge_list_to_compressed_sparse(raft::handle_t const &handle, - edgelist_t const &edgelist, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last) + edgelist_to_compressed_sparse(edgelist_t const &edgelist, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) { - rmm::device_uvector offsets((major_last - major_first) + 1, handle.get_stream()); - rmm::device_uvector indices(edgelist.number_of_edges, handle.get_stream()); + rmm::device_uvector offsets((major_last - major_first) + 1, stream); + rmm::device_uvector indices(edgelist.number_of_edges, stream); rmm::device_uvector weights( - edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, handle.get_stream()); - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - offsets.begin(), - offsets.end(), - edge_t{0}); - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - indices.begin(), - indices.end(), - vertex_t{0}); + edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), edge_t{0}); + thrust::fill(rmm::exec_policy(stream)->on(stream), indices.begin(), indices.end(), vertex_t{0}); // FIXME: need to performance test this code with R-mat graphs having highly-skewed degree // distribution. If there is a small number of vertices with very large degrees, atomicAdd can @@ -98,7 +92,7 @@ std:: auto p_weights = edgelist.p_edge_weights != nullptr ? weights.data() : static_cast(nullptr); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, store_transposed ? edgelist.p_dst_vertices + edgelist.number_of_edges : edgelist.p_src_vertices + edgelist.number_of_edges, @@ -106,15 +100,13 @@ std:: atomicAdd(p_offsets + (v - major_first), edge_t{1}); }); - thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - offsets.begin(), - offsets.end(), - offsets.begin()); + thrust::exclusive_scan( + rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), offsets.begin()); if (edgelist.p_edge_weights != nullptr) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -137,7 +129,7 @@ std:: } else { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist.p_src_vertices, edgelist.p_dst_vertices)); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -162,42 +154,6 @@ std:: return std::make_tuple(std::move(offsets), std::move(indices), std::move(weights)); } -template -std::vector segment_degree_sorted_vertex_partition(raft::handle_t const &handle, - DegreeIterator degree_first, - DegreeIterator degree_last, - ThresholdIterator threshold_first, - ThresholdIterator threshold_last) -{ - auto num_elements = thrust::distance(degree_first, degree_last); - auto num_segments = thrust::distance(threshold_first, threshold_last) + 1; - - std::vector h_segment_offsets(num_segments + 1); - h_segment_offsets[0] = 0; - h_segment_offsets.back() = num_elements; - - rmm::device_uvector d_segment_offsets(num_segments - 1, handle.get_stream()); - - thrust::upper_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - degree_first, - degree_last, - threshold_first, - threshold_last, - d_segment_offsets.begin()); - - raft::update_host(h_segment_offsets.begin() + 1, - d_segment_offsets.begin(), - d_segment_offsets.size(), - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as d_segment_offsets will become out-of-scope once - // this function returns and this function returns a host variable which - // can be used right after return. - - return h_segment_offsets; -} - } // namespace template offsets(0, default_stream); rmm::device_uvector indices(0, default_stream); rmm::device_uvector weights(0, default_stream); - std::tie(offsets, indices, weights) = edge_list_to_compressed_sparse( - *(this->get_handle_ptr()), edgelists[i], major_first, major_last, minor_first, minor_last); + std::tie(offsets, indices, weights) = + edgelist_to_compressed_sparse(edgelists[i], + major_first, + major_last, + minor_first, + minor_last, + this->get_handle_ptr()->get_stream()); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); if (is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } @@ -455,12 +416,12 @@ graph_t(*(this->get_handle_ptr()), - edgelist, - vertex_t{0}, - this->get_number_of_vertices(), - vertex_t{0}, - this->get_number_of_vertices()); + edgelist_to_compressed_sparse(edgelist, + vertex_t{0}, + this->get_number_of_vertices(), + vertex_t{0}, + this->get_number_of_vertices(), + this->get_handle_ptr()->get_stream()); // update degree-based segment offsets (to be used for graph analytics kernel optimization) From ac85973b78613a2d3bb36bac7e202559e9646926 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 10 Nov 2020 13:50:46 -0500 Subject: [PATCH 006/343] fixed typos in documentation --- cpp/include/experimental/graph_view.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index ba327047b1d..eae06181e6e 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -332,6 +332,7 @@ class graph_view_t 0; } + // FIXME: this should be removed once MNMG Louvain is updated to use graph primitives partition_t get_partition() const { return partition_; } vertex_t get_number_of_local_vertices() const @@ -441,25 +442,25 @@ class graph_view_t Date: Tue, 10 Nov 2020 13:51:00 -0500 Subject: [PATCH 007/343] add graph_functions.cu --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ce1a1fae88a..18dd18e9682 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -388,6 +388,7 @@ add_library(cugraph SHARED src/centrality/betweenness_centrality.cu src/experimental/graph.cu src/experimental/graph_view.cu + src/experimental/graph_functions.cu src/experimental/bfs.cu src/experimental/sssp.cu src/experimental/pagerank.cu From bb8c66397d6f2e8d8ce0808cfd4944baa10771e0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 12 Nov 2020 11:55:28 -0500 Subject: [PATCH 008/343] add functions to compute mappings between vertex/edge and GPU rank --- .../experimental/detail/graph_utils.cuh | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index bf56b2e6f80..b0a9a5bad0a 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -137,6 +138,38 @@ struct degree_from_offsets_t { __device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; } }; +template +struct compute_gpu_id_from_vertex_t { + int comm_size{0}; + + __device__ int operator()(vertex_t v) + { + cuco::detail::MurmurHash3_32 hash_func{}; + return hash_func(v) % comm_size; + } +}; + +template +struct compute_gpu_id_from_edge_t { + bool hypergraph_partitioned{false}; + int comm_size{0}; + int row_comm_size{0}; + int col_comm_size{0}; + + __device__ int operator()(vertex_t src, vertex_t dst) + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_comm_rank = hash_func(store_transposed ? dst : src) % comm_size; + auto minor_comm_rank = hash_func(store_transposed ? src : dst) % comm_size; + if (hypergraph_partitioned) { + return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } else { + return (major_comm_rank - (major_comm_rank % row_comm_size)) + + (minor_comm_rank / col_comm_size); + } + } +}; + } // namespace detail } // namespace experimental } // namespace cugraph From 44153a9e496dcc094d945f2735a5ff667e9003df Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 12 Nov 2020 11:56:06 -0500 Subject: [PATCH 009/343] temporary commit of WIP graph coarsening functions for communication --- cpp/src/experimental/graph_functions.cu | 437 ++++++++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 cpp/src/experimental/graph_functions.cu diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu new file mode 100644 index 00000000000..8a85e2e7207 --- /dev/null +++ b/cpp/src/experimental/graph_functions.cu @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace { + +template +rmm::device_uvector find_unique_labels(vertex_t const *labels, + vertex_t num_labels, + cudaStream_t stream) +{ + rmm::device_uvector unique_labels(num_labels); + thrust::copy( + rmm::exec_policy(stream)->on(stream), labels, labels + num_labels, unique_labels.data()); + thrust::sort(rmm::exec_policy(stream)->on(stream), unique_labels.begin(), unique_labels.end()); + auto it = thrust::unique( + rmm::exec_policy(stream)->on(stream), unique_labels.begin(), unique_labels.end()); + unique_labels.resize(thrust::distance(unique_labels.begin(), it)); + unique_labels.shrink_to_fit(); + + return std::move(unique_labels); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) +{ + edge_t number_of_edges{0}; + raft::update_host( + &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); + CUDA_TRY(cudaStreamSynchronize(stream)); + rmm::device_uvector edgelist_src_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_dst_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights(number_of_edges, stream); + + auto p_majors = store_transposed ? edgelist_dst_vertices.data() : edgelist_src_vertices.data(); + auto p_minors = store_transposed ? edgelist_src_vertices.data() : edgelist_dst_vertices.data(); + + // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can + // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA + // warp per vertex, and low-degree vertices using one CUDA thread per block + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [compressed_sparse_offsets, major_first, p_majors] __device__(auto v) { + auto first = compressed_sparse_offsets[v - major_first]; + auto last = compressed_sparse_offsets[v - major_first + 1]; + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_indices, + compressed_sparse_indices + number_of_edges, + p_minors); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + + return std::make_tuple(std::move(edgelist_src_vertices), + std::move(edgelist_dst_vertices), + std::move(edgelist_weights)); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compute_coarsened_edgelist( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme + // regardless of hypergraph partitioning is applied or not + rmm::device_uvector adj_matrix_major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows(), + handle.get_stream()); + rmm::device_uvector adj_matrix_minor_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols(), + handle.get_stream()); + if (store_transposed) { + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } else { + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } + + // FIXME: we may compare performance/memory footprint with the hash_based approach especially when + // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst + // case). We may be able to limit the memory requirement close to the final coarsened edgelist + // with the hash based approach. + rmm::device_uvector coarsened_edgelist_src_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_dst_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + for (size_t i = 0; i < graph_view.adj_matrix_partition_offsets_.size(); ++i) { + rmm::device_uvector edgelist_src_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_dst_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_weights(0, handle.get_stream()); + std::tie(edgelist_src_vertices, edgelist_dst_vertices, edgelist_weights) = + compressed_sparse_to_edgelist( + handle, + graph_view.adj_matrix_partition_offsets[i], + graph_view.adj_matrix_partition_indices[i], + graph_view.adj_matrix_partition_weights[i], + store_transposed ? graph_view.get_local_adj_matrix_partition_col_first() + : graph_view.get_local_adj_matrix_partition_row_first(), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_last() + : graph_view.get_local_adj_matrix_partition_row_last()); + auto src_dst_pair_first = + thrust::make_zip_iterator(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin()); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size(), + src_dst_pair_first, + [p_adj_matrix_major_labels = + adj_matrix_major_labels.data() + + (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) + : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), + p_adj_matrix_minor_labels = adj_matrix_minor_labels.data(), + src_first = graph_view.get_local_adj_matrix_partition_row_first(), + dst_first = graph_view.get_local_adj_matrix_partition_col_first()] __device__(auto val) { + auto src = thrust::get<0>(val); + auto dst = thrust::get<1>(val); + return store_transposed ? thrust::make_tuple(p_adj_matrix_minor_labels[src - src_first], + p_adj_matrix_major_labels[dst - dst_first]) + : thrust::make_tuple(p_adj_matrix_major_labels[src - src_first], + p_adj_matrix_minor_labels[dst - dst_first]); + }); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size(), + edgelist_weights); + if (coarsened_edgelist_src_vertices.size() > 0) { + rmm::device_uvector tmp_src_vertices( + coarsened_edgelist_src_vertices.size() + edgelist_src_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_dst_vertices(tmp_src_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_weights(tmp_src_vertices.size(), handle.get_stream()); + auto coarsened_src_dst_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + coarsened_edgelist_src_vertices.begin(), coarsened_edgelist_dst_vertices.begin())); + thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + coarsened_src_dst_pair_first, + coarsened_src_dst_pair_first + coarsened_edgelist_src_vertices.size(), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size(), + coarsened_edgelist_weights.begin(), + edgelist_weights.begin(), + thrust::make_zip_iterator( + thrust::make_tuple(tmp_src_vertices.begin(), tmp_dst_vertices.begin()), + tmp_weights.begin())); + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is necessary as memory blocks in edge_list_(src_vertices, + // dst_vertices, weights) will be freed after the following move + // assignments. + edgelist_src_vertices = std::move(tmp_src_vertices); + edgelist_dst_vertices = std::move(tmp_dst_vertices); + edgelist_weights = std::move(tmp_weights); + src_dst_pair_first = + thrust::make_zip_iterator(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin()); + } + + coarsened_edgelist_src_vertices.resize(edgelist_src_vertices.size(), handle.get_stream()); + coarsened_edgelist_dst_vertices.resize(coarsened_edgelist_src_vertices.size(), + handle.get_stream()); + coarsened_edgelist_weights.resize(coarsened_edgelist_src_vertices.size(), handle.get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size(), + edgelist_weights, + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_src_vertices.begin(), + coarsened_edgelist_dst_vertices.begin())), + coarsened_edgelist_weights.begin()); + coarsened_edgelist_src_vertices.resize(thrust::distance(src_dst_pair_first, it), + handle.get_stream()); + coarsened_edgelist_dst_vertices.resize(coarsened_edgelist_src_vertices.size(), + handle.get_stream()); + coarsened_edgelist_weights.resize(coarsened_edgelist_src_vertices.size(), handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is necessary as edge_list_(src_vertices, dst_vertices, + // weights) will become out-of-scope. + } + + coarsened_edgelist_src_vertices.shrink_to_fit(handle.get_stream()); + coarsened_edgelist_dst_vertices.shrink_to_fit(handle.get_stream()); + coarsened_edgelist_weights.shrink_to_fit(handle.get_stream()); + return std::make_tuple(std::move(coarsened_edgelist_src_vertices), + std::move(coarsened_edgelist_dst_vertices), + std::move(coarsened_edgelist_weights)); +} + +template +auto shuffle_values(raft::handle_t const &handle, + TxValueIterator tx_value_first, + rmm::device_uvector const &tx_value_counts) +{ + auto &comm = handle.get_comms(); + + rmm::device_uvector rx_value_counts(comm.get_size(), handle.get_stream()); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + std::vector tx_counts(comm.get_size(), size_t{1}); + std::vector tx_offsets(comm.get_size()); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + std::vector tx_dst_ranks(comm.get_size()); + std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); + std::vector rx_counts(comm.get_size(), size_t{1}); + std::vector rx_offsets(comm.get_size()); + std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); + std::vector rx_src_ranks(comm.get_size()); + std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); + device_multicast_sendrecv(comm, + tx_value_counts.data(), + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_counts.data(), + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + raft::update_host(tx_counts.data(), tx_value_counts.data(), comm.get_size(), handle.get_stream()); + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); + raft::update_host(rx_counts.data(), rx_value_counts.data(), comm.get_size(), handle.get_stream()); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + + auto rx_value_buffer = + allocate_comm_buffer::value_type>( + rx_offsets.back(), handle.get_stream()); + auto rx_value_first = + get_comm_buffer_begin::value_type>( + rx_value_buffer); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + device_multicast_sendrecv(comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_first, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + return std::move(rx_value_buffer); +} + +} // namespace + +template +std::enable_if_t< + multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + // 1. locally construct coarsened edge list + + rmm::device_uvector coarsened_edgelist_src_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_dst_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie( + coarsened_edgelist_src_vertices, coarsened_edgelist_dst_vertices, coarsened_edgelist_weights) = + compute_coarsened_edgelist(handle, graph_view, labels); + + // 2. globally shuffle edge list + + { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_src_vertices.begin(), + coarsened_edgelist_dst_vertices.begin(), + coarsened_edgelist_weights.begin())); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + coarsened_edgelist_src_vertices.size(), + [key_func = detail::compute_gpu_id_from_edge_t{ + graph_view.is_hypergraph_partitioned(), + handle.get_comms().get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < + key_func(thrust::get<0>(rhs), thrust::get<1>(rhs)); + }); + + auto key_first = thrust::make_transform_iterator( + edge_first, + [key_func = detail::compute_gpu_id_from_edge_t{ + graph_view.is_hypergraph_partitioned(), + handle.get_comms().get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + coarsened_edgelist_src_vertices.size(), + thrust::make_constant_iterator(vertex_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + auto = shuffle_values(edge_first, tx_value_counts); + + std::tie(coarsened_edgelist_src_vertices, + coarsened_edgelist_dst_vertices, + coarsened_edgelist_weights) = compute_coarsened_edgelist(handle, graph_view, labels); + } + + // 3. find unique labels assigned to each GPU + + rmm::device_uvector unique_labels(0, handle.get_stream()); + { + auto tx_unique_labels = + find_unique_labels(labels, graph_view.get_number_of_local_vertices(), handle.get_stream()); + + auto &comm = handle.get_comms(); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tx_unique_labels.begin(), + tx_unique_labels.end(), + [hash_func = cuco::detail::MurmurHash3_32(), + comm_size = comm.get_size()] __device__(auto lhs, auto rhs) { + return (hash_func(lhs) % comm_size) < (hash_func(rhs) % comm_size); + }); + auto key_first = thrust::make_transform_iterator( + tx_unique_label_keys.begin(), + [hash_func = cuco::detail::MurmurHash3_32(), + comm_size = comm.get_size()] __device__(auto label) { return hash(label) % comm_size; }); + rmm::device_uvector tx_num_unique_labels(comm.get_size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + tx_unique_labels.size(), + thrust::make_constant_iterator(vertex_t{1}), + thrust::make_discard_iterator(), + tx_num_unique_labels.begin()); + + auto rx_unique_labels = shuffle_values(tx_unique_labels, tx_num_unique_labels); + + unique_labels = + find_unique_labels(rx_unique_labels.data(), rx_unique_labels.size(), handle.get_stream()); + + // FIXME: should I cudaStreamSynchronize()? + } + + // 4. acquire unique labels for the major range + + // 5. locally compute (label, count) pairs and globally reduce + + // 6. sort (label, count) pairs and compute label to vertex ID map + + // 7. acquire (label, vertex ID) pairs for the major & minor ranges. + + // 8. renumber edgelists. + + // 9. create a coarsened graph. +} + +template +std::enable_if_t< + !multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + CUGRAPH_FAIL("unimplemented."); +} + +// explicit instantiation + +} // namespace experimental +} // namespace cugraph From f75e461f8d7b580412059a2d9b483532e79cb3c2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 13 Nov 2020 01:05:41 -0500 Subject: [PATCH 010/343] update headers for graph coarsening and relabeling --- cpp/include/experimental/graph_functions.hpp | 79 ++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 cpp/include/experimental/graph_functions.hpp diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp new file mode 100644 index 00000000000..e44dd694b21 --- /dev/null +++ b/cpp/include/experimental/graph_functions.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Compute the coarsened graph. + * + * Aggregates the vertices with the same label to a new vertex in the output coarsened graph. + * Multi-edges in the coarsened graph are collapsed to a single edge with its weight equal to the + * sum of multi-edge weights. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam store_transposed + * @tparam multi_gpu + * @param graph_view Graph view object of the input graph to be coarsened. + * @param labels Vertex labels (assigned to this process in multi-GPU) to be used in coarsening. + * @return std::tuple>, rmm::device_uvector> Tuple of the coarsened graph and labels mapped to the + * vertices (assigned to this process in multi-GPU) in the coarsened graph. + */ +template +std::tuple>, + rmm::device_uvector> +coarsen_graph( + raft::handel_t const& handle, + graph_view_t const& graph_view, + vertex_t const* labels); + +/** + * @brief Relabel old labels to new labels. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu + * @param old_labels Old labels to be relabeled. + * @param old_new_label_pairs Pairs of an old label and the corresponding new label (each process + * holds only part of the entire pairs; partitioning can be arbitrary). + * @return rmm::device_uvector New labels corresponding to the @p old_labels. + */ +template +rmm::device_uvector relabel( + raft::handle_t const& handle, + rmm::device_uvector const& old_labels, + std::tuple, rmm::device_uvector> const& + old_new_label_pairs); + +} // namespace experimental +} // namespace cugraph From 2137b3e8ed4d7ceacc0f08eb2c5bd11d18d25a31 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 18 Nov 2020 10:46:57 -0500 Subject: [PATCH 011/343] cosmetic updates --- cpp/include/experimental/graph.hpp | 4 ++-- cpp/include/patterns/copy_to_adj_matrix_row_col.cuh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 88c84414cd0..a3b1da35622 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -62,7 +62,7 @@ class graph_t> const &edge_lists, + std::vector> const &edgelists, partition_t const &partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -124,7 +124,7 @@ class graph_t const &edge_list, + edgelist_t const &edgelist, vertex_t number_of_vertices, graph_properties_t properties, bool sorted_by_degree, diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index 760775c03d4..d13066185da 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -219,7 +219,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle, // partitioning auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may no longer necessary with NCCL backend + // FIXME: this branch may be no longer necessary with NCCL backend if (comm_src_rank == comm_rank) { assert(comm_dst_rank == comm_rank); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), From 24632bef8b367e6449e7df9601cf1e273a2bb999 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 18 Nov 2020 10:47:41 -0500 Subject: [PATCH 012/343] temporary commit of WIP graph coarsening functions for communication --- cpp/src/experimental/graph_functions.cu | 990 ++++++++++++++++++------ 1 file changed, 740 insertions(+), 250 deletions(-) diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index 8a85e2e7207..f34cb53cf81 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include @@ -41,24 +43,90 @@ namespace experimental { namespace { -template -rmm::device_uvector find_unique_labels(vertex_t const *labels, - vertex_t num_labels, - cudaStream_t stream) +// FIXME: better move this elsewhere for reusability +template +auto shuffle_values(raft::handle_t const &handle, + TxValueIterator tx_value_first, + rmm::device_uvector const &tx_value_counts) { - rmm::device_uvector unique_labels(num_labels); - thrust::copy( - rmm::exec_policy(stream)->on(stream), labels, labels + num_labels, unique_labels.data()); - thrust::sort(rmm::exec_policy(stream)->on(stream), unique_labels.begin(), unique_labels.end()); - auto it = thrust::unique( - rmm::exec_policy(stream)->on(stream), unique_labels.begin(), unique_labels.end()); - unique_labels.resize(thrust::distance(unique_labels.begin(), it)); - unique_labels.shrink_to_fit(); - - return std::move(unique_labels); + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector rx_value_counts(comm_size(), handle.get_stream()); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + std::vector tx_counts(comm_size(), size_t{1}); + std::vector tx_offsets(comm_size()); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + std::vector tx_dst_ranks(comm_size()); + std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); + std::vector rx_counts(comm_size(), size_t{1}); + std::vector rx_offsets(comm_size); + std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); + std::vector rx_src_ranks(comm_size()); + std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); + device_multicast_sendrecv(comm, + tx_value_counts.data(), + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_counts.data(), + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size(), handle.get_stream()); + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); + raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size(), handle.get_stream()); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + + auto rx_value_buffer = + allocate_comm_buffer::value_type>( + rx_offsets.back(), handle.get_stream()); + auto rx_value_first = + get_comm_buffer_begin::value_type>( + rx_value_buffer); + + int num_tx_dst_ranks{0}; + int num_rx_src_ranks{0}; + for (int i = 0; i < comm_size; ++i) { + if (tx_counts[i] != 0) { + tx_counts[num_tx_dst_ranks] = tx_counts[i]; + tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; + tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; + ++num_tx_dst_ranks; + } + if (rx_counts[i] != 0) { + rx_counts[num_rx_src_ranks] = rx_counts[i]; + rx_offsets[num_rx_src_ranks] = rx_offsets[i]; + rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; + } + } + tx_counts.resize(num_tx_dst_ranks); + tx_offsets.resize(num_tx_dst_ranks); + tx_dst_ranks.resize(num_tx_dst_ranks); + rx_counts.resize(num_rx_dst_ranks); + rx_offsets.resize(num_rx_dst_ranks); + rx_src_ranks.resize(num_rx_src_ranks); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv(comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_first, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + return std::move(rx_value_buffer); } -template +template std:: tuple, rmm::device_uvector, rmm::device_uvector> compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, @@ -72,12 +140,10 @@ std:: raft::update_host( &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); CUDA_TRY(cudaStreamSynchronize(stream)); - rmm::device_uvector edgelist_src_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_dst_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights(number_of_edges, stream); - - auto p_majors = store_transposed ? edgelist_dst_vertices.data() : edgelist_src_vertices.data(); - auto p_minors = store_transposed ? edgelist_src_vertices.data() : edgelist_dst_vertices.data(); + rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights( + compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA @@ -85,7 +151,9 @@ std:: thrust::for_each(rmm::exec_policy(stream)->on(stream), thrust::make_counting_iterator(major_first), thrust::make_counting_iterator(major_last), - [compressed_sparse_offsets, major_first, p_majors] __device__(auto v) { + [compressed_sparse_offsets, + major_first, + p_majors = edgelist_major_vertices.begin()] __device__(auto v) { auto first = compressed_sparse_offsets[v - major_first]; auto last = compressed_sparse_offsets[v - major_first + 1]; thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); @@ -93,100 +161,164 @@ std:: thrust::copy(rmm::exec_policy(stream)->on(stream), compressed_sparse_indices, compressed_sparse_indices + number_of_edges, - p_minors); - thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_weights, - compressed_sparse_weights + number_of_edges, - edgelist_weights.data()); + edgelist_minor_vertices.begin()); + if (compressed_sparse_weights != nullptr) { + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + } return std::make_tuple(std::move(edgelist_src_vertices), std::move(edgelist_dst_vertices), std::move(edgelist_weights)); } +template +void sort_and_coarsen_edgelist(raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */) +{ + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + + size_t number_of_edges{0}; + if (edgelist_weights.size() > 0) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + edgelist_major_vertices.begin(), + edgelist_weights.begin()); + + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), handle.get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + edgelist_major_vertices.begin(), + edgelist_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgeilst_minor_vertices.begin())), + tmp_edgelist_weights.begin()); + number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + + CUDA_TRY(cudaStreamSynchronize( + handle + .get_stream())); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) + // will be freed after the assignments below + + edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); + edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); + edgelist_weights = std::move(tmp_edgelist_weights); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + edgelist_major_vertices.begin()); + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + edgelist_major_vertices.size()); + number_of_edges = thrust::distance(pair_first, it); + } + + edgelist_major_vertices.resize(number_of_edges, handle.get_stream()); + edgelist_minor_vertices.resize(number_of_edges, handle.get_stream()); + edgelist_weights.resize(number_of_edges, handle.get_stream()); + edgelist_major_vertices.shrink_to_fit(handle.get_stream()); + edgelist_minor_vertices.shrink_to_fit(handle.get_stream()); + edgelist_weights.shrink_to_fit(handle.get_stream()); + + return; +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t minor_first, + cudaStream_t stream) +{ + // FIXME: it might be possible to directly create relabled & coarsened edgelist from the + // compressed sparse format to save memory + + rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_weights(0, handle.get_stream()); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_edgelist(handle, + compressed_sparse_offsets, + compressed_sparse_indices, + compressed_sparse_weights, + major_first, + major_last, + stream); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + sort_and_coarsen_edgelist( + handle, edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights); + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +#if 0 template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compute_coarsened_edgelist( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels) +void compute_coarsened_edgelist(raft::handle_t const &handle, + rmm::device_uvector &coarsened_edgelist_major_vertices, + rmm::device_uvector &coarsened_edgelist_minor_vertices, + rmm::device_uvector &coarsened_edgelist_weights, + edge_t const *uncoarsened_edgelist_major_vertices, + vertex_t const *uncoarsened_edgelist_minor_vertices, + weight_t const *uncoarsened_edgelist_weights, + edge_t number_of_uncoarsened_edges, + vertex_t uncoarsened_major_first, + vertex_t uncoarsened_minor_first, + vertex_t major_labels, + vertex_t minor_labels) { - // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme - // regardless of hypergraph partitioning is applied or not - rmm::device_uvector adj_matrix_major_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() - : graph_view.get_number_of_local_adj_matrix_partition_rows(), - handle.get_stream()); - rmm::device_uvector adj_matrix_minor_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() - : graph_view.get_number_of_local_adj_matrix_partition_cols(), - handle.get_stream()); - if (store_transposed) { - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(uncoarsened_edgelist_major_vertices, uncoarsened_edgelist_minor_vertices)); + if (uncoarsened_edgelist_eights != nullptr) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + number_of_uncoarsened_edges, + uncoarsened_edgelist_weights); } else { - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + number_of_uncoarsened_edges); } - - // FIXME: we may compare performance/memory footprint with the hash_based approach especially when - // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst - // case). We may be able to limit the memory requirement close to the final coarsened edgelist - // with the hash based approach. - rmm::device_uvector coarsened_edgelist_src_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_dst_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - for (size_t i = 0; i < graph_view.adj_matrix_partition_offsets_.size(); ++i) { - rmm::device_uvector edgelist_src_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_dst_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_weights(0, handle.get_stream()); - std::tie(edgelist_src_vertices, edgelist_dst_vertices, edgelist_weights) = - compressed_sparse_to_edgelist( - handle, - graph_view.adj_matrix_partition_offsets[i], - graph_view.adj_matrix_partition_indices[i], - graph_view.adj_matrix_partition_weights[i], - store_transposed ? graph_view.get_local_adj_matrix_partition_col_first() - : graph_view.get_local_adj_matrix_partition_row_first(), - store_transposed ? graph_view.get_local_adj_matrix_partition_col_last() - : graph_view.get_local_adj_matrix_partition_row_last()); - auto src_dst_pair_first = - thrust::make_zip_iterator(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin()); - thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size(), - src_dst_pair_first, - [p_adj_matrix_major_labels = - adj_matrix_major_labels.data() + - (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) - : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), - p_adj_matrix_minor_labels = adj_matrix_minor_labels.data(), - src_first = graph_view.get_local_adj_matrix_partition_row_first(), - dst_first = graph_view.get_local_adj_matrix_partition_col_first()] __device__(auto val) { - auto src = thrust::get<0>(val); - auto dst = thrust::get<1>(val); - return store_transposed ? thrust::make_tuple(p_adj_matrix_minor_labels[src - src_first], - p_adj_matrix_major_labels[dst - dst_first]) - : thrust::make_tuple(p_adj_matrix_major_labels[src - src_first], - p_adj_matrix_minor_labels[dst - dst_first]); - }); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size(), - edgelist_weights); - if (coarsened_edgelist_src_vertices.size() > 0) { - rmm::device_uvector tmp_src_vertices( - coarsened_edgelist_src_vertices.size() + edgelist_src_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_dst_vertices(tmp_src_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_weights(tmp_src_vertices.size(), handle.get_stream()); - auto coarsened_src_dst_pair_first = thrust::make_zip_iterator(thrust::make_tuple( - coarsened_edgelist_src_vertices.begin(), coarsened_edgelist_dst_vertices.begin())); + if (coarsened_edgelist_src_vertices.size() > 0) { + rmm::device_uvector tmp_src_vertices( + coarsened_edgelist_src_vertices.size() + number_of_uncoarsened_edges, handle.get_stream()); + rmm::device_uvector tmp_dst_vertices(tmp_src_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_weights( + graph_view.is_weighted() ? tmp_src_vertices.size() : 0, handle.get_stream()); + auto coarsened_src_dst_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + coarsened_edgelist_src_vertices.begin(), coarsened_edgelist_dst_vertices.begin())); + if (graph_view.is_weighted()) { thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), coarsened_src_dst_pair_first, coarsened_src_dst_pair_first + coarsened_edgelist_src_vertices.size(), @@ -194,42 +326,61 @@ std:: src_dst_pair_first + edgelist_src_vertices.size(), coarsened_edgelist_weights.begin(), edgelist_weights.begin(), - thrust::make_zip_iterator( - thrust::make_tuple(tmp_src_vertices.begin(), tmp_dst_vertices.begin()), - tmp_weights.begin())); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as memory blocks in edge_list_(src_vertices, - // dst_vertices, weights) will be freed after the following move - // assignments. - edgelist_src_vertices = std::move(tmp_src_vertices); - edgelist_dst_vertices = std::move(tmp_dst_vertices); - edgelist_weights = std::move(tmp_weights); - src_dst_pair_first = - thrust::make_zip_iterator(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin()); + thrust::make_zip_iterator(thrust::make_tuple(tmp_src_vertices.begin(), + tmp_dst_vertices.begin())), + tmp_weights.begin()); + } else { + thrust::merge(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + coarsened_src_dst_pair_first, + coarsened_src_dst_pair_first + coarsened_edgelist_src_vertices.size(), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size(), + thrust::make_zip_iterator( + thrust::make_tuple(tmp_src_vertices.begin(), tmp_dst_vertices.begin()))); } + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is necessary as memory blocks in edgelist_(src_vertices, + // dst_vertices, weights) will be freed after the following move + // assignments. + edgelist_src_vertices = std::move(tmp_src_vertices); + edgelist_dst_vertices = std::move(tmp_dst_vertices); + edgelist_weights = std::move(tmp_weights); + src_dst_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin())); + } + edge_t num_edges{0}; + if (graph_view.is_weighted()) { coarsened_edgelist_src_vertices.resize(edgelist_src_vertices.size(), handle.get_stream()); coarsened_edgelist_dst_vertices.resize(coarsened_edgelist_src_vertices.size(), handle.get_stream()); - coarsened_edgelist_weights.resize(coarsened_edgelist_src_vertices.size(), handle.get_stream()); + coarsened_edgelist_weights.resize( + graph_view.is_weighted() ? coarsened_edgelist_src_vertices.size() : 0, handle.get_stream()); auto it = thrust::reduce_by_key( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_dst_pair_first, src_dst_pair_first + edgelist_src_vertices.size(), - edgelist_weights, + edgelist_weights.begin(), thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_src_vertices.begin(), coarsened_edgelist_dst_vertices.begin())), coarsened_edgelist_weights.begin()); - coarsened_edgelist_src_vertices.resize(thrust::distance(src_dst_pair_first, it), - handle.get_stream()); - coarsened_edgelist_dst_vertices.resize(coarsened_edgelist_src_vertices.size(), - handle.get_stream()); - coarsened_edgelist_weights.resize(coarsened_edgelist_src_vertices.size(), handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as edge_list_(src_vertices, dst_vertices, - // weights) will become out-of-scope. + num_edges = static_cast(thrust::distance(src_dst_pair_first, it)); + } else { + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_dst_pair_first, + src_dst_pair_first + edgelist_src_vertices.size()); + coarsened_edgelist_src_vertices = std::move(edgelist_src_vertices); + coarsened_edgelist_dst_vertices = std::move(edgelist_dst_vertices); + coarsened_edgelist_weights = std::move(edgelist_weights); + num_edges = static_cast(thrust::distance(src_dst_pair_first, it)); } + coarsened_edgelist_src_vertices.resize(num_edges, handle.get_stream()); + coarsened_edgelist_dst_vertices.resize(num_edges, handle.get_stream()); + coarsened_edgelist_weights.resize(graph_view.is_weighted() ? num_edges : 0, handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is necessary as edgelist_(src_vertices, dst_vertices, + // weights) will become out-of-scope. coarsened_edgelist_src_vertices.shrink_to_fit(handle.get_stream()); coarsened_edgelist_dst_vertices.shrink_to_fit(handle.get_stream()); @@ -238,67 +389,311 @@ std:: std::move(coarsened_edgelist_dst_vertices), std::move(coarsened_edgelist_weights)); } +#endif -template -auto shuffle_values(raft::handle_t const &handle, - TxValueIterator tx_value_first, - rmm::device_uvector const &tx_value_counts) +template +rmm::device_uvector compute_renumber_map( + raft::handle_t const &handle, + rmm::device_uvector const &edgelist_major_vertices, + rmm::device_uvector const &edgelist_minor_vertices) { - auto &comm = handle.get_comms(); + // FIXME: compare this sort based approach with hash based approach in both speed and memory + // footprint + + // 1. acquire (unique major label, count) pairs + + rmm::device_uvector tmp_labels = edgelist_major_vertices; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end()); + rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); + rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); + auto major_pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end(), + thrust::make_constant_iterator(edge_t{1}), + major_labels.begin(), + major_counts.begin()); + tmp_labels.resize(0, handle.get_stream()); + tmp_labels.shrink_to_fit(handle.get_stream()); + major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), + handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + + // 2. acquire unique minor labels + + rmm::device_uvector minor_labels = edgelist_minor_vertices; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + auto minor_label_it = + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it)); + minor_labels.shrink_to_fit(handle.get_stream()); + + // 3. merge major and minor labels + + rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), + handle.get_stream()); + rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); + thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_labels.begin(), + major_labels.end(), + minor_labels.begin(), + minor_labels.end(), + major_counts.begin(), + thrust::make_constant_iterator(edge_t{0}), + merged_labels.begin(), + merged_counts.begin()); + major_labels.resize(0, handle.get_stream()); + major_counts.resize(0, handle.get_stream()); + minor_labels.resize(0, handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream()); + rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); + rmm::device_uvector counts(labels.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + merged_labels.begin(), + merged_labels.end(), + merged_counts.begin(), + labels.begin(), + counts.begin()); + merged_labels.resize(0, handle.get_stream()); + merged_counts.resize(0, handle.get_stream()); + merged_labels.shrink_to_fit(); + merged_counts.shrink_to_fit(); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + counts.shrink_to_fit(handle.get_stream()); + + // 4. if multi-GPU, shuffle and reduce (label, count) pairs + + if (multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + labels.size(), + [key_func] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); + }); + auto key_first = thrust::make_transform_iterator( + labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); - rmm::device_uvector rx_value_counts(comm.get_size(), handle.get_stream()); + rmm::device_uvector rx_labels(0, handle.get_stream()); + rmm::device_uvector rx_counts(0, handle.get_stream()); + std::tie(rx_labels, rx_counts) = shuffle_values(handle, pair_first, tx_value_counts); - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. - std::vector tx_counts(comm.get_size(), size_t{1}); - std::vector tx_offsets(comm.get_size()); - std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); - std::vector tx_dst_ranks(comm.get_size()); - std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); - std::vector rx_counts(comm.get_size(), size_t{1}); - std::vector rx_offsets(comm.get_size()); - std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); - std::vector rx_src_ranks(comm.get_size()); - std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); - device_multicast_sendrecv(comm, - tx_value_counts.data(), - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_counts.data(), - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); + labels.resize(rx_labels.size(), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin()); + pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin(), + labels.begin(), + counts.begin()); + rx_labels.resize(0, handle.get_stream()); + rx_counts.resize(0, handle.get_stream()); + rx_labels.shrink_to_fit(handle.get_stream()); + rx_counts.shrink_to_fit(handle.get_stream()); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + } - raft::update_host(tx_counts.data(), tx_value_counts.data(), comm.get_size(), handle.get_stream()); - std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); - raft::update_host(rx_counts.data(), rx_value_counts.data(), comm.get_size(), handle.get_stream()); - std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + // 5. sort by degree - auto rx_value_buffer = - allocate_comm_buffer::value_type>( - rx_offsets.back(), handle.get_stream()); - auto rx_value_first = - get_comm_buffer_begin::value_type>( - rx_value_buffer); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.begin(), + counts.end(), + labels.begin(), + thrust::greater()); - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. - device_multicast_sendrecv(comm, - tx_value_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_first, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // temporary rmm::devicec_uvector objects become + // out-of-scope once this function returns. - return std::move(rx_value_buffer); + return std::move(labels); } } // namespace +template +std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */) +{ + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + // 1. compute renumber map + + auto renumber_map_labels = compute_renumber_map( + handle, edgelist_major_vertices, edgelist_minor_vertices); + + // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened + // graph + + auto vertex_partition_counts = host_scalar_allgather( + comm, static_cast(renumber_map_labels.size()), handle.get_stream()); + std::vector vertex_partition_offsets(comm_size + 1, 0); + std::partial_sum(vertex_partition_counts.begin(), + vertex_partition_counts.end(), + vertex_partition_offsets.begin() + 1); + + partition_t partition(vertex_partition_offsets, + graph_view.is_hypergraph_partitioned(), + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); + + auto number_of_vertices = vertex_partition_offsets.back(); + auto number_of_edges = host_scalar_allreduce( + comm, static_cast(coarsened_edgelist_src_vertices.size()), handle.get_stream()); + + // 3. renumber edges + + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + { + vertex_t major_first{}; + vertex_t major_last{}; + std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); + rmm::device_uvector renumber_map_major_labels(major_last - major_first, + handle.get_stream()); + std::vector recvcounts(row_comm_size); + for (int i = 0; i < row_comm_size; ++i) { + recvcounts[i] = partition.get_vertex_partition_size(row_comm_rank * row_comm_size + i); + } + std::vector displacements(row_comm_size, 0); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + device_allgatherv(row_comm, + renumber_map_labels.begin(), + renumber_map_major_labels.begin(), + recvcounts, + displacements, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_major_labels.begin(), thrust::make_counting_iterator(major_first))); + renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); + renumber_map.find(edgelist_major_vertices.begin(), + edgelist_major_vertices.end(), + edgelist_major_vertices.begin()); + } + + { + vertex_t minor_first{}; + vertex_t minor_last{}; + std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); + rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, + handle.get_stream()); + + // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph + // partitioning + auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; + auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + // FIXME: this branch may be no longer necessary with NCCL backend + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + renumber_map_labels.begin(), + renumber_map_labels.end(), + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); + } else { + device_sendrecv(comm, + renumber_map_labels.begin(), + renumber_map_labels.size(), + comm_dst_rank, + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast(partition.get_vertex_partition_size(comm_src_rank)), + comm_src_rank, + handle.get_stream()); + } + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + for (int i = 0; i < col_comm_size; ++i) { + auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size); + auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); + device_bcast(col_comm, + renumber_map_minor_labels.begin() + offset, + renumber_map_minor_labels.begin() + offset, + count, + i, + handle.get_stream()); + } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_minor_labels.begin(), thrust::make_counting_iterator(minor_first))); + renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); + renumber_map.find(coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_minor_vertices.end(), + coarsened_edgelist_minor_vertices.begin()); + } + } + + return std::make_tuple(std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); +} + template const &graph_view, vertex_t const *labels) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + // 1. locally construct coarsened edge list - rmm::device_uvector coarsened_edgelist_src_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_dst_vertices(0, handle.get_stream()); + // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme + // regardless of hypergraph partitioning is applied or not + rmm::device_uvector adj_matrix_major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows(), + handle.get_stream()); + rmm::device_uvector adj_matrix_minor_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols(), + handle.get_stream()); + if (store_transposed) { + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } else { + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } + + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - std::tie( - coarsened_edgelist_src_vertices, coarsened_edgelist_dst_vertices, coarsened_edgelist_weights) = - compute_coarsened_edgelist(handle, graph_view, labels); + // FIXME: we may compare performance/memory footprint with the hash_based approach especially when + // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst + // case). We may be able to limit the memory requirement close to the final coarsened edgelist + // with the hash based approach. + for (size_t i = 0; i < graph_view.adj_matrix_partition_offsets_.size(); ++i) { + // get edge list - // 2. globally shuffle edge list + rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_weights(0, handle.get_stream()); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + handle, + graph_view.adj_matrix_partition_offsets_[i], + graph_view.adj_matrix_partition_indices_[i], + graph_view.is_weighted() ? graph_view.adj_matrix_partition_weights_[i] : nullptr, + adj_matrix_major_labels.begin() + + (store_transposed ? graph_view.get_local_adj_matrix_partition_col_vaule_start_offset(i) + : graph_view.get_local_adj_matrix_partition_row_vaule_start_offset(i)), + adj_matrix_minor_labels.begin(), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) + : graph_view.get_local_adj_matrix_partition_row_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) + : graph_view.get_local_adj_matrix_partition_col_first(i)); + + auto cur_size = coarsened_edgelist_major_vertices.size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can + // reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_weights.resize( + graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); + + if (graph_view.is_weighted()) { + auto src_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin(), edgelist_weights.begin())); + auto dst_edge_first = thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } else { + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto dst_edge_first = thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } - { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) + // will become out-of-scope + } + + sort_and_coarsen_edgelist(handle, + coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights); + + // 2. globally shuffle edge list and re-coarsen + { auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_src_vertices.begin(), - coarsened_edgelist_dst_vertices.begin(), + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), coarsened_edgelist_weights.begin())); + auto key_func = detail::compute_gpu_id_from_edge_t{ + graph_view.is_hypergraph_partitioned(), + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}; thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), edge_first, - edge_first + coarsened_edgelist_src_vertices.size(), - [key_func = detail::compute_gpu_id_from_edge_t{ - graph_view.is_hypergraph_partitioned(), - handle.get_comms().get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < - key_func(thrust::get<0>(rhs), thrust::get<1>(rhs)); + edge_first + coarsened_edgelist_major_vertices.size(), + [key_func] __device__(auto lhs, auto rhs) { + return store_transposed ? (key_func(thrust::get<1>(lhs), thrust::get<0>(lhs)) < + key_func(thrust::get<1>(rhs), thrust::get<0>(rhs))) + : (key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < + key_func(thrust::get<0>(rhs), thrust::get<1>(rhs))); }); - - auto key_first = thrust::make_transform_iterator( - edge_first, - [key_func = detail::compute_gpu_id_from_edge_t{ - graph_view.is_hypergraph_partitioned(), - handle.get_comms().get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { return key_func(val); }); + auto key_first = thrust::make_transform_iterator(edge_first, [key_func] __device__(auto val) { + return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) + : key_func(thrust::get<0>(val), thrust::get<1>(val)); + }); rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + coarsened_edgelist_src_vertices.size(), + key_first, + key_first + coarsened_edgelist_major_vertices.size(), thrust::make_constant_iterator(vertex_t{1}), thrust::make_discard_iterator(), tx_value_counts.begin()); - auto = shuffle_values(edge_first, tx_value_counts); - - std::tie(coarsened_edgelist_src_vertices, - coarsened_edgelist_dst_vertices, - coarsened_edgelist_weights) = compute_coarsened_edgelist(handle, graph_view, labels); - } + rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights) = + shuffle_values(handle, edge_first, tx_value_counts); - // 3. find unique labels assigned to each GPU + sort_and_coarsen_edgelist( + handle, rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights); - rmm::device_uvector unique_labels(0, handle.get_stream()); - { - auto tx_unique_labels = - find_unique_labels(labels, graph_view.get_number_of_local_vertices(), handle.get_stream()); - - auto &comm = handle.get_comms(); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tx_unique_labels.begin(), - tx_unique_labels.end(), - [hash_func = cuco::detail::MurmurHash3_32(), - comm_size = comm.get_size()] __device__(auto lhs, auto rhs) { - return (hash_func(lhs) % comm_size) < (hash_func(rhs) % comm_size); - }); - auto key_first = thrust::make_transform_iterator( - tx_unique_label_keys.begin(), - [hash_func = cuco::detail::MurmurHash3_32(), - comm_size = comm.get_size()] __device__(auto label) { return hash(label) % comm_size; }); - rmm::device_uvector tx_num_unique_labels(comm.get_size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + tx_unique_labels.size(), - thrust::make_constant_iterator(vertex_t{1}), - thrust::make_discard_iterator(), - tx_num_unique_labels.begin()); - - auto rx_unique_labels = shuffle_values(tx_unique_labels, tx_num_unique_labels); - - unique_labels = - find_unique_labels(rx_unique_labels.data(), rx_unique_labels.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // memory blocks owned by + // coarsened_edgelist_(major_vertices,minor_vertices,weights) + // will be freed after the assignments below - // FIXME: should I cudaStreamSynchronize()? + coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); + coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); + coarsened_edgelist_weights = std::move(rx_edgelist_weights); } - // 4. acquire unique labels for the major range - - // 5. locally compute (label, count) pairs and globally reduce - - // 6. sort (label, count) pairs and compute label to vertex ID map - - // 7. acquire (label, vertex ID) pairs for the major & minor ranges. - - // 8. renumber edgelists. + rmm::device_uvector renumber_map_labels{}; + partition_t partition{}; + vertex_t number_of_vertices{}; + edge_t number_of_edges{}; + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + renumber_edgelist(handle, + coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights); + + // 4. build a graph + + std::vector> edgelists{}; + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + edgelists.resize(1); + edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); + edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + } - // 9. create a coarsened graph. + return std::make_tuple( + std::make_unique>( + handle, + edgelists, + partition, + number_of_vertices, + number_of_edges, + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); } template >, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + } // namespace experimental } // namespace cugraph From 5fb532a3eb29d9893004d01b9df3dc96233a054d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 19 Nov 2020 11:22:16 -0500 Subject: [PATCH 013/343] add const to (vertex,edge)_to_gpu_id functor operator --- cpp/include/experimental/detail/graph_utils.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index b0a9a5bad0a..0b37a26a7b8 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -142,7 +142,7 @@ template struct compute_gpu_id_from_vertex_t { int comm_size{0}; - __device__ int operator()(vertex_t v) + __device__ int operator()(vertex_t v) const { cuco::detail::MurmurHash3_32 hash_func{}; return hash_func(v) % comm_size; @@ -156,11 +156,11 @@ struct compute_gpu_id_from_edge_t { int row_comm_size{0}; int col_comm_size{0}; - __device__ int operator()(vertex_t src, vertex_t dst) + __device__ int operator()(vertex_t src, vertex_t dst) const { cuco::detail::MurmurHash3_32 hash_func{}; - auto major_comm_rank = hash_func(store_transposed ? dst : src) % comm_size; - auto minor_comm_rank = hash_func(store_transposed ? src : dst) % comm_size; + auto major_comm_rank = static_cast(hash_func(store_transposed ? dst : src) % comm_size); + auto minor_comm_rank = static_cast(hash_func(store_transposed ? src : dst) % comm_size); if (hypergraph_partitioned) { return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); } else { From ae8a020301639e6846bf821f528d134f84df5bca Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 19 Nov 2020 11:23:31 -0500 Subject: [PATCH 014/343] allocate_comm_buffer return type: thrust::make_tuple to std::make_tuple --- cpp/include/utilities/comm_utils.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index fb69fff49c9..0c82ed2f82f 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -633,7 +633,7 @@ auto allocate_comm_buffer_tuple_impl(std::index_sequence, size_t buffer_size, cudaStream_t stream) { - return thrust::make_tuple( + return std::make_tuple( allocate_comm_buffer_tuple_element_impl(buffer_size, stream)...); } @@ -641,12 +641,13 @@ template auto get_comm_buffer_begin_tuple_element_impl(BufferType& buffer) { using element_t = typename thrust::tuple_element::type; - return thrust::get(buffer).begin(); + return std::get(buffer).begin(); } template auto get_comm_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) { + // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. return thrust::make_tuple(get_comm_buffer_begin_tuple_element_impl(buffer)...); } From ef25e0d76c3f72e2207ebf23af9e1be0f76beb89 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 19 Nov 2020 11:23:49 -0500 Subject: [PATCH 015/343] fix compile errors --- cpp/src/experimental/graph_functions.cu | 333 +++++++++--------------- 1 file changed, 124 insertions(+), 209 deletions(-) diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index f34cb53cf81..aaf395d1a9b 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -52,18 +52,18 @@ auto shuffle_values(raft::handle_t const &handle, auto &comm = handle.get_comms(); auto const comm_size = comm.get_size(); - rmm::device_uvector rx_value_counts(comm_size(), handle.get_stream()); + rmm::device_uvector rx_value_counts(comm_size, handle.get_stream()); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. - std::vector tx_counts(comm_size(), size_t{1}); - std::vector tx_offsets(comm_size()); + std::vector tx_counts(comm_size, size_t{1}); + std::vector tx_offsets(comm_size); std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); - std::vector tx_dst_ranks(comm_size()); + std::vector tx_dst_ranks(comm_size); std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); - std::vector rx_counts(comm_size(), size_t{1}); + std::vector rx_counts(comm_size, size_t{1}); std::vector rx_offsets(comm_size); std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); - std::vector rx_src_ranks(comm_size()); + std::vector rx_src_ranks(comm_size); std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); device_multicast_sendrecv(comm, tx_value_counts.data(), @@ -76,9 +76,9 @@ auto shuffle_values(raft::handle_t const &handle, rx_src_ranks, handle.get_stream()); - raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size(), handle.get_stream()); + raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, handle.get_stream()); std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); - raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size(), handle.get_stream()); + raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, handle.get_stream()); std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); auto rx_value_buffer = @@ -106,8 +106,8 @@ auto shuffle_values(raft::handle_t const &handle, tx_counts.resize(num_tx_dst_ranks); tx_offsets.resize(num_tx_dst_ranks); tx_dst_ranks.resize(num_tx_dst_ranks); - rx_counts.resize(num_rx_dst_ranks); - rx_offsets.resize(num_rx_dst_ranks); + rx_counts.resize(num_rx_src_ranks); + rx_offsets.resize(num_rx_src_ranks); rx_src_ranks.resize(num_rx_src_ranks); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released @@ -169,64 +169,65 @@ std:: edgelist_weights.data()); } - return std::make_tuple(std::move(edgelist_src_vertices), - std::move(edgelist_dst_vertices), + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), std::move(edgelist_weights)); } template -void sort_and_coarsen_edgelist(raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, +void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */) + rmm::device_uvector &edgelist_weights /* [INOUT] */, + cudaStream_t stream) { auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); size_t number_of_edges{0}; if (edgelist_weights.size() > 0) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.begin(), + pair_first + edgelist_major_vertices.size(), edgelist_weights.begin()); - rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); auto it = thrust::reduce_by_key( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.begin(), + pair_first + edgelist_major_vertices.size(), edgelist_weights.begin(), thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), - tmp_edgeilst_minor_vertices.begin())), + tmp_edgelist_minor_vertices.begin())), tmp_edgelist_weights.begin()); number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); CUDA_TRY(cudaStreamSynchronize( - handle - .get_stream())); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) - // will be freed after the assignments below + stream)); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) will be + // freed after the assignments below edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); edgelist_weights = std::move(tmp_edgelist_weights); } else { - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.begin()); - auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first + edgelist_major_vertices.size()); + auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), pair_first, pair_first + edgelist_major_vertices.size()); number_of_edges = thrust::distance(pair_first, it); } - edgelist_major_vertices.resize(number_of_edges, handle.get_stream()); - edgelist_minor_vertices.resize(number_of_edges, handle.get_stream()); - edgelist_weights.resize(number_of_edges, handle.get_stream()); - edgelist_major_vertices.shrink_to_fit(handle.get_stream()); - edgelist_minor_vertices.shrink_to_fit(handle.get_stream()); - edgelist_weights.shrink_to_fit(handle.get_stream()); + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_weights.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.shrink_to_fit(stream); + edgelist_weights.shrink_to_fit(stream); return; } @@ -241,18 +242,19 @@ std:: vertex_t const *p_major_labels, vertex_t const *p_minor_labels, vertex_t major_first, + vertex_t major_last, vertex_t minor_first, + vertex_t minor_last, cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the // compressed sparse format to save memory - rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_weights(0, handle.get_stream()); + rmm::device_uvector edgelist_major_vertices(0, stream); + rmm::device_uvector edgelist_minor_vertices(0, stream); + rmm::device_uvector edgelist_weights(0, stream); std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_edgelist(handle, - compressed_sparse_offsets, + compressed_sparse_to_edgelist(compressed_sparse_offsets, compressed_sparse_indices, compressed_sparse_weights, major_first, @@ -262,7 +264,7 @@ std:: auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(stream)->on(stream), pair_first, pair_first + edgelist_major_vertices.size(), pair_first, @@ -272,125 +274,13 @@ std:: }); sort_and_coarsen_edgelist( - handle, edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights); + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); return std::make_tuple(std::move(edgelist_major_vertices), std::move(edgelist_minor_vertices), std::move(edgelist_weights)); } -#if 0 -template -void compute_coarsened_edgelist(raft::handle_t const &handle, - rmm::device_uvector &coarsened_edgelist_major_vertices, - rmm::device_uvector &coarsened_edgelist_minor_vertices, - rmm::device_uvector &coarsened_edgelist_weights, - edge_t const *uncoarsened_edgelist_major_vertices, - vertex_t const *uncoarsened_edgelist_minor_vertices, - weight_t const *uncoarsened_edgelist_weights, - edge_t number_of_uncoarsened_edges, - vertex_t uncoarsened_major_first, - vertex_t uncoarsened_minor_first, - vertex_t major_labels, - vertex_t minor_labels) -{ - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(uncoarsened_edgelist_major_vertices, uncoarsened_edgelist_minor_vertices)); - if (uncoarsened_edgelist_eights != nullptr) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + number_of_uncoarsened_edges, - uncoarsened_edgelist_weights); - } else { - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + number_of_uncoarsened_edges); - } - if (coarsened_edgelist_src_vertices.size() > 0) { - rmm::device_uvector tmp_src_vertices( - coarsened_edgelist_src_vertices.size() + number_of_uncoarsened_edges, handle.get_stream()); - rmm::device_uvector tmp_dst_vertices(tmp_src_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_weights( - graph_view.is_weighted() ? tmp_src_vertices.size() : 0, handle.get_stream()); - auto coarsened_src_dst_pair_first = thrust::make_zip_iterator(thrust::make_tuple( - coarsened_edgelist_src_vertices.begin(), coarsened_edgelist_dst_vertices.begin())); - if (graph_view.is_weighted()) { - thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - coarsened_src_dst_pair_first, - coarsened_src_dst_pair_first + coarsened_edgelist_src_vertices.size(), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size(), - coarsened_edgelist_weights.begin(), - edgelist_weights.begin(), - thrust::make_zip_iterator(thrust::make_tuple(tmp_src_vertices.begin(), - tmp_dst_vertices.begin())), - tmp_weights.begin()); - } else { - thrust::merge(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - coarsened_src_dst_pair_first, - coarsened_src_dst_pair_first + coarsened_edgelist_src_vertices.size(), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size(), - thrust::make_zip_iterator( - thrust::make_tuple(tmp_src_vertices.begin(), tmp_dst_vertices.begin()))); - } - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as memory blocks in edgelist_(src_vertices, - // dst_vertices, weights) will be freed after the following move - // assignments. - edgelist_src_vertices = std::move(tmp_src_vertices); - edgelist_dst_vertices = std::move(tmp_dst_vertices); - edgelist_weights = std::move(tmp_weights); - src_dst_pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_src_vertices.begin(), edgelist_dst_vertices.begin())); - } - - edge_t num_edges{0}; - if (graph_view.is_weighted()) { - coarsened_edgelist_src_vertices.resize(edgelist_src_vertices.size(), handle.get_stream()); - coarsened_edgelist_dst_vertices.resize(coarsened_edgelist_src_vertices.size(), - handle.get_stream()); - coarsened_edgelist_weights.resize( - graph_view.is_weighted() ? coarsened_edgelist_src_vertices.size() : 0, handle.get_stream()); - auto it = thrust::reduce_by_key( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size(), - edgelist_weights.begin(), - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_src_vertices.begin(), - coarsened_edgelist_dst_vertices.begin())), - coarsened_edgelist_weights.begin()); - num_edges = static_cast(thrust::distance(src_dst_pair_first, it)); - } else { - auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_dst_pair_first, - src_dst_pair_first + edgelist_src_vertices.size()); - coarsened_edgelist_src_vertices = std::move(edgelist_src_vertices); - coarsened_edgelist_dst_vertices = std::move(edgelist_dst_vertices); - coarsened_edgelist_weights = std::move(edgelist_weights); - num_edges = static_cast(thrust::distance(src_dst_pair_first, it)); - } - coarsened_edgelist_src_vertices.resize(num_edges, handle.get_stream()); - coarsened_edgelist_dst_vertices.resize(num_edges, handle.get_stream()); - coarsened_edgelist_weights.resize(graph_view.is_weighted() ? num_edges : 0, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as edgelist_(src_vertices, dst_vertices, - // weights) will become out-of-scope. - - coarsened_edgelist_src_vertices.shrink_to_fit(handle.get_stream()); - coarsened_edgelist_dst_vertices.shrink_to_fit(handle.get_stream()); - coarsened_edgelist_weights.shrink_to_fit(handle.get_stream()); - return std::make_tuple(std::move(coarsened_edgelist_src_vertices), - std::move(coarsened_edgelist_dst_vertices), - std::move(coarsened_edgelist_weights)); -} -#endif - template rmm::device_uvector compute_renumber_map( raft::handle_t const &handle, @@ -402,7 +292,7 @@ rmm::device_uvector compute_renumber_map( // 1. acquire (unique major label, count) pairs - rmm::device_uvector tmp_labels = edgelist_major_vertices; + rmm::device_uvector tmp_labels(edgelist_major_vertices, handle.get_stream()); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), tmp_labels.begin(), tmp_labels.end()); @@ -425,7 +315,7 @@ rmm::device_uvector compute_renumber_map( // 2. acquire unique minor labels - rmm::device_uvector minor_labels = edgelist_minor_vertices; + rmm::device_uvector minor_labels(edgelist_minor_vertices, handle.get_stream()); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_labels.begin(), minor_labels.end()); @@ -433,7 +323,7 @@ rmm::device_uvector compute_renumber_map( thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_labels.begin(), minor_labels.end()); - minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it)); + minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); minor_labels.shrink_to_fit(handle.get_stream()); // 3. merge major and minor labels @@ -467,8 +357,8 @@ rmm::device_uvector compute_renumber_map( counts.begin()); merged_labels.resize(0, handle.get_stream()); merged_counts.resize(0, handle.get_stream()); - merged_labels.shrink_to_fit(); - merged_counts.shrink_to_fit(); + merged_labels.shrink_to_fit(handle.get_stream()); + merged_counts.shrink_to_fit(handle.get_stream()); labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); labels.shrink_to_fit(handle.get_stream()); @@ -542,11 +432,13 @@ rmm::device_uvector compute_renumber_map( } // namespace template -std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */) +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */, + bool is_hypergraph_partitioned) { auto &comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -574,7 +466,7 @@ std::enable_if_t, partition_ vertex_partition_offsets.begin() + 1); partition_t partition(vertex_partition_offsets, - graph_view.is_hypergraph_partitioned(), + is_hypergraph_partitioned, row_comm_size, col_comm_size, row_comm_rank, @@ -582,11 +474,11 @@ std::enable_if_t, partition_ auto number_of_vertices = vertex_partition_offsets.back(); auto number_of_edges = host_scalar_allreduce( - comm, static_cast(coarsened_edgelist_src_vertices.size()), handle.get_stream()); + comm, static_cast(edgelist_major_vertices.size()), handle.get_stream()); // 3. renumber edges - if (graph_view.is_hypergraph_partitioned()) { + if (is_hypergraph_partitioned) { CUGRAPH_FAIL("unimplemented."); } else { double constexpr load_factor = 0.7; @@ -620,8 +512,12 @@ std::enable_if_t, partition_ static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), invalid_vertex_id::value, invalid_vertex_id::value}; - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( - renumber_map_major_labels.begin(), thrust::make_counting_iterator(major_first))); + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), + thrust::make_counting_iterator(major_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); renumber_map.find(edgelist_major_vertices.begin(), edgelist_major_vertices.end(), @@ -682,16 +578,21 @@ std::enable_if_t, partition_ static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), invalid_vertex_id::value, invalid_vertex_id::value}; - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( - renumber_map_minor_labels.begin(), thrust::make_counting_iterator(minor_first))); + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(minor_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - renumber_map.find(coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_minor_vertices.end(), - coarsened_edgelist_minor_vertices.begin()); + renumber_map.find(edgelist_minor_vertices.begin(), + edgelist_minor_vertices.end(), + edgelist_minor_vertices.begin()); } } - return std::make_tuple(std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); + return std::make_tuple( + std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); } template edgelist_major_vertices(0, handle.get_stream()); @@ -753,18 +654,22 @@ coarsen_graph( rmm::device_uvector edgelist_weights(0, handle.get_stream()); std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - handle, - graph_view.adj_matrix_partition_offsets_[i], - graph_view.adj_matrix_partition_indices_[i], - graph_view.is_weighted() ? graph_view.adj_matrix_partition_weights_[i] : nullptr, + graph_view.offsets(i), + graph_view.indices(i), + graph_view.weights(i), adj_matrix_major_labels.begin() + - (store_transposed ? graph_view.get_local_adj_matrix_partition_col_vaule_start_offset(i) - : graph_view.get_local_adj_matrix_partition_row_vaule_start_offset(i)), + (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) + : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), adj_matrix_minor_labels.begin(), store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) : graph_view.get_local_adj_matrix_partition_row_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) + : graph_view.get_local_adj_matrix_partition_row_last(i), store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) - : graph_view.get_local_adj_matrix_partition_col_first(i)); + : graph_view.get_local_adj_matrix_partition_col_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) + : graph_view.get_local_adj_matrix_partition_col_last(i), + handle.get_stream()); auto cur_size = coarsened_edgelist_major_vertices.size(); // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can @@ -778,36 +683,41 @@ coarsen_graph( graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); if (graph_view.is_weighted()) { - auto src_edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin(), edgelist_weights.begin())); - auto dst_edge_first = thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())) + - cur_size; + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())) + + cur_size; thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_edge_first, src_edge_first + edgelist_major_vertices.size(), dst_edge_first); } else { - auto src_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto dst_edge_first = thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin())) + - cur_size; + auto src_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin())) + + cur_size; thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_edge_first, src_edge_first + edgelist_major_vertices.size(), dst_edge_first); } - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) - // will become out-of-scope + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) + // will become out-of-scope } - sort_and_coarsen_edgelist(handle, - coarsened_edgelist_major_vertices, + sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights); + coarsened_edgelist_weights, + handle.get_stream()); // 2. globally shuffle edge list and re-coarsen @@ -834,11 +744,11 @@ coarsen_graph( return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) : key_func(thrust::get<0>(val), thrust::get<1>(val)); }); - rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); + rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), key_first, key_first + coarsened_edgelist_major_vertices.size(), - thrust::make_constant_iterator(vertex_t{1}), + thrust::make_constant_iterator(size_t{1}), thrust::make_discard_iterator(), tx_value_counts.begin()); @@ -848,8 +758,10 @@ coarsen_graph( std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights) = shuffle_values(handle, edge_first, tx_value_counts); - sort_and_coarsen_edgelist( - handle, rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights); + sort_and_coarsen_edgelist(rx_edgelist_major_vertices, + rx_edgelist_minor_vertices, + rx_edgelist_weights, + handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // memory blocks owned by @@ -861,15 +773,18 @@ coarsen_graph( coarsened_edgelist_weights = std::move(rx_edgelist_weights); } - rmm::device_uvector renumber_map_labels{}; - partition_t partition{}; + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + partition_t partition( + std::vector{}, graph_view.is_hypergraph_partitioned(), 0, 0, 0, 0); vertex_t number_of_vertices{}; edge_t number_of_edges{}; std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist(handle, - coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights); + renumber_edgelist( + handle, + coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights, + graph_view.is_hypergraph_partitioned()); // 4. build a graph From d02674696421455eda35b271a2eb18a4f1de160c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 19 Nov 2020 16:31:27 -0500 Subject: [PATCH 016/343] update graph_functions header --- cpp/include/experimental/graph_functions.hpp | 74 +++++++++++++++++++- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index e44dd694b21..653275d50a2 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -28,6 +28,67 @@ namespace cugraph { namespace experimental { +/** + * @brief renumber edgelist (multi-GPU) + * + * This function assumes that edges are pre-shuffled to their target processes using the + * compute_gpu_id_from_edge_t functor. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to + * every (source, destination) pair should return the local GPU ID for this function to work (edges + * should be pre-shuffled). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning + * (this flag will be removed in the future). Applying the compute_gpu_id_from_edge_t functor to + * every (source, destination) pair should return the local GPU ID for this function to work (edges + * should be pre-shuffled). + * @return std::tuple, partition_t, vertex_t, edge_t> + * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to + * this process in multi-GPU), partition_t object storing graph partitioning information, total + * number of vertices, and total number of edges. + */ +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned); + +/** + * @brief renumber edgelist (single-GPU) + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set + * of vertices. + */ +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */); + /** * @brief Compute the coarsened graph. * @@ -38,8 +99,12 @@ namespace experimental { * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. - * @tparam store_transposed - * @tparam multi_gpu + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object of the input graph to be coarsened. * @param labels Vertex labels (assigned to this process in multi-GPU) to be used in coarsening. * @return std::tuple Date: Thu, 19 Nov 2020 16:43:28 -0500 Subject: [PATCH 017/343] move shuffle_values to graph_utils.cuh for better reusability --- .../experimental/detail/graph_utils.cuh | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 0b37a26a7b8..ea3dc749be6 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -27,6 +28,7 @@ #include #include +#include #include namespace cugraph { @@ -131,6 +133,88 @@ rmm::device_uvector compute_major_degree( return compute_major_degree(handle, tmp_offsets, partition); } +template +auto shuffle_values(raft::handle_t const &handle, + TxValueIterator tx_value_first, + rmm::device_uvector const &tx_value_counts) +{ + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector rx_value_counts(comm_size, handle.get_stream()); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + std::vector tx_counts(comm_size, size_t{1}); + std::vector tx_offsets(comm_size); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + std::vector tx_dst_ranks(comm_size); + std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); + std::vector rx_counts(comm_size, size_t{1}); + std::vector rx_offsets(comm_size); + std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); + std::vector rx_src_ranks(comm_size); + std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); + device_multicast_sendrecv(comm, + tx_value_counts.data(), + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_counts.data(), + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, handle.get_stream()); + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); + raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, handle.get_stream()); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + + auto rx_value_buffer = + allocate_comm_buffer::value_type>( + rx_offsets.back(), handle.get_stream()); + auto rx_value_first = + get_comm_buffer_begin::value_type>( + rx_value_buffer); + + int num_tx_dst_ranks{0}; + int num_rx_src_ranks{0}; + for (int i = 0; i < comm_size; ++i) { + if (tx_counts[i] != 0) { + tx_counts[num_tx_dst_ranks] = tx_counts[i]; + tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; + tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; + ++num_tx_dst_ranks; + } + if (rx_counts[i] != 0) { + rx_counts[num_rx_src_ranks] = rx_counts[i]; + rx_offsets[num_rx_src_ranks] = rx_offsets[i]; + rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; + } + } + tx_counts.resize(num_tx_dst_ranks); + tx_offsets.resize(num_tx_dst_ranks); + tx_dst_ranks.resize(num_tx_dst_ranks); + rx_counts.resize(num_rx_src_ranks); + rx_offsets.resize(num_rx_src_ranks); + rx_src_ranks.resize(num_rx_src_ranks); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv(comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_value_first, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + + return std::move(rx_value_buffer); +} + template struct degree_from_offsets_t { edge_t const *offsets{nullptr}; From 2e5a555c1f28ef3b22ac4634df6562de4a515c74 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 19 Nov 2020 16:44:06 -0500 Subject: [PATCH 018/343] add SG implementation --- cpp/src/experimental/graph_functions.cu | 178 +++++++++++------------- 1 file changed, 84 insertions(+), 94 deletions(-) diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index aaf395d1a9b..3dc7d7d8b37 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -43,89 +43,6 @@ namespace experimental { namespace { -// FIXME: better move this elsewhere for reusability -template -auto shuffle_values(raft::handle_t const &handle, - TxValueIterator tx_value_first, - rmm::device_uvector const &tx_value_counts) -{ - auto &comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - rmm::device_uvector rx_value_counts(comm_size, handle.get_stream()); - - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. - std::vector tx_counts(comm_size, size_t{1}); - std::vector tx_offsets(comm_size); - std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); - std::vector tx_dst_ranks(comm_size); - std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); - std::vector rx_counts(comm_size, size_t{1}); - std::vector rx_offsets(comm_size); - std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); - std::vector rx_src_ranks(comm_size); - std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); - device_multicast_sendrecv(comm, - tx_value_counts.data(), - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_counts.data(), - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - - raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, handle.get_stream()); - std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); - raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, handle.get_stream()); - std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); - - auto rx_value_buffer = - allocate_comm_buffer::value_type>( - rx_offsets.back(), handle.get_stream()); - auto rx_value_first = - get_comm_buffer_begin::value_type>( - rx_value_buffer); - - int num_tx_dst_ranks{0}; - int num_rx_src_ranks{0}; - for (int i = 0; i < comm_size; ++i) { - if (tx_counts[i] != 0) { - tx_counts[num_tx_dst_ranks] = tx_counts[i]; - tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; - tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; - ++num_tx_dst_ranks; - } - if (rx_counts[i] != 0) { - rx_counts[num_rx_src_ranks] = rx_counts[i]; - rx_offsets[num_rx_src_ranks] = rx_offsets[i]; - rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; - } - } - tx_counts.resize(num_tx_dst_ranks); - tx_offsets.resize(num_tx_dst_ranks); - tx_dst_ranks.resize(num_tx_dst_ranks); - rx_counts.resize(num_rx_src_ranks); - rx_offsets.resize(num_rx_src_ranks); - rx_src_ranks.resize(num_rx_src_ranks); - - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released - // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). - device_multicast_sendrecv(comm, - tx_value_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_first, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - - return std::move(rx_value_buffer); -} - template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -390,7 +307,7 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - std::tie(rx_labels, rx_counts) = shuffle_values(handle, pair_first, tx_value_counts); + std::tie(rx_labels, rx_counts) = cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); @@ -431,13 +348,12 @@ rmm::device_uvector compute_renumber_map( } // namespace -template +template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const &handle, rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */, bool is_hypergraph_partitioned) { auto &comm = handle.get_comms(); @@ -595,6 +511,41 @@ renumber_edgelist(raft::handle_t const &handle, std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); } +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) +{ + auto renumber_map_labels = compute_renumber_map( + handle, edgelist_major_vertices, edgelist_minor_vertices); + + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + renumber_map.find(edgelist_major_vertices.begin(), + edgelist_major_vertices.end(), + edgelist_major_vertices.begin()); + renumber_map.find(edgelist_minor_vertices.begin(), + edgelist_minor_vertices.end(), + edgelist_minor_vertices.begin()); + + return std::move(renumber_map_labels); +} + template rx_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights) = - shuffle_values(handle, edge_first, tx_value_counts); + detail::shuffle_values(handle, edge_first, tx_value_counts); sort_and_coarsen_edgelist(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, @@ -779,12 +730,10 @@ coarsen_graph( vertex_t number_of_vertices{}; edge_t number_of_edges{}; std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist( - handle, - coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights, - graph_view.is_hypergraph_partitioned()); + renumber_edgelist(handle, + coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + graph_view.is_hypergraph_partitioned()); // 4. build a graph @@ -827,7 +776,48 @@ coarsen_graph( graph_view_t const &graph_view, vertex_t const *labels) { - CUGRAPH_FAIL("unimplemented."); + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + labels, + labels, + vertex_t{0}, + graph_view.get_number_of_vertices(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + handle.get_stream()); + + sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights, + handle.get_stream()); + + auto renumber_map_labels = renumber_edgelist( + handle, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); + + edgelist_t edgelist{}; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + + return std::make_tuple( + std::make_unique>( + handle, + edgelist, + static_cast(renumber_map_labels.size()), + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); } // explicit instantiation From de199d63e82ff31ae9df4bbdd7d11bb95f62b845 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 23 Nov 2020 10:43:16 -0500 Subject: [PATCH 019/343] fix a function API error --- .../patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 25038dea971..ad172c5437a 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -85,7 +85,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( GraphViewType const& graph_view, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, KeyIterator out_nbr_key_first, - cuco::static_map::value_type, ValueType> kv_map, + cuco::static_map::value_type, ValueType> const& kv_map, KeyAggregatedEdgeOp key_aggregated_e_op, ReduceOp reduce_op, T init, From 1a6d105a2ec469aaa13a9e45ae7cf482e6c626c5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 23 Nov 2020 10:43:59 -0500 Subject: [PATCH 020/343] update shuffle_values --- .../experimental/detail/graph_utils.cuh | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index ea3dc749be6..333d8202f06 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -116,6 +116,28 @@ rmm::device_uvector compute_major_degree( return degrees; } +// FIXME: better if I don't need to do this. Haven't found a better way to concatenate a variable +// which can be either a tuple or not with another variable to create an aggregated/flattened tuple. +#if 1 +template +struct is_std_tuple : std::false_type { +}; + +template +struct is_std_tuple> : std::true_type { +}; + +template +auto to_tuple(T&& val, std::enable_if_t::value, void>* = nullptr) { + return std::forward(val); +} + +template +auto to_tuple(T&& val, std::enable_if_t::value, void>* = nullptr) { + return std::make_tuple(std::forward(val)); +} +#endif + // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template @@ -166,8 +188,12 @@ auto shuffle_values(raft::handle_t const &handle, handle.get_stream()); raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, handle.get_stream()); - std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, handle.get_stream()); + + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // tx_counts & rx_counts should be up-to-date + + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); auto rx_value_buffer = @@ -206,13 +232,7 @@ auto shuffle_values(raft::handle_t const &handle, tx_counts, tx_offsets, tx_dst_ranks, - rx_value_first, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - - return std::move(rx_value_buffer); + return std::tuple_cat(to_tuple(std::move(rx_value_buffer)), std::make_tuple(std::move(rx_value_counts))); } template From 64954e9d0308fd746c7a6205a3cd62fff9593e36 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 23 Nov 2020 10:45:02 -0500 Subject: [PATCH 021/343] update graph functions --- cpp/include/experimental/graph_functions.hpp | 3 +- cpp/src/experimental/graph_functions.cu | 190 ++++++++++++++++++- 2 files changed, 190 insertions(+), 3 deletions(-) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 653275d50a2..cbd15b04d58 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -133,7 +133,8 @@ coarsen_graph( * handles to various CUDA libraries) to run graph algorithms. * @param old_labels Old labels to be relabeled. * @param old_new_label_pairs Pairs of an old label and the corresponding new label (each process - * holds only part of the entire pairs; partitioning can be arbitrary). + * holds only part of the entire old labels and the corresponding new labels; partitioning can be + * arbitrary). * @return rmm::device_uvector New labels corresponding to the @p old_labels. */ template diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index 3dc7d7d8b37..ea8b2a26cf5 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -307,7 +307,11 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - std::tie(rx_labels, rx_counts) = cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date + + std::tie(rx_labels, rx_counts, std::ignore) = + cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); @@ -706,7 +710,11 @@ coarsen_graph( rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights) = + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date + + std::tie( + rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = detail::shuffle_values(handle, edge_first, tx_value_counts); sort_and_coarsen_edgelist(rx_edgelist_major_vertices, @@ -820,6 +828,178 @@ coarsen_graph( std::move(renumber_map_labels)); } +template +rmm::device_uvector relabel( + raft::handle_t const &handle, + rmm::device_uvector const &old_labels, + std::tuple, rmm::device_uvector> const + &old_new_label_pairs) +{ + double constexpr load_factor = 0.7; + + rmm::device_uvector new_labels(0, handle.get_stream()); + + if (multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + + // find unique old labels (to be relabeled) + + rmm::device_uvector unique_old_labels(old_labels, handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end()); + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end()); + unique_old_labels.resize(thrust::distance(unique_old_labels.begin(), it), handle.get_stream()); + unique_old_labels.shrink_to_fit(handle.get_stream()); + + // collect new labels for the unique old labels + + rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream()); + { + // shuffle the old_new_label_pairs based on applying the compute_gpu_id_from_vertex_t functor + // to the old labels + + rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); + { + rmm::device_uvector label_pair_old_labels(thrust::get<0>(old_new_label_pairs), + handle.get_stream()); + rmm::device_uvector label_pair_new_labels(thrust::get<1>(old_new_label_pairs), + handle.get_stream()); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + thrust::get<0>(old_new_label_pairs).size(), + [key_func] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); + }); + auto key_first = thrust::make_transform_iterator( + label_pair_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + label_pair_old_labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date + + std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = + cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become + // out-of-scope + } + + // update intermediate relabel map + + cuco::static_map relabel_map{ + static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + + rx_label_pair_old_labels.resize(0, handle.get_stream()); + rx_label_pair_new_labels.resize(0, handle.get_stream()); + rx_label_pair_old_labels.shrink_to_fit(handle.get_stream()); + rx_label_pair_new_labels.shrink_to_fit(handle.get_stream()); + + // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back + + { + thrust::sort( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end(), + [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); + + auto key_first = thrust::make_transform_iterator( + unique_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + unique_old_labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_value_counts(0, handle.get_stream()); + + std::tie(rx_unique_old_labels, rx_value_counts) = + cugraph::experimental::detail::shuffle_values( + handle, unique_old_labels.begin(), tx_value_counts); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels + .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + + std::tie(new_labels_for_unique_old_labels, std::ignore) = + cugraph::experimental::detail::shuffle_values( + handle, rx_unique_old_labels.begin(), rx_value_counts); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope + } + } + + cuco::static_map relabel_map( + static_cast(static_cast(unique_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + new_labels.resize(old_labels.size(), handle.get_stream()); + relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + } else { + cuco::static_map relabel_map( + static_cast(static_cast(old_new_label_pairs.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(old_new_label_pairs).begin(), + std::get<1>(old_new_label_pairs).begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + old_new_label_pairs.size()); + new_labels.resize(old_labels.size(), handle.get_stream()); + relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + } + + return std::move(new_labels); +} + // explicit instantiation template std::tuple>, @@ -828,5 +1008,11 @@ coarsen_graph(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t const *labels); +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + } // namespace experimental } // namespace cugraph From 231734919c4d1716dfb28aa1ed23d6b2220ee196 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 23 Nov 2020 11:45:22 -0600 Subject: [PATCH 022/343] enable multigraph --- python/cugraph/__init__.py | 2 ++ python/cugraph/structure/__init__.py | 2 +- python/cugraph/structure/graph.py | 15 +++++---------- python/cugraph/structure/symmetrize.py | 12 +++++++----- python/cugraph/traversal/sssp.py | 4 ++-- python/cugraph/utilities/utils.py | 5 +++-- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 18a50160f99..374973e9f1f 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -29,6 +29,8 @@ from cugraph.structure import ( Graph, DiGraph, + MultiGraph, + MultiDiGraph, from_cudf_edgelist, from_pandas_edgelist, to_pandas_edgelist, diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index b8b6fbe0435..9a697f32373 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph import Graph, DiGraph +from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf from cugraph.structure.convert_matrix import (from_cudf_edgelist, diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 9479960d8e6..ba7243868f4 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -406,24 +406,19 @@ def from_cudf_edgelist( source_col = elist[source] dest_col = elist[destination] - if self.multi: - if type(edge_attr) is not list: - raise Exception("edge_attr should be a list of column names") - value_col = {} - for col_name in edge_attr: - value_col[col_name] = elist[col_name] - elif edge_attr is not None: + if edge_attr is not None: value_col = elist[edge_attr] else: value_col = None - if not self.symmetrized and not self.multi: + if not self.symmetrized: if value_col is not None: source_col, dest_col, value_col = symmetrize( - source_col, dest_col, value_col + source_col, dest_col, value_col, multi=self.multi ) else: - source_col, dest_col = symmetrize(source_col, dest_col) + source_col, dest_col = symmetrize(source_col, dest_col, + multi=self.multi) self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index e7fd15144aa..61e2a8bdfa1 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -16,7 +16,7 @@ import dask_cudf -def symmetrize_df(df, src_name, dst_name): +def symmetrize_df(df, src_name, dst_name, multi): """ Take a COO stored in a DataFrame, along with the column names of the source and destination columns and create a new data frame @@ -72,8 +72,10 @@ def symmetrize_df(df, src_name, dst_name): ) else: gdf[name] = df[name].append(df[name], ignore_index=True) - - return gdf.groupby(by=[src_name, dst_name], as_index=False).min() + if multi: + return gdf + else: + return gdf.groupby(by=[src_name, dst_name], as_index=False).min() def symmetrize_ddf(df, src_name, dst_name, weight_name=None): @@ -129,7 +131,7 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): return result -def symmetrize(source_col, dest_col, value_col=None): +def symmetrize(source_col, dest_col, value_col=None, multi=False): """ Take a COO set of source destination pairs along with associated values stored in a single GPU or distributed @@ -190,7 +192,7 @@ def symmetrize(source_col, dest_col, value_col=None): input_df, "source", "destination", weight_name ).persist() else: - output_df = symmetrize_df(input_df, "source", "destination") + output_df = symmetrize_df(input_df, "source", "destination", multi) if value_col is not None: return ( diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index cb7ac4529f4..08cdaabcfc3 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -15,7 +15,7 @@ import cudf from cugraph.utilities import ensure_cugraph_obj -from cugraph.structure import Graph, DiGraph +from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.traversal import sssp_wrapper # optional dependencies used for handling different input types @@ -37,7 +37,7 @@ def _convert_df_to_output_type(df, input_type): Given a cudf.DataFrame df, convert it to a new type appropriate for the graph algos in this module, based on input_type. """ - if input_type in [Graph, DiGraph]: + if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]: return df elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index 53351f001e2..8de6656fc77 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -178,11 +178,12 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None): cugraph Graph-type obj to create when converting from a matrix type. """ # FIXME: importing here to avoid circular import - from cugraph.structure import Graph, DiGraph + from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.utilities.nx_factory import convert_from_nx input_type = type(obj) - if input_type in [Graph, DiGraph]: + print(input_type) + if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]: return (obj, input_type) elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): From eaf73202c22483deb836829a1c0f0e09abb20131 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 24 Nov 2020 15:44:29 -0500 Subject: [PATCH 023/343] recover accidentally deleted code --- cpp/include/experimental/detail/graph_utils.cuh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 333d8202f06..f1f62addf40 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -232,6 +232,12 @@ auto shuffle_values(raft::handle_t const &handle, tx_counts, tx_offsets, tx_dst_ranks, + rx_value_first, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + return std::tuple_cat(to_tuple(std::move(rx_value_buffer)), std::make_tuple(std::move(rx_value_counts))); } From 75b96ec445cdfd8acd8d15ec140e117a22f24b4f Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 24 Nov 2020 15:45:03 -0500 Subject: [PATCH 024/343] DOC v0.18 Updates --- CHANGELOG.md | 8 ++++++++ conda/environments/cugraph_dev_cuda10.1.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda10.2.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.0.yml | 14 +++++++------- cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 4 ++-- 6 files changed, 32 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d756b7dcec8..3f970e25fb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# cuGraph 0.18.0 (Date TBD) + +## New Features + +## Improvements + +## Bug Fixes + # cuGraph 0.17.0 (Date TBD) ## New Features diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 9b4274abef5..ed345fcafff 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.17.* -- libcudf=0.17.* -- rmm=0.17.* +- cudf=0.18.* +- libcudf=0.18.* +- rmm=0.18.* - cuxfilter=0.17.* -- librmm=0.17.* +- librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.17* -- dask-cudf=0.17* +- dask-cuda=0.18* +- dask-cudf=0.18* - nccl>=2.7 -- ucx-py=0.17* +- ucx-py=0.18* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 6526dd73f98..325a89382b7 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.17.* -- libcudf=0.17.* -- rmm=0.17.* +- cudf=0.18.* +- libcudf=0.18.* +- rmm=0.18.* - cuxfilter=0.17.* -- librmm=0.17.* +- librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.17* -- dask-cudf=0.17* +- dask-cuda=0.18* +- dask-cudf=0.18* - nccl>=2.7 -- ucx-py=0.17* +- ucx-py=0.18* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 5016eb9405c..386377e745d 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.17.* -- libcudf=0.17.* -- rmm=0.17.* +- cudf=0.18.* +- libcudf=0.18.* +- rmm=0.18.* - cuxfilter=0.17.* -- librmm=0.17.* +- librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.17* -- dask-cudf=0.17* +- dask-cuda=0.18* +- dask-cudf=0.18* - nccl>=2.7 -- ucx-py=0.17* +- ucx-py=0.18* - ucx-proc=*=gpu - scipy - networkx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e012b1d197b..6a6d5125034 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) -project(CUGRAPH VERSION 0.17.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA) ################################################################################################### # - build type ------------------------------------------------------------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index adec59a2f6c..6b484a5f57b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,9 +80,9 @@ # built documents. # # The short X.Y version. -version = '0.17' +version = '0.18' # The full version, including alpha/beta/rc tags. -release = '0.17.0' +release = '0.18.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From d264a55af1145875cc6ca0f24fee0615b1bfcc8f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 24 Nov 2020 16:00:46 -0500 Subject: [PATCH 025/343] add functions to query number of edges per partition --- cpp/include/experimental/graph_view.hpp | 11 +++++++++++ cpp/include/matrix_partition_device.cuh | 18 +++++++++++++----- cpp/src/experimental/graph_view.cu | 23 +++++++++++++++++++++++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index eae06181e6e..3312cb266fc 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -400,6 +400,10 @@ class graph_view_t adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; std::vector adj_matrix_partition_weights_{}; + std::vector adj_matrix_partition_number_of_edges_{}; partition_t partition_{}; @@ -568,6 +573,11 @@ class graph_view_tget_number_of_vertices(); } + edge_t get_number_of_local_adj_matrix_partition_edges(size_t adj_matrix_partition_idx) const { + assert(adj_matrix_partition_idx == 0); + return this->get_number_of_edges(); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); @@ -630,6 +640,7 @@ class graph_view_t segment_offsets_{}; // segment offsets based on vertex degree, relevant // only if sorted_by_global_degree is true }; diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index 53796530f60..e8f4d6fe02e 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -30,11 +30,14 @@ class matrix_partition_device_base_t { public: matrix_partition_device_base_t(edge_t const* offsets, vertex_t const* indices, - weight_t const* weights) - : offsets_(offsets), indices_(indices), weights_(weights) + weight_t const* weights, + edge_t number_of_edges) + : offsets_(offsets), indices_(indices), weights_(weights), number_of_edges_(number_of_edges) { } + __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } + __device__ thrust::tuple get_local_edges( vertex_t major_offset) const noexcept { @@ -55,6 +58,7 @@ class matrix_partition_device_base_t { edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; weight_t const* weights_{nullptr}; + edge_t number_of_edges_{0}; }; template @@ -73,7 +77,8 @@ class matrix_partition_device_t( graph_view.offsets(partition_idx), graph_view.indices(partition_idx), - graph_view.weights(partition_idx)), + graph_view.weights(partition_idx), + graph_view.get_number_of_local_adj_matrix_partition_edges(partition_idx)), major_first_(GraphViewType::is_adj_matrix_transposed ? graph_view.get_local_adj_matrix_partition_col_first(partition_idx) : graph_view.get_local_adj_matrix_partition_row_first(partition_idx)), @@ -93,7 +98,7 @@ class matrix_partition_device_t( - graph_view.offsets(), graph_view.indices(), graph_view.weights()), + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + graph_view.get_number_of_edges()), number_of_vertices_(graph_view.get_number_of_vertices()) { assert(partition_idx == 0); diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 04d2ea990df..ed43bad5a03 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -49,6 +49,27 @@ struct out_of_range_t { __device__ bool operator()(vertex_t v) { return (v < min) || (v >= max); } }; +template +std::vector update_adj_matrix_partition_edge_counts( + std::vector const& adj_matrix_partition_offsets, + partition_t const& partition, + cudaStream_t stream) +{ + std::vector adj_matrix_partition_edge_counts(partition.get_number_of_matrix_partitions(), + 0); + for (size_t i = 0; i < adj_matrix_partition_offsets.size(); ++i) { + vertex_t major_first{}; + vertex_t major_last{}; + std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(i); + raft::update_host(&(adj_matrix_partition_edge_counts[i]), + adj_matrix_partition_offsets[i] + (major_last - major_first), + 1, + stream); + } + CUDA_TRY(cudaStreamSynchronize(stream)); + return adj_matrix_partition_edge_counts; +} + } // namespace template Date: Tue, 1 Dec 2020 12:39:41 -0600 Subject: [PATCH 026/343] add tests and fixes --- python/cugraph/structure/graph.py | 2 +- python/cugraph/tests/test_multigraph.py | 54 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 python/cugraph/tests/test_multigraph.py diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index ba7243868f4..af4b1cfa65f 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -1012,7 +1012,7 @@ def number_of_edges(self, directed_edges=False): return len(self.edgelist.edgelist_df) if self.edge_count is None: if self.edgelist is not None: - if type(self) is Graph: + if type(self) is Graph or MultiGraph: self.edge_count = len( self.edgelist.edgelist_df[ self.edgelist.edgelist_df["src"] diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py new file mode 100644 index 00000000000..e57090cc90e --- /dev/null +++ b/python/cugraph/tests/test_multigraph.py @@ -0,0 +1,54 @@ +import cugraph +import networkx as nx +from cugraph.tests import utils +import pytest +import gc +import pandas as pd +import numpy as np + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_multigraph(graph_file): + gc.collect() + cuM = utils.read_csv_file(graph_file) + G = cugraph.MultiGraph() + G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiGraph(), + ) + + assert G.number_of_edges() == Gnx.number_of_edges() + assert G.number_of_nodes() == Gnx.number_of_nodes() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_multigraph_sssp(graph_file): + gc.collect() + cuM = utils.read_csv_file(graph_file) + G = cugraph.MultiDiGraph() + G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + cu_paths = cugraph.sssp(G, 0) + max_val = np.finfo(cu_paths["distance"].dtype).max + cu_paths = cu_paths[cu_paths["distance"] != max_val] + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiDiGraph(), + ) + nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0) + + print(cu_paths) + print(nx_paths) + + cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array() + nx_dist = [i[1] for i in sorted(nx_paths.items())] + + assert (cu_dist == nx_dist).all() From 3ae324dda1e38f6505a5acfafdcda8ec12610dc3 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Wed, 2 Dec 2020 10:42:50 -0600 Subject: [PATCH 027/343] add graph from multigraph functionality --- python/cugraph/structure/graph.py | 63 ++++++++++++++++--------- python/cugraph/structure/symmetrize.py | 36 +++++++------- python/cugraph/tests/test_multigraph.py | 40 ++++++++++++++-- 3 files changed, 99 insertions(+), 40 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index af4b1cfa65f..fde75576cba 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -73,7 +73,6 @@ def __init__(self, offsets, indices, value=None): def __init__( self, m_graph=None, - edge_attr=None, symmetrized=False, bipartite=False, multi=False, @@ -112,24 +111,41 @@ def __init__( self.batch_transposed_adjlists = None if m_graph is not None: - if (type(self) is Graph and type(m_graph) is MultiGraph) or ( + """if (type(self) is Graph and type(m_graph) is MultiGraph) or ( type(self) is DiGraph and type(m_graph) is MultiDiGraph ): - self.from_cudf_edgelist( - m_graph.edgelist.edgelist_df, - source="src", - destination="dst", - edge_attr=edge_attr, - ) - self.renumbered = m_graph.renumbered - self.renumber_map = m_graph.renumber_map + elist = m_graph.view_edge_list() + if m_graph.edgelist.weights: + weights = "weights" + else: + weights = None + self.from_cudf_edgelist(elist, + source = "src", + destination = "dst", + edge_attr = weights) else: msg = ( "Graph can be initialized using MultiGraph " "and DiGraph can be initialized using MultiDiGraph" ) + raise Exception(msg)""" + if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph: + elist = m_graph.view_edge_list() + if m_graph.edgelist.weights: + weights = "weights" + else: + weights = None + self.from_cudf_edgelist(elist, + source = "src", + destination = "dst", + edge_attr = weights) + else: + msg = ( + "Graph can only be initialized using MultiGraph " + "or MultiDiGraph" + ) raise Exception(msg) - # self.number_of_vertices = None + def enable_batch(self): client = mg_utils.get_client() @@ -277,6 +293,12 @@ def is_multipartite(self): # TO DO: Call coloring algorithm return self.multipartite or self.bipartite + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. Else returns False. + """ + return self.multi + def sets(self): """ Returns the bipartite set of nodes. This solely relies on the user's @@ -411,14 +433,13 @@ def from_cudf_edgelist( else: value_col = None - if not self.symmetrized: - if value_col is not None: - source_col, dest_col, value_col = symmetrize( - source_col, dest_col, value_col, multi=self.multi - ) - else: - source_col, dest_col = symmetrize(source_col, dest_col, - multi=self.multi) + if value_col is not None: + source_col, dest_col, value_col = symmetrize( + source_col, dest_col, value_col, multi=self.multi, + symmetrize = not self.symmetrized) + else: + source_col, dest_col = symmetrize(source_col, dest_col, + multi=self.multi, symmetrize = not self.symmetrized) self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) @@ -1012,7 +1033,7 @@ def number_of_edges(self, directed_edges=False): return len(self.edgelist.edgelist_df) if self.edge_count is None: if self.edgelist is not None: - if type(self) is Graph or MultiGraph: + if type(self) is Graph or type(self) is MultiGraph: self.edge_count = len( self.edgelist.edgelist_df[ self.edgelist.edgelist_df["src"] @@ -1506,7 +1527,7 @@ def add_internal_vertex_id( class DiGraph(Graph): def __init__(self, m_graph=None, edge_attr=None): super().__init__( - m_graph=m_graph, edge_attr=edge_attr, symmetrized=True + m_graph=m_graph, symmetrized=True ) diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index 61e2a8bdfa1..b9499df6908 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -16,7 +16,7 @@ import dask_cudf -def symmetrize_df(df, src_name, dst_name, multi): +def symmetrize_df(df, src_name, dst_name, multi, symmetrize): """ Take a COO stored in a DataFrame, along with the column names of the source and destination columns and create a new data frame @@ -54,24 +54,26 @@ def symmetrize_df(df, src_name, dst_name, multi): >>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight") >>> Comms.destroy() """ - gdf = cudf.DataFrame() - # # Now append the columns. We add sources to the end of destinations, # and destinations to the end of sources. Otherwise we append a # column onto itself. # - for idx, name in enumerate(df.columns): - if name == src_name: - gdf[src_name] = df[src_name].append( - df[dst_name], ignore_index=True - ) - elif name == dst_name: - gdf[dst_name] = df[dst_name].append( - df[src_name], ignore_index=True - ) - else: - gdf[name] = df[name].append(df[name], ignore_index=True) + if symmetrize: + gdf = cudf.DataFrame() + for idx, name in enumerate(df.columns): + if name == src_name: + gdf[src_name] = df[src_name].append( + df[dst_name], ignore_index=True + ) + elif name == dst_name: + gdf[dst_name] = df[dst_name].append( + df[src_name], ignore_index=True + ) + else: + gdf[name] = df[name].append(df[name], ignore_index=True) + else: + gdf = df if multi: return gdf else: @@ -131,7 +133,8 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): return result -def symmetrize(source_col, dest_col, value_col=None, multi=False): +def symmetrize(source_col, dest_col, value_col=None, multi=False, + symmetrize=True): """ Take a COO set of source destination pairs along with associated values stored in a single GPU or distributed @@ -192,7 +195,8 @@ def symmetrize(source_col, dest_col, value_col=None, multi=False): input_df, "source", "destination", weight_name ).persist() else: - output_df = symmetrize_df(input_df, "source", "destination", multi) + output_df = symmetrize_df(input_df, "source", "destination", multi, + symmetrize) if value_col is not None: return ( diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index e57090cc90e..c92ea516670 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -24,6 +24,42 @@ def test_multigraph(graph_file): assert G.number_of_edges() == Gnx.number_of_edges() assert G.number_of_nodes() == Gnx.number_of_nodes() + cuedges = G.view_edge_list() + nxedges = pd.DataFrame(Gnx.edges(data=True)) + #print(cuedges.sort_values(by=["src","dst"])) + #print(nxedges.sort_values(by=["0","1"])) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_Graph_from_MultiGraph(graph_file): + cuM = utils.read_csv_file(graph_file) + GM = cugraph.MultiGraph() + GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + GnxM = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiGraph(), + ) + + G = cugraph.Graph(GM) + Gnx = nx.Graph(GnxM) + assert Gnx.number_of_edges() == G.number_of_edges() + + GdM = cugraph.MultiDiGraph() + GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + GnxdM = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiGraph(), + ) + Gd = cugraph.DiGraph(GdM) + Gnxd = nx.DiGraph(GnxdM) + assert Gnxd.number_of_edges() == Gd.number_of_edges() @pytest.mark.parametrize("graph_file", utils.DATASETS) @@ -45,10 +81,8 @@ def test_multigraph_sssp(graph_file): ) nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0) - print(cu_paths) - print(nx_paths) - cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array() nx_dist = [i[1] for i in sorted(nx_paths.items())] assert (cu_dist == nx_dist).all() + From 13ec762f35cfbfa8ce18b0a328ebcec3d7f47ac5 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Wed, 2 Dec 2020 15:21:28 -0600 Subject: [PATCH 028/343] update test, add changelog --- CHANGELOG.md | 1 + python/cugraph/tests/test_multigraph.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d77c6cbc40..fc8d428bcfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - PR #1260 Add katz_centrality mnmg wrapper - PR #1264 CuPy sparse matrix input support for WCC, SCC, SSSP, and BFS - PR #1265 Implement Hungarian Algorithm +- PR #1280 Add Multi(Di)Graph support ## Improvements - PR #1227 Pin cmake policies to cmake 3.17 version diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index c92ea516670..853bf2687c3 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -10,7 +10,7 @@ def test_multigraph(graph_file): gc.collect() cuM = utils.read_csv_file(graph_file) - G = cugraph.MultiGraph() + G = cugraph.MultiDiGraph() G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) @@ -19,15 +19,19 @@ def test_multigraph(graph_file): source="0", target="1", edge_attr="weight", - create_using=nx.MultiGraph(), + create_using=nx.MultiDiGraph(), ) assert G.number_of_edges() == Gnx.number_of_edges() assert G.number_of_nodes() == Gnx.number_of_nodes() - cuedges = G.view_edge_list() - nxedges = pd.DataFrame(Gnx.edges(data=True)) - #print(cuedges.sort_values(by=["src","dst"])) - #print(nxedges.sort_values(by=["0","1"])) + cuedges = cugraph.to_pandas_edgelist(G) + cuedges.rename(columns = {"src":"source", "dst":"target","weights":"weight"}, inplace=True) + cuedges["weight"] = cuedges["weight"].round(decimals = 3) + nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source":"int32","target":"int32","weight":"float32"}) + cuedges = cuedges.sort_values(by=["source","target"]).reset_index(drop=True) + nxedges = nxedges.sort_values(by=["source","target"]).reset_index(drop=True) + nxedges["weight"] = nxedges["weight"].round(decimals = 3) + assert nxedges.equals(cuedges[["source", "target", "weight"]]) @pytest.mark.parametrize("graph_file", utils.DATASETS) @@ -85,4 +89,3 @@ def test_multigraph_sssp(graph_file): nx_dist = [i[1] for i in sorted(nx_paths.items())] assert (cu_dist == nx_dist).all() - From 396caae3026df1e6c56399697035873cf5637b14 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Wed, 2 Dec 2020 16:09:14 -0600 Subject: [PATCH 029/343] style --- python/cugraph/structure/graph.py | 16 ++++++++-------- python/cugraph/tests/test_multigraph.py | 19 ++++++++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index fde75576cba..d390993e9ed 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -136,9 +136,9 @@ def __init__( else: weights = None self.from_cudf_edgelist(elist, - source = "src", - destination = "dst", - edge_attr = weights) + source="src", + destination="dst", + edge_attr=weights) else: msg = ( "Graph can only be initialized using MultiGraph " @@ -146,7 +146,6 @@ def __init__( ) raise Exception(msg) - def enable_batch(self): client = mg_utils.get_client() comms = Comms.get_comms() @@ -436,10 +435,11 @@ def from_cudf_edgelist( if value_col is not None: source_col, dest_col, value_col = symmetrize( source_col, dest_col, value_col, multi=self.multi, - symmetrize = not self.symmetrized) + symmetrize=not self.symmetrized) else: - source_col, dest_col = symmetrize(source_col, dest_col, - multi=self.multi, symmetrize = not self.symmetrized) + source_col, dest_col = symmetrize( + source_col, dest_col, multi=self.multi, + symmetrize=not self.symmetrized) self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) @@ -741,7 +741,7 @@ def view_edge_list(self): edgelist_df = self.unrenumber(edgelist_df, "src") edgelist_df = self.unrenumber(edgelist_df, "dst") - if type(self) is Graph: + if type(self) is Graph or type(self) is MultiGraph: edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]] edgelist_df = edgelist_df.reset_index(drop=True) self.edge_count = len(edgelist_df) diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index 853bf2687c3..66fc95d7f84 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -3,9 +3,9 @@ from cugraph.tests import utils import pytest import gc -import pandas as pd import numpy as np + @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): gc.collect() @@ -25,12 +25,17 @@ def test_multigraph(graph_file): assert G.number_of_edges() == Gnx.number_of_edges() assert G.number_of_nodes() == Gnx.number_of_nodes() cuedges = cugraph.to_pandas_edgelist(G) - cuedges.rename(columns = {"src":"source", "dst":"target","weights":"weight"}, inplace=True) - cuedges["weight"] = cuedges["weight"].round(decimals = 3) - nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source":"int32","target":"int32","weight":"float32"}) - cuedges = cuedges.sort_values(by=["source","target"]).reset_index(drop=True) - nxedges = nxedges.sort_values(by=["source","target"]).reset_index(drop=True) - nxedges["weight"] = nxedges["weight"].round(decimals = 3) + cuedges.rename(columns={"src": "source", "dst": "target", + "weights": "weight"}, inplace=True) + cuedges["weight"] = cuedges["weight"].round(decimals=3) + nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32", + "target": "int32", + "weight": "float32"}) + cuedges = cuedges.sort_values(by=["source", "target"]).\ + reset_index(drop=True) + nxedges = nxedges.sort_values(by=["source", "target"]).\ + reset_index(drop=True) + nxedges["weight"] = nxedges["weight"].round(decimals=3) assert nxedges.equals(cuedges[["source", "target", "weight"]]) From 9aab318c8a32ab257dc91b012207af25cb629002 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 3 Dec 2020 17:22:48 -0500 Subject: [PATCH 030/343] update shuffle_values to take raft::comms::comms_t & cudaStream_t instead of raft::handle_t --- .../experimental/detail/graph_utils.cuh | 30 ++++++++++--------- cpp/src/experimental/graph_functions.cu | 15 +++++----- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index f1f62addf40..92614d00678 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -128,12 +128,14 @@ struct is_std_tuple> : std::true_type { }; template -auto to_tuple(T&& val, std::enable_if_t::value, void>* = nullptr) { +auto to_tuple(T &&val, std::enable_if_t::value, void> * = nullptr) +{ return std::forward(val); } template -auto to_tuple(T&& val, std::enable_if_t::value, void>* = nullptr) { +auto to_tuple(T &&val, std::enable_if_t::value, void> * = nullptr) +{ return std::make_tuple(std::forward(val)); } #endif @@ -156,14 +158,14 @@ rmm::device_uvector compute_major_degree( } template -auto shuffle_values(raft::handle_t const &handle, +auto shuffle_values(raft::comms::comms_t const &comm, TxValueIterator tx_value_first, - rmm::device_uvector const &tx_value_counts) + rmm::device_uvector const &tx_value_counts, + cudaStream_t stream) { - auto &comm = handle.get_comms(); auto const comm_size = comm.get_size(); - rmm::device_uvector rx_value_counts(comm_size, handle.get_stream()); + rmm::device_uvector rx_value_counts(comm_size, stream); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. std::vector tx_counts(comm_size, size_t{1}); @@ -185,20 +187,19 @@ auto shuffle_values(raft::handle_t const &handle, rx_counts, rx_offsets, rx_src_ranks, - handle.get_stream()); + stream); - raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, handle.get_stream()); - raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, handle.get_stream()); + raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, stream); + raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, stream); - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // tx_counts & rx_counts should be up-to-date + CUDA_TRY(cudaStreamSynchronize(stream)); // tx_counts & rx_counts should be up-to-date std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); auto rx_value_buffer = allocate_comm_buffer::value_type>( - rx_offsets.back(), handle.get_stream()); + rx_offsets.back(), stream); auto rx_value_first = get_comm_buffer_begin::value_type>( rx_value_buffer); @@ -236,9 +237,10 @@ auto shuffle_values(raft::handle_t const &handle, rx_counts, rx_offsets, rx_src_ranks, - handle.get_stream()); + stream); - return std::tuple_cat(to_tuple(std::move(rx_value_buffer)), std::make_tuple(std::move(rx_value_counts))); + return std::tuple_cat(to_tuple(std::move(rx_value_buffer)), + std::make_tuple(std::move(rx_value_counts))); } template diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index ea8b2a26cf5..bab38d49154 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -310,8 +310,8 @@ rmm::device_uvector compute_renumber_map( CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date - std::tie(rx_labels, rx_counts, std::ignore) = - cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); + std::tie(rx_labels, rx_counts, std::ignore) = cugraph::experimental::detail::shuffle_values( + handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); @@ -715,7 +715,7 @@ coarsen_graph( std::tie( rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = - detail::shuffle_values(handle, edge_first, tx_value_counts); + detail::shuffle_values(handle.get_comms(), edge_first, tx_value_counts, handle.get_stream()); sort_and_coarsen_edgelist(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, @@ -836,7 +836,7 @@ rmm::device_uvector relabel( &old_new_label_pairs) { double constexpr load_factor = 0.7; - + rmm::device_uvector new_labels(0, handle.get_stream()); if (multi_gpu) { @@ -893,7 +893,8 @@ rmm::device_uvector relabel( cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = - cugraph::experimental::detail::shuffle_values(handle, pair_first, tx_value_counts); + cugraph::experimental::detail::shuffle_values( + handle.get_comms(), pair_first, tx_value_counts, handle.get_comms()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become @@ -944,7 +945,7 @@ rmm::device_uvector relabel( std::tie(rx_unique_old_labels, rx_value_counts) = cugraph::experimental::detail::shuffle_values( - handle, unique_old_labels.begin(), tx_value_counts); + handle.get_comms(), unique_old_labels.begin(), tx_value_counts, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream @@ -957,7 +958,7 @@ rmm::device_uvector relabel( std::tie(new_labels_for_unique_old_labels, std::ignore) = cugraph::experimental::detail::shuffle_values( - handle, rx_unique_old_labels.begin(), rx_value_counts); + handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope From 0f78b4bf63266e2706605fc79ed1426c92be6753 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 3 Dec 2020 17:23:24 -0500 Subject: [PATCH 031/343] add get_local_offset() to matrix_partition_device_base_t --- cpp/include/matrix_partition_device.cuh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index e8f4d6fe02e..b5564a47f30 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -53,6 +53,11 @@ class matrix_partition_device_base_t { return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); } + __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept + { + return *(offsets_ + major_offset); + } + private: // should be trivially copyable to device edge_t const* offsets_{nullptr}; From b6fa1603b4ca016644eef842846f923fa53d112d Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Thu, 3 Dec 2020 22:18:29 -0600 Subject: [PATCH 032/343] update symmetrize --- python/cugraph/structure/symmetrize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index b9499df6908..2c70649ded0 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -16,7 +16,7 @@ import dask_cudf -def symmetrize_df(df, src_name, dst_name, multi, symmetrize): +def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True): """ Take a COO stored in a DataFrame, along with the column names of the source and destination columns and create a new data frame From a815509be2fc1aa6ffb49a6976bb897fecc3aa1e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 3 Dec 2020 23:49:44 -0500 Subject: [PATCH 033/343] remove unnecessary stream sync --- cpp/src/experimental/graph_functions.cu | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index bab38d49154..7f88421252a 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -308,8 +308,6 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date - std::tie(rx_labels, rx_counts, std::ignore) = cugraph::experimental::detail::shuffle_values( handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); @@ -711,8 +709,6 @@ coarsen_graph( rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date - std::tie( rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = detail::shuffle_values(handle.get_comms(), edge_first, tx_value_counts, handle.get_stream()); @@ -889,9 +885,6 @@ rmm::device_uvector relabel( thrust::make_discard_iterator(), tx_value_counts.begin()); - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // tx_value_counts should be up-to-date - std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = cugraph::experimental::detail::shuffle_values( handle.get_comms(), pair_first, tx_value_counts, handle.get_comms()); From 5a4119ee649d25f093f03b829aa28feea2d59434 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 8 Dec 2020 00:17:46 -0500 Subject: [PATCH 034/343] cosmetic update/adding comments --- cpp/include/patterns/reduce_op.cuh | 1 + cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/patterns/reduce_op.cuh index e9011914292..f52fa86555a 100644 --- a/cpp/include/patterns/reduce_op.cuh +++ b/cpp/include/patterns/reduce_op.cuh @@ -29,6 +29,7 @@ struct any { __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; } }; +// FIXME: thrust::minimum can replace this. // reducing N elements (operator < should be defined between any two elements), the minimum element // should be selected. template diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index a2250482c68..0a36e105aa0 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -400,7 +400,7 @@ void update_frontier_v_push_if_out_nbr( frontier_size = thrust::distance(vertex_first, vertex_last); } - edge_t max_pushes = + auto max_pushes = frontier_size > 0 ? frontier_rows.size() > 0 ? thrust::transform_reduce( From 53dd26b010925aaa6360ba29a4d039a23e284b65 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 9 Dec 2020 14:55:04 -0500 Subject: [PATCH 035/343] temp commit --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 376 +++++++++++++++++- .../patterns/transform_reduce_by_key_e.cuh | 72 +++- 2 files changed, 429 insertions(+), 19 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index ad172c5437a..e0f116a9f01 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -27,6 +27,87 @@ namespace cugraph { namespace experimental { +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128; + +template +__global__ void for_all_major_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + KeyIterator adj_matrix_minor_key_first, + typename GraphViewType::vertex_type const* major_vertices, + typename GraphViewType::vertex_type const* minor_keys, + typename GraphViewType::weight_type const* key_aggregated_edge_weights) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + while (idx < static_cast(major_last - major_first)) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + if (local_degree > 0) { + auto local_offset = matrix_partition.get_local_offset(major_offset); + auto minor_key_first = thrust::make_transform_iterator(indices, [] __device__(auto minor) { + return *(adj_matrix_minor_key_first + + matrix_partition.get_minor_offset_from_minor_nocheck(minor)); + }); + thrust::copy( + thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset); + if (weights == nullptr) { + thrust::sort( + thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree); + } else { + thrust::copy( + thrust::seq, weights, weights + local_degree, key_aggregated_edge_weights + local_offset); + thrust::sort_by_key(thrust::seq, + minor_keys + local_offset, + minor_keys + local_offset + local_degree, + key_aggregated_edge_weights + local_offset, + key_aggregated_edge_weights + local_offset + local_degree); + } + // in-place reduce_by_key + vertex_t key_idx{0}; + key_aggregated_edge_weights[local_offset + key_idx] = + weights != nullptr ? weights[0] : weight_t{1.0}; + for (edge_t i = 1; i < local_degree; ++i) { + if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { + key_aggregated_edge_weights[local_offset + key_idx] += + weights != nullptr ? weights[i] : weight_t{1.0}; + } else { + ++key_idx; + minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; + key_aggregated_edge_weights[local_offset + key_idx] = + weights != nullptr ? weights[i] : weight_t{1.0}; + } + } + thrust::fill(thrust::seq, + major_vertices + local_offset, + major_vertices + local_offset + key_idx, + matrix_partition.get_major_from_major_offset_nocheck(major_offset)); + thrust::fill(thrust::seq, + major_vertices + local_offset + key_idx, + major_vertices + local_offset + local_degree, + cugraph::experimental::invalid_vertex_id::value); + } + + idx += gridDim.x * blockDim.x; + } +} + +} // namespace detail + /** * @brief Iterate over the key-aggregated outgoing edges to update vertex properties. * @@ -41,8 +122,8 @@ namespace experimental { * input properties. * @tparam KeyIterator Type of the iterator for graph adjacency matrix column key values for * aggregation. - * @tparam ValueType Type of the value in (key, value) pairs stored in @p kv_map. - * @tparam KeyAggregatedEdgeOp Type of the quaternary (or quinary) key-aggregated edge operator. + * @tparam ValueIterator Type of the iterator for values in (key, value) pairs. + * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator. * @tparam ReduceOp Type of the binary reduction operator. * @tparam T Type of the initial value for reduction over the key-aggregated outgoing edges. * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. @@ -53,17 +134,22 @@ namespace experimental { * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param out_nbr_key_first Iterator pointing to the adjacency matrix column key (for aggregation) - * for the first (inclusive) column (assigned to this process in multi-GPU). `out_nbr_key_last` - * (exclusive) is deduced as @p out_nbr_key_first + @p + * @param adj_matrix_col_key_first Iterator pointing to the adjacency matrix column key (for + * aggregation) for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p * graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param kv_map cuco::static_map object holding (key, value) pairs for the keys pointed by @p - * out_nbr_key_first + i (where i is in [0, - * graph_view.get_number_of_local_adj_matrix_partition_rows())) + * @param map_key_first Iterator pointing to the keys in (key, value) pairs (assigned to this + * process in multi-GPU, `cugraph::experimental::detail::compute_gpu_id_from_vertex_t` is used to + * assign keys to processes). (Key, value) pairs may be provided by transform_reduce_by_key_e(). + * @param map_key_last + * @param map_value_first Iterator pointing to the values in (key, value) pairs (assigned to this + * process in multi-GPU). * @param key_aggregated_e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p - * adj_matrix_row_value_input_first + i), and value stored in @p kv_map for the key (where i is in + * adj_matrix_row_value_input_first + i), and value for the key stored in (@p map_key_first, @p + * map_value_first) + @p kv_map for the key (where i is in * [0, graph_view.get_number_of_local_adj_matrix_partition_rows())) and returns a value to be - * reduced. + * reduced. weight of 1.0 is assumed if unweighted. * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for * each vertex. @@ -75,7 +161,7 @@ namespace experimental { template ::value_type, ValueType> const& kv_map, + KeyIterator adj_matrix_col_key_first, + KeyIterator map_key_first, + KeyIterator map_key_last, + ValueIterator map_value_first, KeyAggregatedEdgeOp key_aggregated_e_op, ReduceOp reduce_op, T init, VertexValueOutputIterator vertex_value_output_first) { + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); static_assert(std::is_integral::value_type>::value); - CUGRAPH_FAIL("unimplemented."); + typename value_t = typname std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // 1. build a cuco::static_map object for the k, v pairs. + + auto kv_map_ptr = std::make_unique>( + static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / + load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(map_key_first, map_value_first), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector unique_keys( + graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); + thrust::copy( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + adj_matrix_col_key_first, + adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(), + unique_keys.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end()); + auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end()); + unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream()); + + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + thrust::sort( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end(), + [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); + + auto key_first = thrust::make_transform_iterator( + unique_keys.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + unique_keys.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + rmm::device_uvector rx_value_counts(0, handle.get_stream()); + + std::tie(rx_unique_keys, rx_value_counts) = + cugraph::experimental::detail::shuffle_values(handle, unique_keys.begin(), tx_value_counts); + + rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin()); + + rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream()); + + std::tie(rx_values_for_unique_keys, std::ignore) = + cugraph::experimental::detail::shuffle_values( + handle, values_for_unique_keys.begin(), rx_value_counts); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + kv_map_ptr->reset(); + + kv_map_ptr = std::make_unique>( + static_cast(static_cast(unique_keys.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + } + + // 2. aggregate each vertex out-going edges based on keys and transform-reduce. + + auto loop_count = size_t{1}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); + } + + rmm::device_uvector major_vertices(0, handle.get_stream()); + auto e_op_result_buffer = allocate_comm_buffer(0, handle.get_stream()); + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + int comm_root_rank = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank + : col_comm_rank * row_comm_size + i; + } + + auto num_edges = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); + + rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); + rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), + handle.get_stream()); + + if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + raft::grid_1d_thread_t update_grid( + graph_view.get_vertex_partition_size(comm_root_rank), + detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_col_key_first, + tmp_major_vertices.data(), + tmp_minor_keys.data(), + tmp_key_aggregated_edge_weights.data()); + } + + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); + auto last = + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + tmp_major_vertices.size(), + [] __device__(auto val) { + return thrust::get<0>(val) == invalid_vertex_id::value; + }); + tmp_major_vertices.resize(thrust::distance(triplet_first, last), handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + + if (GraphViewType::is_multi_gpu) { + auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() + ? cugraph::partition_2d::key_naming_t().col_name() + : cugraph::partition_2d::key_naming_t().row_name()); + auto const sub_comm_size = sub_comm.get_size(); + + triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); + auto key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + tmp_major_vertices.size(), + [key_func] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<1>(lhs) < key_func(thrust::get<1>(rhs)); + }); + auto key_first = thrust::make_transform_iterator( + triplet_first, [key_func] __device__(auto val) { return key_func(thrust::get<1>(val)); }); + rmm::device_uvector tx_value_counts(sub_comm.get_size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + tmp_major_vertices.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_minor_keys(0, handle.get_stream()); + rmm::device_uvector rx_key_aggregatd_edge_weights(0, handle.get_stream()); + + std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights, std::ignore) = + detail::shuffle_values(sub_comm, triplet_first, tx_value_counts, handle.get_stream()); + + tmp_major_vertices = std::move(rx_major_vertices); + tmp_minor_keys = std::move(rx_minor_keys); + tmp_key_aggregated_edge_weights = std::move(rx_key_aggregatd_edge_weights); + + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // tx_value_counts will become out-of-scope + } + + auto e_op_result_tmp_buffer = + allocate_comm_buffer(major_vertices.size(), handle.get_stream()); + auto e_op_result_buffer_first = get_comm_buffer_begin(e_op_result_tmp_buffer); + + triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + major_vertices.begin(), minor_keys.begin(), key_aggregated_edge_weights.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + major_vertices.size(), + e_op_result_buffer_first, + [] __device__(auto val) { + auto major = thrust::get<0>(val); + auto key = thrust::get<1>(val); + auto w = thrust::get<2>(val); + return key_aggregated_e_op(); + }); + minor_keys.resize(0, handle.get_stream()); + key_aggregated_edge_weights.resize(0, handle.get_stream()); + minor_keys.shrink_to_fit(handle.get_stream()); + key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); + + if (GraphViewType::is_multi_gpu) { + // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op + // can be mapped to ncclRedOp_t). + + device_gatherv(); + e_op_result_tmp_buffer = std::move(); + } + } + + { + // FIXME: this runs only on one GPU a subcomm. + thrust::sort_by_key(); + thrust::reduce_by_key(); + + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_vertex_partition_size(comm_root_rank), + init); + + thrust::for_each([] __device__(auto val) { + auto major = ; + auto val = ; + *(vertex_value_output_first +) = reduce_op(val, *(vertex_value_output_first +)); + }); + } } } // namespace experimental diff --git a/cpp/include/patterns/transform_reduce_by_key_e.cuh b/cpp/include/patterns/transform_reduce_by_key_e.cuh index 6b6ae135ba1..771901f4a7f 100644 --- a/cpp/include/patterns/transform_reduce_by_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_key_e.cuh @@ -70,20 +70,82 @@ template -cuco::static_map::value_type, T> +thrust::tuple, rmm::device_uvector> transform_reduce_by_key_e(raft::handle_t const& handle, GraphViewType const& graph_view, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op, - T init, - KeyIterator map_key_first, - KeyIterator map_key_last) + T init) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); static_assert(std::is_integral::value_type>::value); - CUGRAPH_FAIL("unimplemented."); + // If I change the function name to transform_reduce_by_adj_matrix_col_key_e + + // initialize static::cuco_map with *(adj_matrix_col_key_first + i), init + + // find(key), add e_op return value + + // iterate and get (key, value) pairs + + // shuffle and reduce again + + // collect value for map_key + + // return static::cuco_map + + + rmm::device_uvector keys(0, handle.get_stream()); + rmm::device_uvector values(0, handle.get_stream()); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + edge_t max_pushes = matrix_partition.get_number_of_edges(); + + // FIXME: This is highly pessimistic as # unique keys is likely to be much smaller than the + // number of edges. If we use cuco::dynamic_map and can pause & resume execution if buffer needs + // to be increased, we can start with a smaller buffer size than the worst possible size. + rmm::device_uvector keys(max_pushes, handle.get_stream()); + rmm::device_uvector values(max_pushes, handle.get_stream()); + auto kv_buffer = + allocate_comm_buffer>(max_pushes, handle.get_stream()); + auto kv_buffer_first = get_comm_buffer_begin>(kv_buffer); + vertex_frontier.resize_buffer(vertex_frontier.get_buffer_idx_value() + max_pushes); + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first); + auto buffer_payload_first = std::get<1>(buffer_first); + + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + + // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in decreasing + // order, we will apply this kernel only to low out-degree vertices. + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_value_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + + thrust::sort(); + thrust::reduce_by_key(); + } + + if (multi_gpu) { + thrust::sort(); + thrust::reduce_by_key(); + + tx_keys; + rx_values; + } return cuco::static_map::value_type, T>(); } From d21433c6e608ceb3a936ed958c058264825d52af Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 10 Dec 2020 13:58:43 -0500 Subject: [PATCH 036/343] add host_scalar_reduce() --- cpp/include/utilities/comm_utils.cuh | 74 ++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index 0c82ed2f82f..08a0cf65626 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -27,6 +27,7 @@ #include #include +// FIXME: split this file to three: host_scalar_comm_utils.cuh, device_comm_utils.cuh, and buffer_utils.cuh namespace cugraph { namespace experimental { @@ -92,6 +93,32 @@ struct host_allreduce_tuple_scalar_element_impl { } }; +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.reduce(ptr, ptr, 1, raft::comms::op_t::SUM, root, stream); + host_reduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, root, stream); + } +}; + +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + } +}; + template T* iter_to_raw_ptr(T* ptr) { @@ -692,6 +719,53 @@ host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t st return ret; } +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> host_scalar_reduce( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.reduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + if (comm.get_rank() == root) { + raft::update_host(&h_input, d_input.data(), 1, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> +host_scalar_reduce(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_reduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, root, stream); + if (comm.get_rank() == root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() == root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + ret, h_tuple_scalar_elements); + } + + return ret; +} + template std::enable_if_t::value, T> host_scalar_bcast( raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) From 949461d9441f67001d0a776d22c364bdd1966289 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 10 Dec 2020 15:04:46 -0500 Subject: [PATCH 037/343] add host_scalar_gather & host_scalar_gatherv --- cpp/include/utilities/comm_utils.cuh | 90 +++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index 08a0cf65626..b4e2709aa3f 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -27,7 +27,8 @@ #include #include -// FIXME: split this file to three: host_scalar_comm_utils.cuh, device_comm_utils.cuh, and buffer_utils.cuh +// FIXME: split this file to three: host_scalar_comm_utils.cuh, device_comm_utils.cuh, and +// buffer_utils.cuh namespace cugraph { namespace experimental { @@ -728,9 +729,7 @@ std::enable_if_t::value, T> host_scalar_reduce( raft::update_device(d_input.data(), &input, 1, stream); comm.reduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); T h_input{}; - if (comm.get_rank() == root) { - raft::update_host(&h_input, d_input.data(), 1, stream); - } + if (comm.get_rank() == root) { raft::update_host(&h_input, d_input.data(), 1, stream); } auto status = comm.sync_stream(stream); CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); return h_input; @@ -759,8 +758,8 @@ host_scalar_reduce(raft::comms::comms_t const& comm, T input, int root, cudaStre auto status = comm.sync_stream(stream); CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); if (comm.get_rank() == root) { - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( - ret, h_tuple_scalar_elements); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); } return ret; @@ -819,12 +818,13 @@ std::enable_if_t::value, std::vector> host_scalar_allga std::iota(displacements.begin(), displacements.end(), size_t{0}); rmm::device_uvector d_outputs(rx_counts.size(), stream); raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); + // FIXME: better use allgather comm.allgatherv(d_outputs.data() + comm.get_rank(), d_outputs.data(), rx_counts.data(), displacements.data(), stream); - std::vector h_outputs(rx_counts.size(), size_t{0}); + std::vector h_outputs(rx_counts.size()); raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); auto status = comm.sync_stream(stream); CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); @@ -849,6 +849,7 @@ host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t st h_tuple_scalar_elements.data(), tuple_size, stream); + // FIXME: better use allgather comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, d_allgathered_tuple_scalar_elements.data(), rx_counts.data(), @@ -874,6 +875,81 @@ host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t st return ret; } +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> host_scalar_gather( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_outputs(comm.get_rank() == root ? comm.get_size() : int{1}, stream); + raft::update_device( + comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + &input, + 1, + stream); + comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + d_outputs.data(), + size_t{1}, + root, + stream); + std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); + if (comm.get_rank() == root ?) { + raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> +host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : tuple_size, stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + h_tuple_scalar_elements.data(), + tuple_size, + stream); + comm.gather(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + tuple_size, + root, + stream); + std::vector h_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); + if (comm.get_rank() == root) { + raft::update_host(h_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + if (comm.get_rank() == root) { + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_gathered_tuple_scalar_elements.data() + i * tuple_size, + h_gathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + } + + return ret; +} + template std::enable_if_t< std::is_arithmetic::value_type>::value, From d894c669f740f84269f2ce2c919578557ab4c1de Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 10 Dec 2020 15:24:06 -0500 Subject: [PATCH 038/343] add device_gatherv --- cpp/include/utilities/comm_utils.cuh | 121 +++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index b4e2709aa3f..bf9566f01fc 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -649,6 +649,82 @@ struct device_allgatherv_tuple_iterator_element_impl +std::enable_if_t::value, void> +device_gatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_gatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + comm.gatherv(iter_to_raw_ptr(input_first), + iter_to_raw_ptr(output_first), + sendcount, + recvcounts.data(), + displacements.data(), + root, + stream); +} + +template +struct device_gatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) const + { + device_gatherv_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + sendcount, + recvcounts, + displacements, + root, + stream); + device_gatherv_tuple_iterator_element_impl().run( + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); + } +}; + +template +struct device_gatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) const + { + } +}; + template auto allocate_comm_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) { @@ -1265,6 +1341,51 @@ device_allgatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, recvcounts, displacements, stream); } +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_gatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) +{ + detail::device_gatherv_impl( + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_gatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail::device_allgatherv_tuple_iterator_element_impl() + .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); +} + template ::value>* = nullptr> auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) { From 6cfafee66da6b62897922745e649a5ecebaa837f Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 10 Dec 2020 14:20:15 -0800 Subject: [PATCH 039/343] need the ifdefs --- cpp/src/community/louvain.cu | 10 ++++++++++ cpp/tests/experimental/louvain_test.cu | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 5066bf494ce..8bb7a9c1661 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -15,8 +15,18 @@ */ #include + +// "FIXME": remove this check +// +// Disable louvain(experimental::graph_view_t,...) +// versions for GPU architectures < 700 +// (cuco/static_map.cuh depends on features not supported on or before Pascal) +// +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 #include +#else #include +#endif namespace cugraph { diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 9ed636cd682..fd327b31cf3 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -21,8 +21,17 @@ #include #include +// "FIXME": remove this check +// +// Disable louvain(experimental::graph_view_t,...) +// versions for GPU architectures < 700 +// (cuco/static_map.cuh depends on features not supported on or before Pascal) +// +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 #include +#else #include +#endif #include From 7cbfdedc5c596c1daa5088fe4dd283d379af6835 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 10 Dec 2020 14:44:24 -0800 Subject: [PATCH 040/343] missed a spot --- cpp/src/community/louvain.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 8bb7a9c1661..d7ceb1603b7 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -69,9 +69,13 @@ std::pair louvain( if (device_prop.major < 7) { CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); } else { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 + CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); +#else experimental::Louvain> runner(handle, graph_view); return runner(clustering, max_level, resolution); +#endif } } From 3931f90b46df2ceb475dca55763b2edd817d7950 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 11 Dec 2020 01:27:17 -0500 Subject: [PATCH 041/343] fix compile error --- cpp/include/utilities/comm_utils.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index bf9566f01fc..f4e2a3921a1 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -968,8 +968,8 @@ std::enable_if_t::value, std::vector> host_scalar_gathe root, stream); std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); - if (comm.get_rank() == root ?) { - raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); + if (comm.get_rank() == root) { + raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); } auto status = comm.sync_stream(stream); CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); From dc7d664af9159036cd015e912f62d59690f8648b Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Fri, 11 Dec 2020 08:10:30 -0800 Subject: [PATCH 042/343] re-enable EXPERIMENTAL_LOUVAIN_TEST --- cpp/tests/CMakeLists.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 593c36359e2..9b57ad4557c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -302,12 +302,11 @@ ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "" ################################################################################################### # - Experimental LOUVAIN tests ------------------------------------------------------------------- -# FIXME: Re-enable once failures are fixed -#set(EXPERIMENTAL_LOUVAIN_TEST_SRCS -# "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" -# "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") -# -#ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") +set(EXPERIMENTAL_LOUVAIN_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") + +ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ From d1ee4be48b828ffc7e06e1f13456800f0ee05464 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Fri, 11 Dec 2020 08:40:34 -0800 Subject: [PATCH 043/343] add runtime check --- cpp/tests/experimental/louvain_test.cu | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 344d2757bd4..16543a4ad18 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -84,7 +84,20 @@ class Tests_Louvain : public ::testing::TestWithParam { auto graph_view = graph.view(); - louvain(graph_view); + // "FIXME": remove this check + // + // Disable louvain(experimental::graph_view_t,...) + // versions for GPU architectures < 700 + // (cuco/static_map.cuh depends on features not supported on or before Pascal) + // + cudaDeviceProp device_prop; + CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); + + if (device_prop.major < 7) { + EXPECT_THROW(louvain(graph_view), cugraph::logic_error); + } else { + louvain(graph_view); + } } template From ba5cda81119095de0dbae24c12060ebebe5f1b5c Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Fri, 11 Dec 2020 09:34:49 -0800 Subject: [PATCH 044/343] clean up some guards and text --- cpp/src/community/louvain.cu | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index ed2323ac08f..cc3885af58a 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -16,7 +16,7 @@ #include -// "FIXME": remove this check +// "FIXME": remove the guards after support for Pascal is dropped // // Disable louvain(experimental::graph_view_t,...) // versions for GPU architectures < 700 @@ -57,7 +57,7 @@ std::pair louvain( { CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); - // "FIXME": remove this check + // "FIXME": remove this check and the guards below // // Disable louvain(experimental::graph_view_t,...) // versions for GPU architectures < 700 @@ -70,7 +70,6 @@ std::pair louvain( CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); } else { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 - CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); #else experimental::Louvain> runner(handle, graph_view); @@ -90,11 +89,7 @@ std::pair louvain(raft::handle_t const &h { CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 - CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); -#else return detail::louvain(handle, graph, clustering, max_level, resolution); -#endif } // Explicit template instantations From e02989b57b3edc6eb00f47de35c95728c69de561 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 11 Dec 2020 14:25:52 -0500 Subject: [PATCH 045/343] remove return at the end in void functions --- cpp/src/experimental/bfs.cu | 2 -- cpp/src/experimental/graph_functions.cu | 2 -- cpp/src/experimental/katz_centrality.cu | 2 -- cpp/src/experimental/pagerank.cu | 2 -- 4 files changed, 8 deletions(-) diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index f297587a1d6..e99dbcaebc6 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -165,8 +165,6 @@ void bfs(raft::handle_t const &handle, handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once // this function returns (FIXME: should I stream sync in VertexFrontier // destructor?) - - return; } } // namespace detail diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu index 7f88421252a..5ac3b7ae135 100644 --- a/cpp/src/experimental/graph_functions.cu +++ b/cpp/src/experimental/graph_functions.cu @@ -145,8 +145,6 @@ void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_ver edgelist_major_vertices.shrink_to_fit(stream); edgelist_minor_vertices.shrink_to_fit(stream); edgelist_weights.shrink_to_fit(stream); - - return; } template diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 587011da817..e93b39efbe4 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -166,8 +166,6 @@ void katz_centrality(raft::handle_t const &handle, katz_centralities, [l2_norm] __device__(auto val) { return val / l2_norm; }); } - - return; } } // namespace detail diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 1aa7f37fa6b..35533041b67 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -286,8 +286,6 @@ void pagerank(raft::handle_t const& handle, CUGRAPH_FAIL("PageRank failed to converge."); } } - - return; } } // namespace detail From 68e11d9ce35157bdc5283c1a8a7b29142859c06a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 11 Dec 2020 15:51:06 -0500 Subject: [PATCH 046/343] copy_v_transform_reduce_key_aggregated_out_nbr --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 214 +++++++++++++----- 1 file changed, 159 insertions(+), 55 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index e0f116a9f01..4bdd50d0be7 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -15,8 +15,11 @@ */ #pragma once +#include +#include #include #include +#include #include @@ -38,9 +41,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, KeyIterator adj_matrix_minor_key_first, - typename GraphViewType::vertex_type const* major_vertices, - typename GraphViewType::vertex_type const* minor_keys, - typename GraphViewType::weight_type const* key_aggregated_edge_weights) + typename GraphViewType::vertex_type* major_vertices, + typename GraphViewType::vertex_type* minor_keys, + typename GraphViewType::weight_type* key_aggregated_edge_weights, + typename GraphViewType::vertex_type invalid_vertex) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -59,10 +63,11 @@ __global__ void for_all_major_for_all_nbr_low_degree( matrix_partition.get_local_edges(static_cast(major_offset)); if (local_degree > 0) { auto local_offset = matrix_partition.get_local_offset(major_offset); - auto minor_key_first = thrust::make_transform_iterator(indices, [] __device__(auto minor) { - return *(adj_matrix_minor_key_first + - matrix_partition.get_minor_offset_from_minor_nocheck(minor)); - }); + auto minor_key_first = thrust::make_transform_iterator( + indices, [matrix_partition, adj_matrix_minor_key_first] __device__(auto minor) { + return *(adj_matrix_minor_key_first + + matrix_partition.get_minor_offset_from_minor_nocheck(minor)); + }); thrust::copy( thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset); if (weights == nullptr) { @@ -74,8 +79,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( thrust::sort_by_key(thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree, - key_aggregated_edge_weights + local_offset, - key_aggregated_edge_weights + local_offset + local_degree); + key_aggregated_edge_weights + local_offset); } // in-place reduce_by_key vertex_t key_idx{0}; @@ -99,7 +103,8 @@ __global__ void for_all_major_for_all_nbr_low_degree( thrust::fill(thrust::seq, major_vertices + local_offset + key_idx, major_vertices + local_offset + local_degree, - cugraph::experimental::invalid_vertex_id::value); + invalid_vertex); + // cugraph::experimental::invalid_vertex_id::value); } idx += gridDim.x * blockDim.x; @@ -183,7 +188,10 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( "GraphViewType should support the push model."); static_assert(std::is_integral::value_type>::value); - typename value_t = typname std::iterator_traits::value_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using value_t = typename std::iterator_traits::value_type; double constexpr load_factor = 0.7; @@ -195,10 +203,9 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( invalid_vertex_id::value, invalid_vertex_id::value); auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(map_key_first, map_value_first), + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); @@ -241,8 +248,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_unique_keys(0, handle.get_stream()); rmm::device_uvector rx_value_counts(0, handle.get_stream()); - std::tie(rx_unique_keys, rx_value_counts) = - cugraph::experimental::detail::shuffle_values(handle, unique_keys.begin(), tx_value_counts); + std::tie(rx_unique_keys, rx_value_counts) = cugraph::experimental::detail::shuffle_values( + comm, unique_keys.begin(), tx_value_counts, handle.get_stream()); rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); @@ -255,12 +262,12 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( std::tie(rx_values_for_unique_keys, std::ignore) = cugraph::experimental::detail::shuffle_values( - handle, values_for_unique_keys.begin(), rx_value_counts); + comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - kv_map_ptr->reset(); + kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( static_cast(static_cast(unique_keys.size()) / load_factor), @@ -307,8 +314,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto num_edges = thrust::transform_reduce( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), + thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), + thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), [matrix_partition] __device__(auto row) { auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); return matrix_partition.get_local_degree(row_offset); @@ -316,7 +323,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( edge_t{0}, thrust::plus()); - rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); + rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), handle.get_stream()); @@ -327,6 +334,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); + auto constexpr invalid_vertex = invalid_vertex_id::value; + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. @@ -340,7 +349,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( adj_matrix_col_key_first, tmp_major_vertices.data(), tmp_minor_keys.data(), - tmp_key_aggregated_edge_weights.data()); + tmp_key_aggregated_edge_weights.data(), + invalid_vertex); } auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( @@ -362,14 +372,16 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( : cugraph::partition_2d::key_naming_t().row_name()); auto const sub_comm_size = sub_comm.get_size(); - triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); + triplet_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), + tmp_minor_keys.begin(), + tmp_key_aggregated_edge_weights.begin())); auto key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}; thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), triplet_first, triplet_first + tmp_major_vertices.size(), [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<1>(lhs) < key_func(thrust::get<1>(rhs)); + return key_func(thrust::get<1>(lhs) < key_func(thrust::get<1>(rhs))); }); auto key_first = thrust::make_transform_iterator( triplet_first, [key_func] __device__(auto val) { return key_func(thrust::get<1>(val)); }); @@ -383,65 +395,157 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_major_vertices(0, handle.get_stream()); rmm::device_uvector rx_minor_keys(0, handle.get_stream()); - rmm::device_uvector rx_key_aggregatd_edge_weights(0, handle.get_stream()); + rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream()); std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights, std::ignore) = detail::shuffle_values(sub_comm, triplet_first, tx_value_counts, handle.get_stream()); tmp_major_vertices = std::move(rx_major_vertices); tmp_minor_keys = std::move(rx_minor_keys); - tmp_key_aggregated_edge_weights = std::move(rx_key_aggregatd_edge_weights); + tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights); CUDA_TRY( cudaStreamSynchronize(handle.get_stream())); // tx_value_counts will become out-of-scope } - auto e_op_result_tmp_buffer = - allocate_comm_buffer(major_vertices.size(), handle.get_stream()); - auto e_op_result_buffer_first = get_comm_buffer_begin(e_op_result_tmp_buffer); + auto tmp_e_op_result_buffer = + allocate_comm_buffer(tmp_major_vertices.size(), handle.get_stream()); + auto tmp_e_op_result_buffer_first = get_comm_buffer_begin(tmp_e_op_result_buffer); triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - major_vertices.begin(), minor_keys.begin(), key_aggregated_edge_weights.begin())); + tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), triplet_first, triplet_first + major_vertices.size(), - e_op_result_buffer_first, - [] __device__(auto val) { + tmp_e_op_result_buffer_first, + [adj_matrix_row_value_input_first, + key_aggregated_e_op, + matrix_partition, + kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { auto major = thrust::get<0>(val); auto key = thrust::get<1>(val); auto w = thrust::get<2>(val); - return key_aggregated_e_op(); + return key_aggregated_e_op( + major, + key, + w, + *(adj_matrix_row_value_input_first + + matrix_partition.get_major_offset_from_major_nocheck(major)), + kv_map.find(key)->second); }); - minor_keys.resize(0, handle.get_stream()); - key_aggregated_edge_weights.resize(0, handle.get_stream()); - minor_keys.shrink_to_fit(handle.get_stream()); - key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); + tmp_minor_keys.resize(0, handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); if (GraphViewType::is_multi_gpu) { + auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() + ? cugraph::partition_2d::key_naming_t().col_name() + : cugraph::partition_2d::key_naming_t().row_name()); + auto const sub_comm_rank = sub_comm.get_rank(); + auto const sub_comm_size = sub_comm.get_size(); + // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). - device_gatherv(); - e_op_result_tmp_buffer = std::move(); + auto rx_sizes = + host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream()); + std::vector rx_displs(sub_comm_rank == i ? sub_comm_size : int{0}, size_t{0}); + if (sub_comm_rank == i) { + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + } + rmm::device_uvector rx_major_vertices( + sub_comm_rank == i ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) + : size_t{0}, + handle.get_stream()); + auto rx_tmp_e_op_result_buffer = + allocate_comm_buffer(rx_major_vertices.size(), handle.get_stream()); + + device_gatherv(sub_comm, + tmp_major_vertices.data(), + rx_major_vertices.data(), + tmp_major_vertices.size(), + rx_sizes, + rx_displs, + i, + handle.get_stream()); + device_gatherv(sub_comm, + tmp_e_op_result_buffer_first, + get_comm_buffer_begin(rx_tmp_e_op_result_buffer), + tmp_major_vertices.size(), + rx_sizes, + rx_displs, + i, + handle.get_stream()); + + if (sub_comm_rank == i) { + major_vertices = std::move(rx_major_vertices); + e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); + } + + CUDA_TRY(cudaStreamSynchronize( + handle + .get_stream())); // tmp_minor_keys, tmp_key_aggregated_edge_weights, rx_major_vertices, + // and rx_tmp_e_op_result_buffer will become out-of-scope + } else { + major_vertices = std::move(tmp_major_vertices); + e_op_result_buffer = std::move(tmp_e_op_result_buffer); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // tmp_minor_keys and tmp_key_aggregated_edge_weights will become + // out-of-scope } } - { - // FIXME: this runs only on one GPU a subcomm. - thrust::sort_by_key(); - thrust::reduce_by_key(); - - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_output_first, - vertex_value_output_first + graph_view.get_vertex_partition_size(comm_root_rank), - init); - - thrust::for_each([] __device__(auto val) { - auto major = ; - auto val = ; - *(vertex_value_output_first +) = reduce_op(val, *(vertex_value_output_first +)); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + T{}); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_vertices.begin(), + major_vertices.end(), + get_comm_buffer_begin(e_op_result_buffer)); + + auto num_uniques = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(major_vertices.size()), + [major_vertices = major_vertices.data()] __device__(auto i) { + return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? true : false; }); - } + rmm::device_uvector unique_major_vertices(num_uniques, handle.get_stream()); + + auto major_vertex_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + [major_vertices = major_vertices.data()] __device__(auto i) { + return ((i == 0) || (major_vertices[i] == major_vertices[i - 1])) + ? major_vertices[i] + : invalid_vertex_id::value; + }); + thrust::copy_if( + major_vertex_first, + major_vertex_first + major_vertices.size(), + unique_major_vertices.begin(), + [] __device__(auto major) { return major != invalid_vertex_id::value; }); + thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_vertices.begin(), + major_vertices.end(), + get_comm_buffer_begin(e_op_result_buffer), + thrust::make_discard_iterator(), + thrust::make_permutation_iterator( + vertex_value_output_first, + thrust::make_transform_iterator( + major_vertices.begin(), + [vertex_partition = vertex_partition_device_t(graph_view)] __device__( + auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })), + reduce_op); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + vertex_value_output_first, + [reduce_op, init] __device__(auto val) { return reduce_op(val, init); }); } } // namespace experimental From f8624762cc6a1a69cc5227cc53907b1aa5278cc1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 11 Dec 2020 15:51:53 -0500 Subject: [PATCH 047/343] remove return at the end of a void function --- cpp/src/experimental/sssp.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index ebcde1b1444..7561f569227 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -249,8 +249,6 @@ void sssp(raft::handle_t const &handle, handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once // this function returns (FIXME: should I stream sync in VertexFrontier // destructor?) - - return; } } // namespace detail From 51342a654966226622888df327f37c046a9b916d Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 15 Dec 2020 10:16:31 -0500 Subject: [PATCH 048/343] Merge branch-0.17 into branch-0.18 --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b345d9ff4df..42286c54df4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ ## Bug Fixes # cuGraph 0.17.0 (10 Dec 2020) - ## New Features - PR #1276 MST - PR #1245 Add functions to add pandas and numpy compatibility From 0aee65f95bb55c61c85b696f7d12b77c51b79f3f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 15 Dec 2020 17:00:45 -0500 Subject: [PATCH 049/343] add resize_comm_buffer --- ...rm_reduce_by_adj_matrix_row_col_key_e.cuh} | 0 cpp/include/utilities/comm_utils.cuh | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+) rename cpp/include/patterns/{transform_reduce_by_key_e.cuh => transform_reduce_by_adj_matrix_row_col_key_e.cuh} (100%) diff --git a/cpp/include/patterns/transform_reduce_by_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh similarity index 100% rename from cpp/include/patterns/transform_reduce_by_key_e.cuh rename to cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index f4e2a3921a1..219c717e970 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -741,6 +741,16 @@ auto allocate_comm_buffer_tuple_impl(std::index_sequence, allocate_comm_buffer_tuple_element_impl(buffer_size, stream)...); } +template +void resize_comm_buffer_tuple_element_impl(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + std::get(buffer).resize(new_buffer_size, stream); + resize_comm_buffer_tuple_element_impl(buffer, new_buffer_size, stream); +} + +template +void resize_comm_buffer_tuple_impl(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) {} + template auto get_comm_buffer_begin_tuple_element_impl(BufferType& buffer) { @@ -1400,6 +1410,20 @@ auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) std::make_index_sequence(), buffer_size, stream); } +template ::value>* = nullptr> +void resize_comm_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + buffer.resize(new_buffer_size, stream); +} + +template ::value>* = nullptr> +void resize_comm_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::resize_comm_buffer_tuple_impl( + buffer, new_buffer_size, stream); +} + template ::value>* = nullptr> From 2369a78c5698046a70f134ac86f72ea81f3c6119 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 15 Dec 2020 17:02:18 -0500 Subject: [PATCH 050/343] initial implementation of transform_reduce_by_adj_matrix_row_col_key_e & copy_v_transform_reduce_key_aggregated_out_nbr --- .../experimental/detail/graph_utils.cuh | 1 + ...ransform_reduce_key_aggregated_out_nbr.cuh | 30 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 403 ++++++++++++++---- 3 files changed, 342 insertions(+), 92 deletions(-) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 92614d00678..e68eaaa463a 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -157,6 +157,7 @@ rmm::device_uvector compute_major_degree( return compute_major_degree(handle, tmp_offsets, partition); } +// FIXME: better move this to elsewhere template auto shuffle_values(raft::comms::comms_t const &comm, TxValueIterator tx_value_first, diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 4bdd50d0be7..7fbf5ef9250 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -35,12 +35,12 @@ namespace detail { // FIXME: block size requires tuning int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128; -template +template __global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, - KeyIterator adj_matrix_minor_key_first, + VertexIterator adj_matrix_minor_key_first, typename GraphViewType::vertex_type* major_vertices, typename GraphViewType::vertex_type* minor_keys, typename GraphViewType::weight_type* key_aggregated_edge_weights, @@ -104,7 +104,6 @@ __global__ void for_all_major_for_all_nbr_low_degree( major_vertices + local_offset + key_idx, major_vertices + local_offset + local_degree, invalid_vertex); - // cugraph::experimental::invalid_vertex_id::value); } idx += gridDim.x * blockDim.x; @@ -125,7 +124,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row * input properties. - * @tparam KeyIterator Type of the iterator for graph adjacency matrix column key values for + * @tparam VertexIterator Type of the iterator for graph adjacency matrix column key values for * aggregation. * @tparam ValueIterator Type of the iterator for values in (key, value) pairs. * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator. @@ -165,7 +164,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( */ template ::value_type>::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -450,13 +450,15 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto rx_sizes = host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream()); - std::vector rx_displs(sub_comm_rank == i ? sub_comm_size : int{0}, size_t{0}); - if (sub_comm_rank == i) { + std::vector rx_displs( + static_cast(sub_comm_rank) == i ? sub_comm_size : int{0}, size_t{0}); + if (static_cast(sub_comm_rank) == i) { std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); } rmm::device_uvector rx_major_vertices( - sub_comm_rank == i ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) - : size_t{0}, + static_cast(sub_comm_rank) == i + ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) + : size_t{0}, handle.get_stream()); auto rx_tmp_e_op_result_buffer = allocate_comm_buffer(rx_major_vertices.size(), handle.get_stream()); @@ -478,7 +480,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( i, handle.get_stream()); - if (sub_comm_rank == i) { + if (static_cast(sub_comm_rank) == i) { major_vertices = std::move(rx_major_vertices); e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); } diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 771901f4a7f..a1797ad2729 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -27,6 +28,309 @@ namespace cugraph { namespace experimental { +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr transform_reduce_by_key_e_for_all_block_size = 128; + +template +__global__ void for_all_major_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* keys, + T* values) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + while (idx < static_cast(major_last - major_first)) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + if (local_degree > 0) { + auto transform_op = [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &adj_matrix_row_col_key_first, + &e_op, + major_offset, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) + : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + + auto key = + *(adj_matrix_row_col_key_first + + ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset + : minor_offset)); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + + return thrust::make_tuple(key, e_op_result); + }; + + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::transform( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + thrust::make_zip_iterator(thrust::make_tuple(keys + local_offset, values + local_offset)), + transform_op); + } + + idx += gridDim.x * blockDim.x; + } +} + +template +std::tuple, + decltype(allocate_comm_buffer(0, cudaStream_t{nullptr}))> +transform_reduce_by_adj_matrix_row_col_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto loop_count = size_t{1}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); + } + + rmm::device_uvector keys(0, handle.get_stream()); + auto value_buffer = allocate_comm_buffer(0, handle.get_stream()); + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + int comm_root_rank = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank + : col_comm_rank * row_comm_size + i; + } + + auto num_edges = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), + thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), + [matrix_partition] __device__(auto row) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + + rmm::device_uvector tmp_keys(num_edges, handle.get_stream()); + auto tmp_value_buffer = allocate_comm_buffer(tmp_keys.size(), handle.get_stream()); + + if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + detail::transform_reduce_by_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + tmp_keys.data(), + get_comm_buffer_begin(tmp_value_buffer)); + } + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + get_comm_buffer_begin(tmp_value_buffer)); + + auto num_uniques = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(tmp_keys.size()), + [tmp_keys = tmp_keys.data()] __device__(auto i) { + return ((i == 0) || (tmp_keys[i] != tmp_keys[i - 1])) ? true : false; + }); + rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); + auto value_for_unique_key_buffer = + allocate_comm_buffer(unique_keys.size(), handle.get_stream()); + + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + get_comm_buffer_begin(tmp_value_buffer), + unique_keys.begin(), + get_comm_buffer_begin(value_for_unique_key_buffer)); + + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + thrust::sort_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end(), + get_comm_buffer_begin(value_for_unique_key_buffer), + [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); + + auto key_first = thrust::make_transform_iterator( + unique_keys.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + unique_keys.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + auto rx_value_for_unique_key_buffer = allocate_comm_buffer(0, handle.get_stream()); + + std::tie(rx_unique_keys, std::ignore) = cugraph::experimental::detail::shuffle_values( + comm, unique_keys.begin(), tx_value_counts, handle.get_stream()); + std::tie(rx_value_for_unique_key_buffer, std::ignore) = + cugraph::experimental::detail::shuffle_values( + comm, + get_comm_buffer_begin(value_for_unique_key_buffer), + tx_value_counts, + handle.get_stream()); + + // FIXME: we can reduce after shuffle + + tmp_keys = std::move(rx_unique_keys); + tmp_value_buffer = std::move(rx_value_for_unique_key_buffer); + + CUDA_TRY(cudaStreamSynchronize( + handle + .get_stream())); // unique_keys & value_for_unique_key_buffer will become out-of-scope + } + + auto cur_size = keys.size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can + // reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); + resize_comm_buffer(value_buffer, keys.size(), handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + keys.begin() + cur_size); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_comm_buffer_begin(tmp_value_buffer), + get_comm_buffer_begin(tmp_value_buffer) + tmp_keys.size(), + get_comm_buffer_begin(value_buffer) + cur_size); + } + + if (GraphViewType::is_multi_gpu) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.end(), + get_comm_buffer_begin(value_buffer)); + + auto num_uniques = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(keys.size()), + [keys = keys.data()] __device__(auto i) { + return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; + }); + rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); + auto value_for_unique_key_buffer = + allocate_comm_buffer(unique_keys.size(), handle.get_stream()); + + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.end(), + get_comm_buffer_begin(value_buffer), + unique_keys.begin(), + get_comm_buffer_begin(value_for_unique_key_buffer)); + + keys = std::move(unique_keys); + value_buffer = std::move(value_for_unique_key_buffer); + } + + // FIXME: add init + + return std::make_tuple(std::move(keys), std::move(value_buffer)); +} + +} // namespace detail + +// FIXME: EdgeOp & VertexOp in update_frontier_v_push_if_out_nbr concatenates push inidicator or +// bucket idx with the value while EdgeOp here does not. This is inconsistent. Better be fixed. +// FIXME: rmm::device_uvector does not work if T is a tuple. /** * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. * @@ -39,7 +343,7 @@ namespace experimental { * input properties. * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. * @tparam T Type of the initial value of the value in each (key, value) pair. - * @tparam KeyIterator Type of the iterator for keys in (key, value) pairs. + * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -67,87 +371,30 @@ namespace experimental { template -thrust::tuple, rmm::device_uvector> -transform_reduce_by_key_e(raft::handle_t const& handle, - GraphViewType const& graph_view, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - EdgeOp e_op, - T init) + typename T> +auto transform_reduce_by_adj_matrix_col_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_col_key_first, + EdgeOp e_op, + T init) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - static_assert(std::is_integral::value_type>::value); - - // If I change the function name to transform_reduce_by_adj_matrix_col_key_e - - // initialize static::cuco_map with *(adj_matrix_col_key_first + i), init - - // find(key), add e_op return value - - // iterate and get (key, value) pairs - - // shuffle and reduce again - - // collect value for map_key - - // return static::cuco_map - - - rmm::device_uvector keys(0, handle.get_stream()); - rmm::device_uvector values(0, handle.get_stream()); - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - - edge_t max_pushes = matrix_partition.get_number_of_edges(); - - // FIXME: This is highly pessimistic as # unique keys is likely to be much smaller than the - // number of edges. If we use cuco::dynamic_map and can pause & resume execution if buffer needs - // to be increased, we can start with a smaller buffer size than the worst possible size. - rmm::device_uvector keys(max_pushes, handle.get_stream()); - rmm::device_uvector values(max_pushes, handle.get_stream()); - auto kv_buffer = - allocate_comm_buffer>(max_pushes, handle.get_stream()); - auto kv_buffer_first = get_comm_buffer_begin>(kv_buffer); - vertex_frontier.resize_buffer(vertex_frontier.get_buffer_idx_value() + max_pushes); - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first); - auto buffer_payload_first = std::get<1>(buffer_first); - - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - - // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in decreasing - // order, we will apply this kernel only to low out-degree vertices. - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - buffer_key_first, - buffer_value_first, - vertex_frontier.get_buffer_idx_ptr(), - e_op); - - thrust::sort(); - thrust::reduce_by_key(); - } - - if (multi_gpu) { - thrust::sort(); - thrust::reduce_by_key(); - - tx_keys; - rx_values; - } + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); - return cuco::static_map::value_type, T>(); + return detail::transform_reduce_by_adj_matrix_row_col_key_e( + handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_col_key_first, + e_op, + init); } } // namespace experimental From f3f996d375a76b01177ddd5009db75e4895c30e5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 16 Dec 2020 01:14:13 -0500 Subject: [PATCH 051/343] update documentation --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 30 +++--- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 96 ++++++++++++++++--- 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 7fbf5ef9250..345cd52c10d 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -113,19 +113,19 @@ __global__ void for_all_major_for_all_nbr_low_degree( } // namespace detail /** - * @brief Iterate over the key-aggregated outgoing edges to update vertex properties. + * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties. * * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy * input). * Unlike copy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to - * support two level reduction for each vertex. + * support two level reduction for every vertex. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row * input properties. * @tparam VertexIterator Type of the iterator for graph adjacency matrix column key values for - * aggregation. + * aggregation (key type should coincide with vertex type). * @tparam ValueIterator Type of the iterator for values in (key, value) pairs. * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator. * @tparam ReduceOp Type of the binary reduction operator. @@ -142,18 +142,20 @@ __global__ void for_all_major_for_all_nbr_low_degree( * aggregation) for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p * graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param map_key_first Iterator pointing to the keys in (key, value) pairs (assigned to this - * process in multi-GPU, `cugraph::experimental::detail::compute_gpu_id_from_vertex_t` is used to - * assign keys to processes). (Key, value) pairs may be provided by transform_reduce_by_key_e(). - * @param map_key_last - * @param map_value_first Iterator pointing to the values in (key, value) pairs (assigned to this - * process in multi-GPU). + * @param map_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs + * (assigned to this process in multi-GPU, + * `cugraph::experimental::detail::compute_gpu_id_from_vertex_t` is used to map keys to processes). + * (Key, value) pairs may be provided by transform_reduce_by_adj_matrix_row_key_e() or + * transform_reduce_by_adj_matrix_col_key_e(). + * @param map_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs (assigned + * to this process in multi-GPU). + * @param map_value_first Iterator pointing to the first (inclusive) value in (key, value) pairs + * (assigned to this process in multi-GPU). `map_value_last` (exclusive) is deduced as @p + * map_value_first + thrust::distance(@p map_key_first, @p map_key_last). * @param key_aggregated_e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p - * adj_matrix_row_value_input_first + i), and value for the key stored in (@p map_key_first, @p - * map_value_first) - @p kv_map for the key (where i is in - * [0, graph_view.get_number_of_local_adj_matrix_partition_rows())) and returns a value to be - * reduced. weight of 1.0 is assumed if unweighted. + * adj_matrix_row_value_input_first + i), and value for the key stored in the input (key, value) + * pairs provided by @p map_key_first, @p map_key_last, and @p map_value_first (aggregated over the + * entire set of processes in multi-GPU). * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for * each vertex. diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index a1797ad2729..cffdea057ca 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -330,20 +330,21 @@ transform_reduce_by_adj_matrix_row_col_key_e( // FIXME: EdgeOp & VertexOp in update_frontier_v_push_if_out_nbr concatenates push inidicator or // bucket idx with the value while EdgeOp here does not. This is inconsistent. Better be fixed. -// FIXME: rmm::device_uvector does not work if T is a tuple. /** * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. * - * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). + * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). Keys for + * edges are determined by the graph adjacency matrix rows. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row * input properties. * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column * input properties. + * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs (key type should + * coincide with vertex type). * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. - * @tparam T Type of the initial value of the value in each (key, value) pair. - * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs. + * @tparam T Type of the values in (key, value) pairs. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -355,18 +356,89 @@ transform_reduce_by_adj_matrix_row_col_key_e( * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param adj_matrix_row_key_first Iterator pointing to the adjacency matrix row key for the first + * (inclusive) column (assigned to this process in multi-GPU). `adj_matrix_row_key_last` (exclusive) + * is deduced as @p adj_matrix_row_key_first + @p graph_view.get_number_of_local_adj_matrix_rows(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, - * get_number_of_local_adj_matrix_partition_cols())) and returns a pair of a key and a transformed - * value to be reduced. + * get_number_of_local_adj_matrix_partition_cols())) and returns a transformed value to be reduced. * @param init Initial value to be added to the value in each transform-reduced (key, value) pair. - * @param map_key_first Iterator pointing to the first (inclusive) key to be stored in the returned - * cuco::static_map (which is local to this process in mulit-GPU). - * @param map_key_last Iterator pointing to the last (exclusive) key to be stored in the returned - * cuco::static_map (which is local to this process in multi-GPU). - * @return cuco::static_map Hash-based map of (key, value) pairs for the keys pointed by - * [map_key_first, map_key_last). + * @return std::tuple Tuple of rmm::device_uvector and + * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if + * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar + * type). + */ +template +auto transform_reduce_by_adj_matrix_row_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_key_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + return detail::transform_reduce_by_adj_matrix_row_col_key_e( + handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_key_first, + e_op, + init); +} + +// FIXME: EdgeOp & VertexOp in update_frontier_v_push_if_out_nbr concatenates push inidicator or +// bucket idx with the value while EdgeOp here does not. This is inconsistent. Better be fixed. +/** + * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. + * + * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). Keys for + * edges are determined by the graph adjacency matrix columns. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs (key type should + * coincide with vertex type). + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the values in (key, value) pairs. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param adj_matrix_col_key_first Iterator pointing to the adjacency matrix column key for the + * first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p + * graph_view.get_number_of_local_adj_matrix_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a transformed value to be reduced. + * @param init Initial value to be added to the value in each transform-reduced (key, value) pair. + * @return std::tuple Tuple of rmm::device_uvector and + * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if + * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar + * type). */ template Date: Wed, 16 Dec 2020 12:15:22 -0800 Subject: [PATCH 052/343] remove guard from host code... causing compilation issues --- cpp/src/community/louvain.cu | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index cc3885af58a..81a68a31663 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -15,18 +15,8 @@ */ #include - -// "FIXME": remove the guards after support for Pascal is dropped -// -// Disable louvain(experimental::graph_view_t,...) -// versions for GPU architectures < 700 -// (cuco/static_map.cuh depends on features not supported on or before Pascal) -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 #include -#else #include -#endif namespace cugraph { @@ -69,12 +59,9 @@ std::pair louvain( if (device_prop.major < 7) { CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); } else { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 -#else experimental::Louvain> runner(handle, graph_view); return runner(clustering, max_level, resolution); -#endif } } From e75c74dad830c7bb31057209a91aa30bc3d57eca Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 17 Dec 2020 10:41:30 -0800 Subject: [PATCH 053/343] moved ifdefs into the experimental::louvain implementation --- cpp/src/experimental/louvain.cuh | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 1f6f8633bcd..2268eebd765 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -33,6 +32,13 @@ #include #include +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 +#else +#define CUCO_STATIC_MAP_DEFINED +#include +#endif + + //#define TIMING #ifdef TIMING @@ -44,6 +50,7 @@ namespace experimental { namespace detail { +#ifdef CUCO_STATIC_MAP_DEFINED template struct create_cuco_pair_t { cuco::pair_type __device__ operator()(data_t data) @@ -54,6 +61,7 @@ struct create_cuco_pair_t { return tmp; } }; +#endif // // These classes should allow cuco::static_map to generate hash tables of @@ -443,7 +451,9 @@ class Louvain { weight_t resolution) { size_t num_level{0}; + weight_t best_modularity = weight_t{-1}; +#ifdef CUCO_STATIC_MAP_DEFINED weight_t total_edge_weight; total_edge_weight = experimental::transform_reduce_e( handle_, @@ -453,8 +463,6 @@ class Louvain { [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, weight_t{0}); - weight_t best_modularity = weight_t{-1}; - // // Initialize every cluster to reference each vertex to itself // @@ -480,6 +488,7 @@ class Louvain { } timer_display(std::cout); +#endif return std::make_pair(num_level, best_modularity); } @@ -593,6 +602,7 @@ class Louvain { } } +#ifdef CUCO_STATIC_MAP_DEFINED virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution) { timer_start("update_clustering"); @@ -1616,6 +1626,7 @@ class Louvain { cugraph::detail::offsets_to_indices( current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); } +#endif std:: tuple, rmm::device_vector, rmm::device_vector> From 47b01e3e8d4bed55712e2ba42c4bd750a835599a Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 17 Dec 2020 13:55:46 -0500 Subject: [PATCH 054/343] fix clang format issue --- cpp/src/experimental/louvain.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 2268eebd765..7d893092a6b 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -38,7 +38,6 @@ #include #endif - //#define TIMING #ifdef TIMING From df082d8c208b820327595bbfafde0b7aaa4c6e40 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 17 Dec 2020 14:23:48 -0500 Subject: [PATCH 055/343] undo --allgpuarch change --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 9dd6e14181e..019d03e21da 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -91,7 +91,7 @@ conda list --show-channel-urls if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then gpuci_logger "Build from source" - $WORKSPACE/build.sh -v clean libcugraph cugraph --allgpuarch + $WORKSPACE/build.sh -v clean libcugraph cugraph fi ################################################################################ From 70d9abd3556c60af62b77a1397ce3406dc557af6 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 17 Dec 2020 16:52:29 -0600 Subject: [PATCH 056/343] Updated cuxfilter to 0.18, removed datashader indirect dependency in conda dev .yml files(#1311) Updated cuxfilter to 0.18 and removed datashader indirect dependency in cugraph conda dev environment files. Authors: - Rick Ratzel Approvers: - Hugo Linsenmaier - Hugo Linsenmaier - Seunghwa Kang - AJ Schmidt - Alex Fender - Alex Fender URL: https://github.com/rapidsai/cugraph/pull/1311 --- ci/release/update-version.sh | 1 + conda/environments/cugraph_dev_cuda10.1.yml | 3 +-- conda/environments/cugraph_dev_cuda10.2.yml | 3 +-- conda/environments/cugraph_dev_cuda11.0.yml | 3 +-- python/cugraph/centrality/betweenness_centrality.py | 11 +++++++++-- python/cugraph/structure/graph_primtypes.pyx | 5 +++-- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index d853c3693c6..7cd0d9720fc 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -68,4 +68,5 @@ for FILE in conda/environments/*.yml; do sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/ucx-py=${CURRENT_SHORT_TAG}/ucx-py=${NEXT_SHORT_TAG}/g" ${FILE}; + sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; done diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index ed345fcafff..067fd0bc4ba 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -8,7 +8,7 @@ dependencies: - cudf=0.18.* - libcudf=0.18.* - rmm=0.18.* -- cuxfilter=0.17.* +- cuxfilter=0.18.* - librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 @@ -32,7 +32,6 @@ dependencies: - scikit-learn>=0.23.1 - colorcet - holoviews -- datashader - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 325a89382b7..3371340d8bd 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -8,7 +8,7 @@ dependencies: - cudf=0.18.* - libcudf=0.18.* - rmm=0.18.* -- cuxfilter=0.17.* +- cuxfilter=0.18.* - librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 @@ -32,7 +32,6 @@ dependencies: - scikit-learn>=0.23.1 - colorcet - holoviews -- datashader - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 386377e745d..ee3b57632a1 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -8,7 +8,7 @@ dependencies: - cudf=0.18.* - libcudf=0.18.* - rmm=0.18.* -- cuxfilter=0.17.* +- cuxfilter=0.18.* - librmm=0.18.* - dask>=2.12.0 - distributed>=2.12.0 @@ -31,7 +31,6 @@ dependencies: - pytest - scikit-learn>=0.23.1 - colorcet -- datashader - holoviews - sphinx - sphinx_rtd_theme diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 634cc2aa7a2..93bdce7c515 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -233,7 +233,6 @@ def edge_betweenness_centrality( >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> ebc = cugraph.edge_betweenness_centrality(G) """ - if weight is not None: raise NotImplementedError( "weighted implementation of betweenness " @@ -254,8 +253,16 @@ def edge_betweenness_centrality( df = G.unrenumber(df, "dst") if type(G) is cugraph.Graph: + # select the lower triangle of the df based on src/dst vertex value lower_triangle = df['src'] >= df['dst'] - df[["src", "dst"]][lower_triangle] = df[["dst", "src"]][lower_triangle] + # swap the src and dst vertices for the lower triangle only. Because + # this is a symmeterized graph, this operation results in a df with + # multiple src/dst entries. + df['src'][lower_triangle], df['dst'][lower_triangle] = \ + df['dst'][lower_triangle], df['src'][lower_triangle] + # overwrite the df with the sum of the values for all alike src/dst + # vertex pairs, resulting in half the edges of the original df from the + # symmeterized graph. df = df.groupby(by=["src", "dst"]).sum().reset_index() if isNx is True: diff --git a/python/cugraph/structure/graph_primtypes.pyx b/python/cugraph/structure/graph_primtypes.pyx index f3f0fd9b9a6..da16f8f4c8a 100644 --- a/python/cugraph/structure/graph_primtypes.pyx +++ b/python/cugraph/structure/graph_primtypes.pyx @@ -93,6 +93,9 @@ cdef GraphCOOViewType get_coo_graph_view(input_graph, bool weighted=True, GraphC if not input_graph.edgelist: input_graph.view_edge_list() + num_edges = input_graph.number_of_edges(directed_edges=True) + num_verts = input_graph.number_of_vertices() + cdef uintptr_t c_src = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0] cdef uintptr_t c_dst = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL @@ -101,8 +104,6 @@ cdef GraphCOOViewType get_coo_graph_view(input_graph, bool weighted=True, GraphC if input_graph.edgelist.weights and weighted: c_weights = input_graph.edgelist.edgelist_df['weights'].__cuda_array_interface__['data'][0] - num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges(directed_edges=True) cdef GraphCOOViewType in_graph if GraphCOOViewType is GraphCOOViewFloat: in_graph = GraphCOOViewFloat(c_src, c_dst, c_weights, num_verts, num_edges) From 2316f03f6e073efe780b2a3aa5fc0bf8d1e56cb1 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 17 Dec 2020 22:33:06 -0500 Subject: [PATCH 057/343] add comments based on PR feedback --- cpp/src/experimental/louvain.cuh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 7d893092a6b..08e52092362 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -32,6 +32,17 @@ #include #include +// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED +// +// cuco/static_map.cuh depends on features not supported on or before Pascal. +// +// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil +// result in compilation errors. +// +// If we're Pascal or before we do nothing here and will suppress including +// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED +// which will result in the full implementation being pulled in. +// #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 #else #define CUCO_STATIC_MAP_DEFINED From 3ede162aee5cc78e623637bd8d7adfa0ffc95d12 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 22 Dec 2020 00:18:43 -0500 Subject: [PATCH 058/343] split graph_functions.cu to coarsen_graph.cu, relabel.cu, and renumber_edgelist.cu --- cpp/CMakeLists.txt | 4 +- cpp/include/experimental/graph_functions.hpp | 2 +- cpp/src/experimental/coarsen_graph.cu | 504 +++++++++ cpp/src/experimental/graph_functions.cu | 1010 ------------------ cpp/src/experimental/relabel.cu | 230 ++++ cpp/src/experimental/renumber_edgelist.cu | 408 +++++++ 6 files changed, 1146 insertions(+), 1012 deletions(-) create mode 100644 cpp/src/experimental/coarsen_graph.cu delete mode 100644 cpp/src/experimental/graph_functions.cu create mode 100644 cpp/src/experimental/relabel.cu create mode 100644 cpp/src/experimental/renumber_edgelist.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 38fae0e04a8..59376e91083 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -367,7 +367,9 @@ add_library(cugraph SHARED src/centrality/betweenness_centrality.cu src/experimental/graph.cu src/experimental/graph_view.cu - src/experimental/graph_functions.cu + src/experimental/coarsen_graph.cu + src/experimental/renumber_edgelist.cu + src/experimental/relabel.cu src/experimental/bfs.cu src/experimental/sssp.cu src/experimental/pagerank.cu diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index cbd15b04d58..04037a3b5d7 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -119,7 +119,7 @@ template >, rmm::device_uvector> coarsen_graph( - raft::handel_t const& handle, + raft::handle_t const& handle, graph_view_t const& graph_view, vertex_t const* labels); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu new file mode 100644 index 00000000000..64d603a42d2 --- /dev/null +++ b/cpp/src/experimental/coarsen_graph.cu @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) +{ + edge_t number_of_edges{0}; + raft::update_host( + &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); + CUDA_TRY(cudaStreamSynchronize(stream)); + rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights( + compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + + // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can + // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA + // warp per vertex, and low-degree vertices using one CUDA thread per block + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [compressed_sparse_offsets, + major_first, + p_majors = edgelist_major_vertices.begin()] __device__(auto v) { + auto first = compressed_sparse_offsets[v - major_first]; + auto last = compressed_sparse_offsets[v - major_first + 1]; + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_indices, + compressed_sparse_indices + number_of_edges, + edgelist_minor_vertices.begin()); + if (compressed_sparse_weights != nullptr) { + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + } + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +template +void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */, + cudaStream_t stream) +{ + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + + size_t number_of_edges{0}; + if (edgelist_weights.size() > 0) { + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin()); + + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); + auto it = thrust::reduce_by_key( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin())), + tmp_edgelist_weights.begin()); + number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + + CUDA_TRY(cudaStreamSynchronize( + stream)); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) will be + // freed after the assignments below + + edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); + edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); + edgelist_weights = std::move(tmp_edgelist_weights); + } else { + thrust::sort(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + number_of_edges = thrust::distance(pair_first, it); + } + + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_weights.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.shrink_to_fit(stream); + edgelist_weights.shrink_to_fit(stream); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) +{ + // FIXME: it might be possible to directly create relabled & coarsened edgelist from the + // compressed sparse format to save memory + + rmm::device_uvector edgelist_major_vertices(0, stream); + rmm::device_uvector edgelist_minor_vertices(0, stream); + rmm::device_uvector edgelist_weights(0, stream); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_edgelist(compressed_sparse_offsets, + compressed_sparse_indices, + compressed_sparse_weights, + major_first, + major_last, + stream); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + thrust::transform( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + sort_and_coarsen_edgelist( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +// multi-GPU version +template +std::enable_if_t< + multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + // 1. locally construct coarsened edge list + + // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme + // regardless of hypergraph partitioning is applied or not + rmm::device_uvector adj_matrix_major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows(), + handle.get_stream()); + rmm::device_uvector adj_matrix_minor_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols(), + handle.get_stream()); + if (store_transposed) { + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } else { + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } + + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + // FIXME: we may compare performance/memory footprint with the hash_based approach especially when + // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst + // case). We may be able to limit the memory requirement close to the final coarsened edgelist + // with the hash based approach. + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + // get edge list + + rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_weights(0, handle.get_stream()); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(i), + graph_view.indices(i), + graph_view.weights(i), + adj_matrix_major_labels.begin() + + (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) + : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), + adj_matrix_minor_labels.begin(), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) + : graph_view.get_local_adj_matrix_partition_row_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) + : graph_view.get_local_adj_matrix_partition_row_last(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) + : graph_view.get_local_adj_matrix_partition_col_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) + : graph_view.get_local_adj_matrix_partition_col_last(i), + handle.get_stream()); + + auto cur_size = coarsened_edgelist_major_vertices.size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can + // reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_weights.resize( + graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); + + if (graph_view.is_weighted()) { + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } else { + auto src_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) + // will become out-of-scope + } + + sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights, + handle.get_stream()); + + // 2. globally shuffle edge list and re-coarsen + + { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())); + auto key_func = detail::compute_gpu_id_from_edge_t{ + graph_view.is_hypergraph_partitioned(), + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + coarsened_edgelist_major_vertices.size(), + [key_func] __device__(auto lhs, auto rhs) { + return store_transposed ? (key_func(thrust::get<1>(lhs), thrust::get<0>(lhs)) < + key_func(thrust::get<1>(rhs), thrust::get<0>(rhs))) + : (key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < + key_func(thrust::get<0>(rhs), thrust::get<1>(rhs))); + }); + auto key_first = thrust::make_transform_iterator(edge_first, [key_func] __device__(auto val) { + return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) + : key_func(thrust::get<0>(val), thrust::get<1>(val)); + }); + rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + coarsened_edgelist_major_vertices.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); + + std::tie( + rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = + detail::shuffle_values(handle.get_comms(), edge_first, tx_value_counts, handle.get_stream()); + + sort_and_coarsen_edgelist(rx_edgelist_major_vertices, + rx_edgelist_minor_vertices, + rx_edgelist_weights, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // memory blocks owned by + // coarsened_edgelist_(major_vertices,minor_vertices,weights) + // will be freed after the assignments below + + coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); + coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); + coarsened_edgelist_weights = std::move(rx_edgelist_weights); + } + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + partition_t partition( + std::vector{}, graph_view.is_hypergraph_partitioned(), 0, 0, 0, 0); + vertex_t number_of_vertices{}; + edge_t number_of_edges{}; + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + renumber_edgelist(handle, + coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + graph_view.is_hypergraph_partitioned()); + + // 4. build a graph + + std::vector> edgelists{}; + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + edgelists.resize(1); + edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); + edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + } + + return std::make_tuple( + std::make_unique>( + handle, + edgelists, + partition, + number_of_vertices, + number_of_edges, + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); +} + +// single-GPU version +template +std::enable_if_t< + !multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + labels, + labels, + vertex_t{0}, + graph_view.get_number_of_vertices(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + handle.get_stream()); + + auto renumber_map_labels = renumber_edgelist( + handle, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); + + edgelist_t edgelist{}; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + + return std::make_tuple( + std::make_unique>( + handle, + edgelist, + static_cast(renumber_map_labels.size()), + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple>, + rmm::device_uvector> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels) +{ + return detail::coarsen_graph(handle, graph_view, labels); +} + +// explicit instantiation + +template std::tuple>, + rmm::device_uvector> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/graph_functions.cu b/cpp/src/experimental/graph_functions.cu deleted file mode 100644 index 5ac3b7ae135..00000000000 --- a/cpp/src/experimental/graph_functions.cu +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -namespace cugraph { -namespace experimental { - -namespace { - -template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t major_first, - vertex_t major_last, - cudaStream_t stream) -{ - edge_t number_of_edges{0}; - raft::update_host( - &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); - CUDA_TRY(cudaStreamSynchronize(stream)); - rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights( - compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); - - // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can - // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA - // warp per vertex, and low-degree vertices using one CUDA thread per block - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(major_first), - thrust::make_counting_iterator(major_last), - [compressed_sparse_offsets, - major_first, - p_majors = edgelist_major_vertices.begin()] __device__(auto v) { - auto first = compressed_sparse_offsets[v - major_first]; - auto last = compressed_sparse_offsets[v - major_first + 1]; - thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); - }); - thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_indices, - compressed_sparse_indices + number_of_edges, - edgelist_minor_vertices.begin()); - if (compressed_sparse_weights != nullptr) { - thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_weights, - compressed_sparse_weights + number_of_edges, - edgelist_weights.data()); - } - - return std::make_tuple(std::move(edgelist_major_vertices), - std::move(edgelist_minor_vertices), - std::move(edgelist_weights)); -} - -template -void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */, - cudaStream_t stream) -{ - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - - size_t number_of_edges{0}; - if (edgelist_weights.size() > 0) { - thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin()); - - rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), - stream); - rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), - stream); - rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); - auto it = thrust::reduce_by_key( - rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin(), - thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), - tmp_edgelist_minor_vertices.begin())), - tmp_edgelist_weights.begin()); - number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); - - CUDA_TRY(cudaStreamSynchronize( - stream)); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) will be - // freed after the assignments below - - edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); - edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); - edgelist_weights = std::move(tmp_edgelist_weights); - } else { - thrust::sort(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - number_of_edges = thrust::distance(pair_first, it); - } - - edgelist_major_vertices.resize(number_of_edges, stream); - edgelist_minor_vertices.resize(number_of_edges, stream); - edgelist_weights.resize(number_of_edges, stream); - edgelist_major_vertices.shrink_to_fit(stream); - edgelist_minor_vertices.shrink_to_fit(stream); - edgelist_weights.shrink_to_fit(stream); -} - -template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t const *p_major_labels, - vertex_t const *p_minor_labels, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last, - cudaStream_t stream) -{ - // FIXME: it might be possible to directly create relabled & coarsened edgelist from the - // compressed sparse format to save memory - - rmm::device_uvector edgelist_major_vertices(0, stream); - rmm::device_uvector edgelist_minor_vertices(0, stream); - rmm::device_uvector edgelist_weights(0, stream); - std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_edgelist(compressed_sparse_offsets, - compressed_sparse_indices, - compressed_sparse_weights, - major_first, - major_last, - stream); - - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - thrust::transform( - rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size(), - pair_first, - [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { - return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], - p_minor_labels[thrust::get<1>(val) - minor_first]); - }); - - sort_and_coarsen_edgelist( - edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); - - return std::make_tuple(std::move(edgelist_major_vertices), - std::move(edgelist_minor_vertices), - std::move(edgelist_weights)); -} - -template -rmm::device_uvector compute_renumber_map( - raft::handle_t const &handle, - rmm::device_uvector const &edgelist_major_vertices, - rmm::device_uvector const &edgelist_minor_vertices) -{ - // FIXME: compare this sort based approach with hash based approach in both speed and memory - // footprint - - // 1. acquire (unique major label, count) pairs - - rmm::device_uvector tmp_labels(edgelist_major_vertices, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end()); - rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); - rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); - auto major_pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end(), - thrust::make_constant_iterator(edge_t{1}), - major_labels.begin(), - major_counts.begin()); - tmp_labels.resize(0, handle.get_stream()); - tmp_labels.shrink_to_fit(handle.get_stream()); - major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), - handle.get_stream()); - major_counts.resize(major_labels.size(), handle.get_stream()); - major_labels.shrink_to_fit(handle.get_stream()); - major_counts.shrink_to_fit(handle.get_stream()); - - // 2. acquire unique minor labels - - rmm::device_uvector minor_labels(edgelist_minor_vertices, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end()); - auto minor_label_it = - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end()); - minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); - minor_labels.shrink_to_fit(handle.get_stream()); - - // 3. merge major and minor labels - - rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), - handle.get_stream()); - rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); - thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - major_labels.begin(), - major_labels.end(), - minor_labels.begin(), - minor_labels.end(), - major_counts.begin(), - thrust::make_constant_iterator(edge_t{0}), - merged_labels.begin(), - merged_counts.begin()); - major_labels.resize(0, handle.get_stream()); - major_counts.resize(0, handle.get_stream()); - minor_labels.resize(0, handle.get_stream()); - major_labels.shrink_to_fit(handle.get_stream()); - major_counts.shrink_to_fit(handle.get_stream()); - minor_labels.shrink_to_fit(handle.get_stream()); - rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); - rmm::device_uvector counts(labels.size(), handle.get_stream()); - auto pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - merged_labels.begin(), - merged_labels.end(), - merged_counts.begin(), - labels.begin(), - counts.begin()); - merged_labels.resize(0, handle.get_stream()); - merged_counts.resize(0, handle.get_stream()); - merged_labels.shrink_to_fit(handle.get_stream()); - merged_counts.shrink_to_fit(handle.get_stream()); - labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - counts.shrink_to_fit(handle.get_stream()); - - // 4. if multi-GPU, shuffle and reduce (label, count) pairs - - if (multi_gpu) { - auto &comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); - auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + labels.size(), - [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); - }); - auto key_first = thrust::make_transform_iterator( - labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - rmm::device_uvector rx_labels(0, handle.get_stream()); - rmm::device_uvector rx_counts(0, handle.get_stream()); - - std::tie(rx_labels, rx_counts, std::ignore) = cugraph::experimental::detail::shuffle_values( - handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); - - labels.resize(rx_labels.size(), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin()); - pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin(), - labels.begin(), - counts.begin()); - rx_labels.resize(0, handle.get_stream()); - rx_counts.resize(0, handle.get_stream()); - rx_labels.shrink_to_fit(handle.get_stream()); - rx_counts.shrink_to_fit(handle.get_stream()); - labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - } - - // 5. sort by degree - - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - counts.begin(), - counts.end(), - labels.begin(), - thrust::greater()); - - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // temporary rmm::devicec_uvector objects become - // out-of-scope once this function returns. - - return std::move(labels); -} - -} // namespace - -template -std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned) -{ - auto &comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto const row_comm_rank = row_comm.get_rank(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); - auto const col_comm_rank = col_comm.get_rank(); - - // 1. compute renumber map - - auto renumber_map_labels = compute_renumber_map( - handle, edgelist_major_vertices, edgelist_minor_vertices); - - // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened - // graph - - auto vertex_partition_counts = host_scalar_allgather( - comm, static_cast(renumber_map_labels.size()), handle.get_stream()); - std::vector vertex_partition_offsets(comm_size + 1, 0); - std::partial_sum(vertex_partition_counts.begin(), - vertex_partition_counts.end(), - vertex_partition_offsets.begin() + 1); - - partition_t partition(vertex_partition_offsets, - is_hypergraph_partitioned, - row_comm_size, - col_comm_size, - row_comm_rank, - col_comm_rank); - - auto number_of_vertices = vertex_partition_offsets.back(); - auto number_of_edges = host_scalar_allreduce( - comm, static_cast(edgelist_major_vertices.size()), handle.get_stream()); - - // 3. renumber edges - - if (is_hypergraph_partitioned) { - CUGRAPH_FAIL("unimplemented."); - } else { - double constexpr load_factor = 0.7; - - // FIXME: compare this hash based approach with a binary search based approach in both memory - // footprint and execution time - - { - vertex_t major_first{}; - vertex_t major_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); - rmm::device_uvector renumber_map_major_labels(major_last - major_first, - handle.get_stream()); - std::vector recvcounts(row_comm_size); - for (int i = 0; i < row_comm_size; ++i) { - recvcounts[i] = partition.get_vertex_partition_size(row_comm_rank * row_comm_size + i); - } - std::vector displacements(row_comm_size, 0); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); - device_allgatherv(row_comm, - renumber_map_labels.begin(), - renumber_map_major_labels.begin(), - recvcounts, - displacements, - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), - thrust::make_counting_iterator(major_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); - renumber_map.find(edgelist_major_vertices.begin(), - edgelist_major_vertices.end(), - edgelist_major_vertices.begin()); - } - - { - vertex_t minor_first{}; - vertex_t minor_last{}; - std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); - rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, - handle.get_stream()); - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may be no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - renumber_map_labels.begin(), - renumber_map_labels.end(), - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); - } else { - device_sendrecv(comm, - renumber_map_labels.begin(), - renumber_map_labels.size(), - comm_dst_rank, - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(partition.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - handle.get_stream()); - } - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - for (int i = 0; i < col_comm_size; ++i) { - auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); - device_bcast(col_comm, - renumber_map_minor_labels.begin() + offset, - renumber_map_minor_labels.begin() + offset, - count, - i, - handle.get_stream()); - } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), - thrust::make_counting_iterator(minor_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - renumber_map.find(edgelist_minor_vertices.begin(), - edgelist_minor_vertices.end(), - edgelist_minor_vertices.begin()); - } - } - - return std::make_tuple( - std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); -} - -template -std::enable_if_t> renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) -{ - auto renumber_map_labels = compute_renumber_map( - handle, edgelist_major_vertices, edgelist_minor_vertices); - - double constexpr load_factor = 0.7; - - // FIXME: compare this hash based approach with a binary search based approach in both memory - // footprint and execution time - - cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); - renumber_map.find(edgelist_major_vertices.begin(), - edgelist_major_vertices.end(), - edgelist_major_vertices.begin()); - renumber_map.find(edgelist_minor_vertices.begin(), - edgelist_minor_vertices.end(), - edgelist_minor_vertices.begin()); - - return std::move(renumber_map_labels); -} - -template -std::enable_if_t< - multi_gpu, - std::tuple>, - rmm::device_uvector>> -coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels) -{ - auto &comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto const row_comm_rank = row_comm.get_rank(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); - auto const col_comm_rank = col_comm.get_rank(); - - // 1. locally construct coarsened edge list - - // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme - // regardless of hypergraph partitioning is applied or not - rmm::device_uvector adj_matrix_major_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() - : graph_view.get_number_of_local_adj_matrix_partition_rows(), - handle.get_stream()); - rmm::device_uvector adj_matrix_minor_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() - : graph_view.get_number_of_local_adj_matrix_partition_cols(), - handle.get_stream()); - if (store_transposed) { - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); - } else { - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); - } - - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - // FIXME: we may compare performance/memory footprint with the hash_based approach especially when - // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst - // case). We may be able to limit the memory requirement close to the final coarsened edgelist - // with the hash based approach. - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - // get edge list - - rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_weights(0, handle.get_stream()); - std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - graph_view.offsets(i), - graph_view.indices(i), - graph_view.weights(i), - adj_matrix_major_labels.begin() + - (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) - : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), - adj_matrix_minor_labels.begin(), - store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) - : graph_view.get_local_adj_matrix_partition_row_first(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) - : graph_view.get_local_adj_matrix_partition_row_last(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) - : graph_view.get_local_adj_matrix_partition_col_first(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) - : graph_view.get_local_adj_matrix_partition_col_last(i), - handle.get_stream()); - - auto cur_size = coarsened_edgelist_major_vertices.size(); - // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can - // reserve address space to avoid expensive reallocation. - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management - coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_weights.resize( - graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); - - if (graph_view.is_weighted()) { - auto src_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), - edgelist_minor_vertices.begin(), - edgelist_weights.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); - } else { - auto src_edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); - } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) - // will become out-of-scope - } - - sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights, - handle.get_stream()); - - // 2. globally shuffle edge list and re-coarsen - - { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())); - auto key_func = detail::compute_gpu_id_from_edge_t{ - graph_view.is_hypergraph_partitioned(), - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}; - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + coarsened_edgelist_major_vertices.size(), - [key_func] __device__(auto lhs, auto rhs) { - return store_transposed ? (key_func(thrust::get<1>(lhs), thrust::get<0>(lhs)) < - key_func(thrust::get<1>(rhs), thrust::get<0>(rhs))) - : (key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < - key_func(thrust::get<0>(rhs), thrust::get<1>(rhs))); - }); - auto key_first = thrust::make_transform_iterator(edge_first, [key_func] __device__(auto val) { - return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) - : key_func(thrust::get<0>(val), thrust::get<1>(val)); - }); - rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + coarsened_edgelist_major_vertices.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - - std::tie( - rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = - detail::shuffle_values(handle.get_comms(), edge_first, tx_value_counts, handle.get_stream()); - - sort_and_coarsen_edgelist(rx_edgelist_major_vertices, - rx_edgelist_minor_vertices, - rx_edgelist_weights, - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // memory blocks owned by - // coarsened_edgelist_(major_vertices,minor_vertices,weights) - // will be freed after the assignments below - - coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); - coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); - coarsened_edgelist_weights = std::move(rx_edgelist_weights); - } - - rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - partition_t partition( - std::vector{}, graph_view.is_hypergraph_partitioned(), 0, 0, 0, 0); - vertex_t number_of_vertices{}; - edge_t number_of_edges{}; - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist(handle, - coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - graph_view.is_hypergraph_partitioned()); - - // 4. build a graph - - std::vector> edgelists{}; - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - edgelists.resize(1); - edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); - edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); - } - - return std::make_tuple( - std::make_unique>( - handle, - edgelists, - partition, - number_of_vertices, - number_of_edges, - graph_properties_t{graph_view.is_symmetric(), false}, - true), - std::move(renumber_map_labels)); -} - -template -std::enable_if_t< - !multi_gpu, - std::tuple>, - rmm::device_uvector>> -coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels) -{ - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - std::tie(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - graph_view.offsets(), - graph_view.indices(), - graph_view.weights(), - labels, - labels, - vertex_t{0}, - graph_view.get_number_of_vertices(), - vertex_t{0}, - graph_view.get_number_of_vertices(), - handle.get_stream()); - - sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights, - handle.get_stream()); - - auto renumber_map_labels = renumber_edgelist( - handle, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); - - edgelist_t edgelist{}; - edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelist.p_edge_weights = coarsened_edgelist_weights.data(); - edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); - - return std::make_tuple( - std::make_unique>( - handle, - edgelist, - static_cast(renumber_map_labels.size()), - graph_properties_t{graph_view.is_symmetric(), false}, - true), - std::move(renumber_map_labels)); -} - -template -rmm::device_uvector relabel( - raft::handle_t const &handle, - rmm::device_uvector const &old_labels, - std::tuple, rmm::device_uvector> const - &old_new_label_pairs) -{ - double constexpr load_factor = 0.7; - - rmm::device_uvector new_labels(0, handle.get_stream()); - - if (multi_gpu) { - auto &comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; - - // find unique old labels (to be relabeled) - - rmm::device_uvector unique_old_labels(old_labels, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_old_labels.begin(), - unique_old_labels.end()); - auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_old_labels.begin(), - unique_old_labels.end()); - unique_old_labels.resize(thrust::distance(unique_old_labels.begin(), it), handle.get_stream()); - unique_old_labels.shrink_to_fit(handle.get_stream()); - - // collect new labels for the unique old labels - - rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream()); - { - // shuffle the old_new_label_pairs based on applying the compute_gpu_id_from_vertex_t functor - // to the old labels - - rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); - rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); - { - rmm::device_uvector label_pair_old_labels(thrust::get<0>(old_new_label_pairs), - handle.get_stream()); - rmm::device_uvector label_pair_new_labels(thrust::get<1>(old_new_label_pairs), - handle.get_stream()); - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + thrust::get<0>(old_new_label_pairs).size(), - [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); - }); - auto key_first = thrust::make_transform_iterator( - label_pair_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + label_pair_old_labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), pair_first, tx_value_counts, handle.get_comms()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become - // out-of-scope - } - - // update intermediate relabel map - - cuco::static_map relabel_map{ - static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); - - rx_label_pair_old_labels.resize(0, handle.get_stream()); - rx_label_pair_new_labels.resize(0, handle.get_stream()); - rx_label_pair_old_labels.shrink_to_fit(handle.get_stream()); - rx_label_pair_new_labels.shrink_to_fit(handle.get_stream()); - - // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back - - { - thrust::sort( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_old_labels.begin(), - unique_old_labels.end(), - [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); - - auto key_first = thrust::make_transform_iterator( - unique_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + unique_old_labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); - rmm::device_uvector rx_value_counts(0, handle.get_stream()); - - std::tie(rx_unique_old_labels, rx_value_counts) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), unique_old_labels.begin(), tx_value_counts, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels - - std::tie(new_labels_for_unique_old_labels, std::ignore) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope - } - } - - cuco::static_map relabel_map( - static_cast(static_cast(unique_old_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - new_labels.resize(old_labels.size(), handle.get_stream()); - relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); - } else { - cuco::static_map relabel_map( - static_cast(static_cast(old_new_label_pairs.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(old_new_label_pairs).begin(), - std::get<1>(old_new_label_pairs).begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - - relabel_map.insert(pair_first, pair_first + old_new_label_pairs.size()); - new_labels.resize(old_labels.size(), handle.get_stream()); - relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); - } - - return std::move(new_labels); -} - -// explicit instantiation - -template std::tuple>, - rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels); - -template std::tuple>, - rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels); - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu new file mode 100644 index 00000000000..3a21c465db2 --- /dev/null +++ b/cpp/src/experimental/relabel.cu @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +template +rmm::device_uvector relabel( + raft::handle_t const &handle, + rmm::device_uvector const &old_labels, + std::tuple, rmm::device_uvector> const + &old_new_label_pairs) +{ + double constexpr load_factor = 0.7; + + rmm::device_uvector new_labels(0, handle.get_stream()); + + if (multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + + // find unique old labels (to be relabeled) + + rmm::device_uvector unique_old_labels(old_labels, handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end()); + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end()); + unique_old_labels.resize(thrust::distance(unique_old_labels.begin(), it), handle.get_stream()); + unique_old_labels.shrink_to_fit(handle.get_stream()); + + // collect new labels for the unique old labels + + rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream()); + { + // shuffle the old_new_label_pairs based on applying the compute_gpu_id_from_vertex_t functor + // to the old labels + + rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); + { + rmm::device_uvector label_pair_old_labels(std::get<0>(old_new_label_pairs), + handle.get_stream()); + rmm::device_uvector label_pair_new_labels(std::get<1>(old_new_label_pairs), + handle.get_stream()); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + std::get<0>(old_new_label_pairs).size(), + [key_func] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); + }); + auto key_first = thrust::make_transform_iterator( + label_pair_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + label_pair_old_labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = + cugraph::experimental::detail::shuffle_values( + handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become + // out-of-scope + } + + // update intermediate relabel map + + cuco::static_map relabel_map{ + static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + + rx_label_pair_old_labels.resize(0, handle.get_stream()); + rx_label_pair_new_labels.resize(0, handle.get_stream()); + rx_label_pair_old_labels.shrink_to_fit(handle.get_stream()); + rx_label_pair_new_labels.shrink_to_fit(handle.get_stream()); + + // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back + + { + thrust::sort( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end(), + [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); + + auto key_first = thrust::make_transform_iterator( + unique_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + unique_old_labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_value_counts(0, handle.get_stream()); + + std::tie(rx_unique_old_labels, rx_value_counts) = + cugraph::experimental::detail::shuffle_values( + handle.get_comms(), unique_old_labels.begin(), tx_value_counts, handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels + .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + + std::tie(new_labels_for_unique_old_labels, std::ignore) = + cugraph::experimental::detail::shuffle_values( + handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope + } + } + + cuco::static_map relabel_map( + static_cast(static_cast(unique_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + new_labels.resize(old_labels.size(), handle.get_stream()); + relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + } else { + cuco::static_map relabel_map( + static_cast(static_cast(std::get<0>(old_new_label_pairs).size()) / + load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(old_new_label_pairs).begin(), + std::get<1>(old_new_label_pairs).begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + std::get<0>(old_new_label_pairs).size()); + new_labels.resize(old_labels.size(), handle.get_stream()); + relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + } + + return std::move(new_labels); +} + +// explicit instantiation + +template rmm::device_uvector relabel( + raft::handle_t const &handle, + rmm::device_uvector const &old_labels, + std::tuple, rmm::device_uvector> const + &old_new_label_pairs); + +template rmm::device_uvector relabel( + raft::handle_t const &handle, + rmm::device_uvector const &old_labels, + std::tuple, rmm::device_uvector> const + &old_new_label_pairs); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu new file mode 100644 index 00000000000..1c34d5b1cb7 --- /dev/null +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +rmm::device_uvector compute_renumber_map( + raft::handle_t const &handle, + rmm::device_uvector const &edgelist_major_vertices, + rmm::device_uvector const &edgelist_minor_vertices) +{ + // FIXME: compare this sort based approach with hash based approach in both speed and memory + // footprint + + // 1. acquire (unique major label, count) pairs + + rmm::device_uvector tmp_labels(edgelist_major_vertices, handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end()); + rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); + rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); + auto major_pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end(), + thrust::make_constant_iterator(edge_t{1}), + major_labels.begin(), + major_counts.begin()); + tmp_labels.resize(0, handle.get_stream()); + tmp_labels.shrink_to_fit(handle.get_stream()); + major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), + handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + + // 2. acquire unique minor labels + + rmm::device_uvector minor_labels(edgelist_minor_vertices, handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + auto minor_label_it = + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream()); + + // 3. merge major and minor labels + + rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), + handle.get_stream()); + rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); + thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_labels.begin(), + major_labels.end(), + minor_labels.begin(), + minor_labels.end(), + major_counts.begin(), + thrust::make_constant_iterator(edge_t{0}), + merged_labels.begin(), + merged_counts.begin()); + major_labels.resize(0, handle.get_stream()); + major_counts.resize(0, handle.get_stream()); + minor_labels.resize(0, handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream()); + rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); + rmm::device_uvector counts(labels.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + merged_labels.begin(), + merged_labels.end(), + merged_counts.begin(), + labels.begin(), + counts.begin()); + merged_labels.resize(0, handle.get_stream()); + merged_counts.resize(0, handle.get_stream()); + merged_labels.shrink_to_fit(handle.get_stream()); + merged_counts.shrink_to_fit(handle.get_stream()); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + counts.shrink_to_fit(handle.get_stream()); + + // 4. if multi-GPU, shuffle and reduce (label, count) pairs + + if (multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + labels.size(), + [key_func] __device__(auto lhs, auto rhs) { + return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); + }); + auto key_first = thrust::make_transform_iterator( + labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); + rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + labels.size(), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + tx_value_counts.begin()); + + rmm::device_uvector rx_labels(0, handle.get_stream()); + rmm::device_uvector rx_counts(0, handle.get_stream()); + + std::tie(rx_labels, rx_counts, std::ignore) = cugraph::experimental::detail::shuffle_values( + handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); + + labels.resize(rx_labels.size(), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin()); + pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin(), + labels.begin(), + counts.begin()); + rx_labels.resize(0, handle.get_stream()); + rx_counts.resize(0, handle.get_stream()); + rx_labels.shrink_to_fit(handle.get_stream()); + rx_counts.shrink_to_fit(handle.get_stream()); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + } + + // 5. sort by degree + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.begin(), + counts.end(), + labels.begin(), + thrust::greater()); + + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // temporary rmm::devicec_uvector objects become + // out-of-scope once this function returns. + + return std::move(labels); +} + +} // namespace detail + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned) +{ + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + // 1. compute renumber map + + auto renumber_map_labels = detail::compute_renumber_map( + handle, edgelist_major_vertices, edgelist_minor_vertices); + + // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened + // graph + + auto vertex_partition_counts = host_scalar_allgather( + comm, static_cast(renumber_map_labels.size()), handle.get_stream()); + std::vector vertex_partition_offsets(comm_size + 1, 0); + std::partial_sum(vertex_partition_counts.begin(), + vertex_partition_counts.end(), + vertex_partition_offsets.begin() + 1); + + partition_t partition(vertex_partition_offsets, + is_hypergraph_partitioned, + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); + + auto number_of_vertices = vertex_partition_offsets.back(); + auto number_of_edges = host_scalar_allreduce( + comm, static_cast(edgelist_major_vertices.size()), handle.get_stream()); + + // 3. renumber edges + + if (is_hypergraph_partitioned) { + CUGRAPH_FAIL("unimplemented."); + } else { + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + { + vertex_t major_first{}; + vertex_t major_last{}; + std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); + rmm::device_uvector renumber_map_major_labels(major_last - major_first, + handle.get_stream()); + std::vector recvcounts(row_comm_size); + for (int i = 0; i < row_comm_size; ++i) { + recvcounts[i] = partition.get_vertex_partition_size(row_comm_rank * row_comm_size + i); + } + std::vector displacements(row_comm_size, 0); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + device_allgatherv(row_comm, + renumber_map_labels.begin(), + renumber_map_major_labels.begin(), + recvcounts, + displacements, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), + thrust::make_counting_iterator(major_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); + renumber_map.find(edgelist_major_vertices.begin(), + edgelist_major_vertices.end(), + edgelist_major_vertices.begin()); + } + + { + vertex_t minor_first{}; + vertex_t minor_last{}; + std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); + rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, + handle.get_stream()); + + // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph + // partitioning + auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; + auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + // FIXME: this branch may be no longer necessary with NCCL backend + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + renumber_map_labels.begin(), + renumber_map_labels.end(), + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); + } else { + device_sendrecv(comm, + renumber_map_labels.begin(), + renumber_map_labels.size(), + comm_dst_rank, + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast(partition.get_vertex_partition_size(comm_src_rank)), + comm_src_rank, + handle.get_stream()); + } + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + for (int i = 0; i < col_comm_size; ++i) { + auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size); + auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); + device_bcast(col_comm, + renumber_map_minor_labels.begin() + offset, + renumber_map_minor_labels.begin() + offset, + count, + i, + handle.get_stream()); + } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(minor_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); + renumber_map.find(edgelist_minor_vertices.begin(), + edgelist_minor_vertices.end(), + edgelist_minor_vertices.begin()); + } + } + + return std::make_tuple( + std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) +{ + auto renumber_map_labels = detail::compute_renumber_map( + handle, edgelist_major_vertices, edgelist_minor_vertices); + + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + renumber_map.find(edgelist_major_vertices.begin(), + edgelist_major_vertices.end(), + edgelist_major_vertices.begin()); + renumber_map.find(edgelist_minor_vertices.begin(), + edgelist_minor_vertices.end(), + edgelist_minor_vertices.begin()); + + return std::move(renumber_map_labels); +} + +// explicit instantiation + +template std::tuple, partition_t, int32_t, int32_t> +renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */); + +} // namespace experimental +} // namespace cugraph From 923c6573a7fd75887ad9ba239139638d731d6fb9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 22 Dec 2020 15:54:13 -0500 Subject: [PATCH 059/343] split comm_utils.cuh to host_scalar_comm.cuh, device_comm.cuh, and dataframe_buffer.cuh --- .../experimental/detail/graph_utils.cuh | 7 +- .../patterns/any_of_adj_matrix_row.cuh | 2 +- .../patterns/copy_to_adj_matrix_row_col.cuh | 39 +- .../copy_v_transform_reduce_in_out_nbr.cuh | 12 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 15 +- cpp/include/patterns/count_if_e.cuh | 2 +- cpp/include/patterns/count_if_v.cuh | 2 +- cpp/include/patterns/reduce_v.cuh | 2 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 39 +- cpp/include/patterns/transform_reduce_e.cuh | 1 + cpp/include/patterns/transform_reduce_v.cuh | 2 +- ...transform_reduce_v_with_adj_matrix_row.cuh | 2 +- .../update_frontier_v_push_if_out_nbr.cuh | 3 +- cpp/include/patterns/vertex_frontier.cuh | 2 +- cpp/include/utilities/dataframe_buffer.cuh | 134 ++++++ .../{comm_utils.cuh => device_comm.cuh} | 444 ------------------ cpp/include/utilities/host_scalar_comm.cuh | 389 +++++++++++++++ cpp/src/experimental/coarsen_graph.cu | 1 - cpp/src/experimental/graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 2 +- cpp/src/experimental/louvain.cuh | 1 - cpp/src/experimental/relabel.cu | 1 - cpp/src/experimental/renumber_edgelist.cu | 3 +- 23 files changed, 596 insertions(+), 511 deletions(-) create mode 100644 cpp/include/utilities/dataframe_buffer.cuh rename cpp/include/utilities/{comm_utils.cuh => device_comm.cuh} (67%) create mode 100644 cpp/include/utilities/host_scalar_comm.cuh diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index e68eaaa463a..bd6053b1e61 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -17,7 +17,8 @@ #include #include -#include +#include +#include #include #include @@ -199,10 +200,10 @@ auto shuffle_values(raft::comms::comms_t const &comm, std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); auto rx_value_buffer = - allocate_comm_buffer::value_type>( + allocate_dataframe_buffer::value_type>( rx_offsets.back(), stream); auto rx_value_first = - get_comm_buffer_begin::value_type>( + get_dataframe_buffer_begin::value_type>( rx_value_buffer); int num_tx_dst_ranks{0}; diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh index 199e7c230ef..81fd1956886 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index d13066185da..874c7cc6eb5 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -18,8 +18,10 @@ #include #include #include -#include +#include +#include #include +#include #include #include @@ -120,10 +122,10 @@ void copy_to_matrix_major(raft::handle_t const& handle, for (int i = 0; i < row_comm_size; ++i) { rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); - auto rx_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_counts[i], handle.get_stream()); - auto rx_value_first = get_comm_buffer_begin< + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(rx_tmp_buffer); if (row_comm_rank == i) { @@ -314,12 +316,11 @@ void copy_to_matrix_minor(raft::handle_t const& handle, vertex_partition_device_t vertex_partition(graph_view); rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); - auto dst_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_count, handle.get_stream()); - auto dst_value_first = - get_comm_buffer_begin::value_type>( - dst_tmp_buffer); + auto dst_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_count, + handle.get_stream()); + auto dst_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(dst_tmp_buffer); if (comm_src_rank == comm_rank) { thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_first, @@ -335,10 +336,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle, vertex_value_input_first, dst_value_first); } else { - auto src_tmp_buffer = - allocate_comm_buffer::value_type>( - tx_count, handle.get_stream()); - auto src_value_first = get_comm_buffer_begin< + auto src_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(tx_count, + handle.get_stream()); + auto src_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(src_tmp_buffer); auto map_first = @@ -383,10 +384,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle, for (int i = 0; i < col_comm_size; ++i) { rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); - auto rx_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_counts[i], handle.get_stream()); - auto rx_value_first = get_comm_buffer_begin< + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(rx_tmp_buffer); // FIXME: these broadcast operations can be placed between ncclGroupStart() and diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 688255fac04..bca4abf5424 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include @@ -377,8 +378,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, ? graph_view.get_number_of_local_adj_matrix_partition_rows() : graph_view.get_number_of_local_adj_matrix_partition_cols() : vertex_t{0}; - auto minor_tmp_buffer = allocate_comm_buffer(minor_tmp_buffer_size, handle.get_stream()); - auto minor_buffer_first = get_comm_buffer_begin(minor_tmp_buffer); + auto minor_tmp_buffer = allocate_dataframe_buffer(minor_tmp_buffer_size, handle.get_stream()); + auto minor_buffer_first = get_dataframe_buffer_begin(minor_tmp_buffer); if (in != GraphViewType::is_adj_matrix_transposed) { auto minor_init = init; @@ -424,8 +425,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) : vertex_t{0}; } - auto major_tmp_buffer = allocate_comm_buffer(major_tmp_buffer_size, handle.get_stream()); - auto major_buffer_first = get_comm_buffer_begin(major_tmp_buffer); + auto major_tmp_buffer = + allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); + auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); auto major_init = T{}; if (in == GraphViewType::is_adj_matrix_transposed) { diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 345cd52c10d..cb4481dfd01 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -298,7 +299,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( } rmm::device_uvector major_vertices(0, handle.get_stream()); - auto e_op_result_buffer = allocate_comm_buffer(0, handle.get_stream()); + auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < loop_count; ++i) { matrix_partition_device_t matrix_partition( graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); @@ -411,8 +412,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( } auto tmp_e_op_result_buffer = - allocate_comm_buffer(tmp_major_vertices.size(), handle.get_stream()); - auto tmp_e_op_result_buffer_first = get_comm_buffer_begin(tmp_e_op_result_buffer); + allocate_dataframe_buffer(tmp_major_vertices.size(), handle.get_stream()); + auto tmp_e_op_result_buffer_first = get_dataframe_buffer_begin(tmp_e_op_result_buffer); triplet_first = thrust::make_zip_iterator(thrust::make_tuple( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); @@ -463,7 +464,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( : size_t{0}, handle.get_stream()); auto rx_tmp_e_op_result_buffer = - allocate_comm_buffer(rx_major_vertices.size(), handle.get_stream()); + allocate_dataframe_buffer(rx_major_vertices.size(), handle.get_stream()); device_gatherv(sub_comm, tmp_major_vertices.data(), @@ -475,7 +476,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( handle.get_stream()); device_gatherv(sub_comm, tmp_e_op_result_buffer_first, - get_comm_buffer_begin(rx_tmp_e_op_result_buffer), + get_dataframe_buffer_begin(rx_tmp_e_op_result_buffer), tmp_major_vertices.size(), rx_sizes, rx_displs, @@ -508,7 +509,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_vertices.begin(), major_vertices.end(), - get_comm_buffer_begin(e_op_result_buffer)); + get_dataframe_buffer_begin(e_op_result_buffer)); auto num_uniques = thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -535,7 +536,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_vertices.begin(), major_vertices.end(), - get_comm_buffer_begin(e_op_result_buffer), + get_dataframe_buffer_begin(e_op_result_buffer), thrust::make_discard_iterator(), thrust::make_permutation_iterator( vertex_value_output_first, diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 4f0f0a7a43e..7ef5356f4f7 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh index c90b259cdde..b45f8cd0705 100644 --- a/cpp/include/patterns/count_if_v.cuh +++ b/cpp/include/patterns/count_if_v.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh index 12224dc55f4..0e0b7ff2ea4 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/patterns/reduce_v.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index cffdea057ca..bd10be6eb77 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -130,7 +131,7 @@ template std::tuple, - decltype(allocate_comm_buffer(0, cudaStream_t{nullptr}))> + decltype(allocate_dataframe_buffer(0, cudaStream_t{nullptr}))> transform_reduce_by_adj_matrix_row_col_key_e( raft::handle_t const& handle, GraphViewType const& graph_view, @@ -158,7 +159,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( } rmm::device_uvector keys(0, handle.get_stream()); - auto value_buffer = allocate_comm_buffer(0, handle.get_stream()); + auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < loop_count; ++i) { matrix_partition_device_t matrix_partition( graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); @@ -186,7 +187,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( thrust::plus()); rmm::device_uvector tmp_keys(num_edges, handle.get_stream()); - auto tmp_value_buffer = allocate_comm_buffer(tmp_keys.size(), handle.get_stream()); + auto tmp_value_buffer = allocate_dataframe_buffer(tmp_keys.size(), handle.get_stream()); if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), @@ -206,7 +207,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( adj_matrix_row_col_key_first, e_op, tmp_keys.data(), - get_comm_buffer_begin(tmp_value_buffer)); + get_dataframe_buffer_begin(tmp_value_buffer)); } if (GraphViewType::is_multi_gpu) { @@ -216,7 +217,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), tmp_keys.begin(), tmp_keys.end(), - get_comm_buffer_begin(tmp_value_buffer)); + get_dataframe_buffer_begin(tmp_value_buffer)); auto num_uniques = thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -227,21 +228,21 @@ transform_reduce_by_adj_matrix_row_col_key_e( }); rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); auto value_for_unique_key_buffer = - allocate_comm_buffer(unique_keys.size(), handle.get_stream()); + allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), tmp_keys.begin(), tmp_keys.end(), - get_comm_buffer_begin(tmp_value_buffer), + get_dataframe_buffer_begin(tmp_value_buffer), unique_keys.begin(), - get_comm_buffer_begin(value_for_unique_key_buffer)); + get_dataframe_buffer_begin(value_for_unique_key_buffer)); auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; thrust::sort_by_key( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), unique_keys.begin(), unique_keys.end(), - get_comm_buffer_begin(value_for_unique_key_buffer), + get_dataframe_buffer_begin(value_for_unique_key_buffer), [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); auto key_first = thrust::make_transform_iterator( @@ -255,14 +256,14 @@ transform_reduce_by_adj_matrix_row_col_key_e( tx_value_counts.begin()); rmm::device_uvector rx_unique_keys(0, handle.get_stream()); - auto rx_value_for_unique_key_buffer = allocate_comm_buffer(0, handle.get_stream()); + auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_unique_keys, std::ignore) = cugraph::experimental::detail::shuffle_values( comm, unique_keys.begin(), tx_value_counts, handle.get_stream()); std::tie(rx_value_for_unique_key_buffer, std::ignore) = cugraph::experimental::detail::shuffle_values( comm, - get_comm_buffer_begin(value_for_unique_key_buffer), + get_dataframe_buffer_begin(value_for_unique_key_buffer), tx_value_counts, handle.get_stream()); @@ -281,23 +282,23 @@ transform_reduce_by_adj_matrix_row_col_key_e( // reserve address space to avoid expensive reallocation. // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); - resize_comm_buffer(value_buffer, keys.size(), handle.get_stream()); + resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), tmp_keys.begin(), tmp_keys.end(), keys.begin() + cur_size); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_comm_buffer_begin(tmp_value_buffer), - get_comm_buffer_begin(tmp_value_buffer) + tmp_keys.size(), - get_comm_buffer_begin(value_buffer) + cur_size); + get_dataframe_buffer_begin(tmp_value_buffer), + get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), + get_dataframe_buffer_begin(value_buffer) + cur_size); } if (GraphViewType::is_multi_gpu) { thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), keys.begin(), keys.end(), - get_comm_buffer_begin(value_buffer)); + get_dataframe_buffer_begin(value_buffer)); auto num_uniques = thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -308,14 +309,14 @@ transform_reduce_by_adj_matrix_row_col_key_e( }); rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); auto value_for_unique_key_buffer = - allocate_comm_buffer(unique_keys.size(), handle.get_stream()); + allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), keys.begin(), keys.end(), - get_comm_buffer_begin(value_buffer), + get_dataframe_buffer_begin(value_buffer), unique_keys.begin(), - get_comm_buffer_begin(value_for_unique_key_buffer)); + get_dataframe_buffer_begin(value_for_unique_key_buffer)); keys = std::move(unique_keys); value_buffer = std::move(value_for_unique_key_buffer); diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 797facd4657..342add517bd 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/patterns/transform_reduce_v.cuh index 02538c36f47..2eb3f03251c 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/patterns/transform_reduce_v.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh index f5af03d647c..703da8a318b 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 0a36e105aa0..5150af1f57c 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -21,8 +21,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index ccb9e1a5a0d..dc3a5893ef3 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include #include diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh new file mode 100644 index 00000000000..beec7fc3c8b --- /dev/null +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +auto allocate_dataframe_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) +{ + using element_t = typename thrust::tuple_element::type; + return rmm::device_uvector(buffer_size, stream); +} + +template +auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, + size_t buffer_size, + cudaStream_t stream) +{ + return std::make_tuple( + allocate_dataframe_buffer_tuple_element_impl(buffer_size, stream)...); +} + +template +void resize_dataframe_buffer_tuple_element_impl(BufferType& buffer, + size_t new_buffer_size, + cudaStream_t stream) +{ + std::get(buffer).resize(new_buffer_size, stream); + resize_dataframe_buffer_tuple_element_impl( + buffer, new_buffer_size, stream); +} + +template +void resize_dataframe_buffer_tuple_impl(BufferType& buffer, + size_t new_buffer_size, + cudaStream_t stream) +{ +} + +template +auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) +{ + using element_t = typename thrust::tuple_element::type; + return std::get(buffer).begin(); +} + +template +auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) +{ + // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. + return thrust::make_tuple( + get_dataframe_buffer_begin_tuple_element_impl(buffer)...); +} + +} // namespace detail + +template ::value>* = nullptr> +auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +{ + return rmm::device_uvector(buffer_size, stream); +} + +template ::value>* = nullptr> +auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return detail::allocate_dataframe_buffer_tuple_impl( + std::make_index_sequence(), buffer_size, stream); +} + +template ::value>* = nullptr> +void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + buffer.resize(new_buffer_size, stream); +} + +template ::value>* = nullptr> +void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::resize_dataframe_buffer_tuple_impl( + buffer, new_buffer_size, stream); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_begin(BufferType& buffer) +{ + return buffer.begin(); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_begin(BufferType& buffer) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return thrust::make_zip_iterator(detail::get_dataframe_buffer_begin_tuple_impl( + std::make_index_sequence(), buffer)); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/device_comm.cuh similarity index 67% rename from cpp/include/utilities/comm_utils.cuh rename to cpp/include/utilities/device_comm.cuh index 219c717e970..5eb6184aa29 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -24,102 +24,13 @@ #include #include -#include #include -// FIXME: split this file to three: host_scalar_comm_utils.cuh, device_comm_utils.cuh, and -// buffer_utils.cuh namespace cugraph { namespace experimental { namespace detail { -template -struct update_vector_of_tuple_scalar_elements_from_tuple_impl { - void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - *ptr = thrust::get(tuple); - update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - tuple_scalar_elements, tuple); - } -}; - -template -struct update_vector_of_tuple_scalar_elements_from_tuple_impl { - void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const { return; } -}; - -template -struct update_tuple_from_vector_of_tuple_scalar_elements_impl { - void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - thrust::get(tuple) = *ptr; - update_tuple_from_vector_of_tuple_scalar_elements_impl().update( - tuple, tuple_scalar_elements); - } -}; - -template -struct update_tuple_from_vector_of_tuple_scalar_elements_impl { - void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const { return; } -}; - -template -struct host_allreduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - cudaStream_t stream) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - comm.allreduce(ptr, ptr, 1, raft::comms::op_t::SUM, stream); - host_allreduce_tuple_scalar_element_impl().run( - comm, tuple_scalar_elements, stream); - } -}; - -template -struct host_allreduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - cudaStream_t stream) const - { - } -}; - -template -struct host_reduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - int root, - cudaStream_t stream) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - comm.reduce(ptr, ptr, 1, raft::comms::op_t::SUM, root, stream); - host_reduce_tuple_scalar_element_impl().run( - comm, tuple_scalar_elements, root, stream); - } -}; - -template -struct host_reduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - int root, - cudaStream_t stream) const - { - } -}; - template T* iter_to_raw_ptr(T* ptr) { @@ -725,317 +636,8 @@ struct device_gatherv_tuple_iterator_element_impl -auto allocate_comm_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) -{ - using element_t = typename thrust::tuple_element::type; - return rmm::device_uvector(buffer_size, stream); -} - -template -auto allocate_comm_buffer_tuple_impl(std::index_sequence, - size_t buffer_size, - cudaStream_t stream) -{ - return std::make_tuple( - allocate_comm_buffer_tuple_element_impl(buffer_size, stream)...); -} - -template -void resize_comm_buffer_tuple_element_impl(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) -{ - std::get(buffer).resize(new_buffer_size, stream); - resize_comm_buffer_tuple_element_impl(buffer, new_buffer_size, stream); -} - -template -void resize_comm_buffer_tuple_impl(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) {} - -template -auto get_comm_buffer_begin_tuple_element_impl(BufferType& buffer) -{ - using element_t = typename thrust::tuple_element::type; - return std::get(buffer).begin(); -} - -template -auto get_comm_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) -{ - // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. - return thrust::make_tuple(get_comm_buffer_begin_tuple_element_impl(buffer)...); -} - } // namespace detail -template -std::enable_if_t::value, T> host_scalar_allreduce( - raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - rmm::device_uvector d_input(1, stream); - raft::update_device(d_input.data(), &input, 1, stream); - comm.allreduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); - T h_input{}; - raft::update_host(&h_input, d_input.data(), 1, stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_input; -} - -template -std::enable_if_t::value, T> -host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); - T ret{}; - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device( - d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); - detail::host_allreduce_tuple_scalar_element_impl().run( - comm, d_tuple_scalar_elements, stream); - raft::update_host( - h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( - ret, h_tuple_scalar_elements); - - return ret; -} - -// Return value is valid only in root (return value may better be std::optional in C++17 or later) -template -std::enable_if_t::value, T> host_scalar_reduce( - raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - rmm::device_uvector d_input(1, stream); - raft::update_device(d_input.data(), &input, 1, stream); - comm.reduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); - T h_input{}; - if (comm.get_rank() == root) { raft::update_host(&h_input, d_input.data(), 1, stream); } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_input; -} - -// Return value is valid only in root (return value may better be std::optional in C++17 or later) -template -std::enable_if_t::value, T> -host_scalar_reduce(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); - T ret{}; - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device( - d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); - detail::host_reduce_tuple_scalar_element_impl().run( - comm, d_tuple_scalar_elements, root, stream); - if (comm.get_rank() == root) { - raft::update_host( - h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); - } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - if (comm.get_rank() == root) { - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret, h_tuple_scalar_elements); - } - - return ret; -} - -template -std::enable_if_t::value, T> host_scalar_bcast( - raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - rmm::device_uvector d_input(1, stream); - if (comm.get_rank() == root) { raft::update_device(d_input.data(), &input, 1, stream); } - comm.bcast(d_input.data(), 1, root, stream); - auto h_input = input; - if (comm.get_rank() != root) { raft::update_host(&h_input, d_input.data(), 1, stream); } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_input; -} - -template -std::enable_if_t::value, T> -host_scalar_bcast(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); - auto ret = input; - - if (comm.get_rank() == root) { - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl() - .update(h_tuple_scalar_elements, input); - raft::update_device( - d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); - } - comm.bcast(d_tuple_scalar_elements.data(), d_tuple_scalar_elements.size(), root, stream); - if (comm.get_rank() != root) { - raft::update_host( - h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); - } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - if (comm.get_rank() != root) { - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret, h_tuple_scalar_elements); - } - - return ret; -} - -template -std::enable_if_t::value, std::vector> host_scalar_allgather( - raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - std::vector rx_counts(comm.get_size(), size_t{1}); - std::vector displacements(rx_counts.size(), size_t{0}); - std::iota(displacements.begin(), displacements.end(), size_t{0}); - rmm::device_uvector d_outputs(rx_counts.size(), stream); - raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); - // FIXME: better use allgather - comm.allgatherv(d_outputs.data() + comm.get_rank(), - d_outputs.data(), - rx_counts.data(), - displacements.data(), - stream); - std::vector h_outputs(rx_counts.size()); - raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_outputs; -} - -template -std::enable_if_t::value, std::vector> -host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector rx_counts(comm.get_size(), tuple_size); - std::vector displacements(rx_counts.size(), size_t{0}); - for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, - stream); - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, - h_tuple_scalar_elements.data(), - tuple_size, - stream); - // FIXME: better use allgather - comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, - d_allgathered_tuple_scalar_elements.data(), - rx_counts.data(), - displacements.data(), - stream); - std::vector h_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size); - raft::update_host(h_allgathered_tuple_scalar_elements.data(), - d_allgathered_tuple_scalar_elements.data(), - comm.get_size() * tuple_size, - stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - - std::vector ret(comm.get_size()); - for (size_t i = 0; i < ret.size(); ++i) { - std::vector h_tuple_scalar_elements( - h_allgathered_tuple_scalar_elements.data() + i * tuple_size, - h_allgathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret[i], h_tuple_scalar_elements); - } - - return ret; -} - -// Return value is valid only in root (return value may better be std::optional in C++17 or later) -template -std::enable_if_t::value, std::vector> host_scalar_gather( - raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - rmm::device_uvector d_outputs(comm.get_rank() == root ? comm.get_size() : int{1}, stream); - raft::update_device( - comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), - &input, - 1, - stream); - comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), - d_outputs.data(), - size_t{1}, - root, - stream); - std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); - if (comm.get_rank() == root) { - raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); - } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_outputs; -} - -// Return value is valid only in root (return value may better be std::optional in C++17 or later) -template -std::enable_if_t::value, std::vector> -host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_gathered_tuple_scalar_elements( - comm.get_rank() == root ? comm.get_size() * tuple_size : tuple_size, stream); - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device(comm.get_rank() == root - ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size - : d_gathered_tuple_scalar_elements.data(), - h_tuple_scalar_elements.data(), - tuple_size, - stream); - comm.gather(comm.get_rank() == root - ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size - : d_gathered_tuple_scalar_elements.data(), - d_gathered_tuple_scalar_elements.data(), - tuple_size, - root, - stream); - std::vector h_gathered_tuple_scalar_elements( - comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); - if (comm.get_rank() == root) { - raft::update_host(h_gathered_tuple_scalar_elements.data(), - d_gathered_tuple_scalar_elements.data(), - comm.get_size() * tuple_size, - stream); - } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - - std::vector ret(comm.get_size()); - if (comm.get_rank() == root) { - for (size_t i = 0; i < ret.size(); ++i) { - std::vector h_tuple_scalar_elements( - h_gathered_tuple_scalar_elements.data() + i * tuple_size, - h_gathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret[i], h_tuple_scalar_elements); - } - } - - return ret; -} - template std::enable_if_t< std::is_arithmetic::value_type>::value, @@ -1396,51 +998,5 @@ device_gatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } -template ::value>* = nullptr> -auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) -{ - return rmm::device_uvector(buffer_size, stream); -} - -template ::value>* = nullptr> -auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - return detail::allocate_comm_buffer_tuple_impl( - std::make_index_sequence(), buffer_size, stream); -} - -template ::value>* = nullptr> -void resize_comm_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) -{ - buffer.resize(new_buffer_size, stream); -} - -template ::value>* = nullptr> -void resize_comm_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - detail::resize_comm_buffer_tuple_impl( - buffer, new_buffer_size, stream); -} - -template ::value>* = nullptr> -auto get_comm_buffer_begin(BufferType& buffer) -{ - return buffer.begin(); -} - -template ::value>* = nullptr> -auto get_comm_buffer_begin(BufferType& buffer) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - return thrust::make_zip_iterator( - detail::get_comm_buffer_begin_tuple_impl(std::make_index_sequence(), buffer)); -} - } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh new file mode 100644 index 00000000000..b0bd644a6ce --- /dev/null +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + *ptr = thrust::get(tuple); + update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + tuple_scalar_elements, tuple); + } +}; + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const { return; } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + thrust::get(tuple) = *ptr; + update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + tuple, tuple_scalar_elements); + } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const { return; } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.allreduce(ptr, ptr, 1, raft::comms::op_t::SUM, stream); + host_allreduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, stream); + } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + } +}; + +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.reduce(ptr, ptr, 1, raft::comms::op_t::SUM, root, stream); + host_reduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, root, stream); + } +}; + +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + } +}; + +} // namespace detail + +template +std::enable_if_t::value, T> host_scalar_allreduce( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.allreduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + raft::update_host(&h_input, d_input.data(), 1, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_allreduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, stream); + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + ret, h_tuple_scalar_elements); + + return ret; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> host_scalar_reduce( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.reduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + if (comm.get_rank() == root) { raft::update_host(&h_input, d_input.data(), 1, stream); } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> +host_scalar_reduce(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_reduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, root, stream); + if (comm.get_rank() == root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() == root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); + } + + return ret; +} + +template +std::enable_if_t::value, T> host_scalar_bcast( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + if (comm.get_rank() == root) { raft::update_device(d_input.data(), &input, 1, stream); } + comm.bcast(d_input.data(), 1, root, stream); + auto h_input = input; + if (comm.get_rank() != root) { raft::update_host(&h_input, d_input.data(), 1, stream); } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_bcast(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + auto ret = input; + + if (comm.get_rank() == root) { + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl() + .update(h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + } + comm.bcast(d_tuple_scalar_elements.data(), d_tuple_scalar_elements.size(), root, stream); + if (comm.get_rank() != root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() != root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); + } + + return ret; +} + +template +std::enable_if_t::value, std::vector> host_scalar_allgather( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + std::vector rx_counts(comm.get_size(), size_t{1}); + std::vector displacements(rx_counts.size(), size_t{0}); + std::iota(displacements.begin(), displacements.end(), size_t{0}); + rmm::device_uvector d_outputs(rx_counts.size(), stream); + raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); + // FIXME: better use allgather + comm.allgatherv(d_outputs.data() + comm.get_rank(), + d_outputs.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_outputs(rx_counts.size()); + raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +template +std::enable_if_t::value, std::vector> +host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector rx_counts(comm.get_size(), tuple_size); + std::vector displacements(rx_counts.size(), size_t{0}); + for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, + stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + h_tuple_scalar_elements.data(), + tuple_size, + stream); + // FIXME: better use allgather + comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + d_allgathered_tuple_scalar_elements.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size); + raft::update_host(h_allgathered_tuple_scalar_elements.data(), + d_allgathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_allgathered_tuple_scalar_elements.data() + i * tuple_size, + h_allgathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + + return ret; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> host_scalar_gather( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_outputs(comm.get_rank() == root ? comm.get_size() : int{1}, stream); + raft::update_device( + comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + &input, + 1, + stream); + comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + d_outputs.data(), + size_t{1}, + root, + stream); + std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); + if (comm.get_rank() == root) { + raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> +host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : tuple_size, stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + h_tuple_scalar_elements.data(), + tuple_size, + stream); + comm.gather(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + tuple_size, + root, + stream); + std::vector h_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); + if (comm.get_rank() == root) { + raft::update_host(h_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + if (comm.get_rank() == root) { + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_gathered_tuple_scalar_elements.data() + i * tuple_size, + h_gathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + } + + return ret; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 64d603a42d2..9d6dc6e73e5 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 8371361b670..38157c83f1c 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index ed43bad5a03..8289a231e31 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 1f6f8633bcd..1f71f8233dc 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 3a21c465db2..34ac5c71de9 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 1c34d5b1cb7..8f1dead303c 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -17,8 +17,9 @@ #include #include #include -#include +#include #include +#include #include #include From 83a0ed6a8fbfcd3843f38ac39e48b473894b3b7a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 23 Dec 2020 16:16:24 -0500 Subject: [PATCH 060/343] refactor shuffle_values --- .../experimental/detail/graph_utils.cuh | 111 ------ ...ransform_reduce_key_aggregated_out_nbr.cuh | 61 ++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 31 +- cpp/include/utilities/shuffle_comm.cuh | 315 ++++++++++++++++++ cpp/src/experimental/coarsen_graph.cu | 48 +-- cpp/src/experimental/relabel.cu | 59 +--- cpp/src/experimental/renumber_edgelist.cu | 29 +- 7 files changed, 385 insertions(+), 269 deletions(-) create mode 100644 cpp/include/utilities/shuffle_comm.cuh diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index bd6053b1e61..68e2816c1a1 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -117,30 +117,6 @@ rmm::device_uvector compute_major_degree( return degrees; } -// FIXME: better if I don't need to do this. Haven't found a better way to concatenate a variable -// which can be either a tuple or not with another variable to create an aggregated/flattened tuple. -#if 1 -template -struct is_std_tuple : std::false_type { -}; - -template -struct is_std_tuple> : std::true_type { -}; - -template -auto to_tuple(T &&val, std::enable_if_t::value, void> * = nullptr) -{ - return std::forward(val); -} - -template -auto to_tuple(T &&val, std::enable_if_t::value, void> * = nullptr) -{ - return std::make_tuple(std::forward(val)); -} -#endif - // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template @@ -158,93 +134,6 @@ rmm::device_uvector compute_major_degree( return compute_major_degree(handle, tmp_offsets, partition); } -// FIXME: better move this to elsewhere -template -auto shuffle_values(raft::comms::comms_t const &comm, - TxValueIterator tx_value_first, - rmm::device_uvector const &tx_value_counts, - cudaStream_t stream) -{ - auto const comm_size = comm.get_size(); - - rmm::device_uvector rx_value_counts(comm_size, stream); - - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. - std::vector tx_counts(comm_size, size_t{1}); - std::vector tx_offsets(comm_size); - std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); - std::vector tx_dst_ranks(comm_size); - std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); - std::vector rx_counts(comm_size, size_t{1}); - std::vector rx_offsets(comm_size); - std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); - std::vector rx_src_ranks(comm_size); - std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); - device_multicast_sendrecv(comm, - tx_value_counts.data(), - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_counts.data(), - rx_counts, - rx_offsets, - rx_src_ranks, - stream); - - raft::update_host(tx_counts.data(), tx_value_counts.data(), comm_size, stream); - raft::update_host(rx_counts.data(), rx_value_counts.data(), comm_size, stream); - - CUDA_TRY(cudaStreamSynchronize(stream)); // tx_counts & rx_counts should be up-to-date - - std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); - std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); - - auto rx_value_buffer = - allocate_dataframe_buffer::value_type>( - rx_offsets.back(), stream); - auto rx_value_first = - get_dataframe_buffer_begin::value_type>( - rx_value_buffer); - - int num_tx_dst_ranks{0}; - int num_rx_src_ranks{0}; - for (int i = 0; i < comm_size; ++i) { - if (tx_counts[i] != 0) { - tx_counts[num_tx_dst_ranks] = tx_counts[i]; - tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; - tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; - ++num_tx_dst_ranks; - } - if (rx_counts[i] != 0) { - rx_counts[num_rx_src_ranks] = rx_counts[i]; - rx_offsets[num_rx_src_ranks] = rx_offsets[i]; - rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; - } - } - tx_counts.resize(num_tx_dst_ranks); - tx_offsets.resize(num_tx_dst_ranks); - tx_dst_ranks.resize(num_tx_dst_ranks); - rx_counts.resize(num_rx_src_ranks); - rx_offsets.resize(num_rx_src_ranks); - rx_src_ranks.resize(num_rx_src_ranks); - - // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released - // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). - device_multicast_sendrecv(comm, - tx_value_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - rx_value_first, - rx_counts, - rx_offsets, - rx_src_ranks, - stream); - - return std::tuple_cat(to_tuple(std::move(rx_value_buffer)), - std::make_tuple(std::move(rx_value_counts))); -} - template struct degree_from_offsets_t { edge_t const *offsets{nullptr}; diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index cb4481dfd01..b30c3ca19ac 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -231,28 +232,16 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( unique_keys.end()); unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream()); - auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; - thrust::sort( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = sort_and_shuffle_values( + comm, unique_keys.begin(), unique_keys.end(), - [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); - - auto key_first = thrust::make_transform_iterator( - unique_keys.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + unique_keys.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); - rmm::device_uvector rx_value_counts(0, handle.get_stream()); - - std::tie(rx_unique_keys, rx_value_counts) = cugraph::experimental::detail::shuffle_values( - comm, unique_keys.begin(), tx_value_counts, handle.get_stream()); + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { + return key_func(val); + }, + handle.get_stream()); rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); @@ -264,8 +253,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream()); std::tie(rx_values_for_unique_keys, std::ignore) = - cugraph::experimental::detail::shuffle_values( - comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); + shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream @@ -379,29 +367,18 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); - auto key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}; - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - triplet_first, - triplet_first + tmp_major_vertices.size(), - [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<1>(lhs) < key_func(thrust::get<1>(rhs))); - }); - auto key_first = thrust::make_transform_iterator( - triplet_first, [key_func] __device__(auto val) { return key_func(thrust::get<1>(val)); }); - rmm::device_uvector tx_value_counts(sub_comm.get_size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + tmp_major_vertices.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - rmm::device_uvector rx_major_vertices(0, handle.get_stream()); rmm::device_uvector rx_minor_keys(0, handle.get_stream()); rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream()); - - std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights, std::ignore) = - detail::shuffle_values(sub_comm, triplet_first, tx_value_counts, handle.get_stream()); + std::forward_as_tuple( + std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = + sort_and_shuffle_values( + sub_comm, + triplet_first, + triplet_first + tmp_major_vertices.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}] __device__( + auto val) { return key_func(thrust::get<1>(val)); }, + handle.get_stream()); tmp_major_vertices = std::move(rx_major_vertices); tmp_minor_keys = std::move(rx_minor_keys); diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index bd10be6eb77..55a9ad7f323 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -237,34 +238,16 @@ transform_reduce_by_adj_matrix_row_col_key_e( unique_keys.begin(), get_dataframe_buffer_begin(value_for_unique_key_buffer)); - auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; - thrust::sort_by_key( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_keys.begin(), - unique_keys.end(), - get_dataframe_buffer_begin(value_for_unique_key_buffer), - [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); - - auto key_first = thrust::make_transform_iterator( - unique_keys.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + unique_keys.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); - - std::tie(rx_unique_keys, std::ignore) = cugraph::experimental::detail::shuffle_values( - comm, unique_keys.begin(), tx_value_counts, handle.get_stream()); - std::tie(rx_value_for_unique_key_buffer, std::ignore) = - cugraph::experimental::detail::shuffle_values( + std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = + sort_and_shuffle_kv_pairs( comm, + unique_keys.begin(), + unique_keys.end(), get_dataframe_buffer_begin(value_for_unique_key_buffer), - tx_value_counts, + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(val); }, handle.get_stream()); // FIXME: we can reduce after shuffle diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh new file mode 100644 index 00000000000..e55fc29d229 --- /dev/null +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + thrust::sort(rmm::exec_policy(stream)->on(stream), + tx_value_first, + tx_value_last, + [value_to_gpu_id_op] __device__(auto lhs, auto rhs) { + return value_to_gpu_id_op(lhs) < value_to_gpu_id_op(rhs); + }); + + auto gpu_id_first = thrust::make_transform_iterator( + tx_value_first, + [value_to_gpu_id_op] __device__(auto value) { return value_to_gpu_id_op(value); }); + rmm::device_uvector d_tx_value_counts(comm_size, stream); + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_value_first, tx_value_last), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + d_tx_value_counts.begin()); + std::vector tx_value_counts(comm_size); + raft::update_host(tx_value_counts.data(), d_tx_value_counts.data(), comm_size, stream); + + return std::move(d_tx_value_counts); +} + +template +rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + tx_key_first, + tx_key_last, + tx_value_first, + [key_to_gpu_id_op] __device__(auto lhs, auto rhs) { + return key_to_gpu_id_op(lhs) < key_to_gpu_id_op(rhs); + }); + + auto gpu_id_first = thrust::make_transform_iterator( + tx_key_first, [key_to_gpu_id_op] __device__(auto key) { return key_to_gpu_id_op(key); }); + rmm::device_uvector d_tx_value_counts(comm_size, stream); + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_key_first, tx_key_last), + thrust::make_constant_iterator(size_t{1}), + thrust::make_discard_iterator(), + d_tx_value_counts.begin()); + + return std::move(d_tx_value_counts); +} + +// inline to suppress a complaint about ODR violation +inline std::tuple, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> +compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, + rmm::device_uvector const &d_tx_value_counts, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_rx_value_counts(comm_size, stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + std::vector tx_counts(comm_size, size_t{1}); + std::vector tx_offsets(comm_size); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + std::vector tx_dst_ranks(comm_size); + std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); + std::vector rx_counts(comm_size, size_t{1}); + std::vector rx_offsets(comm_size); + std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); + std::vector rx_src_ranks(comm_size); + std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); + device_multicast_sendrecv(comm, + d_tx_value_counts.data(), + tx_counts, + tx_offsets, + tx_dst_ranks, + d_rx_value_counts.data(), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); + raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // rx_counts should be up-to-date + + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + + int num_tx_dst_ranks{0}; + int num_rx_src_ranks{0}; + for (int i = 0; i < comm_size; ++i) { + if (tx_counts[i] != 0) { + tx_counts[num_tx_dst_ranks] = tx_counts[i]; + tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; + tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; + ++num_tx_dst_ranks; + } + if (rx_counts[i] != 0) { + rx_counts[num_rx_src_ranks] = rx_counts[i]; + rx_offsets[num_rx_src_ranks] = rx_offsets[i]; + rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; + ++num_rx_src_ranks; + } + } + tx_counts.resize(num_tx_dst_ranks); + tx_offsets.resize(num_tx_dst_ranks); + tx_dst_ranks.resize(num_tx_dst_ranks); + rx_counts.resize(num_rx_src_ranks); + rx_offsets.resize(num_rx_src_ranks); + rx_src_ranks.resize(num_rx_src_ranks); + + return std::make_tuple(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks); +} + +} // namespace detail + +template +auto shuffle_values(raft::comms::comms_t const &comm, + TxValueIterator tx_value_first, + std::vector const &tx_value_counts, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_tx_value_counts(comm_size, stream); + raft::update_device(d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // tx_value_counts should be up-to-date + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_offsets.back(), stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_value_buffer), rx_counts); +} + +template +auto sort_and_shuffle_values(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + auto d_tx_value_counts = + detail::sort_and_count(comm, tx_value_first, tx_value_last, value_to_gpu_id_op, stream); + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_offsets.back() + rx_counts.back(), stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_value_buffer), rx_counts); +} + +template +auto sort_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + auto d_tx_value_counts = detail::sort_and_count( + comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + rmm::device_uvector::value_type> rx_keys( + rx_offsets.back() + rx_counts.back(), stream); + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_keys.size(), stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv(comm, + tx_key_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_keys.begin(), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_keys), std::move(rx_value_buffer), rx_counts); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 9d6dc6e73e5..b5296e2034a 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -326,39 +327,26 @@ coarsen_graph( thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), coarsened_edgelist_minor_vertices.begin(), coarsened_edgelist_weights.begin())); - auto key_func = detail::compute_gpu_id_from_edge_t{ - graph_view.is_hypergraph_partitioned(), - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}; - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + coarsened_edgelist_major_vertices.size(), - [key_func] __device__(auto lhs, auto rhs) { - return store_transposed ? (key_func(thrust::get<1>(lhs), thrust::get<0>(lhs)) < - key_func(thrust::get<1>(rhs), thrust::get<0>(rhs))) - : (key_func(thrust::get<0>(lhs), thrust::get<1>(lhs)) < - key_func(thrust::get<0>(rhs), thrust::get<1>(rhs))); - }); - auto key_first = thrust::make_transform_iterator(edge_first, [key_func] __device__(auto val) { - return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) - : key_func(thrust::get<0>(val), thrust::get<1>(val)); - }); - rmm::device_uvector tx_value_counts(comm.get_size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + coarsened_edgelist_major_vertices.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - - std::tie( - rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights, std::ignore) = - detail::shuffle_values(handle.get_comms(), edge_first, tx_value_counts, handle.get_stream()); + std::forward_as_tuple( + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), + std::ignore) = + sort_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + coarsened_edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{ + graph_view.is_hypergraph_partitioned(), + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) + : key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); sort_and_coarsen_edgelist(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 34ac5c71de9..f8cfe810a97 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -85,25 +86,14 @@ rmm::device_uvector relabel( handle.get_stream()); auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + std::get<0>(old_new_label_pairs).size(), - [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); - }); - auto key_first = thrust::make_transform_iterator( - label_pair_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + label_pair_old_labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels, std::ignore) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); + std::forward_as_tuple(std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels), + std::ignore) = + sort_and_shuffle_values( + handle.get_comms(), + pair_first, + pair_first + label_pair_old_labels.size(), + [key_func] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become @@ -133,28 +123,14 @@ rmm::device_uvector relabel( // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back { - thrust::sort( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); + std::vector rx_value_counts{}; + std::tie(rx_unique_old_labels, rx_value_counts) = sort_and_shuffle_values( + handle.get_comms(), unique_old_labels.begin(), unique_old_labels.end(), - [key_func] __device__(auto lhs, auto rhs) { return key_func(lhs) < key_func(rhs); }); - - auto key_first = thrust::make_transform_iterator( - unique_old_labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + unique_old_labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - - rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); - rmm::device_uvector rx_value_counts(0, handle.get_stream()); - - std::tie(rx_unique_old_labels, rx_value_counts) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), unique_old_labels.begin(), tx_value_counts, handle.get_stream()); + [key_func] __device__(auto val) { return key_func(val); }, + handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream @@ -165,9 +141,8 @@ rmm::device_uvector relabel( rx_unique_old_labels .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels - std::tie(new_labels_for_unique_old_labels, std::ignore) = - cugraph::experimental::detail::shuffle_values( - handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); + std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( + handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 8f1dead303c..27061429f4a 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -131,28 +132,16 @@ rmm::device_uvector compute_renumber_map( auto const comm_size = comm.get_size(); auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); - auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + labels.size(), - [key_func] __device__(auto lhs, auto rhs) { - return key_func(thrust::get<0>(lhs)) < key_func(thrust::get<0>(rhs)); - }); - auto key_first = thrust::make_transform_iterator( - labels.begin(), [key_func] __device__(auto val) { return key_func(val); }); - rmm::device_uvector tx_value_counts(comm_size, handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_first, - key_first + labels.size(), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - tx_value_counts.begin()); - rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - - std::tie(rx_labels, rx_counts, std::ignore) = cugraph::experimental::detail::shuffle_values( - handle.get_comms(), pair_first, tx_value_counts, handle.get_stream()); + std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = sort_and_shuffle_values( + comm, + pair_first, + pair_first + labels.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val)); + }, + handle.get_stream()); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); From 521952ff19b7bbdffa25c28c92183e7e1e4b86bd Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 28 Dec 2020 13:16:55 -0600 Subject: [PATCH 061/343] bug fixes --- python/cugraph/structure/graph.py | 2 ++ python/cugraph/structure/number_map.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 53c3a4e656c..16314ec8b6a 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -1373,6 +1373,8 @@ def nodes(self): return self.renumber_map.implementation.df["0"] else: return cudf.concat([df["src"], df["dst"]]).unique() + if self.adjlist is not None: + return cudf.Series(np.arange(0, self.number_of_nodes())) if "all_nodes" in self._nodes.keys(): return self._nodes["all_nodes"] else: diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index b9ed8eb2e58..f47a8bbb53e 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -88,7 +88,7 @@ def __init__(self, df, src_col_names, dst_col_names, id_type, self.df[newname] = tmp[newname].append(tmp_dst[oldname]) self.df['count'] = tmp['count'].append(tmp_dst['count']) else: - for newname, oldname in zip(self.col_names, dst_col_names): + for newname in self.col_names: self.df[newname] = tmp[newname] self.df['count'] = tmp['count'] From e8f0631401eb528ae3acdc0a9235d646016124c1 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 28 Dec 2020 13:20:58 -0600 Subject: [PATCH 062/343] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01c32b8f8d2..639122f6fe3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## Improvements ## Bug Fixes +- PR #1319 Fix graph nodes function and renumbering from series # cuGraph 0.17.0 (Date TBD) From a80ca57bd704ed5d88dc7e1eba630e4f5dc4a7fe Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Sun, 3 Jan 2021 23:43:23 -0600 Subject: [PATCH 063/343] remove unused code --- python/cugraph/structure/graph.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 16314ec8b6a..7afd9b76b48 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -653,32 +653,6 @@ def from_dask_cudf_edgelist( self.destination_columns = destination self.store_tranposed = None - def compute_local_data(self, by, load_balance=True): - """ - Compute the local edges, vertices and offsets for a distributed - graph stored as a dask-cudf dataframe and initialize the - communicator. Performs global sorting and load_balancing. - - Parameters - ---------- - by : str - by argument is the column by which we want to sort and - partition. It should be the source column name for generating - CSR format and destination column name for generating CSC - format. - load_balance : bool - Set as True to perform load_balancing after global sorting of - dask-cudf DataFrame. This ensures that the data is uniformly - distributed among multiple GPUs to avoid over-loading. - """ - if self.distributed: - data = get_local_data(self, by, load_balance) - self.local_data = {} - self.local_data["data"] = data - self.local_data["by"] = by - else: - raise Exception("Graph should be a distributed graph") - def view_edge_list(self): """ Display the edge list. Compute it if needed. From 9ab78a9776e0f881b984c3c55581d93ad354b511 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 4 Jan 2021 08:46:34 -0800 Subject: [PATCH 064/343] don't need to skip includes on Pascal, update comment --- cpp/tests/experimental/louvain_test.cu | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 16543a4ad18..4a47b1a1aca 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -21,17 +21,8 @@ #include #include -// "FIXME": remove this check -// -// Disable louvain(experimental::graph_view_t,...) -// versions for GPU architectures < 700 -// (cuco/static_map.cuh depends on features not supported on or before Pascal) -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 #include -#else #include -#endif #include @@ -84,11 +75,10 @@ class Tests_Louvain : public ::testing::TestWithParam { auto graph_view = graph.view(); - // "FIXME": remove this check + // "FIXME": remove this check once we drop support for Pascal // - // Disable louvain(experimental::graph_view_t,...) - // versions for GPU architectures < 700 - // (cuco/static_map.cuh depends on features not supported on or before Pascal) + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) // cudaDeviceProp device_prop; CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); From 049b088dd42183b21c4a559cc8634241f551104f Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 4 Jan 2021 12:52:25 -0600 Subject: [PATCH 065/343] Updates to support nightly MG test automation(#1308) This PR includes various updates to support nightly automated MG test runs, including: * Adding a marker which nightly scripts use to run on all visible GPUs instead of a hardcoded number of GPUs, since the scripts rely on knowing the number of GPUs being used in the tests by setting the `CUDA_VISIBLE_DEVICES` env var. * In the nightly scripts, the marker is used like so: `pytest -m "not preset_gpu_count" ...` * Added a `client.wait_for_workers()` call to various setups to both match the approach taken by the `MGContext` class, and to ensure workers are running. This seemed to increase reliability in the test runs. * _side note: we should decide to use only the `MGContext` class or the `client_connection` pytest fixture in these tests, since they both aim to accomplish the same thing._ Authors: - Rick Ratzel Approvers: - null URL: https://github.com/rapidsai/cugraph/pull/1308 --- python/cugraph/dask/common/mg_utils.py | 43 ++++++++++++++++++- python/cugraph/tests/dask/mg_context.py | 28 +++++------- .../test_mg_batch_betweenness_centrality.py | 6 ++- ...st_mg_batch_edge_betweenness_centrality.py | 6 ++- python/cugraph/tests/dask/test_mg_bfs.py | 19 +++----- python/cugraph/tests/dask/test_mg_comms.py | 19 +++----- python/cugraph/tests/dask/test_mg_degree.py | 21 +++------ .../tests/dask/test_mg_katz_centrality.py | 22 +++------- python/cugraph/tests/dask/test_mg_louvain.py | 19 +++----- python/cugraph/tests/dask/test_mg_pagerank.py | 21 +++------ python/cugraph/tests/dask/test_mg_renumber.py | 19 +++----- .../cugraph/tests/dask/test_mg_replication.py | 7 ++- python/cugraph/tests/dask/test_mg_sssp.py | 19 +++----- python/cugraph/tests/dask/test_mg_utility.py | 19 +++----- python/cugraph/tests/test_symmetrize.py | 19 +++----- python/pytest.ini | 1 + 16 files changed, 130 insertions(+), 158 deletions(-) diff --git a/python/cugraph/dask/common/mg_utils.py b/python/cugraph/dask/common/mg_utils.py index 7556afb122a..9604c26c1f7 100644 --- a/python/cugraph/dask/common/mg_utils.py +++ b/python/cugraph/dask/common/mg_utils.py @@ -10,9 +10,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.raft.dask.common.utils import default_client + +import os + import numba.cuda +from dask_cuda import LocalCUDACluster +from dask.distributed import Client + +from cugraph.raft.dask.common.utils import default_client +import cugraph.comms as Comms + # FIXME: We currently look for the default client from dask, as such is the # if there is a dask client running without any GPU we will still try @@ -41,3 +49,36 @@ def is_single_gpu(): return False else: return True + + +def get_visible_devices(): + _visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES") + if _visible_devices is None: + # FIXME: We assume that if the variable is unset there is only one GPU + visible_devices = ["0"] + else: + visible_devices = _visible_devices.strip().split(",") + return visible_devices + + +def setup_local_dask_cluster(p2p=True): + """ + Performs steps to setup a Dask cluster using LocalCUDACluster and returns + the LocalCUDACluster and corresponding client instance. + """ + cluster = LocalCUDACluster() + client = Client(cluster) + client.wait_for_workers(len(get_visible_devices())) + Comms.initialize(p2p) + + return (cluster, client) + + +def teardown_local_dask_cluster(cluster, client): + """ + Performs steps to destroy a Dask cluster and a corresponding client + instance. + """ + Comms.destroy() + client.close() + cluster.close() diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index a72cf1c4b04..45dc75767fa 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -12,12 +12,15 @@ # limitations under the License. import time -import os + +import pytest from dask.distributed import Client + +from cugraph.dask.common.mg_utils import get_visible_devices from dask_cuda import LocalCUDACluster as CUDACluster import cugraph.comms as Comms -import pytest + # Maximal number of verifications of the number of workers DEFAULT_MAX_ATTEMPT = 100 @@ -26,22 +29,13 @@ DEFAULT_WAIT_TIME = 0.5 -def get_visible_devices(): - _visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES") - if _visible_devices is None: - # FIXME: We assume that if the variable is unset there is only one GPU - visible_devices = ["0"] - else: - visible_devices = _visible_devices.strip().split(",") - return visible_devices - - def skip_if_not_enough_devices(required_devices): - visible_devices = get_visible_devices() - number_of_visible_devices = len(visible_devices) - if required_devices > number_of_visible_devices: - pytest.skip("Not enough devices available to " - "test MG({})".format(required_devices)) + if required_devices is not None: + visible_devices = get_visible_devices() + number_of_visible_devices = len(visible_devices) + if required_devices > number_of_visible_devices: + pytest.skip("Not enough devices available to " + "test MG({})".format(required_devices)) class MGContext: diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 4d04bf6df85..4b0f6629bc3 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -37,7 +37,11 @@ # Parameters # ============================================================================= DATASETS = ["../datasets/karate.csv"] -MG_DEVICE_COUNT_OPTIONS = [1, 2, 3, 4] +MG_DEVICE_COUNT_OPTIONS = [pytest.param(1, marks=pytest.mark.preset_gpu_count), + pytest.param(2, marks=pytest.mark.preset_gpu_count), + pytest.param(3, marks=pytest.mark.preset_gpu_count), + pytest.param(4, marks=pytest.mark.preset_gpu_count), + None] RESULT_DTYPE_OPTIONS = [np.float64] diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 1e4a1950c53..54b58c340aa 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -37,7 +37,11 @@ # Parameters # ============================================================================= DATASETS = ["../datasets/karate.csv"] -MG_DEVICE_COUNT_OPTIONS = [1, 2, 4] +MG_DEVICE_COUNT_OPTIONS = [pytest.param(1, marks=pytest.mark.preset_gpu_count), + pytest.param(2, marks=pytest.mark.preset_gpu_count), + pytest.param(3, marks=pytest.mark.preset_gpu_count), + pytest.param(4, marks=pytest.mark.preset_gpu_count), + None] RESULT_DTYPE_OPTIONS = [np.float64] diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index 553bbc698ff..63580461b17 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -12,28 +12,21 @@ # limitations under the License. import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import gc import pytest import cugraph import dask_cudf import cudf -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index 29789461018..61a4944b5f1 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -12,28 +12,21 @@ # limitations under the License. import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import gc import pytest import cugraph import dask_cudf import cudf -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index a6600104bc8..9f4c0d94319 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -11,30 +11,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dask.distributed import Client import gc import pytest import cudf -import cugraph.comms as Comms import cugraph import dask_cudf -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -# Move to conftest -from dask_cuda import LocalCUDACluster - -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py index 43d63f2fd5d..631457f7558 100644 --- a/python/cugraph/tests/dask/test_mg_katz_centrality.py +++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py @@ -14,30 +14,20 @@ # import numpy as np import pytest import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import gc import cugraph import dask_cudf import cudf -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -# The function selects personalization_perc% of accessible vertices in graph M -# and randomly assigns them personalization values - -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index 56401e338a4..61b2f41f474 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -14,13 +14,12 @@ import pytest import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import cugraph import dask_cudf -from dask_cuda import LocalCUDACluster from cugraph.tests import utils -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) try: from rapids_pytest_benchmark import setFixtureParamNames @@ -44,17 +43,9 @@ def setFixtureParamNames(*args, **kwargs): # Fixtures @pytest.fixture(scope="module") def client_connection(): - # setup - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - # teardown - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py index f6416903b89..4f0b45242dd 100644 --- a/python/cugraph/tests/dask/test_mg_pagerank.py +++ b/python/cugraph/tests/dask/test_mg_pagerank.py @@ -13,19 +13,18 @@ import numpy as np import pytest import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import gc import cugraph import dask_cudf import cudf -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) + # The function selects personalization_perc% of accessible vertices in graph M # and randomly assigns them personalization values - def personalize(vertices, personalization_perc): personalization = None if personalization_perc != 0: @@ -52,17 +51,11 @@ def personalize(vertices, personalization_perc): PERSONALIZATION_PERC = [0, 10, 50] -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index 8456241ff26..00a574bcf8e 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -20,29 +20,22 @@ import numpy as np import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import cugraph import dask_cudf import dask import cudf -from dask_cuda import LocalCUDACluster from cugraph.tests import utils from cugraph.structure.number_map import NumberMap -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) # Test all combinations of default/managed and pooled/non-pooled allocation diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index 2b8510cd9ff..bb43d6c0f7a 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -24,8 +24,11 @@ DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] -# MG_DEVICE_COUNT_OPTIONS = [1, 2, 3, 4] -MG_DEVICE_COUNT_OPTIONS = [1] +MG_DEVICE_COUNT_OPTIONS = [pytest.param(1, marks=pytest.mark.preset_gpu_count), + pytest.param(2, marks=pytest.mark.preset_gpu_count), + pytest.param(3, marks=pytest.mark.preset_gpu_count), + pytest.param(4, marks=pytest.mark.preset_gpu_count), + None] @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py index ac4a60f1bdc..d75d76d7fd4 100644 --- a/python/cugraph/tests/dask/test_mg_sssp.py +++ b/python/cugraph/tests/dask/test_mg_sssp.py @@ -12,28 +12,21 @@ # limitations under the License. import cugraph.dask as dcg -import cugraph.comms as Comms -from dask.distributed import Client import gc import pytest import cugraph import dask_cudf import cudf -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 808f1bcfa70..334ef780178 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -12,16 +12,17 @@ # limitations under the License. import cugraph.dask as dcg -from dask.distributed import Client, default_client, futures_of, wait +from dask.distributed import default_client, futures_of, wait import gc import cugraph import dask_cudf import cugraph.comms as Comms -from dask_cuda import LocalCUDACluster import pytest from cugraph.dask.common.part_utils import concat_within_workers from cugraph.dask.common.read_utils import get_n_workers -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) import os import time import numpy as np @@ -35,17 +36,11 @@ def setup_function(): gc.collect() -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/cugraph/tests/test_symmetrize.py b/python/cugraph/tests/test_symmetrize.py index 7ef8b33e97f..4080362ddfa 100644 --- a/python/cugraph/tests/test_symmetrize.py +++ b/python/cugraph/tests/test_symmetrize.py @@ -19,10 +19,9 @@ import cudf import cugraph from cugraph.tests import utils -import cugraph.comms as Comms -from dask.distributed import Client -from dask_cuda import LocalCUDACluster -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) def test_version(): @@ -188,17 +187,11 @@ def test_symmetrize_weighted(graph_file): compare(cu_M["0"], cu_M["1"], cu_M["2"], sym_src, sym_dst, sym_w) -@pytest.fixture +@pytest.fixture(scope="module") def client_connection(): - cluster = LocalCUDACluster() - client = Client(cluster) - Comms.initialize(p2p=True) - + (cluster, client) = setup_local_dask_cluster(p2p=True) yield client - - Comms.destroy() - client.close() - cluster.close() + teardown_local_dask_cluster(cluster, client) @pytest.mark.skipif( diff --git a/python/pytest.ini b/python/pytest.ini index 33c82fe48f7..fb8c6ea0948 100644 --- a/python/pytest.ini +++ b/python/pytest.ini @@ -11,6 +11,7 @@ markers = managedmem_off: RMM managed memory disabled poolallocator_on: RMM pool allocator enabled poolallocator_off: RMM pool allocator disabled + preset_gpu_count: Use a hard-coded number of GPUs for specific MG tests ETL: benchmarks for ETL steps small: small datasets tiny: tiny datasets From fd609e22dcdd9cd9d1665eda5e30631c81e53ee3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 4 Jan 2021 22:23:26 -0500 Subject: [PATCH 066/343] cosmetic updates --- cpp/tests/experimental/katz_centrality_test.cpp | 4 ++-- cpp/tests/experimental/pagerank_test.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index c2ac4340319..94639883100 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -197,8 +197,8 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(graph_view.get_number_of_vertices())) * threshold_ratio; auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - auto diff = std::abs(lhs - rhs); - return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 4763249aa9e..640326c1fa2 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -265,8 +265,8 @@ class Tests_PageRank : public ::testing::TestWithParam { auto threshold_magnitude = (epsilon / static_cast(graph_view.get_number_of_vertices())) * threshold_ratio; auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - auto diff = std::abs(lhs - rhs); - return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), From 659bde30d0ef031a39e95b9fef676776772550c9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 4 Jan 2021 23:42:29 -0500 Subject: [PATCH 067/343] add C++ coarsen_graph_test --- cpp/tests/CMakeLists.txt | 9 + cpp/tests/experimental/coarsen_graph_test.cpp | 371 ++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 cpp/tests/experimental/coarsen_graph_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 593c36359e2..c81fd68bdf4 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -272,6 +272,15 @@ set(EXPERIMENTAL_GRAPH_TEST_SRCS ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") +################################################################################################### +# - Experimental coarsening tests ----------------------------------------------------------------- + +set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") + +ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}" "") + ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp new file mode 100644 index 00000000000..64f70d5c30b --- /dev/null +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return v < num_vertices; +} + +template +void check_coarsened_graph_results(edge_t* org_offsets, + vertex_t* org_indices, + weight_t* org_weights, + vertex_t* org_labels, + edge_t* coarse_offsets, + vertex_t* coarse_indices, + weight_t* coarse_weights, + vertex_t* coarse_vertex_labels, + vertex_t num_org_vertices, + vertex_t num_coarse_vertices) +{ + ASSERT_TRUE(((org_weights == nullptr) && (coarse_weights == nullptr)) || + ((org_weights != nullptr) && (coarse_weights != nullptr))); + ASSERT_TRUE(std::is_sorted(org_offsets, org_offsets + num_org_vertices)); + ASSERT_TRUE(std::count_if(org_indices, + org_indices + org_offsets[num_org_vertices], + [num_org_vertices](auto nbr) { + return !is_valid_vertex(num_org_vertices, nbr); + }) == 0); + ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); + ASSERT_TRUE(std::count_if(coarse_indices, + coarse_indices + coarse_offsets[num_coarse_vertices], + [num_coarse_vertices](auto nbr) { + return !is_valid_vertex(num_coarse_vertices, nbr); + }) == 0); + ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); + + std::vector unique_labels(num_org_vertices); + std::copy(org_labels, org_labels + num_org_vertices, unique_labels.begin()); + std::sort(unique_labels.begin(), unique_labels.end()); + auto last = std::unique(unique_labels.begin(), unique_labels.end()); + unique_labels.resize(std::distance(unique_labels.begin(), last)); + ASSERT_TRUE(unique_labels.size() == static_cast(num_coarse_vertices)); + + { + std::vector tmp_coarse_vertex_labels(coarse_vertex_labels, + coarse_vertex_labels + num_coarse_vertices); + std::sort(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()); + auto last = std::unique(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()); + ASSERT_TRUE(last == tmp_coarse_vertex_labels.end()); + ASSERT_TRUE( + std::equal(unique_labels.begin(), unique_labels.end(), tmp_coarse_vertex_labels.begin())); + } + + std::vector> label_org_vertex_pairs(num_org_vertices); + for (vertex_t i = 0; i < num_org_vertices; ++i) { + label_org_vertex_pairs[i] = std::make_tuple(org_labels[i], i); + } + std::sort(label_org_vertex_pairs.begin(), label_org_vertex_pairs.end()); + + std::vector unique_label_counts(unique_labels.size()); + std::vector unique_label_offsets(unique_label_counts.size() + 1, 0); + std::transform( + unique_labels.begin(), + unique_labels.end(), + unique_label_counts.begin(), + [&label_org_vertex_pairs](auto label) { + auto lb = std::lower_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(label, cugraph::experimental::invalid_vertex_id::value), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + auto ub = std::upper_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(label, cugraph::experimental::invalid_vertex_id::value), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + return static_cast(std::distance(lb, ub)); + }); + std::partial_sum( + unique_label_counts.begin(), unique_label_counts.end(), unique_label_offsets.begin() + 1); + + std::map label_to_coarse_vertex_map{}; + for (vertex_t i = 0; i < num_coarse_vertices; ++i) { + label_to_coarse_vertex_map[coarse_vertex_labels[i]] = i; + } + + for (size_t i = 0; i < unique_labels.size(); ++i) { + auto count = unique_label_counts[i]; + auto offset = unique_label_offsets[i]; + if (org_weights == nullptr) { + std::vector coarse_nbrs0{}; + for (vertex_t j = offset; j < offset + count; ++j) { + auto org_vertex = std::get<1>(label_org_vertex_pairs[j]); + for (auto k = org_offsets[org_vertex]; k < org_offsets[org_vertex + 1]; ++k) { + auto org_nbr = org_indices[k]; + auto coarse_nbr = label_to_coarse_vertex_map[org_labels[org_nbr]]; + coarse_nbrs0.push_back(coarse_nbr); + } + } + std::sort(coarse_nbrs0.begin(), coarse_nbrs0.end()); + auto last = std::unique(coarse_nbrs0.begin(), coarse_nbrs0.end()); + coarse_nbrs0.resize(std::distance(coarse_nbrs0.begin(), last)); + + auto coarse_vertex = label_to_coarse_vertex_map[unique_labels[i]]; + auto coarse_offset = coarse_offsets[coarse_vertex]; + auto coarse_count = coarse_offsets[coarse_vertex + 1] - coarse_offset; + std::vector coarse_nbrs1(coarse_indices + coarse_offset, + coarse_indices + coarse_offset + coarse_count); + std::sort(coarse_nbrs1.begin(), coarse_nbrs1.end()); + + ASSERT_TRUE(std::equal(coarse_nbrs0.begin(), coarse_nbrs0.end(), coarse_nbrs1.begin())); + } else { + std::vector> coarse_nbr_weight_pairs0{}; + for (vertex_t j = offset; j < offset + count; ++j) { + auto org_vertex = std::get<1>(label_org_vertex_pairs[j]); + for (auto k = org_offsets[org_vertex]; k < org_offsets[org_vertex + 1]; ++k) { + auto org_nbr = org_indices[k]; + auto org_weight = org_weights[k]; + auto coarse_nbr = label_to_coarse_vertex_map[org_labels[org_nbr]]; + coarse_nbr_weight_pairs0.push_back(std::make_tuple(coarse_nbr, org_weight)); + } + } + std::sort(coarse_nbr_weight_pairs0.begin(), coarse_nbr_weight_pairs0.end()); + // reduce by key + { + size_t run_start_idx = 0; + for (size_t i = 1; i < coarse_nbr_weight_pairs0.size(); ++i) { + auto& start = coarse_nbr_weight_pairs0[run_start_idx]; + auto& cur = coarse_nbr_weight_pairs0[i]; + if (std::get<0>(start) == std::get<1>(cur)) { + std::get<1>(start) += std::get<1>(cur); + std::get<0>(cur) = cugraph::experimental::invalid_vertex_id::value; + } else { + run_start_idx = i; + } + } + coarse_nbr_weight_pairs0.erase( + std::remove_if(coarse_nbr_weight_pairs0.begin(), + coarse_nbr_weight_pairs0.end(), + [](auto t) { + return std::get<0>(t) == + cugraph::experimental::invalid_vertex_id::value; + }), + coarse_nbr_weight_pairs0.end()); + } + + auto coarse_vertex = label_to_coarse_vertex_map[unique_labels[i]]; + auto coarse_offset = coarse_offsets[coarse_vertex]; + auto coarse_count = coarse_offsets[coarse_vertex + 1] - coarse_offset; + std::vector> coarse_nbr_weight_pairs1(coarse_count); + for (auto j = coarse_offset; j < coarse_offset + coarse_count; ++j) { + coarse_nbr_weight_pairs1[i - coarse_offset] = + std::make_tuple(coarse_indices[j], coarse_weights[j]); + } + std::sort(coarse_nbr_weight_pairs1.begin(), coarse_nbr_weight_pairs1.end()); + + auto threshold_ratio = weight_t{1e-4}; + auto threshold_magnitude = + ((std::accumulate( + coarse_weights, coarse_weights + coarse_offsets[num_coarse_vertices], weight_t{0.0}) / + static_cast(coarse_offsets[num_coarse_vertices])) * + threshold_ratio) * + threshold_ratio; + ASSERT_TRUE(std::equal( + coarse_nbr_weight_pairs0.begin(), + coarse_nbr_weight_pairs0.end(), + coarse_nbr_weight_pairs1.begin(), + [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::get<0>(lhs) == std::get<0>(rhs) + ? (std::abs(std::get<1>(lhs) - std::get<1>(rhs)) <= + std::max(std::max(std::abs(std::get<1>(lhs)), std::abs(std::get<1>(rhs))) * + threshold_ratio, + threshold_magnitude)) + : false; + })); + } + } + + return; +} + +typedef struct CoarsenGraph_Usecase_t { + std::string graph_file_full_path{}; + double coarsen_ratio{0.0}; + bool test_weighted{false}; + + CoarsenGraph_Usecase_t(std::string const& graph_file_path, + double coarsen_ratio, + bool test_weighted) + : coarsen_ratio(coarsen_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} CoarsenGraph_Usecase; + +class Tests_CoarsenGraph : public ::testing::TestWithParam { + public: + Tests_CoarsenGraph() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(CoarsenGraph_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + std::vector h_labels(graph_view.get_number_of_vertices()); + auto num_labels = static_cast(h_labels.size() * configuration.coarsen_ratio); + ASSERT_TRUE(num_labels > 0); + + std::random_device r{}; + std::default_random_engine generator{r()}; + std::uniform_int_distribution distribution{0, num_labels - 1}; + + std::for_each(h_labels.begin(), h_labels.end(), [&distribution, &generator](auto& label) { + label = distribution(generator); + }); + + rmm::device_uvector d_labels(h_labels.size(), handle.get_stream()); + raft::update_device(d_labels.data(), h_labels.data(), h_labels.size(), handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::unique_ptr< + cugraph::experimental::graph_t> + coarse_graph{}; + rmm::device_uvector coarse_vertices_to_labels(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::tie(coarse_graph, coarse_vertices_to_labels) = + cugraph::experimental::coarsen_graph(handle, graph_view, d_labels.begin()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_org_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_org_indices(graph_view.get_number_of_edges()); + std::vector h_org_weights{}; + raft::update_host(h_org_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_org_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_org_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_org_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + + auto coarse_graph_view = coarse_graph->view(); + + std::vector h_coarse_offsets(coarse_graph_view.get_number_of_vertices() + 1); + std::vector h_coarse_indices(coarse_graph_view.get_number_of_edges()); + std::vector h_coarse_weights{}; + raft::update_host(h_coarse_offsets.data(), + coarse_graph_view.offsets(), + coarse_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_coarse_indices.data(), + coarse_graph_view.indices(), + coarse_graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_coarse_weights.resize(coarse_graph_view.get_number_of_edges()); + raft::update_host(h_coarse_weights.data(), + coarse_graph_view.weights(), + coarse_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + std::vector h_coarse_vertices_to_labels(h_coarse_vertices_to_labels.size()); + raft::update_host(h_coarse_vertices_to_labels.data(), + coarse_vertices_to_labels.data(), + coarse_vertices_to_labels.size(), + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + check_coarsened_graph_results(h_org_offsets.data(), + h_org_indices.data(), + h_org_weights.data(), + h_labels.data(), + h_coarse_offsets.data(), + h_coarse_indices.data(), + h_coarse_weights.data(), + h_coarse_vertices_to_labels.data(), + graph_view.get_number_of_vertices(), + coarse_graph_view.get_number_of_vertices()); + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_CoarsenGraph, + ::testing::Values(CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, false), + CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, true), + CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, true), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, true), + CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() From 8a129f7e3429ad4c6fdde22c1a5cfd0f314b5c7a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Jan 2021 10:01:22 -0500 Subject: [PATCH 068/343] update GIT tag for cuco --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 59376e91083..df2341b54fb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -205,7 +205,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG d965ed8dea8f56da8e260a6130dddf3ca351c45f + GIT_TAG 2196040f0562a0280292eebef5295d914f615e63 ) FetchContent_GetProperties(cuco) From 78c383d406b3f5711e6eeb7ff2999609ececa7eb Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 5 Jan 2021 10:14:58 -0600 Subject: [PATCH 069/343] add test for renumbering from series --- python/cugraph/tests/test_renumber.py | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 91416942429..25911ba842c 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -163,7 +163,39 @@ def test_renumber_negative_col(): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_renumber_files(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + sources = cudf.Series(M["0"]) + destinations = cudf.Series(M["1"]) + + translate = 1000 + + df = cudf.DataFrame() + df["src"] = cudf.Series([x + translate for x in sources. + values_host]) + df["dst"] = cudf.Series([x + translate for x in destinations. + values_host]) + + numbering_series_1 = cugraph.structure.NumberMap() + numbering_series_1.from_series(df["src"]) + + numbering_series_2 = cugraph.structure.NumberMap() + numbering_series_2.from_series(df["dst"]) + + renumbered_src = numbering_series_1.add_internal_vertex_id( + df["src"], "src_id") + renumbered_dst = numbering_series_2.add_internal_vertex_id( + df["dst"], "dst_id") + + check_src = numbering_series_1.from_internal_vertex_id(renumbered_src, "src_id") + check_dst = numbering_series_2.from_internal_vertex_id(renumbered_dst, "dst_id") + assert check_src["0_y"].equals(check_src["0_x"]) + assert check_dst["0_y"].equals(check_dst["0_x"]) + @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): From d3e846f9a2a7a2fac8ad0717b6b7768c7b6c2b07 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 5 Jan 2021 10:42:12 -0600 Subject: [PATCH 070/343] Update test_renumber.py --- python/cugraph/tests/test_renumber.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 25911ba842c..6f88d5f85c4 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -164,9 +164,9 @@ def test_renumber_negative_col(): # Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.parametrize("graph_file", utils.DATASETS) -def test_renumber_files(graph_file): +def test_renumber_series(graph_file): gc.collect() - + M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) @@ -179,10 +179,10 @@ def test_renumber_files(graph_file): df["dst"] = cudf.Series([x + translate for x in destinations. values_host]) - numbering_series_1 = cugraph.structure.NumberMap() + numbering_series_1 = NumberMap() numbering_series_1.from_series(df["src"]) - numbering_series_2 = cugraph.structure.NumberMap() + numbering_series_2 = NumberMap() numbering_series_2.from_series(df["dst"]) renumbered_src = numbering_series_1.add_internal_vertex_id( @@ -190,12 +190,14 @@ def test_renumber_files(graph_file): renumbered_dst = numbering_series_2.add_internal_vertex_id( df["dst"], "dst_id") - check_src = numbering_series_1.from_internal_vertex_id(renumbered_src, "src_id") - check_dst = numbering_series_2.from_internal_vertex_id(renumbered_dst, "dst_id") + check_src = numbering_series_1.from_internal_vertex_id(renumbered_src, + "src_id") + check_dst = numbering_series_2.from_internal_vertex_id(renumbered_dst, + "dst_id") assert check_src["0_y"].equals(check_src["0_x"]) assert check_dst["0_y"].equals(check_dst["0_x"]) - + @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): From 44ffc541e9e606e2011c0af5b4f83a2280892c03 Mon Sep 17 00:00:00 2001 From: dillon-cullinan Date: Tue, 5 Jan 2021 10:09:35 -0800 Subject: [PATCH 071/343] FIX Setup trap after PATH is updated --- ci/benchmark/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 5f74dca4044..921e96dbbb9 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -20,18 +20,18 @@ function cleanup { rm -f testoutput.txt } -# Set cleanup trap for Jenkins -if [ ! -z "$JENKINS_HOME" ] ; then - gpuci_logger "Jenkins environment detected, setting cleanup trap" - trap cleanup EXIT -fi - # Set path, build parallel level, and CUDA version cd $WORKSPACE export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} +# Set cleanup trap for Jenkins +if [ ! -z "$JENKINS_HOME" ] ; then + gpuci_logger "Jenkins environment detected, setting cleanup trap" + trap cleanup EXIT +fi + # Set home export HOME=$WORKSPACE From e2557d079392029de9d1266ed14856cfc5b1f5e8 Mon Sep 17 00:00:00 2001 From: dillon-cullinan Date: Tue, 5 Jan 2021 10:13:49 -0800 Subject: [PATCH 072/343] DOC Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42286c54df4..b22d92c902f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ## Improvements ## Bug Fixes +- PR #1321 Fix benchmark script trap setup to come after the PATH variable update # cuGraph 0.17.0 (10 Dec 2020) ## New Features From ea2530505cfdfd4cd31249956a3620e120b8f6a1 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 5 Jan 2021 15:35:28 -0600 Subject: [PATCH 073/343] review changes --- CHANGELOG.md | 1 - python/cugraph/dask/common/input_utils.py | 22 ------------ python/cugraph/structure/graph.py | 1 - python/cugraph/tests/dask/test_mg_renumber.py | 3 -- python/cugraph/tests/dask/test_mg_utility.py | 34 ------------------- 5 files changed, 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 639122f6fe3..01c32b8f8d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ ## Improvements ## Bug Fixes -- PR #1319 Fix graph nodes function and renumbering from series # cuGraph 0.17.0 (Date TBD) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 0140c9f06f9..1461b13f016 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -198,28 +198,6 @@ def get_obj(x): return x[0] if multiple else x return total, reduce(lambda a, b: a + b, total) -def _get_local_data(df, by): - df = df[0] - num_local_edges = len(df) - local_by_max = df[by].iloc[-1] - local_max = df[['src', 'dst']].max().max() - return num_local_edges, local_by_max, local_max - - -def get_local_data(input_graph, by, load_balance=True): - input_graph.compute_renumber_edge_list(transposed=(by == 'dst')) - _ddf = input_graph.edgelist.edgelist_df - ddf = _ddf.sort_values(by=by, ignore_index=True) - - if load_balance: - ddf = load_balance_func(ddf, by=by) - - comms = Comms.get_comms() - data = DistributedDataHandler.create(data=ddf) - data.calculate_local_data(comms, by) - return data - - def get_mg_batch_data(dask_cudf_data): data = DistributedDataHandler.create(data=dask_cudf_data) return data diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 7afd9b76b48..acc0ad8f066 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -14,7 +14,6 @@ from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap -from cugraph.dask.common.input_utils import get_local_data import cugraph.dask.common.mg_utils as mg_utils import cudf import dask_cudf diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index 8456241ff26..7963ab66be2 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -195,9 +195,6 @@ def test_dask_pagerank(client_connection): dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, "src", "dst") - # Pre compute local data - # dg.compute_local_data(by='dst') - expected_pr = cugraph.pagerank(g) result_pr = dcg.pagerank(dg).compute() diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 808f1bcfa70..3079c2e0e98 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -74,40 +74,6 @@ def test_from_edgelist(client_connection): assert dg1.EdgeList == dg2.EdgeList -@pytest.mark.skipif( - is_single_gpu(), reason="skipping MG testing on Single GPU system" -) -def test_compute_local_data(client_connection): - - input_data_path = r"../datasets/karate.csv" - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist( - ddf, source="src", destination="dst", edge_attr="value" - ) - - # Compute_local_data - dg.compute_local_data(by="dst") - data = dg.local_data["data"] - by = dg.local_data["by"] - - assert by == "dst" - assert Comms.is_initialized() - - global_num_edges = data.local_data["edges"].sum() - assert global_num_edges == dg.number_of_edges() - global_num_verts = data.local_data["verts"].sum() - assert global_num_verts == dg.number_of_nodes() - - @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) From 499d9564645ec86c5c8030bb3e38a0a3ef57e1b1 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 5 Jan 2021 19:22:26 -0500 Subject: [PATCH 074/343] Fix MNMG Louvain tests on Pascal architecture(#1322) MNMG Louvain uses a feature not supported on Pascal. This PR updates the python unit tests so that it expects an exception to be raised if running on a Pascal GPU. Authors: - Charles Hastings - Chuck Hastings Approvers: - Brad Rees - Alex Fender - Rick Ratzel URL: https://github.com/rapidsai/cugraph/pull/1322 --- python/cugraph/dask/common/mg_utils.py | 8 +++++-- python/cugraph/tests/dask/test_mg_louvain.py | 19 ++++++++++------ python/cugraph/utilities/utils.py | 24 ++++++++++++++++++++ python/setup.py | 4 ++-- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/python/cugraph/dask/common/mg_utils.py b/python/cugraph/dask/common/mg_utils.py index 9604c26c1f7..1651a9e800c 100644 --- a/python/cugraph/dask/common/mg_utils.py +++ b/python/cugraph/dask/common/mg_utils.py @@ -19,7 +19,11 @@ from dask.distributed import Client from cugraph.raft.dask.common.utils import default_client -import cugraph.comms as Comms +# FIXME: cugraph/__init__.py also imports the comms module, but +# depending on the import environment, cugraph/comms/__init__.py +# may be imported instead. The following imports the comms.py +# module directly +from cugraph.comms import comms as Comms # FIXME: We currently look for the default client from dask, as such is the @@ -69,7 +73,7 @@ def setup_local_dask_cluster(p2p=True): cluster = LocalCUDACluster() client = Client(cluster) client.wait_for_workers(len(get_visible_devices())) - Comms.initialize(p2p) + Comms.initialize(p2p=p2p) return (cluster, client) diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index 61b2f41f474..a07eede8cb9 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -17,6 +17,7 @@ import cugraph import dask_cudf from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from cugraph.dask.common.mg_utils import (is_single_gpu, setup_local_dask_cluster, teardown_local_dask_cluster) @@ -84,11 +85,15 @@ def test_mg_louvain_with_edgevals(daskGraphFromDataset): # FIXME: daskGraphFromDataset returns a DiGraph, which Louvain is currently # accepting. In the future, an MNMG symmeterize will need to be called to # create a Graph for Louvain. - parts, mod = dcg.louvain(daskGraphFromDataset) + if is_device_version_less_than((7, 0)): + with pytest.raises(RuntimeError): + parts, mod = dcg.louvain(daskGraphFromDataset) + else: + parts, mod = dcg.louvain(daskGraphFromDataset) - # FIXME: either call Nx with the same dataset and compare results, or - # hadcode golden results to compare to. - print() - print(parts.compute()) - print(mod) - print() + # FIXME: either call Nx with the same dataset and compare results, or + # hardcode golden results to compare to. + print() + print(parts.compute()) + print(mod) + print() diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index f1a320cd1ef..b77f6789abe 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -14,6 +14,11 @@ from numba import cuda import cudf +from rmm._cuda.gpu import ( + getDeviceAttribute, + cudaDeviceAttr, +) + # optional dependencies try: @@ -182,6 +187,25 @@ def is_cuda_version_less_than(min_version=(10, 2)): return False +def is_device_version_less_than(min_version=(7, 0)): + """ + Returns True if the version of CUDA being used is less than min_version + """ + major_version = getDeviceAttribute( + cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0 + ) + minor_version = getDeviceAttribute( + cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0 + ) + if major_version > min_version[0]: + return False + if major_version < min_version[0]: + return True + if minor_version < min_version[1]: + return True + return False + + # FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if # set. An additional optional parameter for the weight attr name when accepting # Nx graphs may be needed. From the Nx docs: diff --git a/python/setup.py b/python/setup.py index d99ff12cfa1..59292f32032 100644 --- a/python/setup.py +++ b/python/setup.py @@ -105,11 +105,11 @@ def run(self): "../thirdparty/cub", raft_include_dir, os.path.join( - conda_include_dir, "libcudf", "libcudacxx"), + conda_include_dir, "libcudacxx"), cuda_include_dir], library_dirs=[get_python_lib()], runtime_library_dirs=[conda_lib_dir], - libraries=['cugraph', 'cudf', 'nccl'], + libraries=['cugraph', 'nccl'], language='c++', extra_compile_args=['-std=c++14']) ] From 896db87e227c577831f9745d90951f79e51af9b5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 6 Jan 2021 10:52:25 -0500 Subject: [PATCH 075/343] Update SG PageRank C++ tests(#1307) - Add const to input pointers. - Use a double type counter in std::accumulate as std::accumulate is inaccurate in adding a large number of a comparably sized values. - Fix random number generator seed. - Re-enable a temporarily disabled test case. - Relax the thresholds to skip comparison for lowly ranked vertices (with low scores which are more susceptible to the limited floating-point resolution) Authors: - Seunghwa Kang Approvers: - Andrei Schaffer (@aschaffer) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1307 --- cpp/tests/experimental/bfs_test.cpp | 4 +- .../experimental/katz_centrality_test.cpp | 11 +++-- cpp/tests/experimental/pagerank_test.cpp | 48 +++++++++++-------- cpp/tests/experimental/sssp_test.cpp | 6 +-- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 2498ca4f3f5..82286b1e2fa 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -33,8 +33,8 @@ #include template -void bfs_reference(edge_t* offsets, - vertex_t* indices, +void bfs_reference(edge_t const* offsets, + vertex_t const* indices, vertex_t* distances, vertex_t* predecessors, vertex_t num_vertices, diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index c2ac4340319..cdbe3688248 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -35,10 +35,10 @@ #include template -void katz_centrality_reference(edge_t* offsets, - vertex_t* indices, - weight_t* weights, - result_t* betas, +void katz_centrality_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + result_t const* betas, result_t* katz_centralities, vertex_t num_vertices, result_t alpha, @@ -195,7 +195,8 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(graph_view.get_number_of_vertices())) * threshold_ratio; + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { auto diff = std::abs(lhs - rhs); return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 4763249aa9e..70c83ef8192 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -36,11 +36,11 @@ #include template -void pagerank_reference(edge_t* offsets, - vertex_t* indices, - weight_t* weights, - vertex_t* personalization_vertices, - result_t* personalization_values, +void pagerank_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + vertex_t const* personalization_vertices, + result_t const* personalization_values, result_t* pageranks, vertex_t num_vertices, vertex_t personalization_vector_size, @@ -52,7 +52,11 @@ void pagerank_reference(edge_t* offsets, if (num_vertices == 0) { return; } if (has_initial_guess) { - auto sum = std::accumulate(pageranks, pageranks + num_vertices, result_t{0.0}); + // use a double type counter (instead of result_t) to accumulate as std::accumulate is + // inaccurate in adding a large number of comparably sized numbers. In C++17 or later, + // std::reduce may be a better option. + auto sum = + static_cast(std::accumulate(pageranks, pageranks + num_vertices, double{0.0})); ASSERT_TRUE(sum > 0.0); std::for_each(pageranks, pageranks + num_vertices, [sum](auto& val) { val /= sum; }); } else { @@ -61,13 +65,14 @@ void pagerank_reference(edge_t* offsets, }); } + result_t personalization_sum{0.0}; if (personalization_vertices != nullptr) { - auto sum = std::accumulate( - personalization_values, personalization_values + personalization_vector_size, result_t{0.0}); - ASSERT_TRUE(sum > 0.0); - std::for_each(personalization_values, - personalization_values + personalization_vector_size, - [sum](auto& val) { val /= sum; }); + // use a double type counter (instead of result_t) to accumulate as std::accumulate is + // inaccurate in adding a large number of comparably sized numbers. In C++17 or later, + // std::reduce may be a better option. + personalization_sum = static_cast(std::accumulate( + personalization_values, personalization_values + personalization_vector_size, double{0.0})); + ASSERT_TRUE(personalization_sum > 0.0); } std::vector out_weight_sums(num_vertices, result_t{0.0}); @@ -102,7 +107,8 @@ void pagerank_reference(edge_t* offsets, if (personalization_vertices != nullptr) { for (vertex_t i = 0; i < personalization_vector_size; ++i) { auto v = personalization_vertices[i]; - pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * personalization_values[i]; + pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * + (personalization_values[i] / personalization_sum); } } result_t diff_sum{0.0}; @@ -177,8 +183,7 @@ class Tests_PageRank : public ::testing::TestWithParam { std::vector h_personalization_vertices{}; std::vector h_personalization_values{}; if (configuration.personalization_ratio > 0.0) { - std::random_device r{}; - std::default_random_engine generator{r()}; + std::default_random_engine generator{}; std::uniform_real_distribution distribution{0.0, 1.0}; h_personalization_vertices.resize(graph_view.get_number_of_local_vertices()); std::iota(h_personalization_vertices.begin(), @@ -195,8 +200,11 @@ class Tests_PageRank : public ::testing::TestWithParam { std::for_each(h_personalization_values.begin(), h_personalization_values.end(), [&distribution, &generator](auto& val) { val = distribution(generator); }); - auto sum = std::accumulate( - h_personalization_values.begin(), h_personalization_values.end(), result_t{0.0}); + // use a double type counter (instead of result_t) to accumulate as std::accumulate is + // inaccurate in adding a large number of comparably sized numbers. In C++17 or later, + // std::reduce may be a better option. + auto sum = static_cast(std::accumulate( + h_personalization_values.begin(), h_personalization_values.end(), double{0.0})); std::for_each(h_personalization_values.begin(), h_personalization_values.end(), [sum](auto& val) { val /= sum; }); @@ -263,7 +271,8 @@ class Tests_PageRank : public ::testing::TestWithParam { auto threshold_ratio = 1e-3; auto threshold_magnitude = - (epsilon / static_cast(graph_view.get_number_of_vertices())) * threshold_ratio; + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { auto diff = std::abs(lhs - rhs); return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); @@ -299,8 +308,7 @@ INSTANTIATE_TEST_CASE_P( PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - // FIXME: Re-enable test after failures are addressed - // PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 49eaca56f56..2f7cc499d35 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -36,9 +36,9 @@ // Dijkstra's algorithm template -void sssp_reference(edge_t* offsets, - vertex_t* indices, - weight_t* weights, +void sssp_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, weight_t* distances, vertex_t* predecessors, vertex_t num_vertices, From 4d8036e0ffd4b86c5035292495b9761537247cee Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 6 Jan 2021 11:17:25 -0500 Subject: [PATCH 076/343] debug coarsen_graph test --- cpp/tests/experimental/coarsen_graph_test.cpp | 191 ++++++++++-------- 1 file changed, 109 insertions(+), 82 deletions(-) diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 64f70d5c30b..77c32113066 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -78,104 +78,138 @@ void check_coarsened_graph_results(edge_t* org_offsets, }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); - std::vector unique_labels(num_org_vertices); - std::copy(org_labels, org_labels + num_org_vertices, unique_labels.begin()); - std::sort(unique_labels.begin(), unique_labels.end()); - auto last = std::unique(unique_labels.begin(), unique_labels.end()); - unique_labels.resize(std::distance(unique_labels.begin(), last)); - ASSERT_TRUE(unique_labels.size() == static_cast(num_coarse_vertices)); + std::vector org_unique_vertices(num_org_vertices); + std::iota(org_unique_vertices.begin(), org_unique_vertices.end(), vertex_t{0}); + org_unique_vertices.erase( + std::remove_if(org_unique_vertices.begin(), + org_unique_vertices.end(), + [org_offsets](auto v) { return org_offsets[v + 1] == org_offsets[v]; }), + org_unique_vertices.end()); + org_unique_vertices.insert( + org_unique_vertices.end(), org_indices, org_indices + org_offsets[num_org_vertices]); + std::sort(org_unique_vertices.begin(), org_unique_vertices.end()); + org_unique_vertices.resize( + std::distance(org_unique_vertices.begin(), + std::unique(org_unique_vertices.begin(), org_unique_vertices.end()))); + + std::vector org_unique_labels(org_unique_vertices.size()); + std::transform(org_unique_vertices.begin(), + org_unique_vertices.end(), + org_unique_labels.begin(), + [org_labels](auto v) { return org_labels[v]; }); + std::sort(org_unique_labels.begin(), org_unique_labels.end()); + org_unique_labels.resize(std::distance( + org_unique_labels.begin(), std::unique(org_unique_labels.begin(), org_unique_labels.end()))); + + ASSERT_TRUE(org_unique_labels.size() == static_cast(num_coarse_vertices)); { std::vector tmp_coarse_vertex_labels(coarse_vertex_labels, coarse_vertex_labels + num_coarse_vertices); std::sort(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()); - auto last = std::unique(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()); - ASSERT_TRUE(last == tmp_coarse_vertex_labels.end()); - ASSERT_TRUE( - std::equal(unique_labels.begin(), unique_labels.end(), tmp_coarse_vertex_labels.begin())); + ASSERT_TRUE(std::unique(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()) == + tmp_coarse_vertex_labels.end()); + ASSERT_TRUE(std::equal( + org_unique_labels.begin(), org_unique_labels.end(), tmp_coarse_vertex_labels.begin())); } - std::vector> label_org_vertex_pairs(num_org_vertices); - for (vertex_t i = 0; i < num_org_vertices; ++i) { - label_org_vertex_pairs[i] = std::make_tuple(org_labels[i], i); + std::vector> label_org_vertex_pairs(org_unique_vertices.size()); + for (size_t i = 0; i < org_unique_vertices.size(); ++i) { + auto v = org_unique_vertices[i]; + label_org_vertex_pairs[i] = std::make_tuple(org_labels[v], v); } std::sort(label_org_vertex_pairs.begin(), label_org_vertex_pairs.end()); - std::vector unique_label_counts(unique_labels.size()); - std::vector unique_label_offsets(unique_label_counts.size() + 1, 0); - std::transform( - unique_labels.begin(), - unique_labels.end(), - unique_label_counts.begin(), - [&label_org_vertex_pairs](auto label) { - auto lb = std::lower_bound( - label_org_vertex_pairs.begin(), - label_org_vertex_pairs.end(), - std::make_tuple(label, cugraph::experimental::invalid_vertex_id::value), - [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); - auto ub = std::upper_bound( - label_org_vertex_pairs.begin(), - label_org_vertex_pairs.end(), - std::make_tuple(label, cugraph::experimental::invalid_vertex_id::value), - [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); - return static_cast(std::distance(lb, ub)); - }); - std::partial_sum( - unique_label_counts.begin(), unique_label_counts.end(), unique_label_offsets.begin() + 1); - std::map label_to_coarse_vertex_map{}; for (vertex_t i = 0; i < num_coarse_vertices; ++i) { label_to_coarse_vertex_map[coarse_vertex_labels[i]] = i; } - for (size_t i = 0; i < unique_labels.size(); ++i) { - auto count = unique_label_counts[i]; - auto offset = unique_label_offsets[i]; + auto threshold_ratio = (org_weights == nullptr) ? weight_t{1.0} /* irrelevant */ : weight_t{1e-4}; + auto threshold_magnitude = + (org_weights == nullptr) + ? weight_t{1.0} /* irrelevant */ + : (std::accumulate( + coarse_weights, coarse_weights + coarse_offsets[num_coarse_vertices], weight_t{0.0}) / + static_cast(coarse_offsets[num_coarse_vertices])) * + threshold_ratio; + + for (size_t i = 0; i < org_unique_labels.size(); ++i) { // for each vertex in the coarse graph + auto lb = std::lower_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(org_unique_labels[i], + cugraph::experimental::invalid_vertex_id::value /* dummy */), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + auto ub = std::upper_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(org_unique_labels[i], + cugraph::experimental::invalid_vertex_id::value /* dummy */), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + auto count = std::distance(lb, ub); + auto offset = std::distance(label_org_vertex_pairs.begin(), lb); if (org_weights == nullptr) { std::vector coarse_nbrs0{}; - for (vertex_t j = offset; j < offset + count; ++j) { - auto org_vertex = std::get<1>(label_org_vertex_pairs[j]); - for (auto k = org_offsets[org_vertex]; k < org_offsets[org_vertex + 1]; ++k) { - auto org_nbr = org_indices[k]; - auto coarse_nbr = label_to_coarse_vertex_map[org_labels[org_nbr]]; - coarse_nbrs0.push_back(coarse_nbr); - } - } + std::for_each( + lb, + ub, + [org_offsets, org_indices, org_labels, &label_to_coarse_vertex_map, &coarse_nbrs0](auto t) { + auto org_vertex = std::get<1>(t); + std::vector tmp_nbrs(org_offsets[org_vertex + 1] - org_offsets[org_vertex]); + std::transform(org_indices + org_offsets[org_vertex], + org_indices + org_offsets[org_vertex + 1], + tmp_nbrs.begin(), + [org_labels, &label_to_coarse_vertex_map](auto nbr) { + return label_to_coarse_vertex_map[org_labels[nbr]]; + }); + coarse_nbrs0.insert(coarse_nbrs0.end(), tmp_nbrs.begin(), tmp_nbrs.end()); + }); std::sort(coarse_nbrs0.begin(), coarse_nbrs0.end()); - auto last = std::unique(coarse_nbrs0.begin(), coarse_nbrs0.end()); - coarse_nbrs0.resize(std::distance(coarse_nbrs0.begin(), last)); + coarse_nbrs0.resize( + std::distance(coarse_nbrs0.begin(), std::unique(coarse_nbrs0.begin(), coarse_nbrs0.end()))); - auto coarse_vertex = label_to_coarse_vertex_map[unique_labels[i]]; + auto coarse_vertex = label_to_coarse_vertex_map[org_unique_labels[i]]; auto coarse_offset = coarse_offsets[coarse_vertex]; auto coarse_count = coarse_offsets[coarse_vertex + 1] - coarse_offset; std::vector coarse_nbrs1(coarse_indices + coarse_offset, coarse_indices + coarse_offset + coarse_count); std::sort(coarse_nbrs1.begin(), coarse_nbrs1.end()); + ASSERT_TRUE(coarse_nbrs0.size() == coarse_nbrs1.size()); ASSERT_TRUE(std::equal(coarse_nbrs0.begin(), coarse_nbrs0.end(), coarse_nbrs1.begin())); } else { std::vector> coarse_nbr_weight_pairs0{}; - for (vertex_t j = offset; j < offset + count; ++j) { - auto org_vertex = std::get<1>(label_org_vertex_pairs[j]); - for (auto k = org_offsets[org_vertex]; k < org_offsets[org_vertex + 1]; ++k) { - auto org_nbr = org_indices[k]; - auto org_weight = org_weights[k]; - auto coarse_nbr = label_to_coarse_vertex_map[org_labels[org_nbr]]; - coarse_nbr_weight_pairs0.push_back(std::make_tuple(coarse_nbr, org_weight)); - } - } + std::for_each(lb, + ub, + [org_offsets, + org_indices, + org_weights, + org_labels, + &label_to_coarse_vertex_map, + &coarse_nbr_weight_pairs0](auto t) { + auto org_vertex = std::get<1>(t); + std::vector> tmp_pairs( + org_offsets[org_vertex + 1] - org_offsets[org_vertex]); + for (auto j = org_offsets[org_vertex]; j < org_offsets[org_vertex + 1]; ++j) { + tmp_pairs[j - org_offsets[org_vertex]] = std::make_tuple( + label_to_coarse_vertex_map[org_labels[org_indices[j]]], org_weights[j]); + } + coarse_nbr_weight_pairs0.insert( + coarse_nbr_weight_pairs0.end(), tmp_pairs.begin(), tmp_pairs.end()); + }); std::sort(coarse_nbr_weight_pairs0.begin(), coarse_nbr_weight_pairs0.end()); // reduce by key { size_t run_start_idx = 0; - for (size_t i = 1; i < coarse_nbr_weight_pairs0.size(); ++i) { + for (size_t j = 1; j < coarse_nbr_weight_pairs0.size(); ++j) { auto& start = coarse_nbr_weight_pairs0[run_start_idx]; - auto& cur = coarse_nbr_weight_pairs0[i]; - if (std::get<0>(start) == std::get<1>(cur)) { + auto& cur = coarse_nbr_weight_pairs0[j]; + if (std::get<0>(start) == std::get<0>(cur)) { std::get<1>(start) += std::get<1>(cur); std::get<0>(cur) = cugraph::experimental::invalid_vertex_id::value; } else { - run_start_idx = i; + run_start_idx = j; } } coarse_nbr_weight_pairs0.erase( @@ -188,23 +222,16 @@ void check_coarsened_graph_results(edge_t* org_offsets, coarse_nbr_weight_pairs0.end()); } - auto coarse_vertex = label_to_coarse_vertex_map[unique_labels[i]]; - auto coarse_offset = coarse_offsets[coarse_vertex]; - auto coarse_count = coarse_offsets[coarse_vertex + 1] - coarse_offset; - std::vector> coarse_nbr_weight_pairs1(coarse_count); - for (auto j = coarse_offset; j < coarse_offset + coarse_count; ++j) { - coarse_nbr_weight_pairs1[i - coarse_offset] = + auto coarse_vertex = label_to_coarse_vertex_map[org_unique_labels[i]]; + std::vector> coarse_nbr_weight_pairs1( + coarse_offsets[coarse_vertex + 1] - coarse_offsets[coarse_vertex]); + for (auto j = coarse_offsets[coarse_vertex]; j < coarse_offsets[coarse_vertex + 1]; ++j) { + coarse_nbr_weight_pairs1[j - coarse_offsets[coarse_vertex]] = std::make_tuple(coarse_indices[j], coarse_weights[j]); } std::sort(coarse_nbr_weight_pairs1.begin(), coarse_nbr_weight_pairs1.end()); - auto threshold_ratio = weight_t{1e-4}; - auto threshold_magnitude = - ((std::accumulate( - coarse_weights, coarse_weights + coarse_offsets[num_coarse_vertices], weight_t{0.0}) / - static_cast(coarse_offsets[num_coarse_vertices])) * - threshold_ratio) * - threshold_ratio; + ASSERT_TRUE(coarse_nbr_weight_pairs0.size() == coarse_nbr_weight_pairs1.size()); ASSERT_TRUE(std::equal( coarse_nbr_weight_pairs0.begin(), coarse_nbr_weight_pairs0.end(), @@ -260,12 +287,14 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam handle, configuration.graph_file_full_path, configuration.test_weighted); auto graph_view = graph.view(); + if (graph_view.get_number_of_vertices() == 0) { + return; + } + std::vector h_labels(graph_view.get_number_of_vertices()); - auto num_labels = static_cast(h_labels.size() * configuration.coarsen_ratio); - ASSERT_TRUE(num_labels > 0); + auto num_labels = std::max(static_cast(h_labels.size() * configuration.coarsen_ratio), vertex_t{1}); - std::random_device r{}; - std::default_random_engine generator{r()}; + std::default_random_engine generator{}; std::uniform_int_distribution distribution{0, num_labels - 1}; std::for_each(h_labels.begin(), h_labels.end(), [&distribution, &generator](auto& label) { @@ -329,7 +358,7 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam handle.get_stream()); } - std::vector h_coarse_vertices_to_labels(h_coarse_vertices_to_labels.size()); + std::vector h_coarse_vertices_to_labels(coarse_vertices_to_labels.size()); raft::update_host(h_coarse_vertices_to_labels.data(), coarse_vertices_to_labels.data(), coarse_vertices_to_labels.size(), @@ -363,8 +392,6 @@ INSTANTIATE_TEST_CASE_P( CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, true), CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, false), CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, true), - CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, false), - CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, true), CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, false), CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, true))); From dfab2eeb6c5fecebae5bd86d5a4d7bed6d9c4147 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Wed, 6 Jan 2021 12:57:14 -0600 Subject: [PATCH 077/343] flake8 ' --- python/cugraph/dask/common/input_utils.py | 10 ++++++++-- python/cugraph/tests/dask/test_mg_utility.py | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 1461b13f016..7df0892181a 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -21,8 +21,7 @@ import cugraph.comms.comms as Comms from cugraph.raft.dask.common.utils import get_client -from cugraph.dask.common.part_utils import (_extract_partitions, - load_balance_func) +from cugraph.dask.common.part_utils import _extract_partitions from dask.distributed import default_client from toolz import first from functools import reduce @@ -174,6 +173,13 @@ def calculate_local_data(self, comms, by): self.max_vertex_id = max_vid +def _get_local_data(df, by): + df = df[0] + num_local_edges = len(df) + local_by_max = df[by].iloc[-1] + local_max = df[['src', 'dst']].max().max() + return num_local_edges, local_by_max, local_max + """ Internal methods, API subject to change """ diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 3079c2e0e98..7cd379bb885 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -16,7 +16,6 @@ import gc import cugraph import dask_cudf -import cugraph.comms as Comms from dask_cuda import LocalCUDACluster import pytest from cugraph.dask.common.part_utils import concat_within_workers From 0af875ad7d3795f460a4e3a195619e9ad8e5d358 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 6 Jan 2021 13:03:54 -0600 Subject: [PATCH 078/343] Update input_utils.py --- python/cugraph/dask/common/input_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 7df0892181a..bbc914da502 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -180,6 +180,7 @@ def _get_local_data(df, by): local_max = df[['src', 'dst']].max().max() return num_local_edges, local_by_max, local_max + """ Internal methods, API subject to change """ From 2551c6f82bdfe6e4688a1e080f8b08b50ed9b1b6 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 7 Jan 2021 11:39:28 -0500 Subject: [PATCH 079/343] add coarsen_grpah functions taking both vertices and edge list --- cpp/include/experimental/graph_functions.hpp | 78 ++++++++++++++++ cpp/src/experimental/coarsen_graph.cu | 97 +++++++++++++++++--- 2 files changed, 161 insertions(+), 14 deletions(-) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 04037a3b5d7..9fe999da319 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -28,6 +28,7 @@ namespace cugraph { namespace experimental { +// FIXME: add do_expensive_check /** * @brief renumber edgelist (multi-GPU) * @@ -65,6 +66,7 @@ renumber_edgelist(raft::handle_t const& handle, rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, bool is_hypergraph_partitioned); +// FIXME: add do_expensive_check /** * @brief renumber edgelist (single-GPU) * @@ -89,6 +91,81 @@ std::enable_if_t> renumber_edgelist( rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */); +// FIXME: add do_expensive_check +/** + * @brief renumber edgelist (multi-GPU) + * + * This version takes the vertex set in addition; this allows renumbering to include isolated + * vertices. This function assumes that vertices and edges are pre-shuffled to their target + * processes using the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, + * respectively. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Part of the entire set of vertices in the graph to be renumbered. Applying the + * compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function to + * work (vertices should be pre-shuffled). + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to + * every (source, destination) pair should return the local GPU ID for this function to work (edges + * should be pre-shuffled). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning + * (this flag will be removed in the future). Applying the compute_gpu_id_from_edge_t functor to + * every (source, destination) pair should return the local GPU ID for this function to work (edges + * should be pre-shuffled). + * @return std::tuple, partition_t, vertex_t, edge_t> + * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to + * this process in multi-GPU), partition_t object storing graph partitioning information, total + * number of vertices, and total number of edges. + */ +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + rmm::device_uvector const& vertices, + rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned); + +// FIXME: add do_expensive_check +/** + * @brief renumber edgelist (single-GPU) + * + * This version takes the vertex set in addition; this allows renumbering to include isolated + * vertices. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices The entire set of vertices in the graph to be renumbered. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set + * of vertices. + */ +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + rmm::device_uvector const& vertices, + rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */); + +// FIXME: add do_expensive_check /** * @brief Compute the coarsened graph. * @@ -123,6 +200,7 @@ coarsen_graph( graph_view_t const& graph_view, vertex_t const* labels); +// FIXME: add do_expensive_check /** * @brief Relabel old labels to new labels. * diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index b5296e2034a..737a29b0cc8 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -363,6 +363,47 @@ coarsen_graph( coarsened_edgelist_weights = std::move(rx_edgelist_weights); } + // 3. find unique labels for this GPU + + rmm::device_uvector unique_labels(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + unique_labels.size(), + unique_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + + rmm::device_uvector rx_unique_labels(0, handle.get_stream()); + std::tie(rx_unique_labels, std::ignore) = sort_and_shuffle_values( + handle.get_comms(), + unique_labels.begin(), + unique_labels.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm.get_size()}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + + unique_labels = std::move(rx_unique_labels); + + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + + // 4. renumber + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); partition_t partition( std::vector{}, graph_view.is_hypergraph_partitioned(), 0, 0, 0, 0); @@ -370,21 +411,22 @@ coarsen_graph( edge_t number_of_edges{}; std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = renumber_edgelist(handle, + unique_labels, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices, graph_view.is_hypergraph_partitioned()); - // 4. build a graph + // 5. build a graph std::vector> edgelists{}; if (graph_view.is_hypergraph_partitioned()) { CUGRAPH_FAIL("unimplemented."); } else { edgelists.resize(1); - edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); + edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); } @@ -434,14 +476,30 @@ coarsen_graph( graph_view.get_number_of_vertices(), handle.get_stream()); + rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + unique_labels.size(), + unique_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + auto renumber_map_labels = renumber_edgelist( - handle, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); + handle, unique_labels, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); edgelist_t edgelist{}; - edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); edgelist.p_edge_weights = coarsened_edgelist_weights.data(); edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); @@ -474,12 +532,23 @@ coarsen_graph( // explicit instantiation +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + template std::tuple>, rmm::device_uvector> -coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels); +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels); template std::tuple>, rmm::device_uvector> From cfb446c0f4cb3999bd83ebcf7efac3ada8bbc1b1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 7 Jan 2021 11:41:06 -0500 Subject: [PATCH 080/343] update renumber_edgelist to optionally take vertex list --- cpp/src/experimental/renumber_edgelist.cu | 132 +++++++++++++++++++++- 1 file changed, 126 insertions(+), 6 deletions(-) diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 27061429f4a..dc54538eaad 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,8 @@ namespace detail { template rmm::device_uvector compute_renumber_map( raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t num_local_vertices /* relevant only if vertices != nullptr */, rmm::device_uvector const &edgelist_major_vertices, rmm::device_uvector const &edgelist_minor_vertices) { @@ -87,10 +90,11 @@ rmm::device_uvector compute_renumber_map( minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); minor_labels.shrink_to_fit(handle.get_stream()); - // 3. merge major and minor labels + // 3. merge major and minor labels and vertex labels rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), handle.get_stream()); + rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_labels.begin(), @@ -101,12 +105,14 @@ rmm::device_uvector compute_renumber_map( thrust::make_constant_iterator(edge_t{0}), merged_labels.begin(), merged_counts.begin()); + major_labels.resize(0, handle.get_stream()); major_counts.resize(0, handle.get_stream()); minor_labels.resize(0, handle.get_stream()); major_labels.shrink_to_fit(handle.get_stream()); major_counts.shrink_to_fit(handle.get_stream()); minor_labels.shrink_to_fit(handle.get_stream()); + rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); rmm::device_uvector counts(labels.size(), handle.get_stream()); auto pair_it = @@ -165,7 +171,41 @@ rmm::device_uvector compute_renumber_map( labels.shrink_to_fit(handle.get_stream()); } - // 5. sort by degree + // 5. if vertices != nullptr, add isolated vertices + + rmm::device_uvector isolated_vertices(0, handle.get_stream()); + if (vertices != nullptr) { + auto num_isolated_vertices = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_local_vertices, + [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { + return !thrust::binary_search(thrust::seq, label_first, label_last, v); + }); + isolated_vertices.resize(num_isolated_vertices, handle.get_stream()); + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_local_vertices, + isolated_vertices.begin(), + [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { + return !thrust::binary_search(thrust::seq, label_first, label_last, v); + }); + } + + if (isolated_vertices.size() > 0) { + labels.resize(labels.size() + isolated_vertices.size(), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + isolated_vertices.begin(), + isolated_vertices.end(), + labels.end() - isolated_vertices.size()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.end() - isolated_vertices.size(), + counts.end(), + edge_t{0}); + } + + // 6. sort by degree thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), counts.begin(), @@ -180,12 +220,12 @@ rmm::device_uvector compute_renumber_map( return std::move(labels); } -} // namespace detail - template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t num_local_vertices /* relevant only if vertices != nullptr */, rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, bool is_hypergraph_partitioned) @@ -203,7 +243,7 @@ renumber_edgelist(raft::handle_t const &handle, // 1. compute renumber map auto renumber_map_labels = detail::compute_renumber_map( - handle, edgelist_major_vertices, edgelist_minor_vertices); + handle, vertices, num_local_vertices, edgelist_major_vertices, edgelist_minor_vertices); // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened // graph @@ -348,11 +388,13 @@ renumber_edgelist(raft::handle_t const &handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t num_vertices /* relevant only if vertices != nullptr */, rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) { auto renumber_map_labels = detail::compute_renumber_map( - handle, edgelist_major_vertices, edgelist_minor_vertices); + handle, vertices, num_vertices, edgelist_major_vertices, edgelist_minor_vertices); double constexpr load_factor = 0.7; @@ -380,6 +422,70 @@ std::enable_if_t> renumber_edgelist( return std::move(renumber_map_labels); } +} // namespace detail + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned) +{ + return detail::renumber_edgelist(handle, + static_cast(nullptr), + vertex_t{0}, + edgelist_major_vertices, + edgelist_minor_vertices, + is_hypergraph_partitioned); +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) +{ + return detail::renumber_edgelist(handle, + static_cast(nullptr), + vertex_t{0} /* dummy */, + edgelist_major_vertices, + edgelist_minor_vertices); +} + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const &handle, + rmm::device_uvector const &vertices, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned) +{ + return detail::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + edgelist_major_vertices, + edgelist_minor_vertices, + is_hypergraph_partitioned); +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector const &vertices, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) +{ + return detail::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + edgelist_major_vertices, + edgelist_minor_vertices); +} + // explicit instantiation template std::tuple, partition_t, int32_t, int32_t> @@ -389,8 +495,22 @@ renumber_edgelist( rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, bool is_hypergraph_partitioned); +template std::tuple, partition_t, int32_t, int32_t> +renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector const &vertices, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + bool is_hypergraph_partitioned); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const &handle, + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */); + template rmm::device_uvector renumber_edgelist( raft::handle_t const &handle, + rmm::device_uvector const &vertices, rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */); From b8d73587d4e2dbbfee3efefd7edb2791f9a710aa Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 7 Jan 2021 11:41:31 -0500 Subject: [PATCH 081/343] add coarsen_graph test (C++ SG) --- cpp/tests/experimental/coarsen_graph_test.cpp | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 77c32113066..b3b730b776e 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -78,23 +78,10 @@ void check_coarsened_graph_results(edge_t* org_offsets, }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); - std::vector org_unique_vertices(num_org_vertices); - std::iota(org_unique_vertices.begin(), org_unique_vertices.end(), vertex_t{0}); - org_unique_vertices.erase( - std::remove_if(org_unique_vertices.begin(), - org_unique_vertices.end(), - [org_offsets](auto v) { return org_offsets[v + 1] == org_offsets[v]; }), - org_unique_vertices.end()); - org_unique_vertices.insert( - org_unique_vertices.end(), org_indices, org_indices + org_offsets[num_org_vertices]); - std::sort(org_unique_vertices.begin(), org_unique_vertices.end()); - org_unique_vertices.resize( - std::distance(org_unique_vertices.begin(), - std::unique(org_unique_vertices.begin(), org_unique_vertices.end()))); - - std::vector org_unique_labels(org_unique_vertices.size()); - std::transform(org_unique_vertices.begin(), - org_unique_vertices.end(), + std::vector org_unique_labels(num_org_vertices); + std::iota(org_unique_labels.begin(), org_unique_labels.end(), vertex_t{0}); + std::transform(org_unique_labels.begin(), + org_unique_labels.end(), org_unique_labels.begin(), [org_labels](auto v) { return org_labels[v]; }); std::sort(org_unique_labels.begin(), org_unique_labels.end()); @@ -113,10 +100,9 @@ void check_coarsened_graph_results(edge_t* org_offsets, org_unique_labels.begin(), org_unique_labels.end(), tmp_coarse_vertex_labels.begin())); } - std::vector> label_org_vertex_pairs(org_unique_vertices.size()); - for (size_t i = 0; i < org_unique_vertices.size(); ++i) { - auto v = org_unique_vertices[i]; - label_org_vertex_pairs[i] = std::make_tuple(org_labels[v], v); + std::vector> label_org_vertex_pairs(num_org_vertices); + for (vertex_t i = 0; i < num_org_vertices; ++i) { + label_org_vertex_pairs[i] = std::make_tuple(org_labels[i], i); } std::sort(label_org_vertex_pairs.begin(), label_org_vertex_pairs.end()); @@ -287,12 +273,11 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam handle, configuration.graph_file_full_path, configuration.test_weighted); auto graph_view = graph.view(); - if (graph_view.get_number_of_vertices() == 0) { - return; - } + if (graph_view.get_number_of_vertices() == 0) { return; } std::vector h_labels(graph_view.get_number_of_vertices()); - auto num_labels = std::max(static_cast(h_labels.size() * configuration.coarsen_ratio), vertex_t{1}); + auto num_labels = + std::max(static_cast(h_labels.size() * configuration.coarsen_ratio), vertex_t{1}); std::default_random_engine generator{}; std::uniform_int_distribution distribution{0, num_labels - 1}; @@ -380,7 +365,13 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam }; // FIXME: add tests for type combinations -TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloat) + +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloatUntransposed) { run_current_test(GetParam()); } @@ -392,6 +383,8 @@ INSTANTIATE_TEST_CASE_P( CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, true), CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, false), CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, true), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, true), CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, false), CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, true))); From 038d856d7dda709b6642512ac9735df301dd7e5d Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Thu, 7 Jan 2021 12:00:45 -0500 Subject: [PATCH 082/343] working dir --- test_betweenness_centrality_refac.py | 546 +++++++++++++++++++++++++++ utils.py | 476 +++++++++++++++++++++++ 2 files changed, 1022 insertions(+) create mode 100755 test_betweenness_centrality_refac.py create mode 100755 utils.py diff --git a/test_betweenness_centrality_refac.py b/test_betweenness_centrality_refac.py new file mode 100755 index 00000000000..5d5aae36e86 --- /dev/null +++ b/test_betweenness_centrality_refac.py @@ -0,0 +1,546 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.tests import utils +import random +import numpy as np +import cudf +import cupy + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +# ============================================================================= +# Parameters +# ============================================================================= +DIRECTED_GRAPH_OPTIONS = [False, True] +WEIGHTED_GRAPH_OPTIONS = [False, True] +ENDPOINTS_OPTIONS = [False, True] +NORMALIZED_OPTIONS = [False, True] +DEFAULT_EPSILON = 0.0001 + +SUBSET_SIZE_OPTIONS = [4, None] +SUBSET_SEED_OPTIONS = [42] + +# NOTE: The following is not really being exploited in the tests as the +# datasets that are used are too small to compare, but it ensures that both +# path are actually sane +RESULT_DTYPE_OPTIONS = [np.float32, np.float64] + + +# ============================================================================= +# Comparison functions +# ============================================================================= +def calc_betweenness_centrality( + Fixture_params_dts_sml, + directed=True, #no need of this + k=None, + normalized=False, + weight=None, + endpoints=False, + seed=None, + result_dtype=np.float64, + use_k_full=False, + multi_gpu_batch=False, + edgevals=False, #no need of this +): + """ Generate both cugraph and networkx betweenness centrality + + Parameters + ---------- + graph_file : string + Path to COO Graph representation in .csv format + + directed : bool, optional, default=True + + k : int or None, optional, default=None + int: Number of sources to sample from + None: All sources are used to compute + + normalized : bool + True: Normalize Betweenness Centrality scores + False: Scores are left unnormalized + + weight : cudf.DataFrame: + Not supported as of 06/2020 + + endpoints : bool + True: Endpoints are included when computing scores + False: Endpoints are not considered + + seed : int or None, optional, default=None + Seed for random sampling of the starting point + + result_dtype : numpy.dtype + Expected type of the result, either np.float32 or np.float64 + + use_k_full : bool + When True, if k is None replaces k by the number of sources of the + Graph + + multi_gpu_batch : bool + When True, enable mg batch after constructing the graph + + edgevals: bool + When True, enable tests with weighted graph, should be ignored + during computation. + + Returns + ------- + + sorted_df : cudf.DataFrame + Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' + and 'ref_bc' are the two betweenness centrality scores to compare. + The dataframe is expected to be sorted based on 'vertex', so that we + can use cupy.isclose to compare the scores. + """ + G = None + Gnx = None + + G, Gnx = Fixture_params_dts_sml + + + + assert G is not None and Gnx is not None + if multi_gpu_batch: + G.enable_batch() + + calc_func = None + if k is not None and seed is not None: + calc_func = _calc_bc_subset + elif k is not None: + calc_func = _calc_bc_subset_fixed + else: # We processed to a comparison using every sources + if use_k_full: + k = Gnx.number_of_nodes() + calc_func = _calc_bc_full + sorted_df = calc_func( + G, + Gnx, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + result_dtype=result_dtype, + ) + + return sorted_df + + +def _calc_bc_subset( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + # NOTE: Networkx API does not allow passing a list of vertices + # And the sampling is operated on Gnx.nodes() directly + # We first mimic acquisition of the nodes to compare with same sources + random.seed(seed) # It will be called again in nx's call + sources = random.sample(Gnx.nodes(), k) + df = cugraph.betweenness_centrality( + G, + k=sources, + normalized=normalized, + weight=weight, + endpoints=endpoints, + result_dtype=result_dtype, + ) + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + + nx_bc = nx.betweenness_centrality( + Gnx, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + ) + + _, nx_bc = zip(*sorted(nx_bc.items())) + nx_df = cudf.DataFrame({"ref_bc": nx_bc}) + + merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) + + return merged_sorted_df + + +def _calc_bc_subset_fixed( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + assert isinstance(k, int), ( + "This test is meant for verifying coherence " + "when k is given as an int" + ) + # In the fixed set we compare cu_bc against itself as we random.seed(seed) + # on the same seed and then sample on the number of vertices themselves + if seed is None: + seed = 123 # random.seed(None) uses time, but we want same sources + random.seed(seed) # It will be called again in cugraph's call + sources = random.sample(range(G.number_of_vertices()), k) + + if G.renumbered: + sources_df = cudf.DataFrame({'src': sources}) + sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist() + + # The first call is going to proceed to the random sampling in the same + # fashion as the lines above + df = cugraph.betweenness_centrality( + G, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + result_dtype=result_dtype, + ) + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + + # The second call is going to process source that were already sampled + # We set seed to None as k : int, seed : not none should not be normal + # behavior + df2 = cugraph.betweenness_centrality( + G, + k=sources, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=None, + result_dtype=result_dtype, + ) + sorted_df2 = df2.sort_values("vertex").rename( + columns={"betweenness_centrality": "ref_bc"}, copy=False + ).reset_index(drop=True) + + merged_sorted_df = cudf.concat( + [sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False + ) + + return merged_sorted_df + + +def _calc_bc_full( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + df = cugraph.betweenness_centrality( + G, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + result_dtype=result_dtype, + ) + assert ( + df["betweenness_centrality"].dtype == result_dtype + ), "'betweenness_centrality' column has not the expected type" + nx_bc = nx.betweenness_centrality( + Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints + ) + + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + _, nx_bc = zip(*sorted(nx_bc.items())) + nx_df = cudf.DataFrame({"ref_bc": nx_bc}) + + merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) + + return merged_sorted_df + + +# ============================================================================= +# Utils +# ============================================================================= +# NOTE: We assume that both column are ordered in such way that values +# at ith positions are expected to be compared in both columns +# i.e: sorted_df[idx][first_key] should be compared to +# sorted_df[idx][second_key] +def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): + errors = sorted_df[ + ~cupy.isclose( + sorted_df[first_key], sorted_df[second_key], rtol=epsilon + ) + ] + num_errors = len(errors) + if num_errors > 0: + print(errors) + assert ( + num_errors == 0 + ), "Mismatch were found when comparing '{}' and '{}' (rtol = {})".format( + first_key, second_key, epsilon + ) + + +def prepare_test(): + gc.collect() + + + +# ============================================================================= +# Pytest Fixtures +# ============================================================================= +DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] +DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] +WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] + + +fixture_params_dts_sml = utils.genFixtureParamsProduct( + (DATASETS_SMALL, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( + (DATASETS_UNRENUMBERED, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +@pytest.fixture(scope="module", params=fixture_params_dts_sml) +def Fixture_params_dts_sml(request): + return utils.build_cu_and_nx_graphs(*request.param) + + + + +@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) +def Fixture_params_dts_urnbrd(request): + return utils.build_cu_and_nx_graphs(*request.param) + + +def betweenness_centrality_dts_sml(Fixture_params_dts_sml): + + _,Gnx=Fixture_params_dts_sml + +""" +nx.betweenness_centrality( + Gnx, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + ) +""" + + +# ============================================================================= +# Tests +# ============================================================================= +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centralityy( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + normalized=normalized, + k=subset_size, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + +@pytest.mark.parametrize("subset_size", [None]) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("use_k_full", [True]) +def test_betweenness_centrality_k_full( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, + use_k_full, +): + """Tests full betweenness centrality by using k = G.number_of_vertices() + instead of k=None, checks that k scales properly""" + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + normalized=normalized, + k=subset_size, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + use_k_full=use_k_full, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +# NOTE: This test should only be execute on unrenumbered datasets +# the function operating the comparison inside is first proceeding +# to a random sampling over the number of vertices (thus direct offsets) +# in the graph structure instead of actual vertices identifiers +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", [None]) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centrality_fixed_sample( + Fixture_params_dts_urnbrd, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Test Betweenness Centrality using a subset + + Only k sources are considered for an approximate Betweenness Centrality + """ + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_urnbrd, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [[]]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centrality_weight_except( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Calls betwenness_centrality with weight + + As of 05/28/2020, weight is not supported and should raise + a NotImplementedError + """ + prepare_test() + with pytest.raises(NotImplementedError): + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + + +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", [str]) +def test_betweenness_invalid_dtype( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Test calls edge_betwenness_centrality an invalid type""" + + prepare_test() + with pytest.raises(TypeError): + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +def test_betweenness_centrality_nx( + graph_file, + directed, + edgevals +): + prepare_test() + + Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) + + nx_bc = nx.betweenness_centrality(Gnx) + cu_bc = cugraph.betweenness_centrality(Gnx) + + # Calculating mismatch + networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) + cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_bc) == len(networkx_bc) + for i in range(len(cugraph_bc)): + if ( + abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 + and cugraph_bc[i][0] == networkx_bc[i][0] + ): + err = err + 1 + print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_bc)) + + diff --git a/utils.py b/utils.py new file mode 100755 index 00000000000..e09cae01a0b --- /dev/null +++ b/utils.py @@ -0,0 +1,476 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from itertools import product + +# Assume test environment has the following dependencies installed +import pytest +import pandas as pd +import networkx as nx +import numpy as np +import cupy as cp +from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix +from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix +from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix +from scipy.sparse.coo import coo_matrix as sp_coo_matrix +from scipy.sparse.csr import csr_matrix as sp_csr_matrix +from scipy.sparse.csc import csc_matrix as sp_csc_matrix + +import cudf +import dask_cudf + +import cugraph +from cugraph.dask.common.mg_utils import get_client + +#RAPIDS_DATASET_ROOT_DIR= "/datasets" + +CUPY_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] +SCIPY_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] + +# +# Datasets +# + +RAPIDS_DATASET_ROOT_DIR= "../datasets" + +def get_rapids_dataset_root_dir(): + envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') + if(envVar!=None): + return envVar + return RAPIDS_DATASET_ROOT_DIR + + + + + +rapidsDatasetRootDir = get_rapids_dataset_root_dir() + +DATASETS_UNDIRECTED = [os.path.join(rapidsDatasetRootDir,"karate.csv"), os.path.join(rapidsDatasetRootDir, "dolphins.csv")] + +DATASETS_UNDIRECTED_WEIGHTS = [ + os.path.join(rapidsDatasetRootDir, "netscience.csv") +] + +DATASETS_UNRENUMBERED = [os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv")] + +DATASETS = [ + os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv"), + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "netscience.csv"), +] +# '../datasets/email-Eu-core.csv'] + +STRONGDATASETS = [ + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "netscience.csv"), + os.path.join(rapidsDatasetRootDir, "email-Eu-core.csv"), +] + +DATASETS_KTRUSS = [ + os.path.join(rapidsDatasetRootDir,"polbooks.csv"), + os.path.join(rapidsDatasetRootDir,"/ref/ktruss/","polbooks.csv") +] + +DATASETS_SMALL = [ + os.path.join(rapidsDatasetRootDir, "karate.csv"), + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "polbooks.csv"), +] + +MATRIX_INPUT_TYPES = [ + pytest.param( + cp_coo_matrix, marks=pytest.mark.cupy_types, id="CuPy.coo_matrix" + ), + pytest.param( + cp_csr_matrix, marks=pytest.mark.cupy_types, id="CuPy.csr_matrix" + ), + pytest.param( + cp_csc_matrix, marks=pytest.mark.cupy_types, id="CuPy.csc_matrix" + ), +] + +NX_INPUT_TYPES = [ + pytest.param(nx.Graph, marks=pytest.mark.nx_types, id="nx.Graph"), +] + +NX_DIR_INPUT_TYPES = [ + pytest.param(nx.Graph, marks=pytest.mark.nx_types, id="nx.DiGraph"), +] + +CUGRAPH_INPUT_TYPES = [ + pytest.param( + cugraph.Graph, marks=pytest.mark.cugraph_types, id="cugraph.Graph" + ), +] + +CUGRAPH_DIR_INPUT_TYPES = [ + pytest.param( + cugraph.DiGraph, marks=pytest.mark.cugraph_types, id="cugraph.DiGraph" + ), +] + + + +def get_rapids_dataset_root_dir(): + envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') + if(envVar!=None): + return envVar + return RAPIDS_DATASET_ROOT_DIR + + + + + +def read_csv_for_nx(csv_file, read_weights_in_sp=True, read_weights=True): + print("Reading " + str(csv_file) + "...") + if read_weights: + if read_weights_in_sp is True: + df = pd.read_csv( + csv_file, + delimiter=" ", + header=None, + names=["0", "1", "weight"], + dtype={"0": "int32", "1": "int32", "weight": "float32"}, + ) + else: + df = pd.read_csv( + csv_file, + delimiter=" ", + header=None, + names=["0", "1", "weight"], + dtype={"0": "int32", "1": "int32", "weight": "float64"}, + ) + else: + df = pd.read_csv( + csv_file, + delimiter=" ", + header=None, + names=["0", "1"], + usecols=["0", "1"], + dtype={"0": "int32", "1": "int32"}, + ) + return df + + +def create_obj_from_csv( + csv_file_name, obj_type, csv_has_weights=True, edgevals=False +): + """ + Return an object based on obj_type populated with the contents of + csv_file_name + """ + if obj_type in [cugraph.Graph, cugraph.DiGraph]: + return generate_cugraph_graph_from_file( + csv_file_name, + directed=(obj_type is cugraph.DiGraph), + edgevals=edgevals, + ) + + elif obj_type in SCIPY_MATRIX_TYPES + CUPY_MATRIX_TYPES: + # FIXME: assuming float32 + if csv_has_weights: + (rows, cols, weights) = np.genfromtxt( + csv_file_name, delimiter=" ", dtype=np.float32, unpack=True + ) + else: + (rows, cols) = np.genfromtxt( + csv_file_name, delimiter=" ", dtype=np.float32, unpack=True + ) + + if (csv_has_weights is False) or (edgevals is False): + # COO matrices must have a value array. Also if edgevals are to be + # ignored (False), reset all weights to 1. + weights = np.array([1] * len(rows)) + + if obj_type in CUPY_MATRIX_TYPES: + coo = cp_coo_matrix( + (cp.asarray(weights), (cp.asarray(rows), cp.asarray(cols))), + dtype=np.float32, + ) + else: + coo = sp_coo_matrix( + (weights, (np.array(rows, dtype=int), + np.array(cols, dtype=int))), + ) + + if obj_type in [cp_csr_matrix, sp_csr_matrix]: + return coo.tocsr(copy=False) + elif obj_type in [cp_csc_matrix, sp_csc_matrix]: + return coo.tocsc(copy=False) + else: + return coo + + elif obj_type in [nx.Graph, nx.DiGraph]: + return generate_nx_graph_from_file( + csv_file_name, directed=(obj_type is nx.DiGraph), edgevals=edgevals + ) + + else: + raise TypeError(f"unsupported type: {obj_type}") + + +def read_csv_file(csv_file, read_weights_in_sp=True): + print("Reading " + str(csv_file) + "...") + if read_weights_in_sp is True: + return cudf.read_csv( + csv_file, + delimiter=" ", + dtype=["int32", "int32", "float32"], + header=None, + ) + else: + return cudf.read_csv( + csv_file, + delimiter=" ", + dtype=["int32", "int32", "float64"], + header=None, + ) + + +def read_dask_cudf_csv_file( + csv_file, read_weights_in_sp=True, single_partition=True +): + print("Reading " + str(csv_file) + "...") + if read_weights_in_sp is True: + if single_partition: + chunksize = os.path.getsize(csv_file) + return dask_cudf.read_csv( + csv_file, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "weight"], + dtype=["int32", "int32", "float32"], + header=None, + ) + else: + return dask_cudf.read_csv( + csv_file, + delimiter=" ", + names=["src", "dst", "weight"], + dtype=["int32", "int32", "float32"], + header=None, + ) + else: + if single_partition: + chunksize = os.path.getsize(csv_file) + return dask_cudf.read_csv( + csv_file, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "weight"], + dtype=["int32", "int32", "float32"], + header=None, + ) + else: + return dask_cudf.read_csv( + csv_file, + delimiter=" ", + names=["src", "dst", "weight"], + dtype=["int32", "int32", "float64"], + header=None, + ) + + +def generate_nx_graph_from_file(graph_file, directed=True, edgevals=False): + M = read_csv_for_nx(graph_file, read_weights_in_sp=edgevals) + edge_attr = "weight" if edgevals else None + Gnx = nx.from_pandas_edgelist( + M, + create_using=(nx.DiGraph() if directed else nx.Graph()), + source="0", + target="1", + edge_attr=edge_attr, + ) + return Gnx + + +def generate_cugraph_graph_from_file( + graph_file, directed=True, edgevals=False +): + cu_M = read_csv_file(graph_file) + G = cugraph.DiGraph() if directed else cugraph.Graph() + + if edgevals: + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + else: + G.from_cudf_edgelist(cu_M, source="0", destination="1") + return G + + +def generate_mg_batch_cugraph_graph_from_file(graph_file, directed=True): + client = get_client() + _ddf = read_dask_cudf_csv_file(graph_file) + ddf = client.persist(_ddf) + G = cugraph.DiGraph() if directed else cugraph.Graph() + G.from_dask_cudf_edgelist(ddf) + return G + + +def build_cu_and_nx_graphs(graph_file, directed=True, edgevals=False): + G = generate_cugraph_graph_from_file(graph_file, directed=directed, + edgevals=edgevals) + Gnx = generate_nx_graph_from_file(graph_file, directed=directed, + edgevals=edgevals) + return G, Gnx + + +def build_mg_batch_cu_and_nx_graphs(graph_file, directed=True): + G = generate_mg_batch_cugraph_graph_from_file( + graph_file, directed=directed + ) + Gnx = generate_nx_graph_from_file(graph_file, directed=directed) + return G, Gnx + + +def random_edgelist( + e=1024, + ef=16, + dtypes={"src": np.int32, "dst": np.int32, "val": float}, + drop_duplicates=True, + seed=None, +): + """Create a random edge list + + Parameters + ---------- + e : int + Number of edges + ef : int + Edge factor (average number of edges per vertex) + dtypes : dict + Mapping of column names to types. + Supported type is {"src": int, "dst": int, "val": float} + drop_duplicates + Drop duplicates + seed : int (optional) + Randomstate seed + + Examples + -------- + >>> from cugraph.tests import utils + >>> # genrates 20 df with 100M edges each and write to disk + >>> for x in range(20): + >>> df = utils.random_edgelist(e=100000000, ef=64, + >>> dtypes={'src':np.int32, 'dst':np.int32}, + >>> seed=x) + >>> df.to_csv('df'+str(x), header=False, index=False) + >>> #df.to_parquet('files_parquet/df'+str(x), index=False) + """ + state = np.random.RandomState(seed) + columns = dict( + (k, make[dt](e // ef, e, state)) for k, dt in dtypes.items() + ) + + df = pd.DataFrame(columns) + if drop_duplicates: + df = df.drop_duplicates(subset=["src", "dst"]) + print("Generated " + str(df.shape[0]) + " edges") + return df + + +def make_int32(v, e, rstate): + return rstate.randint(low=0, high=v, size=e, dtype=np.int32) + + +def make_int64(v, e, rstate): + return rstate.randint(low=0, high=v, size=e, dtype=np.int64) + + +def make_float(v, e, rstate): + return rstate.rand(e) + + +make = {float: make_float, np.int32: make_int32, np.int64: make_int64} + + +def genFixtureParamsProduct(*args): + """ + Returns the cartesian product of the param lists passed in. The lists must + be flat lists of pytest.param objects, and the result will be a flat list + of pytest.param objects with values and meta-data combined accordingly. A + flat list of pytest.param objects is required for pytest fixtures to + properly recognize the params. The combinations also include ids generated + from the param values and id names associated with each list. For example: + + genFixtureParamsProduct( ([pytest.param(True, marks=[pytest.mark.A_good]), + pytest.param(False, marks=[pytest.mark.A_bad])], + "A"), + ([pytest.param(True, marks=[pytest.mark.B_good]), + pytest.param(False, marks=[pytest.mark.B_bad])], + "B") ) + + results in fixture param combinations: + + True, True - marks=[A_good, B_good] - id="A=True,B=True" + True, False - marks=[A_good, B_bad] - id="A=True,B=False" + False, True - marks=[A_bad, B_good] - id="A=False,B=True" + False, False - marks=[A_bad, B_bad] - id="A=False,B=False" + + Simply using itertools.product on the lists would result in a list of + sublists of individual param objects (ie. not "merged"), which would not be + recognized properly as params for a fixture by pytest. + + NOTE: This function is only needed for parameterized fixtures. + Tests/benchmarks will automatically get this behavior when specifying + multiple @pytest.mark.parameterize(param_name, param_value_list) + decorators. + """ + # Enforce that each arg is a list of pytest.param objs and separate params + # and IDs. + paramLists = [] + ids = [] + paramType = pytest.param().__class__ + for (paramList, id) in args: + for param in paramList: + assert isinstance(param, paramType) + paramLists.append(paramList) + ids.append(id) + + retList = [] + for paramCombo in product(*paramLists): + values = [p.values[0] for p in paramCombo] + marks = [m for p in paramCombo for m in p.marks] + comboid = ",".join( + ["%s=%s" % (id, p.values[0]) for (p, id) in zip(paramCombo, ids)] + ) + retList.append(pytest.param(values, marks=marks, id=comboid)) + return retList + + +# shared between min and max spanning tree tests +def compare_mst(mst_cugraph, mst_nx): + mst_nx_df = nx.to_pandas_edgelist(mst_nx) + edgelist_df = mst_cugraph.view_edge_list() + assert len(mst_nx_df) == len(edgelist_df) + + # check cycles + Gnx = nx.from_pandas_edgelist( + edgelist_df.to_pandas(), + create_using=nx.Graph(), + source="src", + target="dst", + ) + try: + lc = nx.find_cycle(Gnx, source=None, orientation="ignore") + print(lc) + except nx.NetworkXNoCycle: + pass + + # check total weight + cg_sum = edgelist_df["weights"].sum() + nx_sum = mst_nx_df["weight"].sum() + print(cg_sum) + print(nx_sum) + assert np.isclose(cg_sum, nx_sum) + From d1405dd9b254b115e36ea452880b54dd5aa16f2a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 7 Jan 2021 17:21:38 -0500 Subject: [PATCH 083/343] add initial API for induced subgraph --- cpp/src/experimental/induced_subgraph.cu | 94 ++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 cpp/src/experimental/induced_subgraph.cu diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu new file mode 100644 index 00000000000..e8ffdd9c4f5 --- /dev/null +++ b/cpp/src/experimental/induced_subgraph.cu @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace experimental { +namespace detail { +} // detail + +/** + * @brief extract induced subgraph(s). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam store_transposed + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object of, we extract induced subgraphs from @p graph_view. + * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). + * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p num_subgraphs]). + * @param num_subgraphs Number of induced subgraphs to extract. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, edge destination vertices, edge weights, and edge offsets for each induced subgraph. + */ +template +std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> +extract_induced_subgraph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const * subgraph_offsets/* size == num_subgraphs + 1 */, + vertex_t const * subgraph_vertices/* size == subgraph_offsets[num_subgraphs] */, + size_t num_subgraphs, + bool do_expensive_check = false) { + // FIXME: this code is inefficient for the vertices with their local degrees much larger than the number of vertices in the subgraphs. We may later add additional code to handle such cases. + // FIXME: we may consider the performance (speed & memory footprint, hash based approach uses extra-memory) of hash table based and binary search based approaches + + rmm::device_uvector subgraph_major_offsets(0, handle.get_stream()); + rmm::device_uvector subgraph_majors(0, handle.get_stream()); + rmm::device_uvector subgraph_minor_offsets(0, handle.get_stream()); // relevant only if multi_gpu + rmm::device_uvector subgraph_minors(0, handle.get_stream()); // relevant only if multi_gpu + + // 1. construct (subgraph_idx, vertex, local_degree) triplets + + std::vector h_subgraph_offsets(num_subgraphs + 1); + raft::update_host(h_subgraph_offsets.data(), subgraph_offsets, num_subgraphs + 1, handle.get_stream()); + + // construct (subgraph_idx, v, local_degree) + + // sort triplets by local_degree (non-ascending) + + // find number of edges for each subgraph + + // allocate memory + + // enumerate edges for each subgraph +} + +} // namespace experimental +} // namespace cugraph From 844000e2de2d9d47febf48a4c7aa9aea86fa59ed Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 8 Jan 2021 22:30:58 -0500 Subject: [PATCH 084/343] fix API inconsistencies in graph functions.hpp (compare to graph primitives based algorithms in algorithms.hpp) --- .../experimental/detail/graph_utils.cuh | 8 +- cpp/include/experimental/graph_functions.hpp | 101 +++-- cpp/src/experimental/coarsen_graph.cu | 81 ++-- cpp/src/experimental/relabel.cu | 99 +++-- cpp/src/experimental/renumber_edgelist.cu | 380 +++++++++++++----- 5 files changed, 466 insertions(+), 203 deletions(-) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 68e2816c1a1..cad6eb12de5 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -152,18 +152,18 @@ struct compute_gpu_id_from_vertex_t { } }; -template +template struct compute_gpu_id_from_edge_t { bool hypergraph_partitioned{false}; int comm_size{0}; int row_comm_size{0}; int col_comm_size{0}; - __device__ int operator()(vertex_t src, vertex_t dst) const + __device__ int operator()(vertex_t major, vertex_t minor) const { cuco::detail::MurmurHash3_32 hash_func{}; - auto major_comm_rank = static_cast(hash_func(store_transposed ? dst : src) % comm_size); - auto minor_comm_rank = static_cast(hash_func(store_transposed ? src : dst) % comm_size); + auto major_comm_rank = static_cast(hash_func(major) % comm_size); + auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); if (hypergraph_partitioned) { return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); } else { diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 9fe999da319..564a4177578 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -28,7 +28,6 @@ namespace cugraph { namespace experimental { -// FIXME: add do_expensive_check /** * @brief renumber edgelist (multi-GPU) * @@ -44,15 +43,17 @@ namespace experimental { * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (source, destination) pair should return the local GPU ID for this function to work (edges - * should be pre-shuffled). + * every (major, minor) pair should return the local GPU ID for this function to work (edges should + * be pre-shuffled). * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). + * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t + * functor to every (major, minor) pair should return the local GPU ID for this function to work + * (edges should be pre-shuffled). + * @param num_edgelist_edges Number of edges in the edgelist. * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). Applying the compute_gpu_id_from_edge_t functor to - * every (source, destination) pair should return the local GPU ID for this function to work (edges - * should be pre-shuffled). + * (this flag will be removed in the future). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to * this process in multi-GPU), partition_t object storing graph partitioning information, total @@ -62,11 +63,12 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned); + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check = false); -// FIXME: add do_expensive_check /** * @brief renumber edgelist (single-GPU) * @@ -82,16 +84,19 @@ renumber_edgelist(raft::handle_t const& handle, * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). * Vertex IDs are updated in-place ([INOUT] parameter). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set * of vertices. */ template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */); + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check = false); -// FIXME: add do_expensive_check /** * @brief renumber edgelist (multi-GPU) * @@ -106,21 +111,24 @@ std::enable_if_t> renumber_edgelist( * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param vertices Part of the entire set of vertices in the graph to be renumbered. Applying the - * compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function to - * work (vertices should be pre-shuffled). + * @param local_vertices Part of the entire set of vertices in the graph to be renumbered. Applying + * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function + * to work (vertices should be pre-shuffled). + * @param num_local_vertices Number of local vertices. * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (source, destination) pair should return the local GPU ID for this function to work (edges - * should be pre-shuffled). + * every (major, minor) pair should return the local GPU ID for this function to work (edges should + * be pre-shuffled). * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). + * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t + * functor to every (major, minor) pair should return the local GPU ID for this function to work + * (edges should be pre-shuffled). + * @param num_edgelist_edges Number of edges in the edgelist. * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). Applying the compute_gpu_id_from_edge_t functor to - * every (source, destination) pair should return the local GPU ID for this function to work (edges - * should be pre-shuffled). + * (this flag will be removed in the future). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to * this process in multi-GPU), partition_t object storing graph partitioning information, total @@ -130,12 +138,14 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - rmm::device_uvector const& vertices, - rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned); + vertex_t const* local_vertices, + vertex_t num_local_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check = false); -// FIXME: add do_expensive_check /** * @brief renumber edgelist (single-GPU) * @@ -149,23 +159,28 @@ renumber_edgelist(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param vertices The entire set of vertices in the graph to be renumbered. + * @param num_vertices Number of vertices. * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex * IDs are updated in-place ([INOUT] parameter). * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). * Vertex IDs are updated in-place ([INOUT] parameter). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set * of vertices. */ template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - rmm::device_uvector const& vertices, - rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */); + vertex_t const* vertices, + vertex_t num_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check = false); -// FIXME: add do_expensive_check /** * @brief Compute the coarsened graph. * @@ -184,6 +199,7 @@ std::enable_if_t> renumber_edgelist( * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object of the input graph to be coarsened. * @param labels Vertex labels (assigned to this process in multi-GPU) to be used in coarsening. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple>, rmm::device_uvector> Tuple of the coarsened graph and labels mapped to the * vertices (assigned to this process in multi-GPU) in the coarsened graph. @@ -198,9 +214,9 @@ std::tuple const& graph_view, - vertex_t const* labels); + vertex_t const* labels, + bool do_expensive_check = false); -// FIXME: add do_expensive_check /** * @brief Relabel old labels to new labels. * @@ -209,18 +225,23 @@ coarsen_graph( * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param old_labels Old labels to be relabeled. * @param old_new_label_pairs Pairs of an old label and the corresponding new label (each process * holds only part of the entire old labels and the corresponding new labels; partitioning can be * arbitrary). + * @param num_label_pairs Number of (old, new) label pairs. + * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to + * new labels in-place ([INOUT] parameter). + * @param num_labels Number of labels to be relabeled. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector New labels corresponding to the @p old_labels. */ template -rmm::device_uvector relabel( - raft::handle_t const& handle, - rmm::device_uvector const& old_labels, - std::tuple, rmm::device_uvector> const& - old_new_label_pairs); +void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + vertex_t num_label_pairs, + vertex_t* labels /* [INOUT] */, + vertex_t num_labels, + bool do_expensive_check = false); } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 737a29b0cc8..56b8a4f2989 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -208,7 +208,8 @@ std::enable_if_t< coarsen_graph( raft::handle_t const &handle, graph_view_t const &graph_view, - vertex_t const *labels) + vertex_t const *labels, + bool do_expensive_check) { auto &comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -220,6 +221,10 @@ coarsen_graph( auto const col_comm_size = col_comm.get_size(); auto const col_comm_rank = col_comm.get_rank(); + if (do_expensive_check) { + // currently, nothing to do + } + // 1. locally construct coarsened edge list // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme @@ -338,13 +343,11 @@ coarsen_graph( edge_first, edge_first + coarsened_edgelist_major_vertices.size(), [key_func = - detail::compute_gpu_id_from_edge_t{ - graph_view.is_hypergraph_partitioned(), - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { - return store_transposed ? key_func(thrust::get<1>(val), thrust::get<0>(val)) - : key_func(thrust::get<0>(val), thrust::get<1>(val)); + detail::compute_gpu_id_from_edge_t{graph_view.is_hypergraph_partitioned(), + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, handle.get_stream()); @@ -410,11 +413,15 @@ coarsen_graph( vertex_t number_of_vertices{}; edge_t number_of_edges{}; std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist(handle, - unique_labels, - coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - graph_view.is_hypergraph_partitioned()); + renumber_edgelist( + handle, + unique_labels.data(), + static_cast(unique_labels.size()), + coarsened_edgelist_major_vertices.data(), + coarsened_edgelist_minor_vertices.data(), + static_cast(coarsened_edgelist_major_vertices.size()), + graph_view.is_hypergraph_partitioned(), + do_expensive_check); // 5. build a graph @@ -423,10 +430,10 @@ coarsen_graph( CUGRAPH_FAIL("unimplemented."); } else { edgelists.resize(1); - edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); + edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); } @@ -456,8 +463,13 @@ std::enable_if_t< coarsen_graph( raft::handle_t const &handle, graph_view_t const &graph_view, - vertex_t const *labels) + vertex_t const *labels, + bool do_expensive_check) { + if (do_expensive_check) { + // currently, nothing to do + } + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); @@ -493,13 +505,19 @@ coarsen_graph( handle.get_stream()); auto renumber_map_labels = renumber_edgelist( - handle, unique_labels, coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices); + handle, + unique_labels.data(), + static_cast(unique_labels.size()), + coarsened_edgelist_major_vertices.data(), + coarsened_edgelist_minor_vertices.data(), + static_cast(coarsened_edgelist_major_vertices.size()), + do_expensive_check); edgelist_t edgelist{}; - edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); edgelist.p_edge_weights = coarsened_edgelist_weights.data(); edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); @@ -525,9 +543,10 @@ std::tuple const &graph_view, - vertex_t const *labels) + vertex_t const *labels, + bool do_expensive_check) { - return detail::coarsen_graph(handle, graph_view, labels); + return detail::coarsen_graph(handle, graph_view, labels, do_expensive_check); } // explicit instantiation @@ -536,25 +555,29 @@ template std::tuple rmm::device_uvector> coarsen_graph(raft::handle_t const &handle, graph_view_t const &graph_view, - int32_t const *labels); + int32_t const *labels, + bool do_expensive_check); template std::tuple>, rmm::device_uvector> coarsen_graph(raft::handle_t const &handle, graph_view_t const &graph_view, - int32_t const *labels); + int32_t const *labels, + bool do_expensive_check); template std::tuple>, rmm::device_uvector> coarsen_graph(raft::handle_t const &handle, graph_view_t const &graph_view, - int32_t const *labels); + int32_t const *labels, + bool do_expensive_check); template std::tuple>, rmm::device_uvector> coarsen_graph(raft::handle_t const &handle, graph_view_t const &graph_view, - int32_t const *labels); + int32_t const *labels, + bool do_expensive_check); } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index f8cfe810a97..fec59592a72 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -42,32 +42,38 @@ namespace cugraph { namespace experimental { template -rmm::device_uvector relabel( - raft::handle_t const &handle, - rmm::device_uvector const &old_labels, - std::tuple, rmm::device_uvector> const - &old_new_label_pairs) +void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + vertex_t num_label_pairs, + vertex_t* labels /* [INOUT] */, + vertex_t num_labels, + bool do_expensive_check) { double constexpr load_factor = 0.7; - rmm::device_uvector new_labels(0, handle.get_stream()); - if (multi_gpu) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; // find unique old labels (to be relabeled) - rmm::device_uvector unique_old_labels(old_labels, handle.get_stream()); + rmm::device_uvector unique_old_labels(num_labels, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + unique_old_labels.data()); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), unique_old_labels.begin(), unique_old_labels.end()); - auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_old_labels.begin(), - unique_old_labels.end()); - unique_old_labels.resize(thrust::distance(unique_old_labels.begin(), it), handle.get_stream()); + unique_old_labels.resize( + thrust::distance( + unique_old_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end())), + handle.get_stream()); unique_old_labels.shrink_to_fit(handle.get_stream()); // collect new labels for the unique old labels @@ -80,10 +86,16 @@ rmm::device_uvector relabel( rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); { - rmm::device_uvector label_pair_old_labels(std::get<0>(old_new_label_pairs), - handle.get_stream()); - rmm::device_uvector label_pair_new_labels(std::get<1>(old_new_label_pairs), - handle.get_stream()); + rmm::device_uvector label_pair_old_labels(num_label_pairs, handle.get_stream()); + rmm::device_uvector label_pair_new_labels(num_label_pairs, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + std::get<0>(old_new_label_pairs), + std::get<0>(old_new_label_pairs) + num_label_pairs, + label_pair_old_labels.begin()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + std::get<1>(old_new_label_pairs), + std::get<1>(old_new_label_pairs) + num_label_pairs, + label_pair_new_labels.begin()); auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); std::forward_as_tuple(std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels), @@ -91,7 +103,7 @@ rmm::device_uvector relabel( sort_and_shuffle_values( handle.get_comms(), pair_first, - pair_first + label_pair_old_labels.size(), + pair_first + num_label_pairs, [key_func] __device__(auto val) { return key_func(thrust::get<0>(val)); }, handle.get_stream()); @@ -162,43 +174,52 @@ rmm::device_uvector relabel( }); relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - new_labels.resize(old_labels.size(), handle.get_stream()); - relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + relabel_map.find(labels, labels + num_labels, labels); } else { cuco::static_map relabel_map( - static_cast(static_cast(std::get<0>(old_new_label_pairs).size()) / - load_factor), + static_cast(static_cast(num_label_pairs) / load_factor), invalid_vertex_id::value, invalid_vertex_id::value); auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(old_new_label_pairs).begin(), - std::get<1>(old_new_label_pairs).begin())), + thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))), [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + std::get<0>(old_new_label_pairs).size()); - new_labels.resize(old_labels.size(), handle.get_stream()); - relabel_map.find(old_labels.begin(), old_labels.end(), new_labels.begin()); + relabel_map.insert(pair_first, pair_first + num_label_pairs); + relabel_map.find(labels, labels + num_labels, labels); + } + + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + invalid_vertex_id::value) == 0, + "Invalid input argument: labels include old label values missing in old_new_label_pairs."); } - return std::move(new_labels); + return; } // explicit instantiation -template rmm::device_uvector relabel( - raft::handle_t const &handle, - rmm::device_uvector const &old_labels, - std::tuple, rmm::device_uvector> const - &old_new_label_pairs); - -template rmm::device_uvector relabel( - raft::handle_t const &handle, - rmm::device_uvector const &old_labels, - std::tuple, rmm::device_uvector> const - &old_new_label_pairs); +template void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int32_t num_label_pairs, + int32_t* labels, + int32_t num_labels, + bool do_expensive_check); + +template void relabel( + raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int32_t num_label_pairs, + int32_t* labels, + int32_t num_labels, + bool do_expensive_check); } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index dc54538eaad..41d1680a5bb 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -45,18 +45,23 @@ namespace detail { template rmm::device_uvector compute_renumber_map( - raft::handle_t const &handle, - vertex_t const *vertices, + raft::handle_t const& handle, + vertex_t const* vertices, vertex_t num_local_vertices /* relevant only if vertices != nullptr */, - rmm::device_uvector const &edgelist_major_vertices, - rmm::device_uvector const &edgelist_minor_vertices) + vertex_t const* edgelist_major_vertices, + vertex_t const* edgelist_minor_vertices, + edge_t num_edgelist_edges) { // FIXME: compare this sort based approach with hash based approach in both speed and memory // footprint // 1. acquire (unique major label, count) pairs - rmm::device_uvector tmp_labels(edgelist_major_vertices, handle.get_stream()); + rmm::device_uvector tmp_labels(num_edgelist_edges, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + tmp_labels.begin()); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), tmp_labels.begin(), tmp_labels.end()); @@ -79,7 +84,11 @@ rmm::device_uvector compute_renumber_map( // 2. acquire unique minor labels - rmm::device_uvector minor_labels(edgelist_minor_vertices, handle.get_stream()); + rmm::device_uvector minor_labels(num_edgelist_edges, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + minor_labels.begin()); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_labels.begin(), minor_labels.end()); @@ -134,7 +143,7 @@ rmm::device_uvector compute_renumber_map( // 4. if multi-GPU, shuffle and reduce (label, count) pairs if (multi_gpu) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); @@ -220,30 +229,181 @@ rmm::device_uvector compute_renumber_map( return std::move(labels); } +template +void expensive_check_edgelist( + raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + vertex_t const* edgelist_major_vertices, + vertex_t const* edgelist_minor_vertices, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned /* relevant only if multi_gpu == true */) +{ + rmm::device_uvector sorted_local_vertices( + local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertices, + local_vertices + num_local_vertices, + sorted_local_vertices.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()); + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertices, + local_vertices + num_local_vertices, + [comm_rank, + key_func = + detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { + return key_func(val) != comm_rank; + }) == 0, + "Invalid input argument: local_vertices should be pre-shuffled."); + + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + num_edgelist_edges, + [comm_rank, + key_func = + detail::compute_gpu_id_from_edge_t{is_hypergraph_partitioned, + comm_size, + row_comm_size, + col_comm_size}] __device__(auto edge) { + return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) == comm_rank; + }) == 0, + "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " + "pre-shuffled."); + + if (local_vertices != nullptr) { + rmm::device_uvector unique_edge_vertices(num_edgelist_edges * 2, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + unique_edge_vertices.begin()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + unique_edge_vertices.begin() + num_edgelist_edges); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end()); + unique_edge_vertices.resize( + thrust::distance( + unique_edge_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end())), + handle.get_stream()); + + rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); + std::tie(rx_unique_edge_vertices, std::ignore) = sort_and_shuffle_values( + handle.get_comms(), + unique_edge_vertices.begin(), + unique_edge_vertices.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + + unique_edge_vertices = std::move(rx_unique_edge_vertices); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end(), + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " + "invalid vertex ID(s)."); + } + } else { + if (local_vertices != nullptr) { + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + } + } +} + template std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const &handle, - vertex_t const *vertices, - vertex_t num_local_vertices /* relevant only if vertices != nullptr */, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned) +renumber_edgelist(raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); auto const row_comm_rank = row_comm.get_rank(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); auto const col_comm_rank = col_comm.get_rank(); + if (do_expensive_check) { + expensive_check_edgelist(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned); + } + // 1. compute renumber map - auto renumber_map_labels = detail::compute_renumber_map( - handle, vertices, num_local_vertices, edgelist_major_vertices, edgelist_minor_vertices); + auto renumber_map_labels = + detail::compute_renumber_map(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges); // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened // graph @@ -263,8 +423,7 @@ renumber_edgelist(raft::handle_t const &handle, col_comm_rank); auto number_of_vertices = vertex_partition_offsets.back(); - auto number_of_edges = host_scalar_allreduce( - comm, static_cast(edgelist_major_vertices.size()), handle.get_stream()); + auto number_of_edges = host_scalar_allreduce(comm, num_edgelist_edges, handle.get_stream()); // 3. renumber edges @@ -309,9 +468,9 @@ renumber_edgelist(raft::handle_t const &handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); - renumber_map.find(edgelist_major_vertices.begin(), - edgelist_major_vertices.end(), - edgelist_major_vertices.begin()); + renumber_map.find(edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + edgelist_major_vertices); } { @@ -375,9 +534,9 @@ renumber_edgelist(raft::handle_t const &handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - renumber_map.find(edgelist_minor_vertices.begin(), - edgelist_minor_vertices.end(), - edgelist_minor_vertices.begin()); + renumber_map.find(edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + edgelist_minor_vertices); } } @@ -387,14 +546,31 @@ renumber_edgelist(raft::handle_t const &handle, template std::enable_if_t> renumber_edgelist( - raft::handle_t const &handle, - vertex_t const *vertices, + raft::handle_t const& handle, + vertex_t const* vertices, vertex_t num_vertices /* relevant only if vertices != nullptr */, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) { - auto renumber_map_labels = detail::compute_renumber_map( - handle, vertices, num_vertices, edgelist_major_vertices, edgelist_minor_vertices); + if (do_expensive_check) { + expensive_check_edgelist(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + false); + } + + auto renumber_map_labels = + detail::compute_renumber_map(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges); double constexpr load_factor = 0.7; @@ -412,12 +588,10 @@ std::enable_if_t> renumber_edgelist( return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); - renumber_map.find(edgelist_major_vertices.begin(), - edgelist_major_vertices.end(), - edgelist_major_vertices.begin()); - renumber_map.find(edgelist_minor_vertices.begin(), - edgelist_minor_vertices.end(), - edgelist_minor_vertices.begin()); + renumber_map.find( + edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); + renumber_map.find( + edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); return std::move(renumber_map_labels); } @@ -427,92 +601,116 @@ std::enable_if_t> renumber_edgelist( template std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned) +renumber_edgelist(raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) { return detail::renumber_edgelist(handle, - static_cast(nullptr), + static_cast(nullptr), vertex_t{0}, edgelist_major_vertices, edgelist_minor_vertices, - is_hypergraph_partitioned); + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); } template std::enable_if_t> renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) + raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) { return detail::renumber_edgelist(handle, - static_cast(nullptr), + static_cast(nullptr), vertex_t{0} /* dummy */, edgelist_major_vertices, - edgelist_minor_vertices); + edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); } template std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const &handle, - rmm::device_uvector const &vertices, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned) +renumber_edgelist(raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) { - return detail::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - edgelist_major_vertices, - edgelist_minor_vertices, - is_hypergraph_partitioned); + return detail::renumber_edgelist(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); } template std::enable_if_t> renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector const &vertices, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */) + raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t num_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) { - return detail::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - edgelist_major_vertices, - edgelist_minor_vertices); + return detail::renumber_edgelist(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); } // explicit instantiation template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned); - -template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector const &vertices, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - bool is_hypergraph_partitioned); +renumber_edgelist(raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */); + raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int32_t, int32_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( - raft::handle_t const &handle, - rmm::device_uvector const &vertices, - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */); + raft::handle_t const& handle, + int32_t const* vertices, + int32_t num_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool do_expensive_check); } // namespace experimental } // namespace cugraph From 8fa39fdd5bee29d8b6905edae9c29e9ebf6151af Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 09:00:20 -0500 Subject: [PATCH 085/343] improving error checking and docs --- .../cugraph/community/spectral_clustering.py | 68 +++++++++++++------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index b5f175e8237..2ee0600a7ed 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -32,23 +32,23 @@ def spectralBalancedCutClustering( Parameters ---------- G : cugraph.Graph or networkx.Graph - cuGraph graph descriptor + graph descriptor num_clusters : integer - Specifies the number of clusters to find + Specifies the number of clusters to find, must be greater than 1 num_eigen_vects : integer Specifies the number of eigenvectors to use. Must be lower or equal to - num_clusters. + num_clusters. Default is 2 evs_tolerance: float - Specifies the tolerance to use in the eigensolver + Specifies the tolerance to use in the eigensolver. Default is 0.00001 evs_max_iter: integer - Specifies the maximum number of iterations for the eigensolver + Specifies the maximum number of iterations for the eigensolver. Default is 100 kmean_tolerance: float - Specifies the tolerance to use in the k-means solver + Specifies the tolerance to use in the k-means solver. Default is 0.00001 kmean_max_iter: integer - Specifies the maximum number of iterations for the k-means solver + Specifies the maximum number of iterations for the k-means solver. Default is 100 Returns @@ -73,6 +73,8 @@ def spectralBalancedCutClustering( >>> df = cugraph.spectralBalancedCutClustering(G, 5) """ + # Error checking in C++ code + G, isNx = check_nx_graph(G) df = spectral_clustering_wrapper.spectralBalancedCutClustering( @@ -109,24 +111,24 @@ def spectralModularityMaximizationClustering( Parameters ---------- - G : cugraph.Graph + G : cugraph.Graph or networkx.Graph cuGraph graph descriptor. This graph should have edge weights. num_clusters : integer Specifies the number of clusters to find num_eigen_vects : integer Specifies the number of eigenvectors to use. Must be lower or equal to - num_clusters + num_clusters. Default is 2 evs_tolerance: float - Specifies the tolerance to use in the eigensolver + Specifies the tolerance to use in the eigensolver. Default is 0.00001 evs_max_iter: integer - Specifies the maximum number of iterations for the eigensolver + Specifies the maximum number of iterations for the eigensolver. Default is 100 kmean_tolerance: float - Specifies the tolerance to use in the k-means solver + Specifies the tolerance to use in the k-means solver. Default is 0.00001 kmean_max_iter: integer - Specifies the maximum number of iterations for the k-means solver + Specifies the maximum number of iterations for the k-means solver. Default is 100 Returns @@ -148,6 +150,8 @@ def spectralModularityMaximizationClustering( >>> df = cugraph.spectralModularityMaximizationClustering(G, 5) """ + # Error checking in C++ code + G, isNx = check_nx_graph(G) df = spectral_clustering_wrapper.spectralModularityMaximizationClustering( @@ -173,12 +177,15 @@ def analyzeClustering_modularity(G, n_clusters, clustering, vertex_col_name='vertex', cluster_col_name='cluster'): """ - Compute the modularity score for a partitioning/clustering + Compute the modularity score for a given partitioning/clustering. + The assumption is that “clustering” is the results from a call + from a special clustering algorithm and contains columns named + “vertex” and “cluster”. Parameters ---------- - G : cugraph.Graph - cuGraph graph descriptor. This graph should have edge weights. + G : cugraph.Graph or networkx.Graph + graph descriptor. This graph should have edge weights. n_clusters : integer Specifies the number of clusters in the given clustering clustering : cudf.DataFrame @@ -204,10 +211,17 @@ def analyzeClustering_modularity(G, n_clusters, clustering, >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> df = cugraph.spectralBalancedCutClustering(G, 5) - >>> score = cugraph.analyzeClustering_modularity(G, 5, df, - >>> 'vertex', 'cluster') + >>> score = cugraph.analyzeClustering_modularity(G, 5, df) """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + + G, isNx = check_nx_graph(G) + if G.renumbered: clustering = G.add_internal_vertex_id(clustering, vertex_col_name, @@ -228,6 +242,9 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, cluster_col_name='cluster'): """ Compute the edge cut score for a partitioning/clustering + The assumption is that “clustering” is the results from a call + from a special clustering algorithm and contains columns named + “vertex” and “cluster”. Parameters ---------- @@ -258,10 +275,15 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.spectralBalancedCutClustering(G, 5) - >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df, - >>> 'vertex', 'cluster') + >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df) """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + G, isNx = check_nx_graph(G) if G.renumbered: @@ -318,6 +340,12 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, >>> 'vertex', 'cluster') """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + if G.renumbered: clustering = G.add_internal_vertex_id(clustering, vertex_col_name, From 68745a4fd242cf5a324e1c6b949b968bd71d4fd0 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 11:48:55 -0500 Subject: [PATCH 086/343] issues 1117 - replaced Invalid API with Invalid input --- cpp/src/centrality/betweenness_centrality.cu | 4 +- cpp/src/community/ECG.cu | 4 +- .../community/extract_subgraph_by_vertex.cu | 4 +- cpp/src/components/connectivity.cu | 8 ++-- cpp/src/cores/core_number.cu | 8 ++-- cpp/src/experimental/graph.cu | 22 +++++----- cpp/src/experimental/graph_view.cu | 40 +++++++++---------- cpp/src/linear_assignment/hungarian.cu | 6 +-- cpp/src/link_analysis/gunrock_hits.cpp | 6 +-- cpp/src/link_analysis/pagerank.cu | 10 ++--- cpp/src/link_analysis/pagerank_1D.cu | 6 +-- cpp/src/link_analysis/pagerank_1D.cuh | 14 +++---- cpp/src/link_prediction/jaccard.cu | 10 ++--- cpp/src/link_prediction/overlap.cu | 10 ++--- cpp/src/traversal/sssp.cu | 4 +- 15 files changed, 78 insertions(+), 78 deletions(-) diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index 8ff62f7ddb6..c0a34de5f70 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,7 +116,7 @@ void verify_betweenness_centrality_input(result_t *result, static_assert(std::is_same::value || std::is_same::value, "result_t should be float or double"); - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: betwenness pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: betwenness pointer is NULL"); CUGRAPH_EXPECTS(number_of_sources >= 0, "Number of sources must be positive or equal to 0."); if (number_of_sources != 0) { CUGRAPH_EXPECTS(sources != nullptr, diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index ce7e9dd1ad2..65f1724f6e0 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,7 +114,7 @@ void ecg(raft::handle_t const &handle, vertex_t ensemble_size, vertex_t *clustering) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); cudaStream_t stream{0}; diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index c39b7f8ad0a..eb7b1d494a0 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,7 +119,7 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView::value> connected_components_impl( { using ByteT = unsigned char; // minimum addressable unit - CUGRAPH_EXPECTS(graph.offsets != nullptr, "Invalid API parameter: graph.offsets is nullptr"); - CUGRAPH_EXPECTS(graph.indices != nullptr, "Invalid API parameter: graph.indices is nullptr"); + CUGRAPH_EXPECTS(graph.offsets != nullptr, "Invalid input argument: graph.offsets is nullptr"); + CUGRAPH_EXPECTS(graph.indices != nullptr, "Invalid input argument: graph.indices is nullptr"); VT nrows = graph.number_of_vertices; @@ -90,7 +90,7 @@ void connected_components(GraphCSRView const &graph, { cudaStream_t stream{nullptr}; - CUGRAPH_EXPECTS(labels != nullptr, "Invalid API parameter: labels parameter is NULL"); + CUGRAPH_EXPECTS(labels != nullptr, "Invalid input argument: labels parameter is NULL"); return detail::connected_components_impl(graph, connectivity_type, labels, stream); } diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index cd2b928a81e..091ba07ccc6 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -151,9 +151,9 @@ std::unique_ptr> k_core(GraphCOOView const &in_ VT num_vertex_ids, rmm::mr::device_memory_resource *mr) { - CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid API parameter: vertex_id is NULL"); - CUGRAPH_EXPECTS(core_number != nullptr, "Invalid API parameter: core_number is NULL"); - CUGRAPH_EXPECTS(k >= 0, "Invalid API parameter: k must be >= 0"); + CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid input argument: vertex_id is NULL"); + CUGRAPH_EXPECTS(core_number != nullptr, "Invalid input argument: core_number is NULL"); + CUGRAPH_EXPECTS(k >= 0, "Invalid input argument: k must be >= 0"); return detail::extract_subgraph(in_graph, vertex_id, core_number, k, num_vertex_ids, mr); } diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 3a2b7126d22..0bea8a5c98b 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -233,7 +233,7 @@ graph_tget_handle_ptr()->get_stream(); CUGRAPH_EXPECTS(edgelists.size() > 0, - "Invalid API parameter: edgelists.size() should be non-zero."); + "Invalid input argument: edgelists.size() should be non-zero."); bool is_weighted = edgelists[0].p_edge_weights != nullptr; @@ -246,14 +246,14 @@ graph_t(col_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), - "Invalid API parameter: errneous edgelists.size()."); + "Invalid input argument: errneous edgelists.size()."); // optional expensive checks (part 1/3) @@ -278,17 +278,17 @@ graph_t{ major_first, major_last, minor_first, minor_last}) == 0, - "Invalid API parameter: edgelists[] have out-of-range values."); + "Invalid input argument: edgelists[] have out-of-range values."); } number_of_local_edges_sum = host_scalar_allreduce(comm, number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), - "Invalid API parameter: the sum of local edges doe counts not match with " + "Invalid input argument: the sum of local edges doe counts not match with " "number_of_local_edges."); CUGRAPH_EXPECTS( partition.get_vertex_partition_last(comm_size - 1) == number_of_vertices, - "Invalid API parameter: vertex partition should cover [0, number_of_vertices)."); + "Invalid input argument: vertex partition should cover [0, number_of_vertices)."); } // convert edge list (COO) to compressed sparse format (CSR or CSC) @@ -327,7 +327,7 @@ graph_t{}), - "Invalid API parameter: sorted_by_global_degree_within_vertex_partition is " + "Invalid input argument: sorted_by_global_degree_within_vertex_partition is " "set to true, but degrees are not non-ascending."); } @@ -427,7 +427,7 @@ graph_t{ 0, this->get_number_of_vertices(), 0, this->get_number_of_vertices()}) == 0, - "Invalid API parameter: edgelist have out-of-range values."); + "Invalid input argument: edgelist have out-of-range values."); // FIXME: check for symmetricity may better be implemetned with transpose(). if (this->is_symmetric()) {} @@ -476,7 +476,7 @@ graph_tget_number_of_vertices(), thrust::greater{}), - "Invalid API parameter: sorted_by_degree is set to true, but degrees are not " + "Invalid input argument: sorted_by_degree is set to true, but degrees are not " "non-ascending."); } diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 04d2ea990df..6abfecb74f2 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,18 +87,18 @@ graph_view_t(row_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), - "Invalid API parameter: errneous adj_matrix_partition_offsets.size()."); + "Invalid input argument: errneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == @@ -106,7 +106,7 @@ graph_view_ton(default_stream), adj_matrix_partition_offsets[i], adj_matrix_partition_offsets[i] + (major_last - major_first + 1)), - "Invalid API parameter: adj_matrix_partition_offsets[] is not sorted."); + "Invalid input argument: adj_matrix_partition_offsets[] is not sorted."); edge_t number_of_local_edges{}; raft::update_host(&number_of_local_edges, adj_matrix_partition_offsets[i] + (major_last - major_first), @@ -148,12 +148,12 @@ graph_view_t{minor_first, minor_last}) == 0, - "Invalid API parameter: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Invalid input argument: adj_matrix_partition_indices[] have out-of-range vertex IDs."); } number_of_local_edges_sum = host_scalar_allreduce( this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), - "Invalid API parameter: the sum of local edges doe counts not match with " + "Invalid input argument: the sum of local edges doe counts not match with " "number_of_local_edges."); if (sorted_by_global_degree_within_vertex_partition) { @@ -162,7 +162,7 @@ graph_view_t{}), - "Invalid API parameter: sorted_by_global_degree_within_vertex_partition is " + "Invalid Invalid input argument: sorted_by_global_degree_within_vertex_partition is " "set to true, but degrees are not non-ascending."); for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); @@ -171,11 +171,11 @@ graph_view_tis_symmetric()) {} @@ -233,7 +233,7 @@ graph_view_ton(default_stream), offsets, offsets + (this->get_number_of_vertices() + 1)), - "Invalid API parameter: offsets is not sorted."); + "Invalid input argument: offsets is not sorted."); // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved CUGRAPH_EXPECTS( @@ -251,7 +251,7 @@ graph_view_tget_number_of_edges(), out_of_range_t{0, this->get_number_of_vertices()}) == 0, - "Invalid API parameter: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Invalid input argument: adj_matrix_partition_indices[] have out-of-range vertex IDs."); if (sorted_by_degree) { auto degree_first = @@ -261,14 +261,14 @@ graph_view_tget_number_of_vertices(), thrust::greater{}), - "Invalid API parameter: sorted_by_degree is set to true, but degrees are not " + "Invalid input argument: sorted_by_degree is set to true, but degrees are not " "non-ascending."); CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), - "Invalid API parameter: erroneous segment_offsets."); - CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid API parameter: segment_offsets."); + "Invalid input argument: erroneous segment_offsets."); + CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid input argument segment_offsets."); CUGRAPH_EXPECTS(segment_offsets.back() == this->get_number_of_vertices(), - "Invalid API parameter: segment_offsets."); + "Invalid input argument: segment_offsets."); } // FIXME: check for symmetricity may better be implemetned with transpose(). diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index 164a386c6dd..40f7be52c90 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,9 +67,9 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t *assignment, cudaStream_t stream) { - CUGRAPH_EXPECTS(assignment != nullptr, "Invalid API parameter: assignment pointer is NULL"); + CUGRAPH_EXPECTS(assignment != nullptr, "Invalid input argument: assignment pointer is NULL"); CUGRAPH_EXPECTS(graph.edge_data != nullptr, - "Invalid API parameter: graph must have edge data (costs)"); + "Invalid input argument: graph must have edge data (costs)"); #ifdef TIMING HighResTimer hr_timer; diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 8662c3bea79..5ffaacfe7a6 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,9 +42,9 @@ void hits(cugraph::GraphCSRView const &graph, weight_t *hubs, weight_t *authorities) { - CUGRAPH_EXPECTS(hubs != nullptr, "Invalid API parameter: hubs array should be of size V"); + CUGRAPH_EXPECTS(hubs != nullptr, "Invalid input argument: hubs array should be of size V"); CUGRAPH_EXPECTS(authorities != nullptr, - "Invalid API parameter: authorities array should be of size V"); + "Invalid input argument: authorities array should be of size V"); // // NOTE: gunrock doesn't support passing a starting value diff --git a/cpp/src/link_analysis/pagerank.cu b/cpp/src/link_analysis/pagerank.cu index e5da24e328d..7d0122c506e 100644 --- a/cpp/src/link_analysis/pagerank.cu +++ b/cpp/src/link_analysis/pagerank.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -307,10 +307,10 @@ void pagerank_impl(raft::handle_t const &handle, if (personalization_subset_size != 0) { CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid API parameter: personalization_subset array should be of size " + "Invalid input argument: personalization_subset array should be of size " "personalization_subset_size"); CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid API parameter: personalization_values array should be of size " + "Invalid input argument: personalization_values array should be of size " "personalization_subset_size"); CUGRAPH_EXPECTS(personalization_subset_size <= m, "Personalization size should be smaller than V"); @@ -378,11 +378,11 @@ void pagerank(raft::handle_t const &handle, int64_t max_iter, bool has_guess) { - CUGRAPH_EXPECTS(pagerank != nullptr, "Invalid API parameter: Pagerank array should be of size V"); + CUGRAPH_EXPECTS(pagerank != nullptr, "Invalid input argument: Pagerank array should be of size V"); // Multi-GPU if (handle.comms_initialized()) { CUGRAPH_EXPECTS(has_guess == false, - "Invalid API parameter: Multi-GPU Pagerank does not guess, please use the " + "Invalid input argument: Multi-GPU Pagerank does not guess, please use the " "single GPU version for this feature"); CUGRAPH_EXPECTS(max_iter > 0, "The number of iteration must be positive"); cugraph::mg::pagerank(handle, diff --git a/cpp/src/link_analysis/pagerank_1D.cu b/cpp/src/link_analysis/pagerank_1D.cu index 3774a364cf1..2447290000c 100644 --- a/cpp/src/link_analysis/pagerank_1D.cu +++ b/cpp/src/link_analysis/pagerank_1D.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,10 +106,10 @@ void Pagerank::setup(WT _alpha, // personalize if (personalization_subset_size != 0) { CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid API parameter: personalization_subset array should be of size " + "Invalid input argument: personalization_subset array should be of size " "personalization_subset_size"); CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid API parameter: personalization_values array should be of size " + "Invalid input argument: personalization_values array should be of size " "personalization_subset_size"); CUGRAPH_EXPECTS(personalization_subset_size <= v_glob, "Personalization size should be smaller than V"); diff --git a/cpp/src/link_analysis/pagerank_1D.cuh b/cpp/src/link_analysis/pagerank_1D.cuh index feb410daa9a..de2e049df53 100644 --- a/cpp/src/link_analysis/pagerank_1D.cuh +++ b/cpp/src/link_analysis/pagerank_1D.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,17 +90,17 @@ int pagerank(raft::handle_t const &handle, const double tolerance = 1e-5) { // null pointers check - CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid API parameter - offsets is null"); - CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid API parameter - indidices is null"); + CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid input argument - offsets is null"); + CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid input argument - indidices is null"); CUGRAPH_EXPECTS(pagerank_result != nullptr, - "Invalid API parameter - pagerank output memory must be allocated"); + "Invalid input argument - pagerank output memory must be allocated"); // parameter values CUGRAPH_EXPECTS(damping_factor > 0.0, - "Invalid API parameter - invalid damping factor value (alpha<0)"); + "Invalid input argument - invalid damping factor value (alpha<0)"); CUGRAPH_EXPECTS(damping_factor < 1.0, - "Invalid API parameter - invalid damping factor value (alpha>1)"); - CUGRAPH_EXPECTS(n_iter > 0, "Invalid API parameter - n_iter must be > 0"); + "Invalid input argument - invalid damping factor value (alpha>1)"); + CUGRAPH_EXPECTS(n_iter > 0, "Invalid input argument - n_iter must be > 0"); rmm::device_vector degree(G.number_of_vertices); diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 70952974b39..83a4ec6e713 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -315,7 +315,7 @@ int jaccard_pairs(vertex_t n, template void jaccard(GraphCSRView const &graph, WT const *weights, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); rmm::device_vector weight_i(graph.number_of_edges); rmm::device_vector weight_s(graph.number_of_edges); @@ -352,9 +352,9 @@ void jaccard_list(GraphCSRView const &graph, VT const *second, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); - CUGRAPH_EXPECTS(first != nullptr, "Invalid API parameter: first is NULL"); - CUGRAPH_EXPECTS(second != nullptr, "Invalid API parameter: second in NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); + CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first is NULL"); + CUGRAPH_EXPECTS(second != nullptr, "Invalid input argument: second in NULL"); rmm::device_vector weight_i(num_pairs, WT{0.0}); rmm::device_vector weight_s(num_pairs); diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index e3f80b50d9a..83fdc799649 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -316,7 +316,7 @@ int overlap_pairs(vertex_t n, template void overlap(GraphCSRView const &graph, WT const *weights, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); rmm::device_vector weight_i(graph.number_of_edges); rmm::device_vector weight_s(graph.number_of_edges); @@ -353,9 +353,9 @@ void overlap_list(GraphCSRView const &graph, VT const *second, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); - CUGRAPH_EXPECTS(first != nullptr, "Invalid API parameter: first column is NULL"); - CUGRAPH_EXPECTS(second != nullptr, "Invalid API parameter: second column is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); + CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first column is NULL"); + CUGRAPH_EXPECTS(second != nullptr, "Invalid input argument: second column is NULL"); rmm::device_vector weight_i(num_pairs); rmm::device_vector weight_s(num_pairs); diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index 4018c9d9878..6ffbbbf462b 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -247,7 +247,7 @@ void sssp(GraphCSRView const &graph, VT *predecessors, const VT source_vertex) { - CUGRAPH_EXPECTS(distances || predecessors, "Invalid API parameter, both outputs are nullptr"); + CUGRAPH_EXPECTS(distances || predecessors, "Invalid input argument, both outputs are nullptr"); if (typeid(VT) != typeid(int)) CUGRAPH_FAIL("Unsupported vertex id data type, please use int"); if (typeid(ET) != typeid(int)) CUGRAPH_FAIL("Unsupported edge id data type, please use int"); From 502fe3e729459aef2918f16ab52d7939a3aafa79 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 11:50:26 -0500 Subject: [PATCH 087/343] issues 1117 --- cpp/src/layout/force_atlas2.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index ef00f504d86..7c4c1e26907 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -38,7 +38,7 @@ void force_atlas2(GraphCOOView &graph, bool verbose, internals::GraphBasedDimRedCallback *callback) { - CUGRAPH_EXPECTS(pos != nullptr, "Invalid API parameter: pos array should be of size 2 * V"); + CUGRAPH_EXPECTS(pos != nullptr, "Invalid input argument: pos array should be of size 2 * V"); CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty"); if (!barnes_hut_optimize) { From 9963b6d2389410b4f5e01d0f83c8841675162f2c Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Mon, 11 Jan 2021 13:33:47 -0500 Subject: [PATCH 088/343] oversubscription --- test_betweenness_oversubs.py | 578 +++++++++++++++++++++++++++++++++++ 1 file changed, 578 insertions(+) create mode 100755 test_betweenness_oversubs.py diff --git a/test_betweenness_oversubs.py b/test_betweenness_oversubs.py new file mode 100755 index 00000000000..7d7012b6876 --- /dev/null +++ b/test_betweenness_oversubs.py @@ -0,0 +1,578 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.tests import utils +import random +import numpy as np +import cudf +import cupy +import rmm + + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +#rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) +#rmm.reinitialize(managed_memory=True) +#assert(rmm.is_initialized()) + + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +# ============================================================================= +# Parameters +# ============================================================================= +DIRECTED_GRAPH_OPTIONS = [False, True] +WEIGHTED_GRAPH_OPTIONS = [False, True] +ENDPOINTS_OPTIONS = [False, True] +NORMALIZED_OPTIONS = [False, True] +DEFAULT_EPSILON = 0.0001 + +SUBSET_SIZE_OPTIONS = [4, None] +SUBSET_SEED_OPTIONS = [42] + +# NOTE: The following is not really being exploited in the tests as the +# datasets that are used are too small to compare, but it ensures that both +# path are actually sane +RESULT_DTYPE_OPTIONS = [np.float32, np.float64] + + +# ============================================================================= +# Comparison functions +# ============================================================================= +def calc_betweenness_centrality( + Fixture_params_dts_sml, + directed=True, #no need of this + k=None, + normalized=False, + weight=None, + endpoints=False, + seed=None, + result_dtype=np.float64, + use_k_full=False, + multi_gpu_batch=False, + edgevals=False, #no need of this +): + """ Generate both cugraph and networkx betweenness centrality + + Parameters + ---------- + graph_file : string + Path to COO Graph representation in .csv format + + directed : bool, optional, default=True + + k : int or None, optional, default=None + int: Number of sources to sample from + None: All sources are used to compute + + normalized : bool + True: Normalize Betweenness Centrality scores + False: Scores are left unnormalized + + weight : cudf.DataFrame: + Not supported as of 06/2020 + + endpoints : bool + True: Endpoints are included when computing scores + False: Endpoints are not considered + + seed : int or None, optional, default=None + Seed for random sampling of the starting point + + result_dtype : numpy.dtype + Expected type of the result, either np.float32 or np.float64 + + use_k_full : bool + When True, if k is None replaces k by the number of sources of the + Graph + + multi_gpu_batch : bool + When True, enable mg batch after constructing the graph + + edgevals: bool + When True, enable tests with weighted graph, should be ignored + during computation. + + Returns + ------- + + sorted_df : cudf.DataFrame + Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' + and 'ref_bc' are the two betweenness centrality scores to compare. + The dataframe is expected to be sorted based on 'vertex', so that we + can use cupy.isclose to compare the scores. + """ + G = None + Gnx = None + + G, Gnx = Fixture_params_dts_sml + + + + assert G is not None and Gnx is not None + if multi_gpu_batch: + G.enable_batch() + + calc_func = None + if k is not None and seed is not None: + calc_func = _calc_bc_subset + elif k is not None: + calc_func = _calc_bc_subset_fixed + else: # We processed to a comparison using every sources + if use_k_full: + k = Gnx.number_of_nodes() + calc_func = _calc_bc_full + sorted_df = calc_func( + G, + Gnx, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + result_dtype=result_dtype, + ) + + return sorted_df + + +def _calc_bc_subset( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + # NOTE: Networkx API does not allow passing a list of vertices + # And the sampling is operated on Gnx.nodes() directly + # We first mimic acquisition of the nodes to compare with same sources + random.seed(seed) # It will be called again in nx's call + sources = random.sample(Gnx.nodes(), k) + df = cugraph.betweenness_centrality( + G, + k=sources, + normalized=normalized, + weight=weight, + endpoints=endpoints, + result_dtype=result_dtype, + ) + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + + nx_bc = nx.betweenness_centrality( + Gnx, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + ) + + _, nx_bc = zip(*sorted(nx_bc.items())) + nx_df = cudf.DataFrame({"ref_bc": nx_bc}) + + merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) + + return merged_sorted_df + + +def _calc_bc_subset_fixed( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + assert isinstance(k, int), ( + "This test is meant for verifying coherence " + "when k is given as an int" + ) + # In the fixed set we compare cu_bc against itself as we random.seed(seed) + # on the same seed and then sample on the number of vertices themselves + if seed is None: + seed = 123 # random.seed(None) uses time, but we want same sources + random.seed(seed) # It will be called again in cugraph's call + sources = random.sample(range(G.number_of_vertices()), k) + + if G.renumbered: + sources_df = cudf.DataFrame({'src': sources}) + sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist() + + # The first call is going to proceed to the random sampling in the same + # fashion as the lines above + df = cugraph.betweenness_centrality( + G, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=seed, + result_dtype=result_dtype, + ) + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + + # The second call is going to process source that were already sampled + # We set seed to None as k : int, seed : not none should not be normal + # behavior + df2 = cugraph.betweenness_centrality( + G, + k=sources, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=None, + result_dtype=result_dtype, + ) + sorted_df2 = df2.sort_values("vertex").rename( + columns={"betweenness_centrality": "ref_bc"}, copy=False + ).reset_index(drop=True) + + merged_sorted_df = cudf.concat( + [sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False + ) + + return merged_sorted_df + + +def _calc_bc_full( + G, Gnx, normalized, weight, endpoints, k, seed, result_dtype +): + df = cugraph.betweenness_centrality( + G, + k=k, + normalized=normalized, + weight=weight, + endpoints=endpoints, + result_dtype=result_dtype, + ) + assert ( + df["betweenness_centrality"].dtype == result_dtype + ), "'betweenness_centrality' column has not the expected type" + nx_bc = nx.betweenness_centrality( + Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints + ) + + sorted_df = df.sort_values("vertex").rename( + columns={"betweenness_centrality": "cu_bc"}, copy=False + ).reset_index(drop=True) + _, nx_bc = zip(*sorted(nx_bc.items())) + nx_df = cudf.DataFrame({"ref_bc": nx_bc}) + + merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) + + return merged_sorted_df + + +# ============================================================================= +# Utils +# ============================================================================= +# NOTE: We assume that both column are ordered in such way that values +# at ith positions are expected to be compared in both columns +# i.e: sorted_df[idx][first_key] should be compared to +# sorted_df[idx][second_key] +def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): + errors = sorted_df[ + ~cupy.isclose( + sorted_df[first_key], sorted_df[second_key], rtol=epsilon + ) + ] + num_errors = len(errors) + if num_errors > 0: + print(errors) + assert ( + num_errors == 0 + ), "Mismatch were found when comparing '{}' and '{}' (rtol = {})".format( + first_key, second_key, epsilon + ) + + +def prepare_test(): + gc.collect() + + + +# ============================================================================= +# Pytest Fixtures +# ============================================================================= +DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] +DATASETS_LARGE = [pytest.param(d) for d in utils.DATASETS_LARGE] +DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] +WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] + + +fixture_params_dts_sml = utils.genFixtureParamsProduct( + (DATASETS_SMALL, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( + (DATASETS_UNRENUMBERED, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +#parameters for large graph +fixture_params_dts_lrg = utils.genFixtureParamsProduct( + (DATASETS_LARGE, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + + +@pytest.fixture(scope="module", params=fixture_params_dts_sml) +def Fixture_params_dts_sml(request): + return utils.build_cu_and_nx_graphs(*request.param) + + + + +@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) +def Fixture_params_dts_urnbrd(request): + return utils.build_cu_and_nx_graphs(*request.param) + + + + +#Fixture added for the large dataset +@pytest.fixture(scope="module", params=fixture_params_dts_lrg) +def Fixture_params_dts_lrg(request): + try: + return utils.build_cu_and_nx_graphs(*request.param) + except MemoryError: + rmm.reinitialize(managed_memory=True) + assert(rmm.is_initialized()) + return utils.build_cu_and_nx_graphs(*request.param) + + + + + + +# ============================================================================= +# Tests +# ============================================================================= +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centrality( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + normalized=normalized, + k=subset_size, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + +#Test for the large dataset. omitting all parameters except G and Gnx - +def test_betweenness_centrality_large( + Fixture_params_dts_lrg +): + prepare_test() + sorted_df = calc_betweenness_centrality( #used default paramters of this function + Fixture_params_dts_lrg + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + + + + + +@pytest.mark.parametrize("subset_size", [None]) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("use_k_full", [True]) +def test_betweenness_centrality_k_full( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, + use_k_full, +): + """Tests full betweenness centrality by using k = G.number_of_vertices() + instead of k=None, checks that k scales properly""" + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + normalized=normalized, + k=subset_size, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + use_k_full=use_k_full, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +# NOTE: This test should only be execute on unrenumbered datasets +# the function operating the comparison inside is first proceeding +# to a random sampling over the number of vertices (thus direct offsets) +# in the graph structure instead of actual vertices identifiers +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", [None]) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centrality_fixed_sample( + Fixture_params_dts_urnbrd, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Test Betweenness Centrality using a subset + + Only k sources are considered for an approximate Betweenness Centrality + """ + prepare_test() + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_urnbrd, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [[]]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +def test_betweenness_centrality_weight_except( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Calls betwenness_centrality with weight + + As of 05/28/2020, weight is not supported and should raise + a NotImplementedError + """ + prepare_test() + with pytest.raises(NotImplementedError): + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + + +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", [str]) +def test_betweenness_invalid_dtype( + Fixture_params_dts_sml, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, +): + """Test calls edge_betwenness_centrality an invalid type""" + + prepare_test() + with pytest.raises(TypeError): + sorted_df = calc_betweenness_centrality( + Fixture_params_dts_sml, + k=subset_size, + normalized=normalized, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + ) + compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +def test_betweenness_centrality_nx( + graph_file, + directed, + edgevals +): + prepare_test() + + Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) + + nx_bc = nx.betweenness_centrality(Gnx) + cu_bc = cugraph.betweenness_centrality(Gnx) + + # Calculating mismatch + networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) + cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_bc) == len(networkx_bc) + for i in range(len(cugraph_bc)): + if ( + abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 + and cugraph_bc[i][0] == networkx_bc[i][0] + ): + err = err + 1 + print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_bc)) + + + From d753e7056667cb9e0b589e54cc0886fe9e27acdd Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:09:36 -0500 Subject: [PATCH 089/343] cleaned up imports --- python/cugraph/centrality/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/centrality/__init__.py b/python/cugraph/centrality/__init__.py index da882a61850..83095d64292 100644 --- a/python/cugraph/centrality/__init__.py +++ b/python/cugraph/centrality/__init__.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.centrality.katz_centrality import katz_centrality -from cugraph.centrality.betweenness_centrality import betweenness_centrality from cugraph.centrality.betweenness_centrality import ( + betweenness_centrality, edge_betweenness_centrality, ) From d5b12546a31f11dcf21a00a30ea09dbcff7d9332 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:09:48 -0500 Subject: [PATCH 090/343] better docs --- .../centrality/betweenness_centrality.py | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 93bdce7c515..7f4d25dfddf 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -36,8 +36,10 @@ def betweenness_centrality( Betweenness centrality is a measure of the number of shortest paths that pass through a vertex. A vertex with a high betweenness centrality score has more paths passing through it and is therefore believed to be more - important. Rather than doing an all-pair shortest path, a sample of k - starting vertices can be used. + important. + + To improve performance. rather than doing an all-pair shortest path, + a sample of k starting vertices can be used. CuGraph does not currently support the 'endpoints' and 'weight' parameters as seen in the corresponding networkX call. @@ -52,19 +54,18 @@ def betweenness_centrality( k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher - values give better approximation - If k is a list, use the content of the list for estimation: the list - should contain vertices identifiers. - If k is None (the default), all the vertices are used to estimate - betweenness. - Vertices obtained through sampling or defined as a list will be used as - sources for traversals inside the algorithm. + values give better approximation. If k is a list, use the content + of the list for estimation: the list should contain vertices + identifiers. If k is None (the default), all the vertices are used + to estimate betweenness. Vertices obtained through sampling or + defined as a list will be used assources for traversals inside the + algorithm. normalized : bool, optional Default is True. If true, the betweenness values are normalized by - 2 / ((n - 1) * (n - 2)) for Graphs (undirected), and - 1 / ((n - 1) * (n - 2)) for DiGraphs (directed graphs) + __2 / ((n - 1) * (n - 2))__ for Graphs (undirected), and + __1 / ((n - 1) * (n - 2))__ for DiGraphs (directed graphs) where n is the number of nodes in G. Normalization will ensure that values are in [0, 1], this normalization scales for the highest possible value where one @@ -143,17 +144,24 @@ def betweenness_centrality( else: return df - +# NOTE: result_type=float could be an intuitive way to indicate the result type def edge_betweenness_centrality( - G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64 + G, + k=None, + normalized=True, + weight=None, + seed=None, + result_dtype=np.float64 ): """ Compute the edge betweenness centrality for all edges of the graph G. Betweenness centrality is a measure of the number of shortest paths that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be - more important. Rather than doing an all-pair shortest path, a sample - of k starting vertices can be used. + more important. + + To improve performance, rather than doing an all-pair shortest path, + a sample of k starting vertices can be used. CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. @@ -168,7 +176,7 @@ def edge_betweenness_centrality( k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher - values give better approximation + values give better approximation. If k is a list, use the content of the list for estimation: the list should contain vertices identifiers. Vertices obtained through sampling or defined as a list will be used as From d197f68ea8b774a4d784dc81b3ab8f1c7aca500c Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:13:58 -0500 Subject: [PATCH 091/343] drop duplicate edge centrality entry --- docs/source/api.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 459e5fbf4f1..b36fc6449e5 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -40,13 +40,6 @@ Betweenness Centrality :members: :undoc-members: -Edge Betweenness Centrality ---------------------------- - -.. automodule:: cugraph.centrality.edge_betweenness_centrality - :members: - :undoc-members: - Katz Centrality --------------- From 843be5262f0d504c72d474e3f43b805f7ef86ef5 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:14:09 -0500 Subject: [PATCH 092/343] added more introp doc --- docs/source/cugraph_intro.md | 66 +++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/docs/source/cugraph_intro.md b/docs/source/cugraph_intro.md index 5bf2b715462..073e13d37ca 100644 --- a/docs/source/cugraph_intro.md +++ b/docs/source/cugraph_intro.md @@ -1,8 +1,72 @@ # cuGraph Introduction +The Data Scientist has a collection of techniques within their +proverbial toolbox. Data engineering, statistical analysis, and +machine learning are among the most commonly known. However, there +are numerous cases where the focus of the analysis is on the +relationship between data elements. In those cases, the data is best +represented as a graph. Graph analysis, also called network analysis, +is a collection of algorithms for answering questions posed against +graph data. Graph analysis is not new. +The first graph problem was posed by Euler in 1736, the [Seven Bridges of +Konigsberg](https://en.wikipedia.org/wiki/Seven_Bridges_of_K%C3%B6nigsberg), +and laid the foundation for the mathematical field of graph theory. +The application of graph analysis covers a wide variety of fields, including +marketing, biology, physics, computer science, sociology, and cyber to name a few. -## Terminology +RAPIDS cuGraph is a library of graph algorithms that seamlessly integrates +into the RAPIDS data science ecosystem and allows the data scientist to easily +call graph algorithms using data stored in a GPU DataFrame, NetworkX Graphs, or even +CuPy or SciPy sparse Matrix. + + +# Vision +The vision of RAPIDS cuGraph is to ___make graph analysis ubiquitous to the +point that users just think in terms of analysis and not technologies or +frameworks___. This is a goal that many of us on the cuGraph team have been +working on for almost twenty years. Many of the early attempts focused on +solving one problem or using one technique. Those early attempts worked for +the initial goal but tended to break as the scope changed (e.g., shifting +to solving a dynamic graph problem with a static graph solution). The limiting +factors usually came down to compute power, ease-of-use, or choosing a data +structure that was not suited for all problems. NVIDIA GPUs, CUDA, and RAPIDS +have totally changed the paradigm and the goal of an accelerated unified graph +analytic library is now possible. + +The compute power of the latest NVIDIA GPUs (RAPIDS supports Pascal and later +GPU architectures) make graph analytics 20x faster on average over NetworkX. +Moreover, the internal memory speed within a GPU allows cuGraph to rapidly +switch the data structure to best suit the needs of the analytic rather than +being restricted to a single data structure. cuGraph is working with several +frameworks for both static and dynamic graph data structures so that we always +have a solution to any graph problem. Since Python has emerged as the de facto +language for data science, allowing interactivity and the ability to run graph +analytics in Python makes cuGraph familiar and approachable. RAPIDS wraps all +the graph analytic goodness mentioned above with the ability to perform +high-speed ETL, statistics, and machine learning. To make things even better, +RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support +multi-billion edge graphs. + +Every release of RAPIDS is accompanied with one or more wonderful BLOG(s) +about the features of that release (for example, see the release 0.8 blog [1] +or Life after Hadoop). This article is slightly different. While current +features will be discussed, the main focus is on presenting the vision of +cuGraph and how we envision users will interact with the libraries. The cuGraph +team has been working hard to provide a rich set of initial features. Over the +past three releases (RAPIDS release 0.6 was the first to include cuGraph), the +team has provided over a dozen algorithms. The initial goal was to simply get +graph analytics released and available with a familiar NetworkX-like API. That +is great in the short term since it allows an easy path to switch to RAPIDS. +However, cuGraph development will slowly shift towards improving ease-of-use, +interoperability, and integration with the rest of RAPIDS. That is not an easy +task since there is still so much to be added to the cuGraph ecosystem. Don’t +worry, the goal of getting new analytics out will continue since there is a very +long list of algorithms to release. + + + +# Terminology cuGraph is a collection of GPU accelerated graph algorithms and graph utility functions. The application of graph analysis covers a lot of areas. From d1f9d1633eb1bdd68b595ebab617aa74a0b1a3ed Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:20:11 -0500 Subject: [PATCH 093/343] style fix --- .../cugraph/centrality/betweenness_centrality.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 7f4d25dfddf..afccd191ddf 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -36,9 +36,9 @@ def betweenness_centrality( Betweenness centrality is a measure of the number of shortest paths that pass through a vertex. A vertex with a high betweenness centrality score has more paths passing through it and is therefore believed to be more - important. - - To improve performance. rather than doing an all-pair shortest path, + important. + + To improve performance. rather than doing an all-pair shortest path, a sample of k starting vertices can be used. CuGraph does not currently support the 'endpoints' and 'weight' parameters @@ -144,7 +144,7 @@ def betweenness_centrality( else: return df -# NOTE: result_type=float could be an intuitive way to indicate the result type + def edge_betweenness_centrality( G, k=None, @@ -158,9 +158,9 @@ def edge_betweenness_centrality( Betweenness centrality is a measure of the number of shortest paths that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be - more important. - - To improve performance, rather than doing an all-pair shortest path, + more important. + + To improve performance, rather than doing an all-pair shortest path, a sample of k starting vertices can be used. CuGraph does not currently support the 'weight' parameter From 85f0a3a919c92a5983146b0ca620951de8a0d199 Mon Sep 17 00:00:00 2001 From: Joseph <68436579+jolorunyomi@users.noreply.github.com> Date: Mon, 11 Jan 2021 13:22:29 -0600 Subject: [PATCH 094/343] Create labeler.yml(#1318) # Summary This PR adds a GitHub action (PR Labeler) to auto label PRs based on a labeling rule and changes in PR. Authors: - Joseph <68436579+jolorunyomi@users.noreply.github.com> Approvers: - AJ Schmidt (@ajschmidt8) - Rick Ratzel (@rlratzel) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1318 --- .github/labeler.yml | 30 ++++++++++++++++++++++++++++++ github/workflows/labeler.yml | 11 +++++++++++ 2 files changed, 41 insertions(+) create mode 100644 .github/labeler.yml create mode 100644 github/workflows/labeler.yml diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 00000000000..621d0fde833 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,30 @@ +# https://github.com/actions/labeler#common-examples +# Adapted from https://github.com/rapidsai/cugraph/blob/main/.github/CODEOWNERS +# Labels culled from https://github.com/rapidsai/cugraph/labels + +python: + - 'python/**' + - 'notebooks/**' + - 'benchmarks/**' + +doc: + - 'docs/**' + - '**/*.md' + +datasets: + - 'datasets/**' + +cuGraph: + - 'cpp/**' + +CMake: + - '**/CMakeLists.txt' + - '**/cmake/**' + +Ops: + - '.github/**' + - 'ci/**' + - 'conda/**' + - '**/Dockerfile' + - '**/.dockerignore' + - 'docker/**' diff --git a/github/workflows/labeler.yml b/github/workflows/labeler.yml new file mode 100644 index 00000000000..23956a02fbd --- /dev/null +++ b/github/workflows/labeler.yml @@ -0,0 +1,11 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + triage: + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@main + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" From 61d9ec3229da94058d6a6260fceb992fd668452f Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 11 Jan 2021 14:33:42 -0500 Subject: [PATCH 095/343] clang fixes --- cpp/src/community/ECG.cu | 3 ++- cpp/src/experimental/graph.cu | 13 ++++++----- cpp/src/experimental/graph_view.cu | 35 ++++++++++++++++-------------- cpp/src/link_analysis/pagerank.cu | 3 ++- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index 65f1724f6e0..ea21f87ff7e 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -114,7 +114,8 @@ void ecg(raft::handle_t const &handle, vertex_t ensemble_size, vertex_t *clustering) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, + "Invalid input argument: louvain expects a weighted graph"); CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); cudaStream_t stream{0}; diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 0bea8a5c98b..dc70909900f 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -472,12 +472,13 @@ graph_ton(default_stream), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Invalid input argument: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degree_first, + degree_first + this->get_number_of_vertices(), + thrust::greater{}), + "Invalid input argument: sorted_by_degree is set to true, but degrees are not " + "non-ascending."); } static_assert(detail::num_segments_per_vertex_partition == 3); diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 6abfecb74f2..e791a88d2d4 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -89,10 +89,11 @@ graph_view_ton(default_stream), - degrees.begin(), - degrees.end(), - thrust::greater{}), - "Invalid Invalid input argument: sorted_by_global_degree_within_vertex_partition is " - "set to true, but degrees are not non-ascending."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degrees.begin(), + degrees.end(), + thrust::greater{}), + "Invalid Invalid input argument: sorted_by_global_degree_within_vertex_partition is " + "set to true, but degrees are not non-ascending."); for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); ++i) { @@ -257,12 +259,13 @@ graph_view_t{offsets}); - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Invalid input argument: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degree_first, + degree_first + this->get_number_of_vertices(), + thrust::greater{}), + "Invalid input argument: sorted_by_degree is set to true, but degrees are not " + "non-ascending."); CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), "Invalid input argument: erroneous segment_offsets."); diff --git a/cpp/src/link_analysis/pagerank.cu b/cpp/src/link_analysis/pagerank.cu index 7d0122c506e..2dcd3d73f61 100644 --- a/cpp/src/link_analysis/pagerank.cu +++ b/cpp/src/link_analysis/pagerank.cu @@ -378,7 +378,8 @@ void pagerank(raft::handle_t const &handle, int64_t max_iter, bool has_guess) { - CUGRAPH_EXPECTS(pagerank != nullptr, "Invalid input argument: Pagerank array should be of size V"); + CUGRAPH_EXPECTS(pagerank != nullptr, + "Invalid input argument: Pagerank array should be of size V"); // Multi-GPU if (handle.comms_initialized()) { CUGRAPH_EXPECTS(has_guess == false, From 7971a367aa730a7b65156c8ecfa54b73f676f086 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 11 Jan 2021 14:22:25 -0600 Subject: [PATCH 096/343] Updated git utils used by copyright.py for compatibility with current CI env(#1325) Updated git utils used by copyright.py for compatibility with current CI env, added debug prints. NOTE: I'm intentionally not updating the copyright date on the changed file to test that the check is working in the actual CI environment here. Once verified, I'll change it from a draft PR and mark it ready for review. _verified_ Authors: - Rick Ratzel Approvers: - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1325 --- ci/checks/style.sh | 7 +++--- ci/utils/git_helpers.py | 51 +++++++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 978ac03d85b..e590e4aafa7 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ######################## # cuGraph Style Tester # ######################## @@ -52,13 +52,14 @@ COPYRIGHT=`env PYTHONPATH=ci/utils python ci/checks/copyright.py --git-modified- CR_RETVAL=$? ERRORCODE=$((ERRORCODE | ${CR_RETVAL})) -# Output results if failure otherwise show pass if [ "$CR_RETVAL" != "0" ]; then echo -e "\n\n>>>> FAILED: copyright check; begin output\n\n" echo -e "$COPYRIGHT" echo -e "\n\n>>>> FAILED: copyright check; end output\n\n" else - echo -e "\n\n>>>> PASSED: copyright check\n\n" + echo -e "\n\n>>>> PASSED: copyright check; begin debug output\n\n" + echo -e "$COPYRIGHT" + echo -e "\n\n>>>> PASSED: copyright check; end debug output\n\n" fi exit ${ERRORCODE} diff --git a/ci/utils/git_helpers.py b/ci/utils/git_helpers.py index 83ad73fe283..a0c413b75f4 100644 --- a/ci/utils/git_helpers.py +++ b/ci/utils/git_helpers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -59,14 +59,24 @@ def uncommittedFiles(): return ret -def changedFilesBetween(b1, b2): - """Returns a list of files changed between branches b1 and b2""" +def changedFilesBetween(baseName, branchName, commitHash): + """ + Returns a list of files changed between branches baseName and latest commit + of branchName. + """ current = branch() - __git("checkout", "--quiet", b1) - __git("checkout", "--quiet", b2) - files = __gitdiff("--name-only", "--ignore-submodules", "%s...%s" % - (b1, b2)) - __git("checkout", "--quiet", current) + # checkout "base" branch + __git("checkout", "--force", baseName) + # checkout branch for comparing + __git("checkout", "--force", branchName) + # checkout latest commit from branch + __git("checkout", "-fq", commitHash) + + files = __gitdiff("--name-only", "--ignore-submodules", + f"{baseName}..{branchName}") + + # restore the original branch + __git("checkout", "--force", current) return files.splitlines() @@ -87,10 +97,10 @@ def changesInFileBetween(file, b1, b2, pathFilter=None): def modifiedFiles(pathFilter=None): """ - If inside a CI-env (ie. currentBranch=current-pr-branch and the env-var - PR_TARGET_BRANCH is defined), then lists out all files modified between - these 2 branches. Else, lists out all the uncommitted files in the current - branch. + If inside a CI-env (ie. TARGET_BRANCH and COMMIT_HASH are defined, and + current branch is "current-pr-branch"), then lists out all files modified + between these 2 branches. Else, lists out all the uncommitted files in the + current branch. Such utility function is helpful while putting checker scripts as part of cmake, as well as CI process. This way, during development, only the files @@ -98,15 +108,26 @@ def modifiedFiles(pathFilter=None): process ALL files modified by the dev, as submiited in the PR, will be checked. This happens, all the while using the same script. """ - if "PR_TARGET_BRANCH" in os.environ and branch() == "current-pr-branch": - allFiles = changedFilesBetween(os.environ["PR_TARGET_BRANCH"], - branch()) + targetBranch = os.environ.get("TARGET_BRANCH") + commitHash = os.environ.get("COMMIT_HASH") + currentBranch = branch() + print(f" [DEBUG] TARGET_BRANCH={targetBranch}, COMMIT_HASH={commitHash}, " + f"currentBranch={currentBranch}") + + if targetBranch and commitHash and (currentBranch == "current-pr-branch"): + print(" [DEBUG] Assuming a CI environment.") + allFiles = changedFilesBetween(targetBranch, currentBranch, commitHash) else: + print(" [DEBUG] Did not detect CI environment.") allFiles = uncommittedFiles() + files = [] for f in allFiles: if pathFilter is None or pathFilter(f): files.append(f) + + filesToCheckString = "\n\t".join(files) if files else "" + print(f" [DEBUG] Found files to check:\n\t{filesToCheckString}\n") return files From 4b1c86d347f7b2a70ba123132e65d07361521743 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 11 Jan 2021 22:50:15 -0500 Subject: [PATCH 097/343] add induced_subgraph.cu to CMakeLists.txt --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bd122fc1fb2..4d5dd8ffb15 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -367,6 +367,7 @@ add_library(cugraph SHARED src/centrality/betweenness_centrality.cu src/experimental/graph.cu src/experimental/graph_view.cu + src/experimental/induced_subgraph.cu src/experimental/bfs.cu src/experimental/sssp.cu src/experimental/pagerank.cu From f7a9d3d8700360383a298095ebbe2cb9544176ad Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 11 Jan 2021 22:53:26 -0500 Subject: [PATCH 098/343] induced subgraph error check --- cpp/src/experimental/induced_subgraph.cu | 96 +++++++++++++++++++----- 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index e8ffdd9c4f5..7588bc80778 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -35,15 +35,15 @@ namespace cugraph { namespace experimental { namespace detail { -} // detail +} // namespace detail /** - * @brief extract induced subgraph(s). - * + * @brief extract induced subgraph(s). + * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. - * @tparam store_transposed + * @tparam store_transposed * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as * transposed. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) @@ -52,37 +52,97 @@ namespace detail { * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object of, we extract induced subgraphs from @p graph_view. * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). - * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p num_subgraphs]). + * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p + * num_subgraphs]). * @param num_subgraphs Number of induced subgraphs to extract. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, edge destination vertices, edge weights, and edge offsets for each induced subgraph. + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, + * edge destination vertices, edge weights, and edge offsets for each induced subgraph. */ -template -std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> extract_induced_subgraph( raft::handle_t const &handle, graph_view_t const &graph_view, - size_t const * subgraph_offsets/* size == num_subgraphs + 1 */, - vertex_t const * subgraph_vertices/* size == subgraph_offsets[num_subgraphs] */, + size_t const *subgraph_offsets /* size == num_subgraphs + 1 */, + vertex_t const *subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, size_t num_subgraphs, - bool do_expensive_check = false) { - // FIXME: this code is inefficient for the vertices with their local degrees much larger than the number of vertices in the subgraphs. We may later add additional code to handle such cases. - // FIXME: we may consider the performance (speed & memory footprint, hash based approach uses extra-memory) of hash table based and binary search based approaches + bool do_expensive_check = false) +{ + // FIXME: this code is inefficient for the vertices with their local degrees much larger than the + // number of vertices in the subgraphs. We may later add additional code to handle such cases. + // FIXME: we may consider the performance (speed & memory footprint, hash based approach uses + // extra-memory) of hash table based and binary search based approaches + + size_t num_aggregate_subgraph_vertices{}; + raft::update_host( + &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + if (do_expensive_check) { + size_t should_be_zero{std::numeric_limits::max()}; + raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + CUGRAPH_EXPECTS(should_be_zero == 0, + "Invalid input argument: subgraph_offsets[0] should be 0."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_offsets, + subgraph_offsets + (num_subgraphs + 1)), + "Invalid input argument: subgraph_offsets is not sorted."); + vertex_partition_device_t> + vertex_partition(graph_view); + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_vertices, + subgraph_vertices + num_aggregate_subgraph_vertices, + [vertex_partition] __device__(auto v) { + return !vertex_partition.is_valid_vertex(v) || + !vertex_partition.is_local_vertex_nocheck(v); + }), + "Invalid input argument: subgraph_vertices has invalid vertex IDs."); + } rmm::device_uvector subgraph_major_offsets(0, handle.get_stream()); rmm::device_uvector subgraph_majors(0, handle.get_stream()); - rmm::device_uvector subgraph_minor_offsets(0, handle.get_stream()); // relevant only if multi_gpu - rmm::device_uvector subgraph_minors(0, handle.get_stream()); // relevant only if multi_gpu + rmm::device_uvector subgraph_minor_offsets( + 0, handle.get_stream()); // relevant only if multi_gpu + rmm::device_uvector subgraph_minors(0, + handle.get_stream()); // relevant only if multi_gpu + + copy_to_adj_matrix_row(handle, + graph_view, + subgraph_vertices + subgraph_major_offsets[i], + subgraph_vertices + subgraph_) - // 1. construct (subgraph_idx, vertex, local_degree) triplets + // 1. construct (subgraph_idx, vertex, local_degree) triplets - std::vector h_subgraph_offsets(num_subgraphs + 1); - raft::update_host(h_subgraph_offsets.data(), subgraph_offsets, num_subgraphs + 1, handle.get_stream()); + size_t num_subgraph_vertices{}; + raft::update_host( + &num_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + + rmm::device_uvector subgraph_indices(num_subgraph_vertices, handle.get_stream()); + repeat( + subgraph_offsets, subgraph_offsets + num_subgraphs, subgraph_vertices, subgraph_indices.data()); + + rmm::device_uvector subgraph_vertices(subgraph_indices.size(), handle.get_stream()); + thrust::copy(); + auto local_degrees = graph_view.get_local_degrees(subgraph_vertices, num_subgraph_vertices); // construct (subgraph_idx, v, local_degree) // sort triplets by local_degree (non-ascending) + auto = thrust::make_zip_iterator(thrust::make_tuple()); + thrust::sort(); + // find number of edges for each subgraph // allocate memory From 498cea5405ae40c839ce2b1f6f09312427c31b48 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 11 Jan 2021 23:03:51 -0500 Subject: [PATCH 099/343] clang-format --- cpp/include/experimental/graph_view.hpp | 6 ++++-- cpp/include/utilities/dataframe_buffer.cuh | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 3312cb266fc..75950b21e2b 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -400,7 +400,8 @@ class graph_view_tget_number_of_vertices(); } - edge_t get_number_of_local_adj_matrix_partition_edges(size_t adj_matrix_partition_idx) const { + edge_t get_number_of_local_adj_matrix_partition_edges(size_t adj_matrix_partition_idx) const + { assert(adj_matrix_partition_idx == 0); return this->get_number_of_edges(); } diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index beec7fc3c8b..76d2af22a61 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -20,8 +20,8 @@ #include #include -#include #include +#include #include From 784d9c42cee011b254d0aef5b45398d9dbc1392f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 11 Jan 2021 23:17:07 -0500 Subject: [PATCH 100/343] update copyright year --- cpp/CMakeLists.txt | 2 +- cpp/include/experimental/detail/graph_utils.cuh | 2 +- cpp/include/experimental/graph.hpp | 2 +- cpp/include/experimental/graph_functions.hpp | 2 +- cpp/include/experimental/graph_view.hpp | 2 +- cpp/include/matrix_partition_device.cuh | 2 +- cpp/include/patterns/any_of_adj_matrix_row.cuh | 2 +- cpp/include/patterns/copy_to_adj_matrix_row_col.cuh | 2 +- cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh | 2 +- .../patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh | 2 +- cpp/include/patterns/count_if_e.cuh | 2 +- cpp/include/patterns/count_if_v.cuh | 2 +- cpp/include/patterns/reduce_op.cuh | 2 +- cpp/include/patterns/reduce_v.cuh | 2 +- .../patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- cpp/include/patterns/transform_reduce_e.cuh | 2 +- cpp/include/patterns/transform_reduce_v.cuh | 2 +- cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh | 2 +- cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh | 2 +- cpp/include/patterns/vertex_frontier.cuh | 2 +- cpp/include/utilities/dataframe_buffer.cuh | 2 +- cpp/include/utilities/device_comm.cuh | 2 +- cpp/include/utilities/host_scalar_comm.cuh | 2 +- cpp/include/utilities/shuffle_comm.cuh | 2 +- cpp/src/experimental/bfs.cu | 2 +- cpp/src/experimental/coarsen_graph.cu | 2 +- cpp/src/experimental/graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 2 +- cpp/src/experimental/katz_centrality.cu | 2 +- cpp/src/experimental/louvain.cuh | 2 +- cpp/src/experimental/pagerank.cu | 2 +- cpp/src/experimental/relabel.cu | 2 +- cpp/src/experimental/renumber_edgelist.cu | 2 +- cpp/src/experimental/sssp.cu | 2 +- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/experimental/coarsen_graph_test.cpp | 2 +- cpp/tests/experimental/katz_centrality_test.cpp | 2 +- cpp/tests/experimental/pagerank_test.cpp | 2 +- 38 files changed, 38 insertions(+), 38 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index df2341b54fb..cef05e5de19 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index cad6eb12de5..3ac2e2163c6 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index b3e57cc3adb..5e7e48016c7 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 564a4177578..70db662f903 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 75950b21e2b..d2ae1150970 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index b5564a47f30..b41119e7be6 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh index 81fd1956886..a367ec2a50c 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index 874c7cc6eb5..83347fc726d 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index bca4abf5424..21978d985e2 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index b30c3ca19ac..018a665d1ee 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 7ef5356f4f7..63b31f9c44e 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh index b45f8cd0705..6b28cd7ae12 100644 --- a/cpp/include/patterns/count_if_v.cuh +++ b/cpp/include/patterns/count_if_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/patterns/reduce_op.cuh index f52fa86555a..d92d3352d08 100644 --- a/cpp/include/patterns/reduce_op.cuh +++ b/cpp/include/patterns/reduce_op.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh index 0e0b7ff2ea4..b232d37b78d 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/patterns/reduce_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 55a9ad7f323..715152bc99f 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 342add517bd..946c15a16a0 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/patterns/transform_reduce_v.cuh index 2eb3f03251c..17ffb89206a 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/patterns/transform_reduce_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh index 703da8a318b..39aca7cacae 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 5150af1f57c..e20a2b4238b 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index dc3a5893ef3..2126a27ee5a 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index 76d2af22a61..06352b8e217 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 5eb6184aa29..6fec0c8717f 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh index b0bd644a6ce..2ecfd913813 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index e55fc29d229..6e84668bb8d 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index e99dbcaebc6..7adfbd7fbd7 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 56b8a4f2989..4b773f7152f 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 38157c83f1c..3360f86a5cc 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 8289a231e31..41d14b218f3 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index e93b39efbe4..1ab824f1c91 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 5bea9b79e18..55a182ecec5 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 35533041b67..058cbfe5966 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index fec59592a72..793b40ee4f1 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 41d1680a5bb..e8408f2ecd6 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 7561f569227..4996b3734cb 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5e09040fe52..941ef4774f6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -1,6 +1,6 @@ #============================================================================= # -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index b3b730b776e..8395fdfa93b 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 52394763024..3e9f0b478a0 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index d2d0685e12c..53143bf0bf3 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 1f2345de3e12b36b9deab86af3e2efc6bb95e254 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 12 Jan 2021 08:22:26 -0600 Subject: [PATCH 101/343] Fix mnmg cleanup exceptions(#1326) Fixes #1328 Authors: - Ishika Roy Approvers: - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1326 --- python/cugraph/structure/number_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index f47a8bbb53e..f1b8949eb5d 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -340,7 +340,7 @@ def compute(self): numbering_map, cudf.Series(base_addresses), val_types ) - self.ddf = numbering_map.persist() + self.ddf = numbering_map self.numbered = True def to_internal_vertex_id(self, ddf, col_names): From c1c30b4a1ab660e75ddc82c5fd8550752951be88 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 12 Jan 2021 10:44:42 -0500 Subject: [PATCH 102/343] tmp commit-induced subgraph implementation --- cpp/src/experimental/induced_subgraph.cu | 146 ++++++++++++++++------- 1 file changed, 106 insertions(+), 40 deletions(-) diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 7588bc80778..2e89c1830a6 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -53,7 +53,7 @@ namespace detail { * @param graph_view Graph view object of, we extract induced subgraphs from @p graph_view. * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p - * num_subgraphs]). + * num_subgraphs]). @p subgraph_vertices for each subgraph should be sorted in ascending order. * @param num_subgraphs Number of induced subgraphs to extract. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, rmm::device_uvector, @@ -78,16 +78,20 @@ extract_induced_subgraph( bool do_expensive_check = false) { // FIXME: this code is inefficient for the vertices with their local degrees much larger than the - // number of vertices in the subgraphs. We may later add additional code to handle such cases. + // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are + // included in the local neighbors is more efficient than searching the local neighbors are + // included in the subgraph vertices). We may later add additional code to handle such cases. // FIXME: we may consider the performance (speed & memory footprint, hash based approach uses // extra-memory) of hash table based and binary search based approaches - size_t num_aggregate_subgraph_vertices{}; - raft::update_host( - &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + // 1. check input arguments if (do_expensive_check) { + size_t num_aggregate_subgraph_vertices{}; + raft::update_host( + &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + size_t should_be_zero{std::numeric_limits::max()}; raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); @@ -108,46 +112,108 @@ extract_induced_subgraph( !vertex_partition.is_local_vertex_nocheck(v); }), "Invalid input argument: subgraph_vertices has invalid vertex IDs."); - } - - rmm::device_uvector subgraph_major_offsets(0, handle.get_stream()); - rmm::device_uvector subgraph_majors(0, handle.get_stream()); - rmm::device_uvector subgraph_minor_offsets( - 0, handle.get_stream()); // relevant only if multi_gpu - rmm::device_uvector subgraph_minors(0, - handle.get_stream()); // relevant only if multi_gpu - - copy_to_adj_matrix_row(handle, - graph_view, - subgraph_vertices + subgraph_major_offsets[i], - subgraph_vertices + subgraph_) - - // 1. construct (subgraph_idx, vertex, local_degree) triplets - - size_t num_subgraph_vertices{}; - raft::update_host( - &num_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); - rmm::device_uvector subgraph_indices(num_subgraph_vertices, handle.get_stream()); - repeat( - subgraph_offsets, subgraph_offsets + num_subgraphs, subgraph_vertices, subgraph_indices.data()); - - rmm::device_uvector subgraph_vertices(subgraph_indices.size(), handle.get_stream()); - thrust::copy(); - auto local_degrees = graph_view.get_local_degrees(subgraph_vertices, num_subgraph_vertices); - - // construct (subgraph_idx, v, local_degree) + CUGRAPH_EXPECTS( + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_subgraphs), + [subgraph_offsets, subgraph_vertices] __device__(auto i) { + return !thrust::is_sorted(thrust::seq, + subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1]) || + (thrust::unique(thrust::seq, + subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1]) != + subgraph_vertices + subgraph_offsets[i + 1]); + }) == 0, + "Invalid input argument: subgraph_vertices for each subgraph idx should be sorted in " + "ascending order and unique."); + } - // sort triplets by local_degree (non-ascending) + // 2. extract induced subgraphs - auto = thrust::make_zip_iterator(thrust::make_tuple()); - thrust::sort(); + if (multi_gpu) { + CUGRAPH_FAIL("Unimplemented."); + } else { + size_t num_aggregate_subgraph_vertices{}; + raft::update_host( + &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - // find number of edges for each subgraph + rmm::device_uvector subgraph_edge_offsets(num_aggregate_subgraph_vertices + 1, + handle.get_stream()); + + matrix_partition matrix_partition(graph_view, 0); + thrust::tabulate( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_edge_offsets.begin(), + subgraph_edge_offsets.end() - 1, + [subgraph_offsets, subgraph_vertices, num_subgraphs, matrix_partition] __device__(auto i) { + auto subgraph_idx = thrust::distance( + subgraph_offsets + 1, + thrust::lower_bound( + thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs + 1, size_t{i})); + vertex_t const *indices{nullptr}; + weight_t cosnt *weights{nullptr}; + edge_t local_degree{}; + auto major_offset = + matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(major_offset); + return thrust::count_if( + thrust::seq, + indices, + indices + local_degree, + [vertex_first = subgraph_offsets + subgraph_idx, + vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto nbr) { + return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); + }); + }); + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_edge_offsets, + subgraph_edge_offsets + num_aggregate_subgraph_vertices + 1, + subgraph_edge_offsets); + + size_t num_aggregate_edges{}; + raft::update_host(&num_aggregate_edges, + subgraph_edge_offsets + num_aggregate_subgraph_vertices, + 1, + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - // allocate memory + rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream()); + rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream()); + rmm::device_uvector edge_weights(graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, + handle.get_stream()); + + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_subgraphs), + [subgraph_offsets, subgraph_vertices, num_subgraphs, matrix_partition, subgraph_edge_offsets = subgraph_edge_offsets.data()] __device__(auto i) { + auto subgraph_idx = thrust::distance( + subgraph_offsets + 1, + thrust::lower_bound( + thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs + 1, size_t{i})); + vertex_t const *indices{nullptr}; + weight_t cosnt *weights{nullptr}; + edge_t local_degree{}; + auto major_offset = + matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(major_offset); + thrust::copy_if( + thrust::seq, + thrust::make_zip_iterator(thrust::make_constant_iterator(subgraph_vertices[i]), indices, weights, , + indices + local_degree, + [vertex_first = subgraph_offsets + subgraph_idx, + vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto nbr) { + return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); + }); + }); + } - // enumerate edges for each subgraph + return std::make_tuple(std::move(), std::move(), std::move(), std::move()); } } // namespace experimental From fc1b1d262ea8bb6038ef7bdd4b46b85325002b42 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 12 Jan 2021 11:24:14 -0500 Subject: [PATCH 103/343] temporarily disable raft comms gather & gatherv calls till the RAFT gather & gatherv PR is merged --- cpp/include/utilities/device_comm.cuh | 5 +++++ cpp/include/utilities/host_scalar_comm.cuh | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 6fec0c8717f..8c3b0f86a47 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -589,6 +589,10 @@ device_gatherv_impl(raft::comms::comms_t const& comm, { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else comm.gatherv(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), sendcount, @@ -596,6 +600,7 @@ device_gatherv_impl(raft::comms::comms_t const& comm, displacements.data(), root, stream); +#endif } template diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh index 2ecfd913813..dda0ce1f091 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -321,11 +321,16 @@ std::enable_if_t::value, std::vector> host_scalar_gathe &input, 1, stream); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), d_outputs.data(), size_t{1}, root, stream); +#endif std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); if (comm.get_rank() == root) { raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); @@ -353,6 +358,10 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre h_tuple_scalar_elements.data(), tuple_size, stream); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else comm.gather(comm.get_rank() == root ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size : d_gathered_tuple_scalar_elements.data(), @@ -360,6 +369,7 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre tuple_size, root, stream); +#endif std::vector h_gathered_tuple_scalar_elements( comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); if (comm.get_rank() == root) { From 9cc117269e7074409c29a408e21cd898fab70b3d Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Tue, 12 Jan 2021 12:03:58 -0500 Subject: [PATCH 104/343] refac --- .../test_betweenness_centrality_refac.py | 14 - utils.py => python/cugraph/tests/utils_env.py | 0 test_betweenness_oversubs.py | 578 ------------------ 3 files changed, 592 deletions(-) rename test_betweenness_centrality_refac.py => python/cugraph/tests/test_betweenness_centrality_refac.py (98%) rename utils.py => python/cugraph/tests/utils_env.py (100%) delete mode 100755 test_betweenness_oversubs.py diff --git a/test_betweenness_centrality_refac.py b/python/cugraph/tests/test_betweenness_centrality_refac.py similarity index 98% rename from test_betweenness_centrality_refac.py rename to python/cugraph/tests/test_betweenness_centrality_refac.py index 5d5aae36e86..e6194e52a7e 100755 --- a/test_betweenness_centrality_refac.py +++ b/python/cugraph/tests/test_betweenness_centrality_refac.py @@ -332,20 +332,6 @@ def Fixture_params_dts_urnbrd(request): return utils.build_cu_and_nx_graphs(*request.param) -def betweenness_centrality_dts_sml(Fixture_params_dts_sml): - - _,Gnx=Fixture_params_dts_sml - -""" -nx.betweenness_centrality( - Gnx, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - ) -""" # ============================================================================= diff --git a/utils.py b/python/cugraph/tests/utils_env.py similarity index 100% rename from utils.py rename to python/cugraph/tests/utils_env.py diff --git a/test_betweenness_oversubs.py b/test_betweenness_oversubs.py deleted file mode 100755 index 7d7012b6876..00000000000 --- a/test_betweenness_oversubs.py +++ /dev/null @@ -1,578 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import gc - -import pytest - -import cugraph -from cugraph.tests import utils -import random -import numpy as np -import cudf -import cupy -import rmm - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -#rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) -#rmm.reinitialize(managed_memory=True) -#assert(rmm.is_initialized()) - - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - -# ============================================================================= -# Parameters -# ============================================================================= -DIRECTED_GRAPH_OPTIONS = [False, True] -WEIGHTED_GRAPH_OPTIONS = [False, True] -ENDPOINTS_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] -DEFAULT_EPSILON = 0.0001 - -SUBSET_SIZE_OPTIONS = [4, None] -SUBSET_SEED_OPTIONS = [42] - -# NOTE: The following is not really being exploited in the tests as the -# datasets that are used are too small to compare, but it ensures that both -# path are actually sane -RESULT_DTYPE_OPTIONS = [np.float32, np.float64] - - -# ============================================================================= -# Comparison functions -# ============================================================================= -def calc_betweenness_centrality( - Fixture_params_dts_sml, - directed=True, #no need of this - k=None, - normalized=False, - weight=None, - endpoints=False, - seed=None, - result_dtype=np.float64, - use_k_full=False, - multi_gpu_batch=False, - edgevals=False, #no need of this -): - """ Generate both cugraph and networkx betweenness centrality - - Parameters - ---------- - graph_file : string - Path to COO Graph representation in .csv format - - directed : bool, optional, default=True - - k : int or None, optional, default=None - int: Number of sources to sample from - None: All sources are used to compute - - normalized : bool - True: Normalize Betweenness Centrality scores - False: Scores are left unnormalized - - weight : cudf.DataFrame: - Not supported as of 06/2020 - - endpoints : bool - True: Endpoints are included when computing scores - False: Endpoints are not considered - - seed : int or None, optional, default=None - Seed for random sampling of the starting point - - result_dtype : numpy.dtype - Expected type of the result, either np.float32 or np.float64 - - use_k_full : bool - When True, if k is None replaces k by the number of sources of the - Graph - - multi_gpu_batch : bool - When True, enable mg batch after constructing the graph - - edgevals: bool - When True, enable tests with weighted graph, should be ignored - during computation. - - Returns - ------- - - sorted_df : cudf.DataFrame - Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' - and 'ref_bc' are the two betweenness centrality scores to compare. - The dataframe is expected to be sorted based on 'vertex', so that we - can use cupy.isclose to compare the scores. - """ - G = None - Gnx = None - - G, Gnx = Fixture_params_dts_sml - - - - assert G is not None and Gnx is not None - if multi_gpu_batch: - G.enable_batch() - - calc_func = None - if k is not None and seed is not None: - calc_func = _calc_bc_subset - elif k is not None: - calc_func = _calc_bc_subset_fixed - else: # We processed to a comparison using every sources - if use_k_full: - k = Gnx.number_of_nodes() - calc_func = _calc_bc_full - sorted_df = calc_func( - G, - Gnx, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - result_dtype=result_dtype, - ) - - return sorted_df - - -def _calc_bc_subset( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - # NOTE: Networkx API does not allow passing a list of vertices - # And the sampling is operated on Gnx.nodes() directly - # We first mimic acquisition of the nodes to compare with same sources - random.seed(seed) # It will be called again in nx's call - sources = random.sample(Gnx.nodes(), k) - df = cugraph.betweenness_centrality( - G, - k=sources, - normalized=normalized, - weight=weight, - endpoints=endpoints, - result_dtype=result_dtype, - ) - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - - nx_bc = nx.betweenness_centrality( - Gnx, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - ) - - _, nx_bc = zip(*sorted(nx_bc.items())) - nx_df = cudf.DataFrame({"ref_bc": nx_bc}) - - merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) - - return merged_sorted_df - - -def _calc_bc_subset_fixed( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - assert isinstance(k, int), ( - "This test is meant for verifying coherence " - "when k is given as an int" - ) - # In the fixed set we compare cu_bc against itself as we random.seed(seed) - # on the same seed and then sample on the number of vertices themselves - if seed is None: - seed = 123 # random.seed(None) uses time, but we want same sources - random.seed(seed) # It will be called again in cugraph's call - sources = random.sample(range(G.number_of_vertices()), k) - - if G.renumbered: - sources_df = cudf.DataFrame({'src': sources}) - sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist() - - # The first call is going to proceed to the random sampling in the same - # fashion as the lines above - df = cugraph.betweenness_centrality( - G, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - result_dtype=result_dtype, - ) - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - - # The second call is going to process source that were already sampled - # We set seed to None as k : int, seed : not none should not be normal - # behavior - df2 = cugraph.betweenness_centrality( - G, - k=sources, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=None, - result_dtype=result_dtype, - ) - sorted_df2 = df2.sort_values("vertex").rename( - columns={"betweenness_centrality": "ref_bc"}, copy=False - ).reset_index(drop=True) - - merged_sorted_df = cudf.concat( - [sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False - ) - - return merged_sorted_df - - -def _calc_bc_full( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - df = cugraph.betweenness_centrality( - G, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - result_dtype=result_dtype, - ) - assert ( - df["betweenness_centrality"].dtype == result_dtype - ), "'betweenness_centrality' column has not the expected type" - nx_bc = nx.betweenness_centrality( - Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints - ) - - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - _, nx_bc = zip(*sorted(nx_bc.items())) - nx_df = cudf.DataFrame({"ref_bc": nx_bc}) - - merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) - - return merged_sorted_df - - -# ============================================================================= -# Utils -# ============================================================================= -# NOTE: We assume that both column are ordered in such way that values -# at ith positions are expected to be compared in both columns -# i.e: sorted_df[idx][first_key] should be compared to -# sorted_df[idx][second_key] -def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): - errors = sorted_df[ - ~cupy.isclose( - sorted_df[first_key], sorted_df[second_key], rtol=epsilon - ) - ] - num_errors = len(errors) - if num_errors > 0: - print(errors) - assert ( - num_errors == 0 - ), "Mismatch were found when comparing '{}' and '{}' (rtol = {})".format( - first_key, second_key, epsilon - ) - - -def prepare_test(): - gc.collect() - - - -# ============================================================================= -# Pytest Fixtures -# ============================================================================= -DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] -DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -DATASETS_LARGE = [pytest.param(d) for d in utils.DATASETS_LARGE] -DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] -WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] - - -fixture_params_dts_sml = utils.genFixtureParamsProduct( - (DATASETS_SMALL, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( - (DATASETS_UNRENUMBERED, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -#parameters for large graph -fixture_params_dts_lrg = utils.genFixtureParamsProduct( - (DATASETS_LARGE, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - - -@pytest.fixture(scope="module", params=fixture_params_dts_sml) -def Fixture_params_dts_sml(request): - return utils.build_cu_and_nx_graphs(*request.param) - - - - -@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) -def Fixture_params_dts_urnbrd(request): - return utils.build_cu_and_nx_graphs(*request.param) - - - - -#Fixture added for the large dataset -@pytest.fixture(scope="module", params=fixture_params_dts_lrg) -def Fixture_params_dts_lrg(request): - try: - return utils.build_cu_and_nx_graphs(*request.param) - except MemoryError: - rmm.reinitialize(managed_memory=True) - assert(rmm.is_initialized()) - return utils.build_cu_and_nx_graphs(*request.param) - - - - - - -# ============================================================================= -# Tests -# ============================================================================= -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centrality( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - normalized=normalized, - k=subset_size, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - -#Test for the large dataset. omitting all parameters except G and Gnx - -def test_betweenness_centrality_large( - Fixture_params_dts_lrg -): - prepare_test() - sorted_df = calc_betweenness_centrality( #used default paramters of this function - Fixture_params_dts_lrg - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - - - - - -@pytest.mark.parametrize("subset_size", [None]) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("use_k_full", [True]) -def test_betweenness_centrality_k_full( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, - use_k_full, -): - """Tests full betweenness centrality by using k = G.number_of_vertices() - instead of k=None, checks that k scales properly""" - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - normalized=normalized, - k=subset_size, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - use_k_full=use_k_full, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -# NOTE: This test should only be execute on unrenumbered datasets -# the function operating the comparison inside is first proceeding -# to a random sampling over the number of vertices (thus direct offsets) -# in the graph structure instead of actual vertices identifiers -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", [None]) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centrality_fixed_sample( - Fixture_params_dts_urnbrd, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Test Betweenness Centrality using a subset - - Only k sources are considered for an approximate Betweenness Centrality - """ - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_urnbrd, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [[]]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centrality_weight_except( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Calls betwenness_centrality with weight - - As of 05/28/2020, weight is not supported and should raise - a NotImplementedError - """ - prepare_test() - with pytest.raises(NotImplementedError): - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - - -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", [str]) -def test_betweenness_invalid_dtype( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Test calls edge_betwenness_centrality an invalid type""" - - prepare_test() - with pytest.raises(TypeError): - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) -def test_betweenness_centrality_nx( - graph_file, - directed, - edgevals -): - prepare_test() - - Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) - - nx_bc = nx.betweenness_centrality(Gnx) - cu_bc = cugraph.betweenness_centrality(Gnx) - - # Calculating mismatch - networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) - cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) - err = 0 - assert len(cugraph_bc) == len(networkx_bc) - for i in range(len(cugraph_bc)): - if ( - abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 - and cugraph_bc[i][0] == networkx_bc[i][0] - ): - err = err + 1 - print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") - print("Mismatches:", err) - assert err < (0.01 * len(cugraph_bc)) - - - From ab5367013a0b930b30e5597496a3c571534ae3b9 Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Tue, 12 Jan 2021 13:29:55 -0500 Subject: [PATCH 105/343] =refac --- .../tests/test_betweenness_centrality.py | 106 ++-- .../test_betweenness_centrality_refac.py | 532 ------------------ python/cugraph/tests/utils.py | 61 +- python/cugraph/tests/utils_env.py | 476 ---------------- 4 files changed, 99 insertions(+), 1076 deletions(-) mode change 100644 => 100755 python/cugraph/tests/test_betweenness_centrality.py delete mode 100755 python/cugraph/tests/test_betweenness_centrality_refac.py mode change 100644 => 100755 python/cugraph/tests/utils.py delete mode 100755 python/cugraph/tests/utils_env.py diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py old mode 100644 new mode 100755 index 33b2842645d..cccd00108e9 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -55,8 +55,8 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - graph_file, - directed=True, + Fixture_params_dts_sml, + directed=True, #no need of this k=None, normalized=False, weight=None, @@ -65,7 +65,7 @@ def calc_betweenness_centrality( result_dtype=np.float64, use_k_full=False, multi_gpu_batch=False, - edgevals=False, + edgevals=False, #no need of this ): """ Generate both cugraph and networkx betweenness centrality @@ -120,8 +120,9 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed, - edgevals=edgevals) + G, Gnx = Fixture_params_dts_sml + + assert G is not None and Gnx is not None if multi_gpu_batch: @@ -297,48 +298,72 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): def prepare_test(): gc.collect() + + + +# ============================================================================= +# Pytest Fixtures +# ============================================================================= +DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] +DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] +WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] + + +fixture_params_dts_sml = utils.genFixtureParamsProduct( + (DATASETS_SMALL, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( + (DATASETS_UNRENUMBERED, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +@pytest.fixture(scope="module", params=fixture_params_dts_sml) +def Fixture_params_dts_sml(request): + return utils.build_cu_and_nx_graphs(*request.param) + + + + +@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) +def Fixture_params_dts_urnbrd(request): + return utils.build_cu_and_nx_graphs(*request.param) + + # ============================================================================= # Tests # ============================================================================= -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality( - graph_file, - directed, + Fixture_params_dts_sml, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + Fixture_params_dts_sml, normalized=normalized, k=subset_size, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -346,10 +371,8 @@ def test_betweenness_centrality( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_k_full( - graph_file, - directed, + Fixture_params_dts_sml, subset_size, normalized, weight, @@ -357,14 +380,12 @@ def test_betweenness_centrality_k_full( subset_seed, result_dtype, use_k_full, - edgevals ): """Tests full betweenness centrality by using k = G.number_of_vertices() instead of k=None, checks that k scales properly""" prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + Fixture_params_dts_sml, normalized=normalized, k=subset_size, weight=weight, @@ -372,7 +393,6 @@ def test_betweenness_centrality_k_full( seed=subset_seed, result_dtype=result_dtype, use_k_full=use_k_full, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") @@ -381,25 +401,20 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_fixed_sample( - graph_file, - directed, + Fixture_params_dts_urnbrd, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Test Betweenness Centrality using a subset @@ -407,38 +422,31 @@ def test_betweenness_centrality_fixed_sample( """ prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + Fixture_params_dts_urnbrd, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) + @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [[]]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_weight_except( - graph_file, - directed, + Fixture_params_dts_sml, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Calls betwenness_centrality with weight @@ -448,53 +456,45 @@ def test_betweenness_centrality_weight_except( prepare_test() with pytest.raises(NotImplementedError): sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + Fixture_params_dts_sml, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) + @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_invalid_dtype( - graph_file, - directed, + Fixture_params_dts_sml, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Test calls edge_betwenness_centrality an invalid type""" prepare_test() with pytest.raises(TypeError): sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + Fixture_params_dts_sml, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") @@ -528,3 +528,5 @@ def test_betweenness_centrality_nx( print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc)) + + diff --git a/python/cugraph/tests/test_betweenness_centrality_refac.py b/python/cugraph/tests/test_betweenness_centrality_refac.py deleted file mode 100755 index e6194e52a7e..00000000000 --- a/python/cugraph/tests/test_betweenness_centrality_refac.py +++ /dev/null @@ -1,532 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import gc - -import pytest - -import cugraph -from cugraph.tests import utils -import random -import numpy as np -import cudf -import cupy - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - -# ============================================================================= -# Parameters -# ============================================================================= -DIRECTED_GRAPH_OPTIONS = [False, True] -WEIGHTED_GRAPH_OPTIONS = [False, True] -ENDPOINTS_OPTIONS = [False, True] -NORMALIZED_OPTIONS = [False, True] -DEFAULT_EPSILON = 0.0001 - -SUBSET_SIZE_OPTIONS = [4, None] -SUBSET_SEED_OPTIONS = [42] - -# NOTE: The following is not really being exploited in the tests as the -# datasets that are used are too small to compare, but it ensures that both -# path are actually sane -RESULT_DTYPE_OPTIONS = [np.float32, np.float64] - - -# ============================================================================= -# Comparison functions -# ============================================================================= -def calc_betweenness_centrality( - Fixture_params_dts_sml, - directed=True, #no need of this - k=None, - normalized=False, - weight=None, - endpoints=False, - seed=None, - result_dtype=np.float64, - use_k_full=False, - multi_gpu_batch=False, - edgevals=False, #no need of this -): - """ Generate both cugraph and networkx betweenness centrality - - Parameters - ---------- - graph_file : string - Path to COO Graph representation in .csv format - - directed : bool, optional, default=True - - k : int or None, optional, default=None - int: Number of sources to sample from - None: All sources are used to compute - - normalized : bool - True: Normalize Betweenness Centrality scores - False: Scores are left unnormalized - - weight : cudf.DataFrame: - Not supported as of 06/2020 - - endpoints : bool - True: Endpoints are included when computing scores - False: Endpoints are not considered - - seed : int or None, optional, default=None - Seed for random sampling of the starting point - - result_dtype : numpy.dtype - Expected type of the result, either np.float32 or np.float64 - - use_k_full : bool - When True, if k is None replaces k by the number of sources of the - Graph - - multi_gpu_batch : bool - When True, enable mg batch after constructing the graph - - edgevals: bool - When True, enable tests with weighted graph, should be ignored - during computation. - - Returns - ------- - - sorted_df : cudf.DataFrame - Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' - and 'ref_bc' are the two betweenness centrality scores to compare. - The dataframe is expected to be sorted based on 'vertex', so that we - can use cupy.isclose to compare the scores. - """ - G = None - Gnx = None - - G, Gnx = Fixture_params_dts_sml - - - - assert G is not None and Gnx is not None - if multi_gpu_batch: - G.enable_batch() - - calc_func = None - if k is not None and seed is not None: - calc_func = _calc_bc_subset - elif k is not None: - calc_func = _calc_bc_subset_fixed - else: # We processed to a comparison using every sources - if use_k_full: - k = Gnx.number_of_nodes() - calc_func = _calc_bc_full - sorted_df = calc_func( - G, - Gnx, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - result_dtype=result_dtype, - ) - - return sorted_df - - -def _calc_bc_subset( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - # NOTE: Networkx API does not allow passing a list of vertices - # And the sampling is operated on Gnx.nodes() directly - # We first mimic acquisition of the nodes to compare with same sources - random.seed(seed) # It will be called again in nx's call - sources = random.sample(Gnx.nodes(), k) - df = cugraph.betweenness_centrality( - G, - k=sources, - normalized=normalized, - weight=weight, - endpoints=endpoints, - result_dtype=result_dtype, - ) - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - - nx_bc = nx.betweenness_centrality( - Gnx, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - ) - - _, nx_bc = zip(*sorted(nx_bc.items())) - nx_df = cudf.DataFrame({"ref_bc": nx_bc}) - - merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) - - return merged_sorted_df - - -def _calc_bc_subset_fixed( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - assert isinstance(k, int), ( - "This test is meant for verifying coherence " - "when k is given as an int" - ) - # In the fixed set we compare cu_bc against itself as we random.seed(seed) - # on the same seed and then sample on the number of vertices themselves - if seed is None: - seed = 123 # random.seed(None) uses time, but we want same sources - random.seed(seed) # It will be called again in cugraph's call - sources = random.sample(range(G.number_of_vertices()), k) - - if G.renumbered: - sources_df = cudf.DataFrame({'src': sources}) - sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist() - - # The first call is going to proceed to the random sampling in the same - # fashion as the lines above - df = cugraph.betweenness_centrality( - G, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=seed, - result_dtype=result_dtype, - ) - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - - # The second call is going to process source that were already sampled - # We set seed to None as k : int, seed : not none should not be normal - # behavior - df2 = cugraph.betweenness_centrality( - G, - k=sources, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=None, - result_dtype=result_dtype, - ) - sorted_df2 = df2.sort_values("vertex").rename( - columns={"betweenness_centrality": "ref_bc"}, copy=False - ).reset_index(drop=True) - - merged_sorted_df = cudf.concat( - [sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False - ) - - return merged_sorted_df - - -def _calc_bc_full( - G, Gnx, normalized, weight, endpoints, k, seed, result_dtype -): - df = cugraph.betweenness_centrality( - G, - k=k, - normalized=normalized, - weight=weight, - endpoints=endpoints, - result_dtype=result_dtype, - ) - assert ( - df["betweenness_centrality"].dtype == result_dtype - ), "'betweenness_centrality' column has not the expected type" - nx_bc = nx.betweenness_centrality( - Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints - ) - - sorted_df = df.sort_values("vertex").rename( - columns={"betweenness_centrality": "cu_bc"}, copy=False - ).reset_index(drop=True) - _, nx_bc = zip(*sorted(nx_bc.items())) - nx_df = cudf.DataFrame({"ref_bc": nx_bc}) - - merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False) - - return merged_sorted_df - - -# ============================================================================= -# Utils -# ============================================================================= -# NOTE: We assume that both column are ordered in such way that values -# at ith positions are expected to be compared in both columns -# i.e: sorted_df[idx][first_key] should be compared to -# sorted_df[idx][second_key] -def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): - errors = sorted_df[ - ~cupy.isclose( - sorted_df[first_key], sorted_df[second_key], rtol=epsilon - ) - ] - num_errors = len(errors) - if num_errors > 0: - print(errors) - assert ( - num_errors == 0 - ), "Mismatch were found when comparing '{}' and '{}' (rtol = {})".format( - first_key, second_key, epsilon - ) - - -def prepare_test(): - gc.collect() - - - -# ============================================================================= -# Pytest Fixtures -# ============================================================================= -DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] -DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] -WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] - - -fixture_params_dts_sml = utils.genFixtureParamsProduct( - (DATASETS_SMALL, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( - (DATASETS_UNRENUMBERED, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -@pytest.fixture(scope="module", params=fixture_params_dts_sml) -def Fixture_params_dts_sml(request): - return utils.build_cu_and_nx_graphs(*request.param) - - - - -@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) -def Fixture_params_dts_urnbrd(request): - return utils.build_cu_and_nx_graphs(*request.param) - - - - -# ============================================================================= -# Tests -# ============================================================================= -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centralityy( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - normalized=normalized, - k=subset_size, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - -@pytest.mark.parametrize("subset_size", [None]) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("use_k_full", [True]) -def test_betweenness_centrality_k_full( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, - use_k_full, -): - """Tests full betweenness centrality by using k = G.number_of_vertices() - instead of k=None, checks that k scales properly""" - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - normalized=normalized, - k=subset_size, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - use_k_full=use_k_full, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -# NOTE: This test should only be execute on unrenumbered datasets -# the function operating the comparison inside is first proceeding -# to a random sampling over the number of vertices (thus direct offsets) -# in the graph structure instead of actual vertices identifiers -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", [None]) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centrality_fixed_sample( - Fixture_params_dts_urnbrd, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Test Betweenness Centrality using a subset - - Only k sources are considered for an approximate Betweenness Centrality - """ - prepare_test() - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_urnbrd, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("weight", [[]]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -def test_betweenness_centrality_weight_except( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Calls betwenness_centrality with weight - - As of 05/28/2020, weight is not supported and should raise - a NotImplementedError - """ - prepare_test() - with pytest.raises(NotImplementedError): - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - - -@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) -@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize("weight", [None]) -@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) -@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize("result_dtype", [str]) -def test_betweenness_invalid_dtype( - Fixture_params_dts_sml, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, -): - """Test calls edge_betwenness_centrality an invalid type""" - - prepare_test() - with pytest.raises(TypeError): - sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, - k=subset_size, - normalized=normalized, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - ) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - - -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) -def test_betweenness_centrality_nx( - graph_file, - directed, - edgevals -): - prepare_test() - - Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) - - nx_bc = nx.betweenness_centrality(Gnx) - cu_bc = cugraph.betweenness_centrality(Gnx) - - # Calculating mismatch - networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) - cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) - err = 0 - assert len(cugraph_bc) == len(networkx_bc) - for i in range(len(cugraph_bc)): - if ( - abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 - and cugraph_bc[i][0] == networkx_bc[i][0] - ): - err = err + 1 - print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") - print("Mismatches:", err) - assert err < (0.01 * len(cugraph_bc)) - - diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py old mode 100644 new mode 100755 index 164c6efb084..e09cae01a0b --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -33,6 +33,7 @@ import cugraph from cugraph.dask.common.mg_utils import get_client +#RAPIDS_DATASET_ROOT_DIR= "/datasets" CUPY_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] SCIPY_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] @@ -40,46 +41,62 @@ # # Datasets # -DATASETS_UNDIRECTED = ["../datasets/karate.csv", "../datasets/dolphins.csv"] + +RAPIDS_DATASET_ROOT_DIR= "../datasets" + +def get_rapids_dataset_root_dir(): + envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') + if(envVar!=None): + return envVar + return RAPIDS_DATASET_ROOT_DIR + + + + + +rapidsDatasetRootDir = get_rapids_dataset_root_dir() + +DATASETS_UNDIRECTED = [os.path.join(rapidsDatasetRootDir,"karate.csv"), os.path.join(rapidsDatasetRootDir, "dolphins.csv")] DATASETS_UNDIRECTED_WEIGHTS = [ - "../datasets/netscience.csv", + os.path.join(rapidsDatasetRootDir, "netscience.csv") ] -DATASETS_UNRENUMBERED = ["../datasets/karate-disjoint.csv"] +DATASETS_UNRENUMBERED = [os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv")] DATASETS = [ - "../datasets/karate-disjoint.csv", - "../datasets/dolphins.csv", - "../datasets/netscience.csv", + os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv"), + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "netscience.csv"), ] # '../datasets/email-Eu-core.csv'] STRONGDATASETS = [ - "../datasets/dolphins.csv", - "../datasets/netscience.csv", - "../datasets/email-Eu-core.csv", + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "netscience.csv"), + os.path.join(rapidsDatasetRootDir, "email-Eu-core.csv"), ] DATASETS_KTRUSS = [ - ("../datasets/polbooks.csv", "../datasets/ref/ktruss/polbooks.csv") + os.path.join(rapidsDatasetRootDir,"polbooks.csv"), + os.path.join(rapidsDatasetRootDir,"/ref/ktruss/","polbooks.csv") ] DATASETS_SMALL = [ - "../datasets/karate.csv", - "../datasets/dolphins.csv", - "../datasets/polbooks.csv", + os.path.join(rapidsDatasetRootDir, "karate.csv"), + os.path.join(rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(rapidsDatasetRootDir, "polbooks.csv"), ] MATRIX_INPUT_TYPES = [ pytest.param( - cp_coo_matrix, marks=pytest.mark.matrix_types, id="CuPy.coo_matrix" + cp_coo_matrix, marks=pytest.mark.cupy_types, id="CuPy.coo_matrix" ), pytest.param( - cp_csr_matrix, marks=pytest.mark.matrix_types, id="CuPy.csr_matrix" + cp_csr_matrix, marks=pytest.mark.cupy_types, id="CuPy.csr_matrix" ), pytest.param( - cp_csc_matrix, marks=pytest.mark.matrix_types, id="CuPy.csc_matrix" + cp_csc_matrix, marks=pytest.mark.cupy_types, id="CuPy.csc_matrix" ), ] @@ -104,6 +121,17 @@ ] + +def get_rapids_dataset_root_dir(): + envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') + if(envVar!=None): + return envVar + return RAPIDS_DATASET_ROOT_DIR + + + + + def read_csv_for_nx(csv_file, read_weights_in_sp=True, read_weights=True): print("Reading " + str(csv_file) + "...") if read_weights: @@ -445,3 +473,4 @@ def compare_mst(mst_cugraph, mst_nx): print(cg_sum) print(nx_sum) assert np.isclose(cg_sum, nx_sum) + diff --git a/python/cugraph/tests/utils_env.py b/python/cugraph/tests/utils_env.py deleted file mode 100755 index e09cae01a0b..00000000000 --- a/python/cugraph/tests/utils_env.py +++ /dev/null @@ -1,476 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from itertools import product - -# Assume test environment has the following dependencies installed -import pytest -import pandas as pd -import networkx as nx -import numpy as np -import cupy as cp -from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix -from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix -from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix -from scipy.sparse.coo import coo_matrix as sp_coo_matrix -from scipy.sparse.csr import csr_matrix as sp_csr_matrix -from scipy.sparse.csc import csc_matrix as sp_csc_matrix - -import cudf -import dask_cudf - -import cugraph -from cugraph.dask.common.mg_utils import get_client - -#RAPIDS_DATASET_ROOT_DIR= "/datasets" - -CUPY_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] -SCIPY_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] - -# -# Datasets -# - -RAPIDS_DATASET_ROOT_DIR= "../datasets" - -def get_rapids_dataset_root_dir(): - envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') - if(envVar!=None): - return envVar - return RAPIDS_DATASET_ROOT_DIR - - - - - -rapidsDatasetRootDir = get_rapids_dataset_root_dir() - -DATASETS_UNDIRECTED = [os.path.join(rapidsDatasetRootDir,"karate.csv"), os.path.join(rapidsDatasetRootDir, "dolphins.csv")] - -DATASETS_UNDIRECTED_WEIGHTS = [ - os.path.join(rapidsDatasetRootDir, "netscience.csv") -] - -DATASETS_UNRENUMBERED = [os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv")] - -DATASETS = [ - os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv"), - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "netscience.csv"), -] -# '../datasets/email-Eu-core.csv'] - -STRONGDATASETS = [ - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "netscience.csv"), - os.path.join(rapidsDatasetRootDir, "email-Eu-core.csv"), -] - -DATASETS_KTRUSS = [ - os.path.join(rapidsDatasetRootDir,"polbooks.csv"), - os.path.join(rapidsDatasetRootDir,"/ref/ktruss/","polbooks.csv") -] - -DATASETS_SMALL = [ - os.path.join(rapidsDatasetRootDir, "karate.csv"), - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "polbooks.csv"), -] - -MATRIX_INPUT_TYPES = [ - pytest.param( - cp_coo_matrix, marks=pytest.mark.cupy_types, id="CuPy.coo_matrix" - ), - pytest.param( - cp_csr_matrix, marks=pytest.mark.cupy_types, id="CuPy.csr_matrix" - ), - pytest.param( - cp_csc_matrix, marks=pytest.mark.cupy_types, id="CuPy.csc_matrix" - ), -] - -NX_INPUT_TYPES = [ - pytest.param(nx.Graph, marks=pytest.mark.nx_types, id="nx.Graph"), -] - -NX_DIR_INPUT_TYPES = [ - pytest.param(nx.Graph, marks=pytest.mark.nx_types, id="nx.DiGraph"), -] - -CUGRAPH_INPUT_TYPES = [ - pytest.param( - cugraph.Graph, marks=pytest.mark.cugraph_types, id="cugraph.Graph" - ), -] - -CUGRAPH_DIR_INPUT_TYPES = [ - pytest.param( - cugraph.DiGraph, marks=pytest.mark.cugraph_types, id="cugraph.DiGraph" - ), -] - - - -def get_rapids_dataset_root_dir(): - envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') - if(envVar!=None): - return envVar - return RAPIDS_DATASET_ROOT_DIR - - - - - -def read_csv_for_nx(csv_file, read_weights_in_sp=True, read_weights=True): - print("Reading " + str(csv_file) + "...") - if read_weights: - if read_weights_in_sp is True: - df = pd.read_csv( - csv_file, - delimiter=" ", - header=None, - names=["0", "1", "weight"], - dtype={"0": "int32", "1": "int32", "weight": "float32"}, - ) - else: - df = pd.read_csv( - csv_file, - delimiter=" ", - header=None, - names=["0", "1", "weight"], - dtype={"0": "int32", "1": "int32", "weight": "float64"}, - ) - else: - df = pd.read_csv( - csv_file, - delimiter=" ", - header=None, - names=["0", "1"], - usecols=["0", "1"], - dtype={"0": "int32", "1": "int32"}, - ) - return df - - -def create_obj_from_csv( - csv_file_name, obj_type, csv_has_weights=True, edgevals=False -): - """ - Return an object based on obj_type populated with the contents of - csv_file_name - """ - if obj_type in [cugraph.Graph, cugraph.DiGraph]: - return generate_cugraph_graph_from_file( - csv_file_name, - directed=(obj_type is cugraph.DiGraph), - edgevals=edgevals, - ) - - elif obj_type in SCIPY_MATRIX_TYPES + CUPY_MATRIX_TYPES: - # FIXME: assuming float32 - if csv_has_weights: - (rows, cols, weights) = np.genfromtxt( - csv_file_name, delimiter=" ", dtype=np.float32, unpack=True - ) - else: - (rows, cols) = np.genfromtxt( - csv_file_name, delimiter=" ", dtype=np.float32, unpack=True - ) - - if (csv_has_weights is False) or (edgevals is False): - # COO matrices must have a value array. Also if edgevals are to be - # ignored (False), reset all weights to 1. - weights = np.array([1] * len(rows)) - - if obj_type in CUPY_MATRIX_TYPES: - coo = cp_coo_matrix( - (cp.asarray(weights), (cp.asarray(rows), cp.asarray(cols))), - dtype=np.float32, - ) - else: - coo = sp_coo_matrix( - (weights, (np.array(rows, dtype=int), - np.array(cols, dtype=int))), - ) - - if obj_type in [cp_csr_matrix, sp_csr_matrix]: - return coo.tocsr(copy=False) - elif obj_type in [cp_csc_matrix, sp_csc_matrix]: - return coo.tocsc(copy=False) - else: - return coo - - elif obj_type in [nx.Graph, nx.DiGraph]: - return generate_nx_graph_from_file( - csv_file_name, directed=(obj_type is nx.DiGraph), edgevals=edgevals - ) - - else: - raise TypeError(f"unsupported type: {obj_type}") - - -def read_csv_file(csv_file, read_weights_in_sp=True): - print("Reading " + str(csv_file) + "...") - if read_weights_in_sp is True: - return cudf.read_csv( - csv_file, - delimiter=" ", - dtype=["int32", "int32", "float32"], - header=None, - ) - else: - return cudf.read_csv( - csv_file, - delimiter=" ", - dtype=["int32", "int32", "float64"], - header=None, - ) - - -def read_dask_cudf_csv_file( - csv_file, read_weights_in_sp=True, single_partition=True -): - print("Reading " + str(csv_file) + "...") - if read_weights_in_sp is True: - if single_partition: - chunksize = os.path.getsize(csv_file) - return dask_cudf.read_csv( - csv_file, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "weight"], - dtype=["int32", "int32", "float32"], - header=None, - ) - else: - return dask_cudf.read_csv( - csv_file, - delimiter=" ", - names=["src", "dst", "weight"], - dtype=["int32", "int32", "float32"], - header=None, - ) - else: - if single_partition: - chunksize = os.path.getsize(csv_file) - return dask_cudf.read_csv( - csv_file, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "weight"], - dtype=["int32", "int32", "float32"], - header=None, - ) - else: - return dask_cudf.read_csv( - csv_file, - delimiter=" ", - names=["src", "dst", "weight"], - dtype=["int32", "int32", "float64"], - header=None, - ) - - -def generate_nx_graph_from_file(graph_file, directed=True, edgevals=False): - M = read_csv_for_nx(graph_file, read_weights_in_sp=edgevals) - edge_attr = "weight" if edgevals else None - Gnx = nx.from_pandas_edgelist( - M, - create_using=(nx.DiGraph() if directed else nx.Graph()), - source="0", - target="1", - edge_attr=edge_attr, - ) - return Gnx - - -def generate_cugraph_graph_from_file( - graph_file, directed=True, edgevals=False -): - cu_M = read_csv_file(graph_file) - G = cugraph.DiGraph() if directed else cugraph.Graph() - - if edgevals: - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - else: - G.from_cudf_edgelist(cu_M, source="0", destination="1") - return G - - -def generate_mg_batch_cugraph_graph_from_file(graph_file, directed=True): - client = get_client() - _ddf = read_dask_cudf_csv_file(graph_file) - ddf = client.persist(_ddf) - G = cugraph.DiGraph() if directed else cugraph.Graph() - G.from_dask_cudf_edgelist(ddf) - return G - - -def build_cu_and_nx_graphs(graph_file, directed=True, edgevals=False): - G = generate_cugraph_graph_from_file(graph_file, directed=directed, - edgevals=edgevals) - Gnx = generate_nx_graph_from_file(graph_file, directed=directed, - edgevals=edgevals) - return G, Gnx - - -def build_mg_batch_cu_and_nx_graphs(graph_file, directed=True): - G = generate_mg_batch_cugraph_graph_from_file( - graph_file, directed=directed - ) - Gnx = generate_nx_graph_from_file(graph_file, directed=directed) - return G, Gnx - - -def random_edgelist( - e=1024, - ef=16, - dtypes={"src": np.int32, "dst": np.int32, "val": float}, - drop_duplicates=True, - seed=None, -): - """Create a random edge list - - Parameters - ---------- - e : int - Number of edges - ef : int - Edge factor (average number of edges per vertex) - dtypes : dict - Mapping of column names to types. - Supported type is {"src": int, "dst": int, "val": float} - drop_duplicates - Drop duplicates - seed : int (optional) - Randomstate seed - - Examples - -------- - >>> from cugraph.tests import utils - >>> # genrates 20 df with 100M edges each and write to disk - >>> for x in range(20): - >>> df = utils.random_edgelist(e=100000000, ef=64, - >>> dtypes={'src':np.int32, 'dst':np.int32}, - >>> seed=x) - >>> df.to_csv('df'+str(x), header=False, index=False) - >>> #df.to_parquet('files_parquet/df'+str(x), index=False) - """ - state = np.random.RandomState(seed) - columns = dict( - (k, make[dt](e // ef, e, state)) for k, dt in dtypes.items() - ) - - df = pd.DataFrame(columns) - if drop_duplicates: - df = df.drop_duplicates(subset=["src", "dst"]) - print("Generated " + str(df.shape[0]) + " edges") - return df - - -def make_int32(v, e, rstate): - return rstate.randint(low=0, high=v, size=e, dtype=np.int32) - - -def make_int64(v, e, rstate): - return rstate.randint(low=0, high=v, size=e, dtype=np.int64) - - -def make_float(v, e, rstate): - return rstate.rand(e) - - -make = {float: make_float, np.int32: make_int32, np.int64: make_int64} - - -def genFixtureParamsProduct(*args): - """ - Returns the cartesian product of the param lists passed in. The lists must - be flat lists of pytest.param objects, and the result will be a flat list - of pytest.param objects with values and meta-data combined accordingly. A - flat list of pytest.param objects is required for pytest fixtures to - properly recognize the params. The combinations also include ids generated - from the param values and id names associated with each list. For example: - - genFixtureParamsProduct( ([pytest.param(True, marks=[pytest.mark.A_good]), - pytest.param(False, marks=[pytest.mark.A_bad])], - "A"), - ([pytest.param(True, marks=[pytest.mark.B_good]), - pytest.param(False, marks=[pytest.mark.B_bad])], - "B") ) - - results in fixture param combinations: - - True, True - marks=[A_good, B_good] - id="A=True,B=True" - True, False - marks=[A_good, B_bad] - id="A=True,B=False" - False, True - marks=[A_bad, B_good] - id="A=False,B=True" - False, False - marks=[A_bad, B_bad] - id="A=False,B=False" - - Simply using itertools.product on the lists would result in a list of - sublists of individual param objects (ie. not "merged"), which would not be - recognized properly as params for a fixture by pytest. - - NOTE: This function is only needed for parameterized fixtures. - Tests/benchmarks will automatically get this behavior when specifying - multiple @pytest.mark.parameterize(param_name, param_value_list) - decorators. - """ - # Enforce that each arg is a list of pytest.param objs and separate params - # and IDs. - paramLists = [] - ids = [] - paramType = pytest.param().__class__ - for (paramList, id) in args: - for param in paramList: - assert isinstance(param, paramType) - paramLists.append(paramList) - ids.append(id) - - retList = [] - for paramCombo in product(*paramLists): - values = [p.values[0] for p in paramCombo] - marks = [m for p in paramCombo for m in p.marks] - comboid = ",".join( - ["%s=%s" % (id, p.values[0]) for (p, id) in zip(paramCombo, ids)] - ) - retList.append(pytest.param(values, marks=marks, id=comboid)) - return retList - - -# shared between min and max spanning tree tests -def compare_mst(mst_cugraph, mst_nx): - mst_nx_df = nx.to_pandas_edgelist(mst_nx) - edgelist_df = mst_cugraph.view_edge_list() - assert len(mst_nx_df) == len(edgelist_df) - - # check cycles - Gnx = nx.from_pandas_edgelist( - edgelist_df.to_pandas(), - create_using=nx.Graph(), - source="src", - target="dst", - ) - try: - lc = nx.find_cycle(Gnx, source=None, orientation="ignore") - print(lc) - except nx.NetworkXNoCycle: - pass - - # check total weight - cg_sum = edgelist_df["weights"].sum() - nx_sum = mst_nx_df["weight"].sum() - print(cg_sum) - print(nx_sum) - assert np.isclose(cg_sum, nx_sum) - From 8e71a6d9f2730469aadc1bb232f33f1dc3ed5b87 Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Tue, 12 Jan 2021 16:50:58 -0500 Subject: [PATCH 106/343] refac --- python/cugraph/tests/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index e09cae01a0b..dde53748780 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -78,8 +78,7 @@ def get_rapids_dataset_root_dir(): ] DATASETS_KTRUSS = [ - os.path.join(rapidsDatasetRootDir,"polbooks.csv"), - os.path.join(rapidsDatasetRootDir,"/ref/ktruss/","polbooks.csv") + (os.path.join(rapidsDatasetRootDir,"polbooks.csv"), os.path.join(rapidsDatasetRootDir,"ref/ktruss","polbooks.csv")) ] DATASETS_SMALL = [ From cf58b0e447b5d355f91437026e91df9d59dd556f Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Tue, 12 Jan 2021 18:47:02 -0500 Subject: [PATCH 107/343] refac --- python/cugraph/tests/test_ecg.py | 15 ++++++++------- python/cugraph/tests/test_force_atlas2.py | 11 ++++++----- python/cugraph/tests/test_jaccard.py | 5 +++-- python/cugraph/tests/test_utils.py | 7 ++++--- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 4dc01c389cc..f597b49d99a 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -14,7 +14,7 @@ import gc import pytest - +import os import networkx as nx import cugraph from cugraph.tests import utils @@ -30,18 +30,18 @@ def cugraph_call(G, min_weight, ensemble_size): def golden_call(graph_file): - if graph_file == "../datasets/dolphins.csv": + if graph_file == os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"): return 0.4962422251701355 - if graph_file == "../datasets/karate.csv": + if graph_file == os.path.join(utils.rapidsDatasetRootDir,"karate.csv"): return 0.38428664207458496 - if graph_file == "../datasets/netscience.csv": + if graph_file == os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"): return 0.9279554486274719 DATASETS = [ - "../datasets/karate.csv", - "../datasets/dolphins.csv", - "../datasets/netscience.csv", + os.path.join(utils.rapidsDatasetRootDir,"karate.csv"), + os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"), + os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"), ] MIN_WEIGHTS = [0.05, 0.10, 0.15] @@ -84,3 +84,4 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): # Get the modularity score for partitioning versus random assignment _ = cugraph.ecg(G, min_weight, ensemble_size, "weight") + diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index 4de49cb4088..373cb55d367 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -12,7 +12,7 @@ # limitations under the License. import time - +import os import pytest import cugraph @@ -61,10 +61,10 @@ def cugraph_call(cu_M, max_iter, pos_list, outbound_attraction_distribution, DATASETS = [ - ("../datasets/karate.csv", 0.70), - ("../datasets/polbooks.csv", 0.75), - ("../datasets/dolphins.csv", 0.66), - ("../datasets/netscience.csv", 0.66), + (os.path.join(utils.rapidsDatasetRootDir, "karate.csv"), 0.70), + (os.path.join(utils.rapidsDatasetRootDir, "polbooks.csv"), 0.75), + (os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"), 0.66), + (os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"), 0.66), ] MAX_ITERATIONS = [500] BARNES_HUT_OPTIMIZE = [False, True] @@ -132,3 +132,4 @@ def test_force_atlas2(graph_file, score, max_iter, assert test_callback.on_epoch_end_called_count == max_iter # verify `on_train_end` was only called once assert test_callback.on_train_end_called_count == 1 + diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 3c3f6224d83..7158a6968e5 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -13,7 +13,7 @@ import gc import time - +import os import pytest import cugraph @@ -113,7 +113,7 @@ def test_jaccard(graph_file): assert err == 0 -@pytest.mark.parametrize("graph_file", ["../datasets/netscience.csv"]) +@pytest.mark.parametrize("graph_file", [os.path.join(utils.rapidsDatasetRootDir, "netscience.csv")]) def test_jaccard_edgevals(graph_file): gc.collect() @@ -220,3 +220,4 @@ def test_jaccard_nx(graph_file): # FIXME: Nx does a full all-pair Jaccard. # cuGraph does a limited 1-hop Jaccard # assert nx_j == cg_j + diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 22af649ea2e..20bfbeaba1d 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -13,7 +13,7 @@ import gc import pytest - +import os import cugraph from cugraph.tests import utils @@ -22,7 +22,7 @@ def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = '../datasets/karate.csv' + graph_file = os.path.join(utils.rapidsDatasetRootDir,"karate.csv") cu_M = utils.read_csv_file(graph_file) @@ -47,7 +47,7 @@ def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = '../datasets/karate.csv' + graph_file = os.path.join(utils.rapidsDatasetRootDir,"karate.csv") cu_M = utils.read_csv_file(graph_file) @@ -66,3 +66,4 @@ def test_bfs_paths_array(): answer = cugraph.utils.get_traversed_path_list(df, 100) assert "not in the result set" in str(ErrorMsg) + From 4ea1e7fb914e7438d291b95db4eed4f0caab8673 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 12 Jan 2021 22:43:24 -0500 Subject: [PATCH 108/343] cuco::static_map does not support Pascal, throw an exception and do not compile code using cuco_static_map on Pascal --- cpp/src/experimental/coarsen_graph.cu | 1 - .../experimental/include_cuco_static_map.cuh | 33 +++++++++++++++++ cpp/src/experimental/louvain.cuh | 17 +-------- cpp/src/experimental/relabel.cu | 9 ++++- cpp/src/experimental/renumber_edgelist.cu | 37 ++++++++++++++++++- 5 files changed, 78 insertions(+), 19 deletions(-) create mode 100644 cpp/src/experimental/include_cuco_static_map.cuh diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 4b773f7152f..154ee9e2093 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -30,7 +30,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/experimental/include_cuco_static_map.cuh b/cpp/src/experimental/include_cuco_static_map.cuh new file mode 100644 index 00000000000..9e54acef72c --- /dev/null +++ b/cpp/src/experimental/include_cuco_static_map.cuh @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED +// +// cuco/static_map.cuh depends on features not supported on or before Pascal. +// +// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil +// result in compilation errors. +// +// If we're Pascal or before we do nothing here and will suppress including +// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED +// which will result in the full implementation being pulled in. +// +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 +#else +#define CUCO_STATIC_MAP_DEFINED +#include +#endif diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 55a182ecec5..4257953d390 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -31,22 +31,7 @@ #include #include -// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED -// -// cuco/static_map.cuh depends on features not supported on or before Pascal. -// -// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil -// result in compilation errors. -// -// If we're Pascal or before we do nothing here and will suppress including -// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED -// which will result in the full implementation being pulled in. -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 -#else -#define CUCO_STATIC_MAP_DEFINED -#include -#endif +#include //#define TIMING diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 793b40ee4f1..18df0d1c506 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -30,7 +32,6 @@ #include #include #include -#include #include #include @@ -51,6 +52,11 @@ void relabel(raft::handle_t const& handle, { double constexpr load_factor = 0.7; + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED if (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -200,6 +206,7 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value) == 0, "Invalid input argument: labels include old label values missing in old_new_label_pairs."); } +#endif return; } diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index e8408f2ecd6..86f1b07efeb 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -31,7 +33,6 @@ #include #include #include -#include #include #include @@ -43,6 +44,7 @@ namespace cugraph { namespace experimental { namespace detail { +#ifdef CUCO_STATIC_MAP_DEFINED template rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, @@ -362,6 +364,7 @@ void expensive_check_edgelist( } } } +#endif template std::enable_if_t= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); @@ -542,6 +550,13 @@ renumber_edgelist(raft::handle_t const& handle, return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); +#else + return std::make_tuple( + rmm::device_uvector(0, handle.get_stream()), + partition_t(std::vector(), false, int{0}, int{0}, int{0}, int{0}), + vertex_t{0}, + edge_t{0}); +#endif } template @@ -554,6 +569,11 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check) { + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { expensive_check_edgelist(handle, vertices, @@ -594,6 +614,9 @@ std::enable_if_t> renumber_edgelist( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); return std::move(renumber_map_labels); +#else + return rmm::device_uvector(0, handle.get_stream()); +#endif } } // namespace detail @@ -608,6 +631,9 @@ renumber_edgelist(raft::handle_t const& handle, bool is_hypergraph_partitioned, bool do_expensive_check) { + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0}, @@ -626,6 +652,9 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check) { + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0} /* dummy */, @@ -647,6 +676,9 @@ renumber_edgelist(raft::handle_t const& handle, bool is_hypergraph_partitioned, bool do_expensive_check) { + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, local_vertices, num_local_vertices, @@ -667,6 +699,9 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check) { + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, vertices, num_vertices, From f14df80f3932e5cb808956c9a56c5f4e8351f61f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 13 Jan 2021 02:26:10 -0500 Subject: [PATCH 109/343] disable coarsen_grpah testing on Pascal or older --- cpp/tests/experimental/coarsen_graph_test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 8395fdfa93b..4b56fd843dc 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -268,6 +268,11 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam { raft::handle_t handle{}; + // FIXME: remove this once we drop Pascal support + if (handle.get_device_properties().major < 7) { // Pascal is not supported, skip testing + return; + } + auto graph = cugraph::test:: read_graph_from_matrix_market_file( handle, configuration.graph_file_full_path, configuration.test_weighted); From 192848cc8970b8660513286e9f8bd91d07d49db8 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Wed, 13 Jan 2021 15:32:29 -0500 Subject: [PATCH 110/343] updated intro --- docs/source/cugraph_intro.md | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/docs/source/cugraph_intro.md b/docs/source/cugraph_intro.md index 073e13d37ca..cbbb9ef4e4c 100644 --- a/docs/source/cugraph_intro.md +++ b/docs/source/cugraph_intro.md @@ -48,23 +48,6 @@ high-speed ETL, statistics, and machine learning. To make things even better, RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support multi-billion edge graphs. -Every release of RAPIDS is accompanied with one or more wonderful BLOG(s) -about the features of that release (for example, see the release 0.8 blog [1] -or Life after Hadoop). This article is slightly different. While current -features will be discussed, the main focus is on presenting the vision of -cuGraph and how we envision users will interact with the libraries. The cuGraph -team has been working hard to provide a rich set of initial features. Over the -past three releases (RAPIDS release 0.6 was the first to include cuGraph), the -team has provided over a dozen algorithms. The initial goal was to simply get -graph analytics released and available with a familiar NetworkX-like API. That -is great in the short term since it allows an easy path to switch to RAPIDS. -However, cuGraph development will slowly shift towards improving ease-of-use, -interoperability, and integration with the rest of RAPIDS. That is not an easy -task since there is still so much to be added to the cuGraph ecosystem. Don’t -worry, the goal of getting new analytics out will continue since there is a very -long list of algorithms to release. - - # Terminology From d1d2b78a9335dc7aa8b7b93df56ea4a7e9f6c212 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Wed, 13 Jan 2021 16:03:01 -0500 Subject: [PATCH 111/343] updated perf gain --- docs/source/cugraph_intro.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cugraph_intro.md b/docs/source/cugraph_intro.md index cbbb9ef4e4c..142395fb719 100644 --- a/docs/source/cugraph_intro.md +++ b/docs/source/cugraph_intro.md @@ -35,7 +35,7 @@ have totally changed the paradigm and the goal of an accelerated unified graph analytic library is now possible. The compute power of the latest NVIDIA GPUs (RAPIDS supports Pascal and later -GPU architectures) make graph analytics 20x faster on average over NetworkX. +GPU architectures) make graph analytics 1000x faster on average over NetworkX. Moreover, the internal memory speed within a GPU allows cuGraph to rapidly switch the data structure to best suit the needs of the analytic rather than being restricted to a single data structure. cuGraph is working with several From 2ebd8729b87d12fcacc76a8f6b22b142973eecfc Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Wed, 13 Jan 2021 16:19:35 -0500 Subject: [PATCH 112/343] updated copyright year --- cpp/src/layout/force_atlas2.cu | 2 +- python/cugraph/centrality/__init__.py | 2 +- python/cugraph/centrality/betweenness_centrality.py | 2 +- python/cugraph/community/spectral_clustering.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index 7c4c1e26907..6da9b77b45d 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/cugraph/centrality/__init__.py b/python/cugraph/centrality/__init__.py index 83095d64292..f33df2fe61a 100644 --- a/python/cugraph/centrality/__init__.py +++ b/python/cugraph/centrality/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index afccd191ddf..c71c6e92dae 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index 2ee0600a7ed..443e2169711 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From 66a306118f2009afa76b647890596d7100f802cb Mon Sep 17 00:00:00 2001 From: Joseph Nke Date: Thu, 14 Jan 2021 00:43:58 -0500 Subject: [PATCH 113/343] refac --- .../tests/test_betweenness_centrality.py | 27 +++----- python/cugraph/tests/test_ecg.py | 20 +++--- python/cugraph/tests/test_force_atlas2.py | 17 +++-- python/cugraph/tests/test_jaccard.py | 8 ++- python/cugraph/tests/test_k_truss_subgraph.py | 3 +- python/cugraph/tests/test_utils.py | 8 +-- python/cugraph/tests/utils.py | 68 ++++++++----------- 7 files changed, 69 insertions(+), 82 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index cccd00108e9..1d025c33a41 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Copyright (c) 2020-2021, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -56,7 +56,7 @@ # ============================================================================= def calc_betweenness_centrality( Fixture_params_dts_sml, - directed=True, #no need of this + directed=True, k=None, normalized=False, weight=None, @@ -65,7 +65,7 @@ def calc_betweenness_centrality( result_dtype=np.float64, use_k_full=False, multi_gpu_batch=False, - edgevals=False, #no need of this + edgevals=False, ): """ Generate both cugraph and networkx betweenness centrality @@ -121,8 +121,6 @@ def calc_betweenness_centrality( Gnx = None G, Gnx = Fixture_params_dts_sml - - assert G is not None and Gnx is not None if multi_gpu_batch: @@ -298,12 +296,11 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): def prepare_test(): gc.collect() - - + # ============================================================================= # Pytest Fixtures -# ============================================================================= +# ============================================================================= DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] @@ -313,27 +310,24 @@ def prepare_test(): fixture_params_dts_sml = utils.genFixtureParamsProduct( (DATASETS_SMALL, "grph"), (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( (DATASETS_UNRENUMBERED, "grph"), (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + @pytest.fixture(scope="module", params=fixture_params_dts_sml) def Fixture_params_dts_sml(request): return utils.build_cu_and_nx_graphs(*request.param) - - @pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) def Fixture_params_dts_urnbrd(request): return utils.build_cu_and_nx_graphs(*request.param) - - # ============================================================================= # Tests # ============================================================================= @@ -364,6 +358,7 @@ def test_betweenness_centrality( ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -432,7 +427,7 @@ def test_betweenness_centrality_fixed_sample( ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - + @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [[]]) @@ -467,7 +462,6 @@ def test_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") - @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -529,4 +523,3 @@ def test_betweenness_centrality_nx( print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc)) - diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index f597b49d99a..f2a01be9287 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,10 +14,10 @@ import gc import pytest -import os import networkx as nx import cugraph from cugraph.tests import utils +from pathlib import PurePath def cugraph_call(G, min_weight, ensemble_size): @@ -30,18 +30,18 @@ def cugraph_call(G, min_weight, ensemble_size): def golden_call(graph_file): - if graph_file == os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"): + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"dolphins.csv": return 0.4962422251701355 - if graph_file == os.path.join(utils.rapidsDatasetRootDir,"karate.csv"): + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv": return 0.38428664207458496 - if graph_file == os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"): + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv": return 0.9279554486274719 -DATASETS = [ - os.path.join(utils.rapidsDatasetRootDir,"karate.csv"), - os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"), +DATASETS = [PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate.csv", + "dolphins.csv", + "netscience.csv"] ] MIN_WEIGHTS = [0.05, 0.10, 0.15] @@ -69,7 +69,7 @@ def test_ecg_clustering(graph_file, min_weight, ensemble_size): assert cu_score > (0.95 * golden_score) -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index 373cb55d367..55f53e3d60b 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,6 @@ # limitations under the License. import time -import os import pytest import cugraph @@ -20,6 +19,7 @@ from cugraph.tests import utils from sklearn.manifold import trustworthiness import scipy.io +from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -61,11 +61,14 @@ def cugraph_call(cu_M, max_iter, pos_list, outbound_attraction_distribution, DATASETS = [ - (os.path.join(utils.rapidsDatasetRootDir, "karate.csv"), 0.70), - (os.path.join(utils.rapidsDatasetRootDir, "polbooks.csv"), 0.75), - (os.path.join(utils.rapidsDatasetRootDir, "dolphins.csv"), 0.66), - (os.path.join(utils.rapidsDatasetRootDir, "netscience.csv"), 0.66), + (PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/f,)+(d,) for (f, d) in [ + ("karate.csv", 0.70), + ("polbooks.csv", 0.75), + ("dolphins.csv", 0.66), + ("netscience.csv", 0.66)] ] + + MAX_ITERATIONS = [500] BARNES_HUT_OPTIMIZE = [False, True] @@ -120,7 +123,7 @@ def test_force_atlas2(graph_file, score, max_iter, iterations on a given graph. """ - matrix_file = graph_file[:-4] + ".mtx" + matrix_file = PurePath(graph_file).with_suffix(".mtx") M = scipy.io.mmread(matrix_file) M = M.todense() cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas()) diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 7158a6968e5..522b64794da 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,11 +13,11 @@ import gc import time -import os import pytest import cugraph from cugraph.tests import utils +from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -113,7 +113,9 @@ def test_jaccard(graph_file): assert err == 0 -@pytest.mark.parametrize("graph_file", [os.path.join(utils.rapidsDatasetRootDir, "netscience.csv")]) +@pytest.mark.parametrize("graph_file", [PurePath( + utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] +) def test_jaccard_edgevals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index e9ccac81cf6..507eb6a3d24 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -99,3 +99,4 @@ def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): nx_df = nx.to_pandas_edgelist(k_truss_nx) assert len(df) == len(nx_df) + diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 20bfbeaba1d..954306e7528 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,16 +13,16 @@ import gc import pytest -import os import cugraph from cugraph.tests import utils +from pathlib import PurePath def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = os.path.join(utils.rapidsDatasetRootDir,"karate.csv") + graph_file = PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv" cu_M = utils.read_csv_file(graph_file) @@ -47,7 +47,7 @@ def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = os.path.join(utils.rapidsDatasetRootDir,"karate.csv") + graph_file = PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv" cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index dde53748780..77d6bff65ad 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,14 +26,13 @@ from scipy.sparse.coo import coo_matrix as sp_coo_matrix from scipy.sparse.csr import csr_matrix as sp_csr_matrix from scipy.sparse.csc import csc_matrix as sp_csc_matrix - +from pathlib import PurePath import cudf import dask_cudf import cugraph from cugraph.dask.common.mg_utils import get_client -#RAPIDS_DATASET_ROOT_DIR= "/datasets" CUPY_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] SCIPY_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] @@ -42,51 +41,51 @@ # Datasets # -RAPIDS_DATASET_ROOT_DIR= "../datasets" - -def get_rapids_dataset_root_dir(): - envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') - if(envVar!=None): - return envVar - return RAPIDS_DATASET_ROOT_DIR - - - +RAPIDS_DATASET_ROOT_DIR = os.getenv("RAPIDS_DATASET_ROOT_DIR", "../datasets") -rapidsDatasetRootDir = get_rapids_dataset_root_dir() - -DATASETS_UNDIRECTED = [os.path.join(rapidsDatasetRootDir,"karate.csv"), os.path.join(rapidsDatasetRootDir, "dolphins.csv")] +DATASETS_UNDIRECTED = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for + f in ["karate.csv", "dolphins.csv"]] DATASETS_UNDIRECTED_WEIGHTS = [ - os.path.join(rapidsDatasetRootDir, "netscience.csv") + PurePath(RAPIDS_DATASET_ROOT_DIR)/"netscience.csv" ] -DATASETS_UNRENUMBERED = [os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv")] +DATASETS_UNRENUMBERED = [PurePath( + RAPIDS_DATASET_ROOT_DIR)/"karate-disjoint.csv" +] -DATASETS = [ - os.path.join(rapidsDatasetRootDir, "karate-disjoint.csv"), - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "netscience.csv"), +DATASETS = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate-disjoint.csv", + "dolphins.csv", + "netscience.csv"] ] + + # '../datasets/email-Eu-core.csv'] STRONGDATASETS = [ - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "netscience.csv"), - os.path.join(rapidsDatasetRootDir, "email-Eu-core.csv"), + PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "dolphins.csv", + "netscience.csv", + "email-Eu-core.csv"] ] + DATASETS_KTRUSS = [ - (os.path.join(rapidsDatasetRootDir,"polbooks.csv"), os.path.join(rapidsDatasetRootDir,"ref/ktruss","polbooks.csv")) + (str(PurePath(RAPIDS_DATASET_ROOT_DIR)/"polbooks.csv"), + str(PurePath(RAPIDS_DATASET_ROOT_DIR)/"ref/ktruss/polbooks.csv")) ] + DATASETS_SMALL = [ - os.path.join(rapidsDatasetRootDir, "karate.csv"), - os.path.join(rapidsDatasetRootDir, "dolphins.csv"), - os.path.join(rapidsDatasetRootDir, "polbooks.csv"), + PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate.csv", + "dolphins.csv", + "polbooks.csv"] ] + MATRIX_INPUT_TYPES = [ pytest.param( cp_coo_matrix, marks=pytest.mark.cupy_types, id="CuPy.coo_matrix" @@ -120,17 +119,6 @@ def get_rapids_dataset_root_dir(): ] - -def get_rapids_dataset_root_dir(): - envVar=os.getenv('RAPIDS_DATASET_ROOT_DIR') - if(envVar!=None): - return envVar - return RAPIDS_DATASET_ROOT_DIR - - - - - def read_csv_for_nx(csv_file, read_weights_in_sp=True, read_weights=True): print("Reading " + str(csv_file) + "...") if read_weights: From 82aaa968ec9e4b9d72666dd61f819b48379f2e90 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 14 Jan 2021 14:23:46 -0500 Subject: [PATCH 114/343] expose dense version of hungarian algorithm --- python/cugraph/__init__.py | 2 +- python/cugraph/linear_assignment/__init__.py | 2 +- python/cugraph/linear_assignment/lap.pxd | 12 +++++- python/cugraph/linear_assignment/lap.py | 43 ++++++++++++++++++- .../cugraph/linear_assignment/lap_wrapper.pyx | 36 ++++++++++++++-- python/cugraph/tests/test_hungarian.py | 9 +--- 6 files changed, 87 insertions(+), 17 deletions(-) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index d752c868237..06318ed6bd6 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -89,7 +89,7 @@ from cugraph.proto.components import strong_connected_component from cugraph.proto.structure import find_bicliques -from cugraph.linear_assignment import hungarian +from cugraph.linear_assignment import hungarian, dense_hungarian from cugraph.layout import force_atlas2 from cugraph.raft import raft_include_test from cugraph.comms import comms diff --git a/python/cugraph/linear_assignment/__init__.py b/python/cugraph/linear_assignment/__init__.py index 9bf09b67ed9..69a51591c19 100644 --- a/python/cugraph/linear_assignment/__init__.py +++ b/python/cugraph/linear_assignment/__init__.py @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.linear_assignment.lap import hungarian +from cugraph.linear_assignment.lap import hungarian, dense_hungarian diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index f7991405b7f..5c4c09b0662 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -20,9 +20,19 @@ from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef void hungarian[VT,ET,WT]( + cdef WT hungarian[VT,ET,WT]( const handle_t &handle, const GraphCOOView[VT,ET,WT] &graph, VT num_workers, const VT *workers, VT *assignment) except + + + +cdef extern from "algorithms.hpp": + + cdef WT dense_hungarian "cugraph::dense::hungarian" [VT,WT]( + const handle_t &handle, + const WT *costs, + VT num_rows, + VT num_columns, + VT *assignment) except + diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index 5c501d17935..9b073a0d62d 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -46,6 +46,8 @@ def hungarian(G, workers): Returns ------- + cost : matches costs.dtype + The cost of the overall assignment df : cudf.DataFrame df['vertex'][i] gives the vertex id of the i'th vertex. Only vertices in the workers list are defined in this column. @@ -60,7 +62,7 @@ def hungarian(G, workers): >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') - >>> df = cugraph.hungarian(G, workers) + >>> cost, df = cugraph.hungarian(G, workers) """ @@ -69,9 +71,46 @@ def hungarian(G, workers): else: local_workers = workers - df = lap_wrapper.hungarian(G, local_workers) + df = lap_wrapper.sparse_hungarian(G, local_workers) if G.renumbered: df = G.unrenumber(df, 'vertex') return df + + +def dense_hungarian(costs, num_rows, num_columns): + """ + Execute the Hungarian algorithm against a dense bipartite + graph representation. + + The Hungarian algorithm identifies the lowest cost matching of vertices + such that all workers that can be assigned work are assigned exactly + on job. + + Parameters + ---------- + costs : cudf.Series + A dense representation (row major order) of the bipartite + graph. Each row represents a worker, each column represents + a task, cost[i][j] represents the cost of worker i performing + task j. + num_rows : int + Number of rows in the matrix + num_columns : int + Number of columns in the matrix + + + Returns + ------- + cost : matches costs.dtype + The cost of the overall assignment + assignment : cudf.Series + assignment[i] gives the vertex id of the task assigned to the + worker i + + FIXME: Update this with a real example... + + """ + + return lap_wrapper.dense_hungarian(costs, num_rows, num_columns) diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index caaa837e859..d31985f5420 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -17,6 +17,7 @@ # cython: language_level = 3 from cugraph.linear_assignment.lap cimport hungarian as c_hungarian +from cugraph.linear_assignment.lap cimport dense_hungarian as c_dense_hungarian from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t @@ -25,7 +26,7 @@ from cugraph.structure.graph import Graph as type_Graph import cudf import numpy as np -def hungarian(input_graph, workers): +def sparse_hungarian(input_graph, workers): """ Call the hungarian algorithm """ @@ -76,10 +77,37 @@ def hungarian(input_graph, workers): if weights.dtype == np.float32: g_float = GraphCOOView[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges) - c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment) else: g_double = GraphCOOView[int,int,double](c_src, c_dst, c_weights, num_verts, num_edges) - c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment) - return df + return cost, df + + +def dense_hungarian(costs, num_rows, num_columns): + """ + Call the dense hungarian algorithm + """ + if type(costs) is not cudf.Series: + raise("costs must be a cudf.Series") + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + assignment = cudf.Series(np.zeros(num_rows, dtype=np.int32)) + + cdef uintptr_t c_costs = costs.__cuda_array_interface__['data'][0] + cdef uintptr_t c_assignment = assignment.__cuda_array_interface__['data'][0] + + + if costs.dtype == np.float32: + cost = c_dense_hungarian[int,float](handle_[0], c_costs, num_rows, num_columns, c_assignment) + elif costs.dtype == np.float64: + cost = c_dense_hungarian[int,double](handle_[0], c_costs, num_rows, num_columns, c_assignment) + else: + raise("unsported type: ", costs.dtype) + + return cost, assignment diff --git a/python/cugraph/tests/test_hungarian.py b/python/cugraph/tests/test_hungarian.py index 280903bc303..1dd6e8e29f3 100644 --- a/python/cugraph/tests/test_hungarian.py +++ b/python/cugraph/tests/test_hungarian.py @@ -80,7 +80,7 @@ def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): np.float) start = timer() - matching = cugraph.hungarian(g, v1) + cugraph_cost, matching = cugraph.hungarian(g, v1) end = timer() print('cugraph time: ', (end - start)) @@ -93,13 +93,6 @@ def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): scipy_cost = m[np_matching[0], np_matching[1]].sum() - cugraph_df = matching.merge(g.edgelist.edgelist_df, - left_on=['vertex', 'assignment'], - right_on=['src', 'dst'], - how='left') - - cugraph_cost = cugraph_df['weights'].sum() - print('scipy_cost = ', scipy_cost) print('cugraph_cost = ', cugraph_cost) From c1f868c6fd06d93966699fd0b89bdfb72d9899b3 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Thu, 14 Jan 2021 14:25:28 -0500 Subject: [PATCH 115/343] update bad error message --- cpp/src/structure/graph.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 63ef725c3b7..2ce8a54d736 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 334a4ae7ef2bd82037db50c65efe3e19c8c7aab2 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 14 Jan 2021 14:46:18 -0500 Subject: [PATCH 116/343] update documentation --- README.md | 4 ++- cpp/src/linear_assignment/README-hungarian.md | 36 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 cpp/src/linear_assignment/README-hungarian.md diff --git a/README.md b/README.md index 8fee5451ac3..e7d0aea43fe 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,12 @@ for i in range(len(df_page)): | | Core Number | Single-GPU | | | Layout | | | | | | Force Atlas 2 | Single-GPU | | +| Linear Assignment| | | | +| | Hungarian | Single-GPU | [README](cpp/src/linear_assignment/README-hungarian.md) | | Link Analysis| | | | | | Pagerank | Multi-GPU | | | | Personal Pagerank | Multi-GPU | | -| | HITS | Single-GPU | leverages Gunrock | +| | HITS | Single-GPU | leverages Gunrock | | Link Prediction | | | | | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | diff --git a/cpp/src/linear_assignment/README-hungarian.md b/cpp/src/linear_assignment/README-hungarian.md new file mode 100644 index 00000000000..42dabd7cfbc --- /dev/null +++ b/cpp/src/linear_assignment/README-hungarian.md @@ -0,0 +1,36 @@ +# LAP +Implementation of ***O(n^3) Alternating Tree Variant*** of Hungarian Algorithm on NVIDIA CUDA-enabled GPU. + +This implementation solves a batch of ***k*** **Linear Assignment Problems (LAP)**, each with ***nxn*** matrix of single floating point cost values. At optimality, the algorithm produces an assignment with ***minimum*** cost. + +The API can be used to query optimal primal and dual costs, optimal assignment vector, and optimal row/column dual vectors for each subproblem in the batch. + +cuGraph exposes the Hungarian algorithm, the actual implementation is contained in the RAFT library which contains some common tools and kernels shared between cuGraph and cuML. + +Following parameters can be used to tune the performance of algorithm: + +1. epsilon: (in raft/lap/lap_kernels.cuh) This parameter controls the tolerance on the floating point precision. Setting this too small will result in increased solution time because the algorithm will search for precise solutions. Setting it too high may cause some inaccuracies. + +2. BLOCKDIMX, BLOCKDIMY: (in raft/lap/lap_functions.cuh) These parameters control threads_per_block to be used along the given dimension. Set these according to the device specifications and occupancy calculation. + +***This library is licensed under Apache License 2.0. Please cite our paper, if this library helps you in your research.*** + +- Harvard citation style + + Date, K. and Nagi, R., 2016. GPU-accelerated Hungarian algorithms for the Linear Assignment Problem. Parallel Computing, 57, pp.52-72. + +- BibTeX Citation block to be used in LaTeX bibliography file: + +``` +@article{date2016gpu, + title={GPU-accelerated Hungarian algorithms for the Linear Assignment Problem}, + author={Date, Ketan and Nagi, Rakesh}, + journal={Parallel Computing}, + volume={57}, + pages={52--72}, + year={2016}, + publisher={Elsevier} +} +``` + +The paper is available online on [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S016781911630045X). From a390e521f0437fc06975ed7ea7aa008baff91258 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 14 Jan 2021 14:53:11 -0500 Subject: [PATCH 117/343] fix flake8 issue --- python/cugraph/linear_assignment/lap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index 9b073a0d62d..c2671ab09d6 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -107,7 +107,7 @@ def dense_hungarian(costs, num_rows, num_columns): The cost of the overall assignment assignment : cudf.Series assignment[i] gives the vertex id of the task assigned to the - worker i + worker i FIXME: Update this with a real example... From e7cce37991ce9d5cae5cac6c74ee1e0d3a57a9a5 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 14 Jan 2021 14:55:51 -0500 Subject: [PATCH 118/343] update copyright year --- python/cugraph/__init__.py | 2 +- python/cugraph/linear_assignment/__init__.py | 2 +- python/cugraph/linear_assignment/lap.pxd | 2 +- python/cugraph/linear_assignment/lap.py | 2 +- python/cugraph/linear_assignment/lap_wrapper.pyx | 2 +- python/cugraph/tests/test_hungarian.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 06318ed6bd6..81fc3936c0e 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/linear_assignment/__init__.py b/python/cugraph/linear_assignment/__init__.py index 69a51591c19..557bbbdf170 100644 --- a/python/cugraph/linear_assignment/__init__.py +++ b/python/cugraph/linear_assignment/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index 5c4c09b0662..ad437ba09b4 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index c2671ab09d6..e0a04caf732 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index d31985f5420..0769ef42f0f 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/tests/test_hungarian.py b/python/cugraph/tests/test_hungarian.py index 1dd6e8e29f3..bf21cd3420f 100644 --- a/python/cugraph/tests/test_hungarian.py +++ b/python/cugraph/tests/test_hungarian.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From 5ec2ce3f5c2ff8e36d17f71b9dd2c23af501a9af Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 14 Jan 2021 19:50:34 -0600 Subject: [PATCH 119/343] fixed style error --- python/cugraph/tests/test_betweenness_centrality.py | 1 - python/cugraph/tests/test_ecg.py | 1 - python/cugraph/tests/test_force_atlas2.py | 1 - python/cugraph/tests/test_jaccard.py | 1 - python/cugraph/tests/test_k_truss_subgraph.py | 1 - python/cugraph/tests/test_utils.py | 1 - python/cugraph/tests/utils.py | 1 - 7 files changed, 7 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 1d025c33a41..e2abdc86042 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -522,4 +522,3 @@ def test_betweenness_centrality_nx( print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc)) - diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index f2a01be9287..53c4647decc 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -84,4 +84,3 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): # Get the modularity score for partitioning versus random assignment _ = cugraph.ecg(G, min_weight, ensemble_size, "weight") - diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index 55f53e3d60b..e2179436a48 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -135,4 +135,3 @@ def test_force_atlas2(graph_file, score, max_iter, assert test_callback.on_epoch_end_called_count == max_iter # verify `on_train_end` was only called once assert test_callback.on_train_end_called_count == 1 - diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 522b64794da..b61101ef1d0 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -222,4 +222,3 @@ def test_jaccard_nx(graph_file): # FIXME: Nx does a full all-pair Jaccard. # cuGraph does a limited 1-hop Jaccard # assert nx_j == cg_j - diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 507eb6a3d24..7650fc6d2b5 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -99,4 +99,3 @@ def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): nx_df = nx.to_pandas_edgelist(k_truss_nx) assert len(df) == len(nx_df) - diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 954306e7528..55410817f90 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -66,4 +66,3 @@ def test_bfs_paths_array(): answer = cugraph.utils.get_traversed_path_list(df, 100) assert "not in the result set" in str(ErrorMsg) - diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index 77d6bff65ad..6fa9dd577ee 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -460,4 +460,3 @@ def compare_mst(mst_cugraph, mst_nx): print(cg_sum) print(nx_sum) assert np.isclose(cg_sum, nx_sum) - From dcc1dfec4003213fbcf94a6885e98342b957bf10 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 14 Jan 2021 21:22:26 -0600 Subject: [PATCH 120/343] Set a specific known working commit hash for gunrock instead of "dev"(#1336) Set a specific known working commit hash for gunrock instead of "dev", since current gunrock dev is causing cugraph build failures. NOTE: the commit is a known working commit, no newer than Jan. 04 2021. This should be updated with a more recent stable commit later, if possible. Authors: - Rick Ratzel Approvers: - Chuck Hastings (@ChuckHastings) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1336 --- cpp/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cef05e5de19..ac73553ca00 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -260,9 +260,13 @@ ExternalProject_Add(cuhornet set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") +# FIXME: gunrock commit eb13a501edf10dfa1ff2ddd3c05e9de5ec7220ff is a known +# working commit, no newer than Jan. 04 2021. This should be updated with a more +# recent stable commit if possible (or this FIXME removed if not). + ExternalProject_Add(gunrock_ext GIT_REPOSITORY https://github.com/gunrock/gunrock.git - GIT_TAG dev + GIT_TAG eb13a501edf10dfa1ff2ddd3c05e9de5ec7220ff PREFIX ${GUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF From 8e2d64cf5fcb04592defdf035c10b74e31e46998 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Fri, 15 Jan 2021 10:00:20 -0500 Subject: [PATCH 121/343] slowly moving to better error messages --- cpp/src/experimental/graph_view.cu | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index e791a88d2d4..ae2b26e5675 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -87,19 +87,19 @@ graph_view_t(row_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), - "Invalid input argument: errneous adj_matrix_partition_offsets.size()."); + "Internal Error: erroneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == @@ -107,7 +107,7 @@ graph_view_ton(default_stream), adj_matrix_partition_offsets[i], adj_matrix_partition_offsets[i] + (major_last - major_first + 1)), - "Invalid input argument: adj_matrix_partition_offsets[] is not sorted."); + "Internal Error: adj_matrix_partition_offsets[] is not sorted."); edge_t number_of_local_edges{}; raft::update_host(&number_of_local_edges, adj_matrix_partition_offsets[i] + (major_last - major_first), @@ -149,12 +149,12 @@ graph_view_t{minor_first, minor_last}) == 0, - "Invalid input argument: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); } number_of_local_edges_sum = host_scalar_allreduce( this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), - "Invalid input argument: the sum of local edges doe counts not match with " + "Internal Error: the sum of local edges counts does not match with " "number_of_local_edges."); if (sorted_by_global_degree_within_vertex_partition) { @@ -173,11 +173,11 @@ graph_view_tis_symmetric()) {} @@ -235,7 +235,7 @@ graph_view_ton(default_stream), offsets, offsets + (this->get_number_of_vertices() + 1)), - "Invalid input argument: offsets is not sorted."); + "Internal Error: offsets is not sorted."); // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved CUGRAPH_EXPECTS( @@ -253,7 +253,7 @@ graph_view_tget_number_of_edges(), out_of_range_t{0, this->get_number_of_vertices()}) == 0, - "Invalid input argument: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); if (sorted_by_degree) { auto degree_first = @@ -264,11 +264,11 @@ graph_view_tget_number_of_vertices(), thrust::greater{}), - "Invalid input argument: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); + "Internal Error: sorted_by_degree is set to true, but degrees are not " + "in ascending order."); CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), - "Invalid input argument: erroneous segment_offsets."); + "Internal Error: erroneous segment_offsets."); CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid input argument segment_offsets."); CUGRAPH_EXPECTS(segment_offsets.back() == this->get_number_of_vertices(), "Invalid input argument: segment_offsets."); From 8a2b1bf10274d88ab3fda3bb2498c43c0ef6a345 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Fri, 15 Jan 2021 10:00:28 -0500 Subject: [PATCH 122/343] fixed typo --- python/cugraph/centrality/betweenness_centrality.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index c71c6e92dae..3b7cfe6b68f 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -55,7 +55,7 @@ def betweenness_centrality( k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher values give better approximation. If k is a list, use the content - of the list for estimation: the list should contain vertices + of the list for estimation: the list should contain vertex identifiers. If k is None (the default), all the vertices are used to estimate betweenness. Vertices obtained through sampling or defined as a list will be used assources for traversals inside the From 82bc998bd918e5d34b3f85805f3524de61c06a93 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Fri, 15 Jan 2021 11:05:33 -0500 Subject: [PATCH 123/343] clang format --- cpp/src/experimental/graph_view.cu | 36 +++++++++++++----------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index ae2b26e5675..5d3e685617f 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -89,11 +89,10 @@ graph_view_tis_symmetric()) {} @@ -231,11 +229,10 @@ graph_view_t{offsets}); - CUGRAPH_EXPECTS( - thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Internal Error: sorted_by_degree is set to true, but degrees are not " - "in ascending order."); + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degree_first, + degree_first + this->get_number_of_vertices(), + thrust::greater{}), + "Internal Error: sorted_by_degree is set to true, but degrees are not " + "in ascending order."); CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), "Internal Error: erroneous segment_offsets."); From 647e94b48d9a72afc8c54bbe8277f4663c9ec605 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 15 Jan 2021 14:35:30 -0500 Subject: [PATCH 124/343] note that the dense API is unstable for now --- python/cugraph/linear_assignment/lap.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index e0a04caf732..c634d9aceb4 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -84,6 +84,8 @@ def dense_hungarian(costs, num_rows, num_columns): Execute the Hungarian algorithm against a dense bipartite graph representation. + *NOTE*: This API is unstable and subject to change + The Hungarian algorithm identifies the lowest cost matching of vertices such that all workers that can be assigned work are assigned exactly on job. From c4b7fc33a837ca4991193c5fd1fdc0d5b6c0afaf Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 16 Jan 2021 01:12:50 +0000 Subject: [PATCH 125/343] refac --- python/cugraph/tests/test_betweenness_centrality.py | 2 +- python/cugraph/tests/test_ecg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index e2abdc86042..3385a546a6c 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -120,7 +120,7 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = Fixture_params_dts_sml + G, Gnx = Fixture_params_dts assert G is not None and Gnx is not None if multi_gpu_batch: diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 53c4647decc..86f9ed343ce 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -69,7 +69,7 @@ def test_ecg_clustering(graph_file, min_weight, ensemble_size): assert cu_score > (0.95 * golden_score) -@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): From e3c4edfae8684225d1ab1198eccd65e5f6aef3aa Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 16 Jan 2021 01:21:04 +0000 Subject: [PATCH 126/343] refac --- python/cugraph/tests/test_betweenness_centrality.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 3385a546a6c..19d0eb7e4d4 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -55,7 +55,7 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - Fixture_params_dts_sml, + Fixture_params_dts, directed=True, k=None, normalized=False, @@ -120,7 +120,7 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = Fixture_params_dts + G, Gnx = Fixture_params_dts_sml assert G is not None and Gnx is not None if multi_gpu_batch: @@ -469,7 +469,7 @@ def test_betweenness_centrality_weight_except( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) def test_betweenness_invalid_dtype( - Fixture_params_dts_sml, + Fixture_params_dts_smls, subset_size, normalized, weight, From 4c4ef8adf3ab4e49934a453dd9e5dce0146e2736 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 15 Jan 2021 20:16:09 -0600 Subject: [PATCH 127/343] env_fixed --- python/cugraph/tests/test_betweenness_centrality.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 19d0eb7e4d4..fa989e03ea9 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -120,7 +120,7 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = Fixture_params_dts_sml + G, Gnx = Fixture_params_dts assert G is not None and Gnx is not None if multi_gpu_batch: @@ -469,7 +469,7 @@ def test_betweenness_centrality_weight_except( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) def test_betweenness_invalid_dtype( - Fixture_params_dts_smls, + Fixture_params_dts_sml, subset_size, normalized, weight, From e403accdd09386332ba2aad838ca3321d319116a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Mart=C3=ADnez?= <26169771+miguelusque@users.noreply.github.com> Date: Sun, 17 Jan 2021 18:58:29 +0100 Subject: [PATCH 128/343] Fix #1340 - Use generic from_edgelist() methods Hi! I have updated this notebook to make use of the new generic from_edgelist() methods. I have also updated the notebook to mention that graphs can be created from cudf, dask_cudf and Pandas dataframes. Hope it helps! Regards, Miguel --- notebooks/link_analysis/Pagerank.ipynb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/notebooks/link_analysis/Pagerank.ipynb b/notebooks/link_analysis/Pagerank.ipynb index c43561ff48c..1397739c75c 100755 --- a/notebooks/link_analysis/Pagerank.ipynb +++ b/notebooks/link_analysis/Pagerank.ipynb @@ -190,7 +190,7 @@ "metadata": {}, "source": [ "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", + "cuGraph graphs can be created from cuDF, dask_cuDF and Pandas dataframes\n", "\n", "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" ] @@ -219,8 +219,7 @@ "outputs": [], "source": [ "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = cugraph.from_edgelist(gdf, source='src', destination='dst')" ] }, { From b319a21c0f7ab8e9758e8448d4140ef98e1c6c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Mart=C3=ADnez?= <26169771+miguelusque@users.noreply.github.com> Date: Sun, 17 Jan 2021 19:26:51 +0100 Subject: [PATCH 129/343] Update updated date I forgot to update the updated date. --- notebooks/link_analysis/Pagerank.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/link_analysis/Pagerank.ipynb b/notebooks/link_analysis/Pagerank.ipynb index 1397739c75c..523a8e7c65e 100755 --- a/notebooks/link_analysis/Pagerank.ipynb +++ b/notebooks/link_analysis/Pagerank.ipynb @@ -11,7 +11,7 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 08/13/2019\n", - "* Updated: 08/16/2020\n", + "* Updated: 01/17/2022\n", "\n", "RAPIDS Versions: 0.14 \n", "\n", From d6239629b75238d812b7c325dd78841e6ef83081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Mart=C3=ADnez?= <26169771+miguelusque@users.noreply.github.com> Date: Sun, 17 Jan 2021 19:29:15 +0100 Subject: [PATCH 130/343] Update updated date I wrote the wrong year in the previous commit. Sorry! --- notebooks/link_analysis/Pagerank.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/link_analysis/Pagerank.ipynb b/notebooks/link_analysis/Pagerank.ipynb index 523a8e7c65e..a81e1ccf6c3 100755 --- a/notebooks/link_analysis/Pagerank.ipynb +++ b/notebooks/link_analysis/Pagerank.ipynb @@ -11,7 +11,7 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 08/13/2019\n", - "* Updated: 01/17/2022\n", + "* Updated: 01/17/2021\n", "\n", "RAPIDS Versions: 0.14 \n", "\n", From 27b79d538e048037dfeddf863fd72b70e921150e Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 19 Jan 2021 15:38:48 -0500 Subject: [PATCH 131/343] Pin gunrock to v1.2 for version 0.18 --- cpp/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ac73553ca00..5e87a8c549d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -260,13 +260,9 @@ ExternalProject_Add(cuhornet set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") -# FIXME: gunrock commit eb13a501edf10dfa1ff2ddd3c05e9de5ec7220ff is a known -# working commit, no newer than Jan. 04 2021. This should be updated with a more -# recent stable commit if possible (or this FIXME removed if not). - ExternalProject_Add(gunrock_ext GIT_REPOSITORY https://github.com/gunrock/gunrock.git - GIT_TAG eb13a501edf10dfa1ff2ddd3c05e9de5ec7220ff + GIT_TAG v1.2 PREFIX ${GUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF From 6bd2ede5e274f32a58792d948a500a65edf11a59 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 19 Jan 2021 17:09:35 -0500 Subject: [PATCH 132/343] address review comments --- python/cugraph/linear_assignment/lap.pxd | 20 ++++++------ python/cugraph/tests/test_hungarian.py | 41 +++++++++++++++++------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index ad437ba09b4..782d5cfef60 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -20,19 +20,19 @@ from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef WT hungarian[VT,ET,WT]( + cdef weight_t hungarian[vertex_t,edge_t,weight_t]( const handle_t &handle, - const GraphCOOView[VT,ET,WT] &graph, - VT num_workers, - const VT *workers, - VT *assignment) except + + const GraphCOOView[vertex_t,edge_t,weight_t] &graph, + vertex_t num_workers, + const vertex_t *workers, + vertex_t *assignment) except + cdef extern from "algorithms.hpp": - cdef WT dense_hungarian "cugraph::dense::hungarian" [VT,WT]( + cdef weight_t dense_hungarian "cugraph::dense::hungarian" [vertex_t,weight_t]( const handle_t &handle, - const WT *costs, - VT num_rows, - VT num_columns, - VT *assignment) except + + const weight_t *costs, + vertex_t num_rows, + vertex_t num_columns, + vertex_t *assignment) except + diff --git a/python/cugraph/tests/test_hungarian.py b/python/cugraph/tests/test_hungarian.py index bf21cd3420f..7801ae610e9 100644 --- a/python/cugraph/tests/test_hungarian.py +++ b/python/cugraph/tests/test_hungarian.py @@ -21,7 +21,6 @@ import cudf import cugraph from scipy.optimize import linear_sum_assignment -import rmm def create_random_bipartite(v1, v2, size, dtype): @@ -54,7 +53,8 @@ def create_random_bipartite(v1, v2, size, dtype): return df1['src'], g, a -SPARSE_SIZES = [[5, 5, 100], [500, 500, 10000], [5000, 5000, 100000]] +SPARSE_SIZES = [[5, 5, 100], [500, 500, 10000]] +DENSE_SIZES = [[5, 100], [500, 10000]] def setup_function(): @@ -66,14 +66,6 @@ def setup_function(): list(product([False, True], [False, True]))) @pytest.mark.parametrize('v1_size, v2_size, weight_limit', SPARSE_SIZES) def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): - rmm.reinitialize( - managed_memory=managed, - pool_allocator=pool, - initial_pool_size=2 << 27 - ) - - assert(rmm.is_initialized()) - v1, g, m = create_random_bipartite(v1_size, v2_size, weight_limit, @@ -93,7 +85,32 @@ def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): scipy_cost = m[np_matching[0], np_matching[1]].sum() - print('scipy_cost = ', scipy_cost) - print('cugraph_cost = ', cugraph_cost) + assert(scipy_cost == cugraph_cost) + + +# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.parametrize('managed, pool', + list(product([False, True], [False, True]))) +@pytest.mark.parametrize('n, weight_limit', DENSE_SIZES) +def test_dense_hungarian(managed, pool, n, weight_limit): + C = np.random.uniform( + 0, weight_limit, size=(n, n) + ).round().astype(np.float32) + + C_series = cudf.Series(C.flatten()) + + start = timer() + cugraph_cost, matching = cugraph.dense_hungarian(C_series, n, n) + end = timer() + + print('cugraph time: ', (end - start)) + + start = timer() + np_matching = linear_sum_assignment(C) + end = timer() + + print('scipy time: ', (end - start)) + + scipy_cost = C[np_matching[0], np_matching[1]].sum() assert(scipy_cost == cugraph_cost) From 143e04771f4d7af082d6c42b89c89c1e54562e2e Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Tue, 19 Jan 2021 15:38:15 -0700 Subject: [PATCH 133/343] Updates to reflect RAFT PR #120 API changes --- python/cugraph/comms/comms.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cugraph/comms/comms.py b/python/cugraph/comms/comms.py index 925f4a1a060..88941877538 100644 --- a/python/cugraph/comms/comms.py +++ b/python/cugraph/comms/comms.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.raft.dask.common.comms import Comms as raftComms -from cugraph.raft.dask.common.comms import worker_state +from cugraph.raft.dask.common.comms import get_raft_comm_state from cugraph.raft.common.handle import Handle from cugraph.comms.comms_wrapper import init_subcomms as c_init_subcomms from dask.distributed import default_client @@ -196,12 +196,12 @@ def get_default_handle(): # Functions to be called from within workers def get_handle(sID): - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['handle'] def get_worker_id(sID): - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['wid'] @@ -216,5 +216,5 @@ def get_n_workers(sID=None): if sID is None: return read_utils.get_n_workers() else: - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['nworkers'] From 1933f5dd965af6abb682c7141485baa105678ca9 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 19 Jan 2021 18:18:14 -0500 Subject: [PATCH 134/343] clean up some unused things --- python/cugraph/tests/test_hungarian.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/python/cugraph/tests/test_hungarian.py b/python/cugraph/tests/test_hungarian.py index 7801ae610e9..4183bcc2c89 100644 --- a/python/cugraph/tests/test_hungarian.py +++ b/python/cugraph/tests/test_hungarian.py @@ -12,7 +12,6 @@ # limitations under the License. import gc -from itertools import product from timeit import default_timer as timer import numpy as np @@ -61,11 +60,8 @@ def setup_function(): gc.collect() -# Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.parametrize('managed, pool', - list(product([False, True], [False, True]))) @pytest.mark.parametrize('v1_size, v2_size, weight_limit', SPARSE_SIZES) -def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): +def test_hungarian(v1_size, v2_size, weight_limit): v1, g, m = create_random_bipartite(v1_size, v2_size, weight_limit, @@ -88,11 +84,8 @@ def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): assert(scipy_cost == cugraph_cost) -# Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.parametrize('managed, pool', - list(product([False, True], [False, True]))) @pytest.mark.parametrize('n, weight_limit', DENSE_SIZES) -def test_dense_hungarian(managed, pool, n, weight_limit): +def test_dense_hungarian(n, weight_limit): C = np.random.uniform( 0, weight_limit, size=(n, n) ).round().astype(np.float32) From 655e1ed2975625f5ceeefe758a445a8fa21fd29c Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 19 Jan 2021 21:49:00 -0600 Subject: [PATCH 135/343] review changes --- python/cugraph/structure/graph.py | 20 +------------------- python/cugraph/structure/symmetrize.py | 13 +++++++++++++ python/cugraph/tests/test_multigraph.py | 12 ++++++++++-- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 9b35bbe3195..496ad45edad 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -112,24 +112,6 @@ def __init__( self.batch_transposed_adjlists = None if m_graph is not None: - """if (type(self) is Graph and type(m_graph) is MultiGraph) or ( - type(self) is DiGraph and type(m_graph) is MultiDiGraph - ): - elist = m_graph.view_edge_list() - if m_graph.edgelist.weights: - weights = "weights" - else: - weights = None - self.from_cudf_edgelist(elist, - source = "src", - destination = "dst", - edge_attr = weights) - else: - msg = ( - "Graph can be initialized using MultiGraph " - "and DiGraph can be initialized using MultiDiGraph" - ) - raise Exception(msg)""" if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph: elist = m_graph.view_edge_list() if m_graph.edgelist.weights: @@ -1527,7 +1509,7 @@ def add_internal_vertex_id( class DiGraph(Graph): - def __init__(self, m_graph=None, edge_attr=None): + def __init__(self, m_graph=None): super().__init__( m_graph=m_graph, symmetrized=True ) diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index 2c70649ded0..0f4ca90a97c 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -42,6 +42,13 @@ def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True): Name of the column in the data frame containing the source ids dst_name : string Name of the column in the data frame containing the destination ids + multi : bool + Set to True if graph is a Multi(Di)Graph. This allows multiple + edges instead of dropping them. + symmetrize : bool + Default is True to perform symmetrization. If False only duplicate + edges are dropped. + Examples -------- >>> import cugraph.dask as dcg @@ -109,6 +116,12 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): Name of the column in the data frame containing the source ids dst_name : string Name of the column in the data frame containing the destination ids + multi : bool + Set to True if graph is a Multi(Di)Graph. This allows multiple + edges instead of dropping them. + symmetrize : bool + Default is True to perform symmetrization. If False only duplicate + edges are dropped. Examples -------- diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index 66fc95d7f84..cb659bc7e24 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -6,9 +6,16 @@ import numpy as np +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): - gc.collect() + # FIXME: Migrate to new test fixtures for Graph setup once available cuM = utils.read_csv_file(graph_file) G = cugraph.MultiDiGraph() G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") @@ -41,6 +48,7 @@ def test_multigraph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_Graph_from_MultiGraph(graph_file): + # FIXME: Migrate to new test fixtures for Graph setup once available cuM = utils.read_csv_file(graph_file) GM = cugraph.MultiGraph() GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") @@ -73,7 +81,7 @@ def test_Graph_from_MultiGraph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph_sssp(graph_file): - gc.collect() + # FIXME: Migrate to new test fixtures for Graph setup once available cuM = utils.read_csv_file(graph_file) G = cugraph.MultiDiGraph() G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") From 2857d5d458f5a1f3185dbc3264ecd724f88afa5b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 20 Jan 2021 10:09:50 -0600 Subject: [PATCH 136/343] refac_revision --- .../tests/test_betweenness_centrality.py | 36 +++++++++---------- python/cugraph/tests/test_force_atlas2.py | 2 +- python/cugraph/tests/test_k_truss_subgraph.py | 6 +++- python/cugraph/tests/utils.py | 12 +++---- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index fa989e03ea9..f338e5aa633 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -55,7 +55,7 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - Fixture_params_dts, + graph_obj_tuple, directed=True, k=None, normalized=False, @@ -120,7 +120,7 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = Fixture_params_dts + G, Gnx = graph_obj_tuple assert G is not None and Gnx is not None if multi_gpu_batch: @@ -307,24 +307,24 @@ def prepare_test(): WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] -fixture_params_dts_sml = utils.genFixtureParamsProduct( +small_graph_fixture_params = utils.genFixtureParamsProduct( (DATASETS_SMALL, "grph"), (DIRECTED, "dirctd"), (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) -fixture_params_dts_urnbrd = utils.genFixtureParamsProduct( +unrenumbered_graph_fixture_params = utils.genFixtureParamsProduct( (DATASETS_UNRENUMBERED, "grph"), (DIRECTED, "dirctd"), (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) -@pytest.fixture(scope="module", params=fixture_params_dts_sml) -def Fixture_params_dts_sml(request): +@pytest.fixture(scope="module", params=small_graph_fixture_params) +def get_cu_nx_graph_datasets_small(request): return utils.build_cu_and_nx_graphs(*request.param) -@pytest.fixture(scope="module", params=fixture_params_dts_urnbrd) -def Fixture_params_dts_urnbrd(request): +@pytest.fixture(scope="module", params=unrenumbered_graph_fixture_params) +def get_cu_nx_graph_datasets_unrenumbered(request): return utils.build_cu_and_nx_graphs(*request.param) @@ -338,7 +338,7 @@ def Fixture_params_dts_urnbrd(request): @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) def test_betweenness_centrality( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, @@ -348,7 +348,7 @@ def test_betweenness_centrality( ): prepare_test() sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, normalized=normalized, k=subset_size, weight=weight, @@ -367,7 +367,7 @@ def test_betweenness_centrality( @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) def test_betweenness_centrality_k_full( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, @@ -380,7 +380,7 @@ def test_betweenness_centrality_k_full( instead of k=None, checks that k scales properly""" prepare_test() sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, normalized=normalized, k=subset_size, weight=weight, @@ -403,7 +403,7 @@ def test_betweenness_centrality_k_full( @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) def test_betweenness_centrality_fixed_sample( - Fixture_params_dts_urnbrd, + get_cu_nx_graph_datasets_unrenumbered, subset_size, normalized, weight, @@ -417,7 +417,7 @@ def test_betweenness_centrality_fixed_sample( """ prepare_test() sorted_df = calc_betweenness_centrality( - Fixture_params_dts_urnbrd, + get_cu_nx_graph_datasets_unrenumbered, k=subset_size, normalized=normalized, weight=weight, @@ -435,7 +435,7 @@ def test_betweenness_centrality_fixed_sample( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) def test_betweenness_centrality_weight_except( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, @@ -451,7 +451,7 @@ def test_betweenness_centrality_weight_except( prepare_test() with pytest.raises(NotImplementedError): sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, k=subset_size, normalized=normalized, weight=weight, @@ -469,7 +469,7 @@ def test_betweenness_centrality_weight_except( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) def test_betweenness_invalid_dtype( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, @@ -482,7 +482,7 @@ def test_betweenness_invalid_dtype( prepare_test() with pytest.raises(TypeError): sorted_df = calc_betweenness_centrality( - Fixture_params_dts_sml, + get_cu_nx_graph_datasets_small, k=subset_size, normalized=normalized, weight=weight, diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index e2179436a48..f399460e5e6 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -123,7 +123,7 @@ def test_force_atlas2(graph_file, score, max_iter, iterations on a given graph. """ - matrix_file = PurePath(graph_file).with_suffix(".mtx") + matrix_file = graph_file.with_suffix(".mtx") M = scipy.io.mmread(matrix_file) M = M.todense() cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas()) diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 7650fc6d2b5..95bf5e3e7f4 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -41,7 +41,11 @@ # currently in networkx master and will hopefully will make it to a release # soon. def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(graph_file, nodetype=int, data=(("weights", float),)) + G = nx.read_edgelist( + str(graph_file), + nodetype=int, + data=(("weights", float),) + ) df = nx.to_pandas_edgelist(G) return df diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index 6fa9dd577ee..ab12601c171 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -72,9 +72,9 @@ ] -DATASETS_KTRUSS = [ - (str(PurePath(RAPIDS_DATASET_ROOT_DIR)/"polbooks.csv"), - str(PurePath(RAPIDS_DATASET_ROOT_DIR)/"ref/ktruss/polbooks.csv")) +DATASETS_KTRUSS = [( + PurePath(RAPIDS_DATASET_ROOT_DIR)/"polbooks.csv", + PurePath(RAPIDS_DATASET_ROOT_DIR)/"ref/ktruss/polbooks.csv") ] @@ -88,13 +88,13 @@ MATRIX_INPUT_TYPES = [ pytest.param( - cp_coo_matrix, marks=pytest.mark.cupy_types, id="CuPy.coo_matrix" + cp_coo_matrix, marks=pytest.mark.matrix_types, id="CuPy.coo_matrix" ), pytest.param( - cp_csr_matrix, marks=pytest.mark.cupy_types, id="CuPy.csr_matrix" + cp_csr_matrix, marks=pytest.mark.matrix_types, id="CuPy.csr_matrix" ), pytest.param( - cp_csc_matrix, marks=pytest.mark.cupy_types, id="CuPy.csc_matrix" + cp_csc_matrix, marks=pytest.mark.matrix_types, id="CuPy.csc_matrix" ), ] From 71ecf352c140c100af8b449b0a16da930ae4e121 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 20 Jan 2021 13:43:32 -0600 Subject: [PATCH 137/343] Update CODEOWNERS to add configuration for new cugraph-doc-codeowners group. Updated SOURCEBUILD.md to include most recent build.sh options. --- .github/CODEOWNERS | 16 +++++++++++++++- SOURCEBUILD.md | 16 ++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d35c5c02218..be2396dd2d5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,17 @@ +# https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners +# Order matters - match of higest importance goes last (last match wins) + +#doc code owners +datasets/ @rapidsai/cugraph-doc-codeowners +notebooks/ @rapidsai/cugraph-doc-codeowners +docs/ @rapidsai/cugraph-doc-codeowners +**/*.txt @rapidsai/cugraph-doc-codeowners +**/*.md @rapidsai/cugraph-doc-codeowners +**/*.rst @rapidsai/cugraph-doc-codeowners +**/*.ipynb @rapidsai/cugraph-doc-codeowners +**/*.pdf @rapidsai/cugraph-doc-codeowners +**/*.png @rapidsai/cugraph-doc-codeowners + #cpp code owners cpp/ @rapidsai/cugraph-cpp-codeowners @@ -9,7 +23,7 @@ python/ @rapidsai/cugraph-python-codeowners **/cmake/ @rapidsai/cugraph-cmake-codeowners #build/ops code owners -.github/ @rapidsai/ops-codeowners +.github/ @rapidsai/ops-codeowners ci/ @rapidsai/ops-codeowners conda/ @rapidsai/ops-codeowners **/Dockerfile @rapidsai/ops-codeowners diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 8acd90c4f7f..03fef9821a1 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -1,6 +1,6 @@ # Building from Source -The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. +The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. The cuGraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly. @@ -97,17 +97,21 @@ There are several other options available on the build script for advanced users `build.sh` options: ```bash build.sh [ ...] [ ...] - clean - remove all existing build artifacts and configuration (start over) - libcugraph - build the cugraph C++ code - cugraph - build the cugraph Python package - + where is: + clean - remove all existing build artifacts and configuration (start over) + libcugraph - build the cugraph C++ code + cugraph - build the cugraph Python package + docs - build the docs and is: -v - verbose build mode -g - build for debug -n - no install step + --allgpuarch - build for all supported GPU architectures --show_depr_warn - show cmake deprecation warnings -h - print this text + default action (no args) is to build and install 'libcugraph' then 'cugraph' then 'docs' targets + examples: $ ./build.sh clean # remove prior build artifacts (start over) $ ./build.sh libcugraph -v # compile and install libcugraph with verbose output @@ -189,7 +193,7 @@ Run either the C++ or the Python tests with datasets ```bash cd $CUGRAPH_HOME/datasets - source get_test_data.sh #This takes about 10 minutes and download 1GB data (>5 GB uncompressed) + source get_test_data.sh #This takes about 10 minutes and downloads 1GB data (>5 GB uncompressed) ``` Run the C++ tests on large input: From 7bcbc0fd0da7fd929adff8b9a9063f26b9af7430 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 20 Jan 2021 14:11:11 -0600 Subject: [PATCH 138/343] Reverting changes to SOURCEBUILD.md so they can be applied later to test new CODEOWNERS after it's been merged. --- SOURCEBUILD.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 03fef9821a1..8acd90c4f7f 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -1,6 +1,6 @@ # Building from Source -The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. +The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. The cuGraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly. @@ -97,21 +97,17 @@ There are several other options available on the build script for advanced users `build.sh` options: ```bash build.sh [ ...] [ ...] - where is: - clean - remove all existing build artifacts and configuration (start over) - libcugraph - build the cugraph C++ code - cugraph - build the cugraph Python package - docs - build the docs + clean - remove all existing build artifacts and configuration (start over) + libcugraph - build the cugraph C++ code + cugraph - build the cugraph Python package + and is: -v - verbose build mode -g - build for debug -n - no install step - --allgpuarch - build for all supported GPU architectures --show_depr_warn - show cmake deprecation warnings -h - print this text - default action (no args) is to build and install 'libcugraph' then 'cugraph' then 'docs' targets - examples: $ ./build.sh clean # remove prior build artifacts (start over) $ ./build.sh libcugraph -v # compile and install libcugraph with verbose output @@ -193,7 +189,7 @@ Run either the C++ or the Python tests with datasets ```bash cd $CUGRAPH_HOME/datasets - source get_test_data.sh #This takes about 10 minutes and downloads 1GB data (>5 GB uncompressed) + source get_test_data.sh #This takes about 10 minutes and download 1GB data (>5 GB uncompressed) ``` Run the C++ tests on large input: From 8cfe7b872fc11645532581deb83cce5bc5ee0073 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 20 Jan 2021 14:19:03 -0600 Subject: [PATCH 139/343] Fixed typo in comment. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index be2396dd2d5..ebffd18ca5a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,5 +1,5 @@ # https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners -# Order matters - match of higest importance goes last (last match wins) +# Order matters - match of highest importance goes last (last match wins) #doc code owners datasets/ @rapidsai/cugraph-doc-codeowners From fe6e57bc262b5649135bb8bc632106f7bf20fc1c Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Wed, 20 Jan 2021 14:24:01 -0700 Subject: [PATCH 140/343] Update raft git tag --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ac73553ca00..6ed07f58b9f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -320,7 +320,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG f75d7b437bf1da3df749108161b8a0505fb6b7b3 + GIT_TAG 9dbf2c8a9134ce8135f7fe947ec523d874fcab6a PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" From 8170d0ec0a9fb6046c83a4ba19e04fa72cef9a5c Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Wed, 20 Jan 2021 14:59:26 -0700 Subject: [PATCH 141/343] Update copyright year for style checking --- python/cugraph/comms/comms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/comms/comms.py b/python/cugraph/comms/comms.py index 88941877538..85fc426f373 100644 --- a/python/cugraph/comms/comms.py +++ b/python/cugraph/comms/comms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From 8a5bdc371012612db83256b30d7335f32bc09c44 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 20 Jan 2021 22:41:41 -0600 Subject: [PATCH 142/343] Add doc --- python/cugraph/structure/graph.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 496ad45edad..85c1b8bce35 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -1509,6 +1509,9 @@ def add_internal_vertex_id( class DiGraph(Graph): + """ + cuGraph directed graph class. Drops parallel edges. + """ def __init__(self, m_graph=None): super().__init__( m_graph=m_graph, symmetrized=True @@ -1516,10 +1519,16 @@ def __init__(self, m_graph=None): class MultiGraph(Graph): + """ + cuGraph class to create and store undirected graphs with parallel edges. + """ def __init__(self, renumbered=True): super().__init__(multi=True) class MultiDiGraph(Graph): + """ + cuGraph class to create and store directed graphs with parallel edges. + """ def __init__(self, renumbered=True): super().__init__(symmetrized=True, multi=True) From 2280295217dbbe0433cbaff474375606e69bebe0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 21 Jan 2021 00:00:41 -0500 Subject: [PATCH 143/343] initial implementation of extract_induced_subgraph --- cpp/include/experimental/graph_functions.hpp | 44 ++++ cpp/src/experimental/induced_subgraph.cu | 217 ++++++++++++------- 2 files changed, 182 insertions(+), 79 deletions(-) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 70db662f903..98e36c265df 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -243,5 +243,49 @@ void relabel(raft::handle_t const& handle, vertex_t num_labels, bool do_expensive_check = false); +/** + * @brief extract induced subgraph(s). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object, we extract induced subgraphs from @p graph_view. + * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). + * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p + * num_subgraphs]). The elements of @p subgraph_vertices for each subgraph should be sorted in + * ascending order and unique. + * @param num_subgraphs Number of induced subgraphs to extract. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge major (destination + * if @p store_transposed is true, source otherwise) vertices, edge minor (source if @p + * store_transposed is true, destination otherwise) vertices, edge weights, and edge offsets for + * each induced subgraphs (size == num_subgraphs + 1). The sizes of the edge major & minor vertices + * are edge_offsets[num_subgraphs]. The size of the edge weights is either + * edge_offsets[num_subgraphs] (if @p graph_view is weighted) or 0 (if @p graph_view is unweighted). + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraph( + raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets /* size == num_subgraphs + 1 */, + vertex_t const* subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, + size_t num_subgraphs, + bool do_expensive_check = false); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 2e89c1830a6..734b6529098 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -16,9 +16,9 @@ #include #include -#include +#include #include -#include +#include #include #include @@ -26,40 +26,15 @@ #include #include +#include #include #include -#include #include namespace cugraph { namespace experimental { -namespace detail { -} // namespace detail -/** - * @brief extract induced subgraph(s). - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam edge_t Type of edge identifiers. Needs to be an integral type. - * @tparam weight_t Type of edge weights. Needs to be a floating point type. - * @tparam store_transposed - * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as - * transposed. - * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) - * or multi-GPU (true). - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Graph view object of, we extract induced subgraphs from @p graph_view. - * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). - * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p - * num_subgraphs]). @p subgraph_vertices for each subgraph should be sorted in ascending order. - * @param num_subgraphs Number of induced subgraphs to extract. - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return std::tuple, rmm::device_uvector, - * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, - * edge destination vertices, edge weights, and edge offsets for each induced subgraph. - */ template , rmm::device_uvector> extract_induced_subgraph( raft::handle_t const &handle, - graph_view_t const &graph_view, + graph_view_t const &graph_view, size_t const *subgraph_offsets /* size == num_subgraphs + 1 */, vertex_t const *subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, size_t num_subgraphs, - bool do_expensive_check = false) + bool do_expensive_check) { // FIXME: this code is inefficient for the vertices with their local degrees much larger than the // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are @@ -87,16 +62,15 @@ extract_induced_subgraph( // 1. check input arguments if (do_expensive_check) { + size_t should_be_zero{std::numeric_limits::max()}; size_t num_aggregate_subgraph_vertices{}; + raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); raft::update_host( &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - size_t should_be_zero{std::numeric_limits::max()}; - raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); CUGRAPH_EXPECTS(should_be_zero == 0, "Invalid input argument: subgraph_offsets[0] should be 0."); + CUGRAPH_EXPECTS( thrust::is_sorted(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), subgraph_offsets, @@ -114,18 +88,23 @@ extract_induced_subgraph( "Invalid input argument: subgraph_vertices has invalid vertex IDs."); CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(num_subgraphs), - [subgraph_offsets, subgraph_vertices] __device__(auto i) { - return !thrust::is_sorted(thrust::seq, - subgraph_vertices + subgraph_offsets[i], - subgraph_vertices + subgraph_offsets[i + 1]) || - (thrust::unique(thrust::seq, - subgraph_vertices + subgraph_offsets[i], - subgraph_vertices + subgraph_offsets[i + 1]) != - subgraph_vertices + subgraph_offsets[i + 1]); - }) == 0, + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_subgraphs), + [subgraph_offsets, subgraph_vertices] __device__(auto i) { + // vertices are sorted and unique + return !thrust::is_sorted(thrust::seq, + subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1]) || + (thrust::count_if( + thrust::seq, + thrust::make_counting_iterator(subgraph_offsets[i]), + thrust::make_counting_iterator(subgraph_offsets[i + 1]), + [subgraph_vertices, last = subgraph_offsets[i + 1] - 1] __device__(auto i) { + return (i != last) && (subgraph_vertices[i] == subgraph_vertices[i + 1]); + }) != 0); + }) == 0, "Invalid input argument: subgraph_vertices for each subgraph idx should be sorted in " "ascending order and unique."); } @@ -134,87 +113,167 @@ extract_induced_subgraph( if (multi_gpu) { CUGRAPH_FAIL("Unimplemented."); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream())); } else { + // 2-1. Phase 1: calculate memory requirements + size_t num_aggregate_subgraph_vertices{}; raft::update_host( &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - rmm::device_uvector subgraph_edge_offsets(num_aggregate_subgraph_vertices + 1, - handle.get_stream()); + rmm::device_uvector subgraph_vertex_output_offsets( + num_aggregate_subgraph_vertices + 1, + handle.get_stream()); // for each element of subgraph_vertices - matrix_partition matrix_partition(graph_view, 0); - thrust::tabulate( + matrix_partition_device_t> + matrix_partition(graph_view, 0); + thrust::transform( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - subgraph_edge_offsets.begin(), - subgraph_edge_offsets.end() - 1, + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_aggregate_subgraph_vertices), + subgraph_vertex_output_offsets.begin(), [subgraph_offsets, subgraph_vertices, num_subgraphs, matrix_partition] __device__(auto i) { auto subgraph_idx = thrust::distance( subgraph_offsets + 1, - thrust::lower_bound( - thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs + 1, size_t{i})); + thrust::upper_bound(thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, i)); vertex_t const *indices{nullptr}; - weight_t cosnt *weights{nullptr}; + weight_t const *weights{nullptr}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + // FIXME: this is inefficient for high local degree vertices return thrust::count_if( thrust::seq, indices, indices + local_degree, - [vertex_first = subgraph_offsets + subgraph_idx, - vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto nbr) { + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = + subgraph_vertices + subgraph_offsets[subgraph_idx + 1]] __device__(auto nbr) { return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); }); }); thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - subgraph_edge_offsets, - subgraph_edge_offsets + num_aggregate_subgraph_vertices + 1, - subgraph_edge_offsets); + subgraph_vertex_output_offsets.begin(), + subgraph_vertex_output_offsets.end(), + subgraph_vertex_output_offsets.begin()); size_t num_aggregate_edges{}; raft::update_host(&num_aggregate_edges, - subgraph_edge_offsets + num_aggregate_subgraph_vertices, + subgraph_vertex_output_offsets.data() + num_aggregate_subgraph_vertices, 1, handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + // 2-2. Phase 2: find the edges in the induced subgraphs + rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream()); rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream()); - rmm::device_uvector edge_weights(graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, - handle.get_stream()); + rmm::device_uvector edge_weights( + graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream()); thrust::for_each( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(num_subgraphs), - [subgraph_offsets, subgraph_vertices, num_subgraphs, matrix_partition, subgraph_edge_offsets = subgraph_edge_offsets.data()] __device__(auto i) { + thrust::make_counting_iterator(num_aggregate_subgraph_vertices), + [subgraph_offsets, + subgraph_vertices, + num_subgraphs, + matrix_partition, + subgraph_vertex_output_offsets = subgraph_vertex_output_offsets.data(), + edge_majors = edge_majors.data(), + edge_minors = edge_minors.data(), + edge_weights = edge_weights.data()] __device__(auto i) { auto subgraph_idx = thrust::distance( - subgraph_offsets + 1, - thrust::lower_bound( - thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs + 1, size_t{i})); + subgraph_offsets, + thrust::upper_bound( + thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs, size_t{i})); vertex_t const *indices{nullptr}; - weight_t cosnt *weights{nullptr}; + weight_t const *weights{nullptr}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); - thrust::copy_if( - thrust::seq, - thrust::make_zip_iterator(thrust::make_constant_iterator(subgraph_vertices[i]), indices, weights, , - indices + local_degree, - [vertex_first = subgraph_offsets + subgraph_idx, - vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto nbr) { - return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); - }); + if (weights != nullptr) { + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + thrust::make_constant_iterator(subgraph_vertices[i]), indices, weights)); + // FIXME: this is inefficient for high local degree vertices + thrust::copy_if( + thrust::seq, + triplet_first, + triplet_first + local_degree, + thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors, edge_weights)) + + subgraph_vertex_output_offsets[i], + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = + subgraph_vertices + subgraph_offsets[subgraph_idx + 1]] __device__(auto t) { + return thrust::binary_search( + thrust::seq, vertex_first, vertex_last, thrust::get<1>(t)); + }); + } else { + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_constant_iterator(subgraph_vertices[i]), indices)); + // FIXME: this is inefficient for high local degree vertices + thrust::copy_if(thrust::seq, + pair_first, + pair_first + local_degree, + thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors)) + + subgraph_vertex_output_offsets[i], + [vertex_first = subgraph_offsets + subgraph_idx, + vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto t) { + return thrust::binary_search( + thrust::seq, vertex_first, vertex_last, thrust::get<1>(t)); + }); + } }); - } - return std::make_tuple(std::move(), std::move(), std::move(), std::move()); + rmm::device_uvector subgraph_edge_offsets(num_subgraphs + 1, handle.get_stream()); + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_offsets, + subgraph_offsets + (num_subgraphs + 1), + subgraph_vertex_output_offsets.begin(), + subgraph_edge_offsets.begin()); + + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // subgraph_vertex_output_offsets will become + // out-of-scope once this function returns + + return std::make_tuple(std::move(edge_majors), + std::move(edge_minors), + std::move(edge_weights), + std::move(subgraph_edge_offsets)); + } } +// explicit instantiation + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraph(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraph(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph From ec0d6f49416b66f58b98584a982cba4d92733f2f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 21 Jan 2021 16:09:10 -0500 Subject: [PATCH 144/343] fixed C++ test naming mistakes --- cpp/tests/experimental/coarsen_graph_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 4b56fd843dc..b790dfffa69 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -371,12 +371,12 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam // FIXME: add tests for type combinations -TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloatTransposed) +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatTransposed) { run_current_test(GetParam()); } -TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatFloatUntransposed) +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatUntransposed) { run_current_test(GetParam()); } From 772c7ffeff3c56a18339e735c24ed0d6fd9dfbb6 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 21 Jan 2021 16:18:52 -0600 Subject: [PATCH 145/343] Updated SOURCEBUILD.md to include the latest build.sh options. --- SOURCEBUILD.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 8acd90c4f7f..03fef9821a1 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -1,6 +1,6 @@ # Building from Source -The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. +The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. The cuGraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly. @@ -97,17 +97,21 @@ There are several other options available on the build script for advanced users `build.sh` options: ```bash build.sh [ ...] [ ...] - clean - remove all existing build artifacts and configuration (start over) - libcugraph - build the cugraph C++ code - cugraph - build the cugraph Python package - + where is: + clean - remove all existing build artifacts and configuration (start over) + libcugraph - build the cugraph C++ code + cugraph - build the cugraph Python package + docs - build the docs and is: -v - verbose build mode -g - build for debug -n - no install step + --allgpuarch - build for all supported GPU architectures --show_depr_warn - show cmake deprecation warnings -h - print this text + default action (no args) is to build and install 'libcugraph' then 'cugraph' then 'docs' targets + examples: $ ./build.sh clean # remove prior build artifacts (start over) $ ./build.sh libcugraph -v # compile and install libcugraph with verbose output @@ -189,7 +193,7 @@ Run either the C++ or the Python tests with datasets ```bash cd $CUGRAPH_HOME/datasets - source get_test_data.sh #This takes about 10 minutes and download 1GB data (>5 GB uncompressed) + source get_test_data.sh #This takes about 10 minutes and downloads 1GB data (>5 GB uncompressed) ``` Run the C++ tests on large input: From 42a886c034ff49fe10227932a4ed46c18a685368 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 21 Jan 2021 17:33:51 -0500 Subject: [PATCH 146/343] rename extract_induced_subgraph to extract_induced_subgraph"s" --- cpp/include/experimental/graph_functions.hpp | 2 +- cpp/src/experimental/induced_subgraph.cu | 26 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 98e36c265df..7b4bb466b97 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -279,7 +279,7 @@ std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> -extract_induced_subgraph( +extract_induced_subgraphs( raft::handle_t const& handle, graph_view_t const& graph_view, size_t const* subgraph_offsets /* size == num_subgraphs + 1 */, diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 734b6529098..bb3f590f25c 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -44,7 +44,7 @@ std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> -extract_induced_subgraph( +extract_induced_subgraphs( raft::handle_t const &handle, graph_view_t const &graph_view, size_t const *subgraph_offsets /* size == num_subgraphs + 1 */, @@ -257,23 +257,23 @@ template std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> -extract_induced_subgraph(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, - size_t num_subgraphs, - bool do_expensive_check); +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); template std::tuple, rmm::device_uvector, rmm::device_uvector, rmm::device_uvector> -extract_induced_subgraph(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, - size_t num_subgraphs, - bool do_expensive_check); +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); } // namespace experimental } // namespace cugraph From cd135621f3bb04f3511bd8aa7874ab2424f1bad2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 22 Jan 2021 11:04:25 -0500 Subject: [PATCH 147/343] add induced sugraph test --- cpp/tests/CMakeLists.txt | 9 + .../experimental/induced_subgraph_test.cpp | 296 ++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 cpp/tests/experimental/induced_subgraph_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 941ef4774f6..59b84a88fe7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -281,6 +281,15 @@ set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}" "") +################################################################################################### +# - Experimental induced subgraph tests ----------------------------------------------------------- + +set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") + +ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}" "") + ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp new file mode 100644 index 00000000000..3734d322fff --- /dev/null +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +template +std::tuple, std::vector, std::vector, std::vector> +extract_induced_subgraph_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + size_t const* subgraph_offsets, + vertex_t const* subgraph_vertices, + vertex_t num_vertices, + size_t num_subgraphs) +{ + std::vector edgelist_majors{}; + std::vector edgelist_minors{}; + std::vector edgelist_weights{}; + std::vector subgraph_edge_offsets{0}; + + for (size_t i = 0; i < num_subgraphs; ++i) { + std::for_each(subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1], + [offsets, + indices, + weights, + subgraph_vertices, + subgraph_offsets, + &edgelist_majors, + &edgelist_minors, + &edgelist_weights, + i](auto v) { + auto first = offsets[v]; + auto last = offsets[v + 1]; + for (auto j = first; j < last; ++j) { + if (std::binary_search(subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1], + indices[j])) { + edgelist_majors.push_back(v); + edgelist_minors.push_back(indices[j]); + if (weights != nullptr) { edgelist_weights.push_back(weights[j]); } + } + } + }); + subgraph_edge_offsets.push_back(edgelist_majors.size()); + } + + return std::make_tuple(edgelist_majors, edgelist_minors, edgelist_weights, subgraph_edge_offsets); +} + +typedef struct InducedSubgraph_Usecase_t { + std::string graph_file_full_path{}; + std::vector subgraph_sizes{}; + bool test_weighted{false}; + + InducedSubgraph_Usecase_t(std::string const& graph_file_path, + std::vector const& subgraph_sizes, + bool test_weighted) + : subgraph_sizes(subgraph_sizes), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} InducedSubgraph_Usecase; + +class Tests_InducedSubgraph : public ::testing::TestWithParam { + public: + Tests_InducedSubgraph() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(InducedSubgraph_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_subgraph_offsets(configuration.subgraph_sizes.size() + 1, 0); + std::partial_sum(configuration.subgraph_sizes.begin(), + configuration.subgraph_sizes.end(), + h_subgraph_offsets.begin() + 1); + std::vector h_subgraph_vertices( + h_subgraph_offsets.back(), cugraph::experimental::invalid_vertex_id::value); + std::default_random_engine generator{}; + std::uniform_int_distribution distribution{0, + graph_view.get_number_of_vertices() - 1}; + + for (size_t i = 0; i < configuration.subgraph_sizes.size(); ++i) { + auto start = h_subgraph_offsets[i]; + auto last = h_subgraph_offsets[i + 1]; + while (start < last) { + std::for_each(h_subgraph_vertices.begin() + start, + h_subgraph_vertices.begin() + last, + [&distribution, &generator](auto& val) { val = distribution(generator); }); + std::sort(h_subgraph_vertices.begin() + start, h_subgraph_vertices.begin() + last); + start += std::distance( + h_subgraph_vertices.begin() + start, + std::unique(h_subgraph_vertices.begin() + start, h_subgraph_vertices.begin() + last)); + } + } + + rmm::device_uvector d_subgraph_offsets(h_subgraph_offsets.size(), handle.get_stream()); + rmm::device_uvector d_subgraph_vertices(d_subgraph_vertices.size(), + handle.get_stream()); + raft::update_device(d_subgraph_offsets.data(), + h_subgraph_offsets.data(), + h_subgraph_offsets.size(), + handle.get_stream()); + raft::update_device(d_subgraph_vertices.data(), + h_subgraph_vertices.data(), + h_subgraph_vertices.size(), + handle.get_stream()); + + std::vector h_reference_subgraph_edgelist_majors{}; + std::vector h_reference_subgraph_edgelist_minors{}; + std::vector h_reference_subgraph_edgelist_weights{}; + std::vector h_reference_subgraph_edge_offsets{}; + std::tie(h_reference_subgraph_edgelist_majors, + h_reference_subgraph_edgelist_minors, + h_reference_subgraph_edgelist_weights, + h_reference_subgraph_edge_offsets) = + extract_induced_subgraph_reference( + h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_subgraph_offsets.data(), + h_subgraph_vertices.data(), + graph_view.get_number_of_vertices(), + configuration.subgraph_sizes.size()); + + rmm::device_uvector d_subgraph_edgelist_majors(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edgelist_minors(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edgelist_weights(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edge_offsets(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // FIXME: turn-off do_expensive_check once verified. + cugraph::experimental::extract_induced_subgraphs(handle, + graph_view, + d_subgraph_offsets.data(), + d_subgraph_vertices.data(), + configuration.subgraph_sizes.size(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_subgraph_edgelist_majors(d_subgraph_edgelist_majors.size()); + std::vector h_cugraph_subgraph_edgelist_minors(d_subgraph_edgelist_minors.size()); + std::vector h_cugraph_subgraph_edgelist_weights(d_subgraph_edgelist_weights.size()); + std::vector h_cugraph_subgraph_edge_offsets(d_subgraph_edge_offsets.size()); + + raft::update_host(h_cugraph_subgraph_edgelist_majors.data(), + d_subgraph_edgelist_majors.data(), + d_subgraph_edgelist_majors.size(), + handle.get_stream()); + raft::update_host(h_cugraph_subgraph_edgelist_minors.data(), + d_subgraph_edgelist_minors.data(), + d_subgraph_edgelist_minors.size(), + handle.get_stream()); + if (configuration.test_weighted) { + raft::update_host(h_cugraph_subgraph_edgelist_weights.data(), + d_subgraph_edgelist_weights.data(), + d_subgraph_edgelist_weights.size(), + handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(h_reference_subgraph_edge_offsets.size() == h_cugraph_subgraph_edge_offsets.size()) + << "Returned subgraph edge offset vector has an invalid size."; + ASSERT_TRUE(std::equal(h_reference_subgraph_edge_offsets.begin(), + h_reference_subgraph_edge_offsets.end(), + h_cugraph_subgraph_edge_offsets.begin())) + << "Returned subgraph edge offset values do not match with the reference values."; + + for (size_t i = 0; i < configuration.subgraph_sizes.size(); ++i) { + auto start = h_reference_subgraph_edge_offsets[i]; + auto last = h_reference_subgraph_edge_offsets[i + 1]; + if (configuration.test_weighted) { + std::vector> reference_tuples(last - start); + std::vector> cugraph_tuples(last - start); + for (auto j = start; j < last; ++j) { + reference_tuples[j - start] = std::make_tuple(h_reference_subgraph_edgelist_majors[j], + h_reference_subgraph_edgelist_minors[j], + h_reference_subgraph_edgelist_weights[j]); + cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], + h_cugraph_subgraph_edgelist_minors[j], + h_cugraph_subgraph_edgelist_weights[j]); + } + ASSERT_TRUE( + std::equal(reference_tuples.begin(), reference_tuples.end(), cugraph_tuples.begin())) + << "Extracted subgraph edges do not match with the edges extracted by the reference " + "implementation."; + } else { + std::vector> reference_tuples(last - start); + std::vector> cugraph_tuples(last - start); + for (auto j = start; j < last; ++j) { + reference_tuples[j - start] = std::make_tuple(h_reference_subgraph_edgelist_majors[j], + h_reference_subgraph_edgelist_minors[j]); + cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], + h_cugraph_subgraph_edgelist_minors[j]); + } + ASSERT_TRUE( + std::equal(reference_tuples.begin(), reference_tuples.end(), cugraph_tuples.begin())) + << "Extracted subgraph edges do not match with the edges extracted by the reference " + "implementation."; + } + } + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_InducedSubgraph, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_InducedSubgraph, + ::testing::Values(InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{1}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{2}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{3, 0, 5}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{2, 3, 5}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{5, 2, 3}, true), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", std::vector{25, 30, 15}, false), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", std::vector{25, 30, 15}, true), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", std::vector{30, 20, 40}, false), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", std::vector{30, 20, 40}, true), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{10}, false), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{10}, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() From 5e7c8dc99ec7e9dfbed115a9d506feeceb8e464c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 22 Jan 2021 15:46:09 -0500 Subject: [PATCH 148/343] bug fixes --- cpp/src/experimental/induced_subgraph.cu | 11 +-- .../experimental/induced_subgraph_test.cpp | 76 ++++++++++++------- 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index bb3f590f25c..96485134589 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -84,7 +84,7 @@ extract_induced_subgraphs( [vertex_partition] __device__(auto v) { return !vertex_partition.is_valid_vertex(v) || !vertex_partition.is_local_vertex_nocheck(v); - }), + }) == 0, "Invalid input argument: subgraph_vertices has invalid vertex IDs."); CUGRAPH_EXPECTS( @@ -190,9 +190,9 @@ extract_induced_subgraphs( edge_minors = edge_minors.data(), edge_weights = edge_weights.data()] __device__(auto i) { auto subgraph_idx = thrust::distance( - subgraph_offsets, + subgraph_offsets + 1, thrust::upper_bound( - thrust::seq, subgraph_offsets + 1, subgraph_offsets + num_subgraphs, size_t{i})); + thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, size_t{i})); vertex_t const *indices{nullptr}; weight_t const *weights{nullptr}; edge_t local_degree{}; @@ -225,8 +225,9 @@ extract_induced_subgraphs( pair_first + local_degree, thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors)) + subgraph_vertex_output_offsets[i], - [vertex_first = subgraph_offsets + subgraph_idx, - vertex_last = subgraph_offsets + (subgraph_idx + 1)] __device__(auto t) { + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = subgraph_vertices + + subgraph_offsets[subgraph_idx + 1]] __device__(auto t) { return thrust::binary_search( thrust::seq, vertex_first, vertex_last, thrust::get<1>(t)); }); diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 3734d322fff..896faa0b177 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -75,7 +75,7 @@ extract_induced_subgraph_reference(edge_t const* offsets, } } }); - subgraph_edge_offsets.push_back(edgelist_majors.size()); + subgraph_edge_offsets.push_back(edgelist_majors.size()); } return std::make_tuple(edgelist_majors, edgelist_minors, edgelist_weights, subgraph_edge_offsets); @@ -151,19 +151,20 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam vertices(graph_view.get_number_of_vertices()); + std::iota(vertices.begin(), vertices.end(), vertex_t{0}); + std::random_shuffle(vertices.begin(), vertices.end()); + std::copy( + vertices.begin(), vertices.begin() + (last - start), h_subgraph_vertices.begin() + start); + std::sort(h_subgraph_vertices.begin() + start, h_subgraph_vertices.begin() + last); } rmm::device_uvector d_subgraph_offsets(h_subgraph_offsets.size(), handle.get_stream()); - rmm::device_uvector d_subgraph_vertices(d_subgraph_vertices.size(), + rmm::device_uvector d_subgraph_vertices(h_subgraph_vertices.size(), handle.get_stream()); raft::update_device(d_subgraph_offsets.data(), h_subgraph_offsets.data(), @@ -199,12 +200,16 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam{1}, false), - InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{2}, false), - InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{3, 0, 5}, false), - InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{2, 3, 5}, false), - InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{5, 2, 3}, true), - InducedSubgraph_Usecase("test/datasets/web-Google.mtx", std::vector{25, 30, 15}, false), - InducedSubgraph_Usecase("test/datasets/web-Google.mtx", std::vector{25, 30, 15}, true), - InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", std::vector{30, 20, 40}, false), - InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", std::vector{30, 20, 40}, true), - InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{10}, false), - InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{10}, true))); + ::testing::Values( + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{0}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{1}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{10}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{34}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, true), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", + std::vector{250, 130, 15}, + false), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", + std::vector{125, 300, 70}, + true), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", + std::vector{300, 20, 400}, + false), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", + std::vector{9130, 1200, 300}, + true), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{700}, false), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{500}, true))); CUGRAPH_TEST_PROGRAM_MAIN() From 038d2d782363c87ede499d0e8d441e4d091a3dac Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 22 Jan 2021 15:49:33 -0500 Subject: [PATCH 149/343] add test cases for untrasposed graphs --- cpp/tests/experimental/induced_subgraph_test.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 896faa0b177..72894a9349f 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -282,11 +282,17 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam(GetParam()); } +TEST_P(Tests_InducedSubgraph, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + INSTANTIATE_TEST_CASE_P( simple_test, Tests_InducedSubgraph, From 38f1c67668d5ff1cb4b4644756ac0cb9befc4c59 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 25 Jan 2021 14:46:30 -0500 Subject: [PATCH 150/343] remove unnecessary cudaStreamSynchronize calls --- .../patterns/copy_to_adj_matrix_row_col.cuh | 20 ------------------- .../copy_v_transform_reduce_in_out_nbr.cuh | 10 ---------- ...ransform_reduce_key_aggregated_out_nbr.cuh | 12 ----------- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 4 ---- .../update_frontier_v_push_if_out_nbr.cuh | 9 +++++---- cpp/src/experimental/coarsen_graph.cu | 13 ------------ cpp/src/experimental/graph.cu | 4 +--- cpp/src/experimental/induced_subgraph.cu | 4 ---- cpp/src/experimental/relabel.cu | 7 ------- cpp/src/experimental/renumber_edgelist.cu | 4 ---- 10 files changed, 6 insertions(+), 81 deletions(-) diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index 83347fc726d..d4559de06af 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -175,12 +175,6 @@ void copy_to_matrix_major(raft::handle_t const& handle, map_first, matrix_major_value_output_first); } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope - // once control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum rx_counts and - // allocate rx_tmp_buffer outside the loop) } } } else { @@ -370,10 +364,6 @@ void copy_to_matrix_minor(raft::handle_t const& handle, rx_count, comm_src_rank, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary src_tmp_buffer will become out-of-scope - // once control flow exits this block } // FIXME: now we can clear tx_tmp_buffer @@ -424,17 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle, map_first, matrix_minor_value_output_first); } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope - // once control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum rx_counts and - // allocate rx_tmp_buffer outside the loop) } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary dst_tmp_buffer will become out-of-scope once - // control flow exits this block } } else { assert(graph_view.get_number_of_local_vertices() == diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 21978d985e2..3059cf95852 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -525,12 +525,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, handle.get_stream()); } } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary major_tmp_buffer will become out-of-scope once - // control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum major_tmp_buffer_size and - // allocate major_tmp_buffer outside the loop) } if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) { @@ -592,10 +586,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary minor_tmp_buffer will become out-of-scope once - // control flow exits this block } } // namespace detail diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 018a665d1ee..774f6d08bf4 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -383,9 +383,6 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_major_vertices = std::move(rx_major_vertices); tmp_minor_keys = std::move(rx_minor_keys); tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights); - - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // tx_value_counts will become out-of-scope } auto tmp_e_op_result_buffer = @@ -464,18 +461,9 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( major_vertices = std::move(rx_major_vertices); e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); } - - CUDA_TRY(cudaStreamSynchronize( - handle - .get_stream())); // tmp_minor_keys, tmp_key_aggregated_edge_weights, rx_major_vertices, - // and rx_tmp_e_op_result_buffer will become out-of-scope } else { major_vertices = std::move(tmp_major_vertices); e_op_result_buffer = std::move(tmp_e_op_result_buffer); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // tmp_minor_keys and tmp_key_aggregated_edge_weights will become - // out-of-scope } } diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 715152bc99f..7ffd17faec9 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -254,10 +254,6 @@ transform_reduce_by_adj_matrix_row_col_key_e( tmp_keys = std::move(rx_unique_keys); tmp_value_buffer = std::move(rx_value_for_unique_key_buffer); - - CUDA_TRY(cudaStreamSynchronize( - handle - .get_stream())); // unique_keys & value_for_unique_key_buffer will become out-of-scope } auto cur_size = keys.size(); diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index e20a2b4238b..4c76322fa79 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -156,7 +156,7 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // FIXME: actually, we can find how many unique keys are here by now. // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the // vertex unless reduce_op is a pure function. - rmm::device_vector keys(num_buffer_elements); + rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); rmm::device_vector values(num_buffer_elements); auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, @@ -176,9 +176,10 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, values.begin(), values.begin() + num_reduced_buffer_elements, buffer_payload_output_first); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as kyes & values will become out-of-scope once - // this function returns + // FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // this is necessary as values will become + // out-of-scope once this function returns return num_reduced_buffer_elements; } } diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 154ee9e2093..13b15f38218 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -120,10 +120,6 @@ void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_ver tmp_edgelist_weights.begin()); number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); - CUDA_TRY(cudaStreamSynchronize( - stream)); // memory blocks owned by edgelist_(major_vertices,minor_vertices,weights) will be - // freed after the assignments below - edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); edgelist_weights = std::move(tmp_edgelist_weights); @@ -313,10 +309,6 @@ coarsen_graph( src_edge_first + edgelist_major_vertices.size(), dst_edge_first); } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // edgelist_(major_vertices,minor_vertices,weights) - // will become out-of-scope } sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, @@ -355,11 +347,6 @@ coarsen_graph( rx_edgelist_weights, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // memory blocks owned by - // coarsened_edgelist_(major_vertices,minor_vertices,weights) - // will be freed after the assignments below - coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); coarsened_edgelist_weights = std::move(rx_edgelist_weights); diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 93f4171a356..8eb81543e5d 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -470,9 +470,7 @@ graph_t(val)); }, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // label_pair_old_labels and label_pair_new_labels will become - // out-of-scope } // update intermediate relabel map @@ -161,9 +157,6 @@ void relabel(raft::handle_t const& handle, std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // tx_value_counts & rx_value_counts will become out-of-scope } } diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 86f1b07efeb..893b20e77fd 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -224,10 +224,6 @@ rmm::device_uvector compute_renumber_map( labels.begin(), thrust::greater()); - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // temporary rmm::devicec_uvector objects become - // out-of-scope once this function returns. - return std::move(labels); } From 67273d144b92b5b3061b0c7883306a747e1121c5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 25 Jan 2021 15:01:30 -0500 Subject: [PATCH 151/343] add comments to induced_subgraph.cu --- cpp/src/experimental/induced_subgraph.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 6b94d3adb55..f88498a42ee 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -131,6 +131,8 @@ extract_induced_subgraphs( matrix_partition_device_t> matrix_partition(graph_view, 0); + // count the numbers of the induced subgraph edges for each vertex in the aggregate subgraph + // vertex list. thrust::transform( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(size_t{0}), @@ -177,6 +179,8 @@ extract_induced_subgraphs( rmm::device_uvector edge_weights( graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream()); + // fill the edge list buffer (to be returned) for each vetex in the aggregate subgraph vertex + // list (use the offsets computed in the Phase 1) thrust::for_each( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(size_t{0}), From a46f8632755c348172f6e4564c7369d20edc73a7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 25 Jan 2021 15:22:29 -0500 Subject: [PATCH 152/343] add additional explicit instantiations --- cpp/src/experimental/induced_subgraph.cu | 110 +++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index f88498a42ee..a88adf76ef4 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -276,5 +276,115 @@ extract_induced_subgraphs(raft::handle_t const &handle, size_t num_subgraphs, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph From acd35c585aacd5c5faa6d56e3539aa7d2dfc56c9 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 27 Jan 2021 13:52:59 -0500 Subject: [PATCH 153/343] DOC v0.19 Updates --- CHANGELOG.md | 8 ++++++++ conda/environments/cugraph_dev_cuda10.1.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda10.2.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda11.0.yml | 16 ++++++++-------- cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 4 ++-- 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b73fd539c3d..548f0dae747 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# cuGraph 0.19.0 (Date TBD) + +## New Features + +## Improvements + +## Bug Fixes + # cuGraph 0.18.0 (Date TBD) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 067fd0bc4ba..f6f2a65e3f3 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 3371340d8bd..a070819064b 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index ee3b57632a1..3421152af06 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7871ce7581b..2c9c2918e54 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) -project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) ################################################################################################### # - build type ------------------------------------------------------------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index 6b484a5f57b..eb4745a61f0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,9 +80,9 @@ # built documents. # # The short X.Y version. -version = '0.18' +version = '0.19' # The full version, including alpha/beta/rc tags. -release = '0.18.0' +release = '0.19.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 9fc3d991ebcb06ac2cebae078be7012fa2ae532f Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 1 Feb 2021 13:26:33 -0500 Subject: [PATCH 154/343] Prepare Changelog for Automation (#1368) This PR prepares the changelog to be automatically updated during releases. Authors: - AJ Schmidt (@ajschmidt8) Approvers: - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cuml/pull/1368 --- CHANGELOG.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b73fd539c3d..2957a22a68d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,6 @@ +# 0.18.0 -# cuGraph 0.18.0 (Date TBD) - -## New Features - -## Improvements - -## Bug Fixes -- PR #1321 Fix benchmark script trap setup to come after the PATH variable update +Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.18-latest for the latest changes to this development branch. # cuGraph 0.17.0 (10 Dec 2020) ## New Features From 1a7122865f3fb795ece3fd05859e66afa7b70eb3 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 2 Feb 2021 11:50:56 -0600 Subject: [PATCH 155/343] SG Pagerank transition (#1332) Close #1313 #1331 #1330 Authors: - @Iroy30 Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1332 --- cpp/src/utilities/cython.cu | 91 ++++++++++--------- .../community/subgraph_extraction_wrapper.pyx | 10 +- python/cugraph/link_analysis/pagerank.py | 9 +- .../link_analysis/pagerank_wrapper.pyx | 53 ++++++----- python/cugraph/tests/test_pagerank.py | 13 +-- .../cugraph/tests/test_subgraph_extraction.py | 3 +- 6 files changed, 92 insertions(+), 87 deletions(-) diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 6c8ef98e2e2..22807beac34 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,7 +86,6 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai reinterpret_cast(graph_container.dst_vertices), reinterpret_cast(graph_container.weights), static_cast(graph_container.num_partition_edges)}; - return std::make_unique>( handle, edgelist, @@ -123,12 +122,18 @@ void populate_graph_container(graph_container_t& graph_container, bool do_expensive_check{true}; bool hypergraph_partitioned{false}; - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); // pcols - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); // prows + if (multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); // pcols + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); // prows + graph_container.row_comm_size = row_comm_size; + graph_container.col_comm_size = col_comm_size; + graph_container.row_comm_rank = row_comm_rank; + graph_container.col_comm_rank = col_comm_rank; + } graph_container.vertex_partition_offsets = vertex_partition_offsets; graph_container.src_vertices = src_vertices; @@ -143,10 +148,6 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.transposed = transposed; graph_container.is_multi_gpu = multi_gpu; graph_container.hypergraph_partitioned = hypergraph_partitioned; - graph_container.row_comm_size = row_comm_size; - graph_container.col_comm_size = col_comm_size; - graph_container.row_comm_rank = row_comm_rank; - graph_container.col_comm_rank = col_comm_rank; graph_container.sorted_by_degree = sorted_by_degree; graph_container.do_expensive_check = do_expensive_check; @@ -463,33 +464,7 @@ void call_pagerank(raft::handle_t const& handle, int64_t max_iter, bool has_guess) { - if (graph_container.graph_type == graphTypeEnum::GraphCSCViewFloat) { - pagerank(handle, - *(graph_container.graph_ptr_union.GraphCSCViewFloatPtr), - reinterpret_cast(p_pagerank), - static_cast(personalization_subset_size), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - alpha, - tolerance, - max_iter, - has_guess); - graph_container.graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - } else if (graph_container.graph_type == graphTypeEnum::GraphCSCViewDouble) { - pagerank(handle, - *(graph_container.graph_ptr_union.GraphCSCViewDoublePtr), - reinterpret_cast(p_pagerank), - static_cast(personalization_subset_size), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - alpha, - tolerance, - max_iter, - has_guess); - graph_container.graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.is_multi_gpu) { if (graph_container.edgeType == numberTypeEnum::int32Type) { auto graph = detail::create_graph(handle, graph_container); @@ -504,7 +479,7 @@ void call_pagerank(raft::handle_t const& handle, static_cast(tolerance), max_iter, has_guess, - false); + true); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { auto graph = detail::create_graph(handle, graph_container); @@ -519,9 +494,39 @@ void call_pagerank(raft::handle_t const& handle, static_cast(tolerance), max_iter, has_guess, - false); - } else { - CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + true); + } + } else { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } } } diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 5dbb6ce1e27..35b3c743987 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -45,6 +45,10 @@ def subgraph(input_graph, vertices): if weights.dtype == np.float64: use_float = False + num_verts = input_graph.number_of_vertices() + num_edges = len(src) + num_input_vertices = len(vertices) + cdef GraphCOOView[int,int,float] in_graph_float cdef GraphCOOView[int,int,double] in_graph_double cdef unique_ptr[GraphCOO[int,int,float]] out_graph_float @@ -59,10 +63,6 @@ def subgraph(input_graph, vertices): cdef uintptr_t c_vertices = vertices.__cuda_array_interface__['data'][0] - num_verts = input_graph.number_of_vertices() - num_edges = len(src) - num_input_vertices = len(vertices) - if use_float: in_graph_float = GraphCOOView[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges); df = coo_to_df(move(c_extract_subgraph_vertex(in_graph_float, c_vertices, num_input_vertices))); diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 69133d62af7..0bb89195e01 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,7 +32,7 @@ def pagerank( ---------- graph : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). + as an edge list. The transposed adjacency list will be computed if not already present. alpha : float The damping factor alpha represents the probability to follow an @@ -68,11 +68,6 @@ def pagerank( Subset of vertices of graph for initial guess for pagerank values nstart['values'] : cudf.Series Pagerank values for vertices - - weight : str - Edge data column to use. Default is None - This version of PageRank current does not use edge weight. - This parameter is here for NetworkX compatibility dangling : dict This parameter is here for NetworkX compatibility and ignored diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index a8c1c9faee8..88548539a4e 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -33,21 +33,22 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. Call pagerank """ - if not input_graph.transposedadjlist: - input_graph.view_transposed_adj_list() - cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) handle_ = handle_ptr.get(); - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) - [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + weights = None + if input_graph.edgelist.weights: + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) df['pagerank'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) cdef bool has_guess = 0 @@ -56,25 +57,23 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. raise ValueError('nstart must have initial guess for all vertices') df['pagerank'][nstart['vertex']] = nstart['values'] has_guess = 1 + print(df) cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_pers_vtx = NULL cdef uintptr_t c_pers_val = NULL - cdef sz = 0 - - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_weights = NULL - cdef uintptr_t c_local_verts = NULL; - cdef uintptr_t c_local_edges = NULL; - cdef uintptr_t c_local_offsets = NULL; + cdef int sz = 0 + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + personalization_id_series = None if weights is not None: - c_weights = weights.__cuda_array_interface__['data'][0] + c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype else: weight_t = np.dtype("float32") @@ -94,15 +93,19 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] cdef graph_container_t graph_container - populate_graph_container_legacy(graph_container, - ((graphTypeEnum.LegacyCSC)), - handle_[0], - c_offsets, c_indices, c_weights, - ((numberTypeEnum.int32Type)), - ((numberTypeEnum.int32Type)), - ((numberTypeMap[weight_t])), - num_verts, num_edges, - c_local_verts, c_local_edges, c_local_offsets) + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + #num_verts, num_edges, + num_partition_edges, + num_verts, num_edges, + False, + True, + False) if (df['pagerank'].dtype == np.float32): call_pagerank[int, float](handle_[0], graph_container, diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 1ab370041b5..3ce8dd4ffe9 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -143,7 +143,7 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # # https://github.com/rapidsai/cugraph/issues/533 # -# @pytest.mark.parametrize("graph_file", utils.DATASETS) + @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -159,7 +159,8 @@ def test_pagerank( M = utils.read_csv_for_nx(graph_file) nnz_vtx = np.unique(M[['0', '1']]) Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph() ) networkx_pr, networkx_prsn = networkx_call( @@ -169,13 +170,13 @@ def test_pagerank( cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) - max_iter = 5 + max_iter = 20 cu_prsn = cudify(networkx_prsn) # cuGraph PageRank cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() - G.from_cudf_edgelist(cu_M, source="0", destination="1") + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") cugraph_pr = cugraph_call(G, max_iter, tol, alpha, cu_prsn, cu_nstart) @@ -218,7 +219,7 @@ def test_pagerank_nx( cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) - max_iter = 5 + max_iter = 20 cu_prsn = cudify(networkx_prsn) # cuGraph PageRank with Nx Graph diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index a4f36af994a..2e6968892c2 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -102,6 +102,7 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) +@pytest.mark.skip(reason="needs test updates for graph comparison") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() From 683797dd2406bb9b8d5bbbf6268f4ee6e7a6f34e Mon Sep 17 00:00:00 2001 From: Joseph <68436579+jolorunyomi@users.noreply.github.com> Date: Tue, 2 Feb 2021 13:25:40 -0600 Subject: [PATCH 156/343] Auto-label PRs based on their content (#1358) This PR adds the GitHub action [PR Labeler](https://github.com/actions/labeler) to auto-label PRs based on their content. Labeling is managed with a configuration file `.github/labeler.yml` using the following [options](https://github.com/actions/labeler#usage). Authors: - Joseph (@jolorunyomi) Approvers: - AJ Schmidt (@ajschmidt8) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1358 --- .github/labeler.yml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 621d0fde833..9c3af6de64b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -5,26 +5,33 @@ python: - 'python/**' - 'notebooks/**' + +benchmarks: - 'benchmarks/**' doc: - 'docs/**' - '**/*.md' + - 'datasets/**' + - 'notebooks/**' + - '**/*.txt' + - '**/*.rst' + - '**/*.ipynb' + - '**/*.pdf' + - '**/*.png' datasets: - 'datasets/**' cuGraph: - 'cpp/**' - + CMake: - '**/CMakeLists.txt' - '**/cmake/**' - -Ops: - - '.github/**' - - 'ci/**' + +gpuCI: + - 'ci/**' + +conda: - 'conda/**' - - '**/Dockerfile' - - '**/.dockerignore' - - 'docker/**' From 44b0679f580b40caa98e834a3660ee35a4b1bf89 Mon Sep 17 00:00:00 2001 From: jnke2016 <76006812+jnke2016@users.noreply.github.com> Date: Tue, 2 Feb 2021 17:03:48 -0500 Subject: [PATCH 157/343] Enabling pytest code coverage output by default (#1352) added the coverage plugin option by default added pytest-cov package Authors: - @jnke2016 Approvers: - AJ Schmidt (@ajschmidt8) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1352 --- conda/environments/cugraph_dev_cuda10.1.yml | 1 + conda/environments/cugraph_dev_cuda10.2.yml | 1 + conda/environments/cugraph_dev_cuda11.0.yml | 1 + python/pytest.ini | 2 ++ 4 files changed, 5 insertions(+) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 067fd0bc4ba..0da9b3dc3e7 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -44,3 +44,4 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 3371340d8bd..eaf78f0d72c 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -44,3 +44,4 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index ee3b57632a1..c845a311884 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -44,3 +44,4 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov diff --git a/python/pytest.ini b/python/pytest.ini index fb8c6ea0948..a1933ea34aa 100644 --- a/python/pytest.ini +++ b/python/pytest.ini @@ -5,6 +5,8 @@ addopts = --benchmark-min-rounds=1 --benchmark-columns="mean, rounds" --benchmark-gpu-disable + --cov=cugraph + --cov-report term-missing:skip-covered markers = managedmem_on: RMM managed memory enabled From 5813559e8ba7d8198c0c520bc7b73bedb297f3b2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 2 Feb 2021 18:07:52 -0500 Subject: [PATCH 158/343] Bug fixes for MNMG coarsen_graph, renumber_edgelist, relabel (#1364) Bug fixes for MNMG coarsen_graph, renumber_edgelist, relabel Authors: - Seunghwa Kang (@seunghwak) Approvers: - Andrei Schaffer (@aschaffer) - Rick Ratzel (@rlratzel) - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1364 --- cpp/include/utilities/shuffle_comm.cuh | 54 ++++++++++++++++------- cpp/src/experimental/coarsen_graph.cu | 8 +++- cpp/src/experimental/graph.cu | 6 +-- cpp/src/experimental/relabel.cu | 3 ++ cpp/src/experimental/renumber_edgelist.cu | 4 +- 5 files changed, 51 insertions(+), 24 deletions(-) diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 6e84668bb8d..05fe51184ca 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -50,15 +50,24 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, auto gpu_id_first = thrust::make_transform_iterator( tx_value_first, [value_to_gpu_id_op] __device__(auto value) { return value_to_gpu_id_op(value); }); + rmm::device_uvector d_tx_dst_ranks(comm_size, stream); rmm::device_uvector d_tx_value_counts(comm_size, stream); - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_value_first, tx_value_last), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - d_tx_value_counts.begin()); - std::vector tx_value_counts(comm_size); - raft::update_host(tx_value_counts.data(), d_tx_value_counts.data(), comm_size, stream); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_value_first, tx_value_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { + rmm::device_uvector d_counts(comm_size, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } return std::move(d_tx_value_counts); } @@ -83,13 +92,24 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, auto gpu_id_first = thrust::make_transform_iterator( tx_key_first, [key_to_gpu_id_op] __device__(auto key) { return key_to_gpu_id_op(key); }); + rmm::device_uvector d_tx_dst_ranks(comm_size, stream); rmm::device_uvector d_tx_value_counts(comm_size, stream); - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_key_first, tx_key_last), - thrust::make_constant_iterator(size_t{1}), - thrust::make_discard_iterator(), - d_tx_value_counts.begin()); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_key_first, tx_key_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { + rmm::device_uvector d_counts(comm_size, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } return std::move(d_tx_value_counts); } @@ -191,7 +211,7 @@ auto shuffle_values(raft::comms::comms_t const &comm, auto rx_value_buffer = allocate_dataframe_buffer::value_type>( - rx_offsets.back(), stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -234,7 +254,7 @@ auto sort_and_shuffle_values(raft::comms::comms_t const &comm, auto rx_value_buffer = allocate_dataframe_buffer::value_type>( - rx_offsets.back() + rx_counts.back(), stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -275,7 +295,7 @@ auto sort_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); rmm::device_uvector::value_type> rx_keys( - rx_offsets.back() + rx_counts.back(), stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); auto rx_value_buffer = allocate_dataframe_buffer::value_type>( rx_keys.size(), stream); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 13b15f38218..7e312540896 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -394,8 +394,12 @@ coarsen_graph( // 4. renumber rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - partition_t partition( - std::vector{}, graph_view.is_hypergraph_partitioned(), 0, 0, 0, 0); + partition_t partition(std::vector(comm_size + 1, 0), + graph_view.is_hypergraph_partitioned(), + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); vertex_t number_of_vertices{}; edge_t number_of_edges{}; std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 8eb81543e5d..5cf393bfce4 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -238,9 +238,9 @@ graph_tget_number_of_edges(), - "Invalid input argument: the sum of local edges doe counts not match with " - "number_of_local_edges."); + CUGRAPH_EXPECTS( + number_of_local_edges_sum == this->get_number_of_edges(), + "Invalid input argument: the sum of local edge counts does not match with number_of_edges."); CUGRAPH_EXPECTS( partition.get_vertex_partition_last(comm_size - 1) == number_of_vertices, diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 0885f9561cf..f2cb1e616c8 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -116,6 +116,9 @@ void relabel(raft::handle_t const& handle, // update intermediate relabel map + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + cuco::static_map relabel_map{ static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), invalid_vertex_id::value, diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 893b20e77fd..9ecdd624878 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -281,7 +281,7 @@ void expensive_check_edgelist( comm_size, row_comm_size, col_comm_size}] __device__(auto edge) { - return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) == comm_rank; + return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank; }) == 0, "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " "pre-shuffled."); @@ -447,7 +447,7 @@ renumber_edgelist(raft::handle_t const& handle, handle.get_stream()); std::vector recvcounts(row_comm_size); for (int i = 0; i < row_comm_size; ++i) { - recvcounts[i] = partition.get_vertex_partition_size(row_comm_rank * row_comm_size + i); + recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); } std::vector displacements(row_comm_size, 0); std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); From 4e112a39674d97ef97e00548a73ed8f72af3ff46 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 3 Feb 2021 15:42:49 -0500 Subject: [PATCH 159/343] Update the conda environments and README file (#1369) updated the conda environments to have cmake 3.18 and update the README file Authors: - Brad Rees (@BradReesWork) Approvers: - AJ Schmidt (@ajschmidt8) - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1369 --- README.md | 40 +++++++++++---------- SOURCEBUILD.md | 2 +- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- docs/source/cugraph_blogs.rst | 6 ++++ 6 files changed, 31 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e7d0aea43fe..2f2f13b3167 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The [RAPIDS](https://rapids.ai) cuGraph library is a collection of GPU accelerat **NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cugraph/blob/main/README.md) ensure you are on the latest branch. - +As an example, the following Python snippet loads graph data and computes PageRank: ```python import cugraph @@ -30,6 +30,17 @@ for i in range(len(df_page)): " PageRank is " + str(df_page['pagerank'].iloc[i])) ``` +## Getting cuGraph +There are 3 ways to get cuGraph : +1. [Quick start with Docker Repo](#quick) +2. [Conda Installation](#conda) +3. [Build from Source](#source) +

+ +--- +# Currently Supported Features +As of Release 0.18 - including 0.18 nightly + ## Supported Algorithms @@ -67,7 +78,7 @@ for i in range(len(df_page)): | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | | Traversal | | | | -| | Breadth First Search (BFS) | Multi-GPU | | +| | Breadth First Search (BFS) | Multi-GPU | with cutoff support | | | Single Source Shortest Path (SSSP) | Multi-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | @@ -76,7 +87,6 @@ for i in range(len(df_page)): | | Hungarian Algorithm | Single-GPU | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | - | | |

@@ -85,13 +95,13 @@ for i in range(len(df_page)): | --------------- | --------------------------------------------------- | | Graph | An undirected Graph | | DiGraph | A Directed Graph | -| _Multigraph_ | _coming in 0.18_ | -| _MultiDigraph_ | _coming in 0.18_ | +| Multigraph | A Graph with multiple edges between a vertex pair | +| MultiDigraph | A Directed Graph with multiple edges between a vertex pair | | | |

## Supported Data Types -cuGraph supports the creation of a graph several data types: +cuGraph supports graph creation with Source and Destination being expressed as: * cuDF DataFrame * Pandas DataFrame @@ -125,22 +135,14 @@ The amount of memory required is dependent on the graph structure and the analyt The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4 +

-## Getting cuGraph -### Intro -There are 3 ways to get cuGraph : -1. [Quick start with Docker Demo Repo](#quick) -2. [Conda Installation](#conda) -3. [Build from Source](#source) - - - - +--- ## Quick Start -Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph. +Please see the [Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph. -### Conda +## Conda It is easy to install cuGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download). Install and update cuGraph using the conda command: @@ -160,7 +162,7 @@ conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph Note: This conda installation only applies to Linux and Python versions 3.7/3.8. -### Build from Source and Contributing +## Build from Source and Contributing Please see our [guide for building cuGraph from source](SOURCEBUILD.md) diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 03fef9821a1..0cbf6ccdaa3 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -9,7 +9,7 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot __Compiler__: * `gcc` version 5.4+ * `nvcc` version 10.0+ -* `cmake` version 3.12+ +* `cmake` version 3.18+ __CUDA:__ * CUDA 10.1+ diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 0da9b3dc3e7..59fc988bde3 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -23,7 +23,7 @@ dependencies: - cudatoolkit=10.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index eaf78f0d72c..a60730a5476 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -23,7 +23,7 @@ dependencies: - cudatoolkit=10.2 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index c845a311884..d85afa7d667 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -23,7 +23,7 @@ dependencies: - cudatoolkit=11.0 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/docs/source/cugraph_blogs.rst b/docs/source/cugraph_blogs.rst index 84e31d40a19..cbbc93a1b14 100644 --- a/docs/source/cugraph_blogs.rst +++ b/docs/source/cugraph_blogs.rst @@ -23,6 +23,12 @@ BLOGS * `Status of RAPIDS cuGraph — Refactoring Code And Rethinking Graphs `_ * `Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs `_ * `RAPIDS cuGraph adds NetworkX and DiGraph Compatibility `_ + * `Large Graph Visualization with RAPIDS cuGraph `_ + +2021 +------ + * + Media From 7c7c79e24ec897e14ae067007153eff787a01150 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Wed, 3 Feb 2021 15:51:18 -0600 Subject: [PATCH 160/343] Implement MNMG Renumber (#1355) This exposes to cython / python newly added C++ functions `sort_and_shuffle_values()` and `renumber_edgelist()` in order to perform MNMG renumbering. Authors: - Andrei Schaffer (@aschaffer) Approvers: - Chuck Hastings (@ChuckHastings) - Seunghwa Kang (@seunghwak) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1355 --- cpp/include/experimental/graph.hpp | 4 +- cpp/include/utilities/cython.hpp | 185 ++++++- cpp/src/experimental/renumber_edgelist.cu | 78 ++- cpp/src/utilities/cython.cu | 175 +++++++ python/cugraph/dask/structure/renumber.py | 71 +++ .../dask/structure/renumber_wrapper.pyx | 460 ++++++++++++++++++ python/cugraph/structure/graph_primtypes.pxd | 87 +++- 7 files changed, 1054 insertions(+), 6 deletions(-) create mode 100644 python/cugraph/dask/structure/renumber.py create mode 100644 python/cugraph/dask/structure/renumber_wrapper.pyx diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 5e7e48016c7..cc21f7c5013 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -70,7 +70,7 @@ class graph_t view() + graph_view_t view() const { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); std::vector indices(adj_matrix_partition_indices_.size(), nullptr); @@ -132,7 +132,7 @@ class graph_tget_number_of_vertices(); } - graph_view_t view() + graph_view_t view() const { return graph_view_t( *(this->get_handle_ptr()), diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index cd621a516ea..d95d410af29 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -109,6 +110,165 @@ struct graph_container_t { experimental::graph_properties_t graph_props; }; +// replacement for std::tuple<,,>, since std::tuple is not +// supported in cython +// +template +struct major_minor_weights_t { + explicit major_minor_weights_t(raft::handle_t const& handle) + : shuffled_major_vertices_(0, handle.get_stream()), + shuffled_minor_vertices_(0, handle.get_stream()), + shuffled_weights_(0, handle.get_stream()) + { + } + rmm::device_uvector& get_major(void) { return shuffled_major_vertices_; } + + rmm::device_uvector& get_minor(void) { return shuffled_minor_vertices_; } + + rmm::device_uvector& get_weights(void) { return shuffled_weights_; } + + std::pair, size_t> get_major_wrap( + void) // const: triggers errors in Cython autogen-ed C++ + { + return std::make_pair(std::make_unique(shuffled_major_vertices_.release()), + sizeof(vertex_t)); + } + + std::pair, size_t> get_minor_wrap(void) // const + { + return std::make_pair(std::make_unique(shuffled_minor_vertices_.release()), + sizeof(vertex_t)); + } + + std::pair, size_t> get_weights_wrap(void) // const + { + return std::make_pair(std::make_unique(shuffled_weights_.release()), + sizeof(weight_t)); + } + + private: + rmm::device_uvector shuffled_major_vertices_; + rmm::device_uvector shuffled_minor_vertices_; + rmm::device_uvector shuffled_weights_; +}; + +// wrapper for renumber_edgelist() return +// (unrenumbering maps, etc.) +// +template +struct renum_quad_t { + explicit renum_quad_t(raft::handle_t const& handle) + : dv_(0, handle.get_stream()), part_(std::vector(), false, 0, 0, 0, 0) + { + } + + rmm::device_uvector& get_dv(void) { return dv_; } + + std::pair, size_t> get_dv_wrap( + void) // const: see above explanation + { + return std::make_pair(std::make_unique(dv_.release()), sizeof(vertex_t)); + } + + cugraph::experimental::partition_t& get_partition(void) { return part_; } + vertex_t& get_num_vertices(void) { return nv_; } + edge_t& get_num_edges(void) { return ne_; } + + // `partition_t` pass-through getters + // + int get_part_row_size() const { return part_.get_row_size(); } + + int get_part_col_size() const { return part_.get_col_size(); } + + int get_part_comm_rank() const { return part_.get_comm_rank(); } + + // FIXME: part_.get_vertex_partition_offsets() returns a std::vector + // + std::unique_ptr> get_partition_offsets(void) // const + { + return std::make_unique>(part_.get_vertex_partition_offsets()); + } + + std::pair get_part_local_vertex_range() const + { + auto tpl_v = part_.get_local_vertex_range(); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_local_vertex_first() const { return part_.get_local_vertex_first(); } + + vertex_t get_part_local_vertex_last() const { return part_.get_local_vertex_last(); } + + std::pair get_part_vertex_partition_range(size_t vertex_partition_idx) const + { + auto tpl_v = part_.get_vertex_partition_range(vertex_partition_idx); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_first(vertex_partition_idx); + } + + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_last(vertex_partition_idx); + } + + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_size(vertex_partition_idx); + } + + size_t get_part_number_of_matrix_partitions() const + { + return part_.get_number_of_matrix_partitions(); + } + + std::pair get_part_matrix_partition_major_range(size_t partition_idx) const + { + auto tpl_v = part_.get_matrix_partition_major_range(partition_idx); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) const + { + return part_.get_matrix_partition_major_first(partition_idx); + } + + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) const + { + return part_.get_matrix_partition_major_last(partition_idx); + } + + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) const + { + return part_.get_part_matrix_partition_major_value_start_offset(partition_idx); + } + + std::pair get_part_matrix_partition_minor_range() const + { + auto tpl_v = part_.get_matrix_partition_minor_range(); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_matrix_partition_minor_first() const + { + return part_.get_matrix_partition_minor_first(); + } + + vertex_t get_part_matrix_partition_minor_last() const + { + return part_.get_matrix_partition_minor_last(); + } + + private: + rmm::device_uvector dv_; + cugraph::experimental::partition_t part_; + vertex_t nv_; + edge_t ne_; +}; + // FIXME: finish description for vertex_partition_offsets // // Factory function for populating an empty graph container with a new graph @@ -246,6 +406,29 @@ void call_sssp(raft::handle_t const& handle, vertex_t* predecessors, const vertex_t source_vertex); +// wrapper for shuffling: +// +template +std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* edgelist_minor_vertices, // [IN / OUT] + weight_t* edgelist_weights, // [IN / OUT] + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned); // = false + +// Wrapper for calling renumber_edeglist() inplace: +// +template +std::unique_ptr> call_renumber( + raft::handle_t const& handle, + vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, + vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + // Helper for setting up subcommunicators, typically called as part of the // user-initiated comms initialization in Python. // diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 9ecdd624878..1f9a5a573fa 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -707,8 +707,10 @@ std::enable_if_t> renumber_edgelist( do_expensive_check); } -// explicit instantiation - +// explicit instantiation directives (EIDir's): +// +// instantiations for +// template std::tuple, partition_t, int32_t, int32_t> renumber_edgelist(raft::handle_t const& handle, int32_t* edgelist_major_vertices /* [INOUT] */, @@ -743,5 +745,77 @@ template rmm::device_uvector renumber_edgelist int32_t num_edgelist_edges, bool do_expensive_check); +// instantiations for +// +template std::tuple, partition_t, int32_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int32_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t const* vertices, + int32_t num_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +// instantiations for +// +template std::tuple, partition_t, int64_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int64_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int64_t const* local_vertices, + int64_t num_local_vertices, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int64_t const* vertices, + int64_t num_vertices, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 22807beac34..7f221304a0a 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -15,16 +15,20 @@ */ #include +#include +#include #include #include #include #include #include #include +#include #include #include #include +#include namespace cugraph { namespace cython { @@ -691,6 +695,100 @@ void call_sssp(raft::handle_t const& handle, } } +// wrapper for shuffling: +// +template +std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* edgelist_minor_vertices, // [IN / OUT] + weight_t* edgelist_weights, // [IN / OUT] + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned) // = false +{ + auto& comm = handle.get_comms(); + + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); + + std::unique_ptr> ptr_ret = + std::make_unique>(handle); + + std::forward_as_tuple( + std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), + std::ignore) = + cugraph::experimental::sort_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + return ptr_ret; // RVO-ed +} + +// Wrapper for calling renumber_edeglist() inplace: +// TODO: check if return type needs further handling... +// +template +std::unique_ptr> call_renumber( + raft::handle_t const& handle, + vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, + vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu) // bc. cython cannot take non-type template params +{ + // caveat: return values have different types on the 2 branches below: + // + std::unique_ptr> p_ret = + std::make_unique>(handle); + + if (multi_gpu) { + std::tie( + p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = + cugraph::experimental::renumber_edgelist( + handle, + shuffled_edgelist_major_vertices, + shuffled_edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); + } else { + auto ret_f = cugraph::experimental::renumber_edgelist( + handle, + shuffled_edgelist_major_vertices, + shuffled_edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); + + auto tot_vertices = static_cast(ret_f.size()); + + p_ret->get_dv() = std::move(ret_f); + cugraph::experimental::partition_t part_sg( + std::vector{0, tot_vertices}, false, 1, 1, 0, 0); + + p_ret->get_partition() = std::move(part_sg); + + p_ret->get_num_vertices() = tot_vertices; + p_ret->get_num_edges() = num_edgelist_edges; + } + + return p_ret; // RVO-ed (copy ellision) +} + // Helper for setting up subcommunicators void init_subcomms(raft::handle_t& handle, size_t row_comm_size) { @@ -870,5 +968,82 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + float* edgelist_weights, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + float* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + double* edgelist_weights, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + double* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices, + int64_t* edgelist_minor_vertices, + float* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices, + int64_t* edgelist_minor_vertices, + double* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +// TODO: add the remaining relevant EIDIr's: +// +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int64_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int64_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + } // namespace cython } // namespace cugraph diff --git a/python/cugraph/dask/structure/renumber.py b/python/cugraph/dask/structure/renumber.py new file mode 100644 index 00000000000..606a6bc4dc1 --- /dev/null +++ b/python/cugraph/dask/structure/renumber.py @@ -0,0 +1,71 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.structure import renumber_wrapper as renumber_w +import cugraph.comms.comms as Comms +import dask_cudf + + +def call_renumber(sID, + data, + num_verts, + num_edges, + is_mnmg): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return renumber_w.mg_renumber(data[0], + num_verts, + num_edges, + wid, + handle, + is_mnmg) + + +def renumber(input_graph): + + client = default_client() + + ddf = input_graph.edgelist.edgelist_df + + num_edges = len(ddf) + + if isinstance(ddf, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + num_verts = input_graph.number_of_vertices() + + if is_mnmg: + data = get_distributed_data(ddf) + result = [client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + is_mnmg, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + ddf = dask_cudf.from_delayed(result) + else: + call_renumber(Comms.get_session_id(), + ddf, + num_verts, + num_edges, + is_mnmg) + return ddf diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/dask/structure/renumber_wrapper.pyx new file mode 100644 index 00000000000..40dd80aeb67 --- /dev/null +++ b/python/cugraph/dask/structure/renumber_wrapper.pyx @@ -0,0 +1,460 @@ +# +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.utils_wrapper import * +import cudf +from cugraph.structure.graph_primtypes cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref +import numpy as np + +from libcpp.utility cimport move +from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer + +cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w): + # extract shuffled result: + # + cdef pair[unique_ptr[device_buffer], size_t] pair_s_major = deref(ptr_maj_min_w).get_major_wrap() + cdef pair[unique_ptr[device_buffer], size_t] pair_s_minor = deref(ptr_maj_min_w).get_minor_wrap() + cdef pair[unique_ptr[device_buffer], size_t] pair_s_weights = deref(ptr_maj_min_w).get_weights_wrap() + + shufled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) + shufled_major_buffer = Buffer(shufled_major_buffer) + + shufled_major_series = cudf.Series(data=shufled_major_buffer, dtype=vertex_t) + + shufled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) + shufled_minor_buffer = Buffer(shufled_minor_buffer) + + shufled_minor_series = cudf.Series(data=shufled_minor_buffer, dtype=vertex_t) + + shufled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) + shufled_weights_buffer = Buffer(shufled_weights_buffer) + + shufled_weights_series = cudf.Series(data=shufled_weights_buffer, dtype=weight_t) + + shuffled_df = cudf.DataFrame() + shuffled_df['src']=shuffled_major_series + shuffled_df['dst']=shuffled_minor_series + shuffled_df['weights']= shuffled_weights_series + + return shuffled_df + +def mg_renumber(input_df, # maybe use cpdef ? + num_global_verts, + num_global_edges, + rank, + handle, + is_multi_gpu): + """ + Call MNMG renumber + """ + cdef size_t handle_size_t = handle.getHandle() + # TODO: get handle_t out of handle... + handle_ptr = handle_size_t + + src = input_df['src'] + dst = input_df['dst'] + cdef uintptr_t c_edge_weights = NULL # set below... + + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + if "value" in input_df.columns: + weights = input_df['value'] + weight_t = weights.dtype + c_edge_weights = weights.__cuda_array_interface__['data'][0] + else: + weight_t = np.dtype("float32") + + if (vertex_t != np.dtype("int32") and vertex_t != np.dtype("int64")): + raise Exception("Incorrect vertex_t type.") + if (edge_t != np.dtype("int32") and edge_t != np.dtype("int64")): + raise Exception("Incorrect edge_t type.") + if (weight_t != np.dtype("float32") and weight_t != np.dtype("float64")): + raise Exception("Incorrect weight_t type.") + if (vertex_t != np.dtype("int32") and edge_t != np.dtype("int64")): + raise Exception("Incompatible vertex_t and edge_t types.") + + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + + cdef bool is_hyper_partitioned = False # for now + + cdef uintptr_t shuffled_major = NULL + cdef uintptr_t shuffled_minor = NULL + + cdef bool do_check = False # ? for now... + cdef bool mg_flag = is_multi_gpu # run Single-GPU or MNMG + + cdef pair[unique_ptr[device_buffer], size_t] pair_original + cdef pair[unique_ptr[device_buffer], size_t] pair_partition + + # tparams: vertex_t, weight_t: + # + cdef unique_ptr[major_minor_weights_t[int, float]] ptr_shuffled_32_32 + cdef unique_ptr[major_minor_weights_t[int, double]] ptr_shuffled_32_64 + cdef unique_ptr[major_minor_weights_t[long, float]] ptr_shuffled_64_32 + cdef unique_ptr[major_minor_weights_t[long, double]] ptr_shuffled_64_64 + + # tparams: vertex_t, edge_t: + # + cdef unique_ptr[renum_quad_t[int, int]] ptr_renum_quad_32_32 + cdef unique_ptr[renum_quad_t[int, long]] ptr_renum_quad_32_64 + cdef unique_ptr[renum_quad_t[long, long]] ptr_renum_quad_64_64 + + # tparam: vertex_t: + # + cdef unique_ptr[vector[int]] uniq_partition_vector_32 + cdef unique_ptr[vector[long]] uniq_partition_vector_64 + + cdef size_t rank_indx = rank + + if (vertex_t == np.dtype("int32")): + if ( edge_t == np.dtype("int32")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif ( edge_t == np.dtype("int64")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif (vertex_t == np.dtype("int64")): + if ( edge_t == np.dtype("int64")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(rank_indx), + uniq_partition_vector_64.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(rank_indx), + uniq_partition_vector_64.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index e46f4092dd4..f109e5938a3 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,8 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr +from libcpp.utility cimport pair +from libcpp.vector cimport vector from rmm._lib.device_buffer cimport device_buffer @@ -142,6 +144,89 @@ cdef extern from "functions.hpp" namespace "cugraph": ET *map_size) except + +# renumber_edgelist() interface: +# +# +# 1. `cdef extern partition_t`: +# +cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": + + cdef cppclass partition_t[vertex_t]: + pass + + +# 2. return type for shuffle: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + major_minor_weights_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_major_wrap() + pair[unique_ptr[device_buffer], size_t] get_minor_wrap() + pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + + +ctypedef fused shuffled_vertices_t: + major_minor_weights_t[int, float] + major_minor_weights_t[int, double] + major_minor_weights_t[long, float] + major_minor_weights_t[long, double] + +# 3. return type for renumber: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass renum_quad_t[vertex_t, edge_t]: + renum_quad_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_dv_wrap() + vertex_t& get_num_vertices() + edge_t& get_num_edges() + int get_part_row_size() + int get_part_col_size() + int get_part_comm_rank() + unique_ptr[vector[vertex_t]] get_partition_offsets() + pair[vertex_t, vertex_t] get_part_local_vertex_range() + vertex_t get_part_local_vertex_first() + vertex_t get_part_local_vertex_last() + pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) + size_t get_part_number_of_matrix_partitions() + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) + pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() + vertex_t get_part_matrix_partition_minor_first() + vertex_t get_part_matrix_partition_minor_last() + +# 4. `sort_and_shuffle_values()` wrapper: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + weight_t* edgelist_weights, + edge_t num_edges, + bool is_hyper_partitioned) except + + + +# 5. `renumber_edgelist()` wrapper +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + edge_t num_edges, + bool is_hyper_partitioned, + bool do_check, + bool multi_gpu) except + + + cdef extern from "" namespace "std" nogil: cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]]) From 7266cdb34756dc8e9deab458b3b9dcf4498677d1 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Wed, 3 Feb 2021 18:04:24 -0600 Subject: [PATCH 161/343] Update CMakeLists.txt files for consistency with RAPIDS and to support cugraph as an external project and other tech debt removal (#1367) This PR makes cuGraph's cmake files more consistent with other RAPIDS libs by matching the minimum required cmake version, adding `project()` statements to cugraph's thirdparty modules, and using `CMAKE_CURRENT_SOURCE_DIR` appropriately so paths are relative to the CMakeLists.txt file rather than the top-level cmake dir of the project (since that may not be the cugraph cpp dir in the case of cugraph being used as an external project by another application). This also adds a `CUDA_ARCHITECTURES=OFF` setting to suppress the warning printed for each test target. This setting may be replaced/changed once the findcudatoolkit feature is used in a future cmake version. This also removes the Arrow and GTest cmake files since Arrow is not a direct dependency and those files were not being used, and GTest is now a build requirement in the conda dev environment and does not need to be built from source (the conda dev env files have been updated accordingly). This PR also addresses much of #1075 , but not completely since gunrock is still using `ExternalProject` due to (I think) updates that need to be made to their cmake files to support this. This was tested by observing a successful clean build, however it was **not** tested by creating a separate cmake application to simulate cugraph being used as a 3rd party package. Note: the changes in this PR were modeled after https://github.com/rapidsai/rmm/pull/541 closes #1137 closes #1266 Authors: - Rick Ratzel (@rlratzel) Approvers: - Chuck Hastings (@ChuckHastings) - AJ Schmidt (@ajschmidt8) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1367 --- conda/environments/builddocs.yml | 19 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 + conda/environments/cugraph_dev_cuda10.2.yml | 2 + conda/environments/cugraph_dev_cuda11.0.yml | 2 + conda/recipes/libcugraph/meta.yaml | 4 +- cpp/CMakeLists.txt | 146 +++++++++--------- cpp/cmake/Modules/ConfigureArrow.cmake | 98 ------------ cpp/cmake/Modules/ConfigureGoogleTest.cmake | 49 ------ .../Templates/Arrow.CMakeLists.txt.cmake | 19 --- .../Templates/GoogleTest.CMakeLists.txt.cmake | 19 --- cpp/tests/CMakeLists.txt | 73 +++++---- python/setuputils.py | 47 ++++-- 12 files changed, 162 insertions(+), 318 deletions(-) delete mode 100644 conda/environments/builddocs.yml delete mode 100644 cpp/cmake/Modules/ConfigureArrow.cmake delete mode 100644 cpp/cmake/Modules/ConfigureGoogleTest.cmake delete mode 100644 cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake delete mode 100644 cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake diff --git a/conda/environments/builddocs.yml b/conda/environments/builddocs.yml deleted file mode 100644 index 89bd44a5542..00000000000 --- a/conda/environments/builddocs.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: builddocs -channels: -- rapidsai -- pytorch -- conda-forge -- numba -- defaults -dependencies: -- python=3.6* -- cugraph=0.8* -- cudatoolkit=9.2 -- cudf=0.8* -- pyarrow=0.12.1.* -- cython=0.29* -- pip: - - numpydoc - - sphinx - - sphinx-rtd-theme - - sphinxcontrib-websupport diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 59fc988bde3..369a1f1205a 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -45,3 +45,5 @@ dependencies: - rapids-pytest-benchmark - doxygen - pytest-cov +- gtest +- gmock diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index a60730a5476..82903a6c2a5 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -45,3 +45,5 @@ dependencies: - rapids-pytest-benchmark - doxygen - pytest-cov +- gtest +- gmock diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index d85afa7d667..4720183e0b0 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -45,3 +45,5 @@ dependencies: - rapids-pytest-benchmark - doxygen - pytest-cov +- gtest +- gmock diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 211ec920d27..955c723bd27 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: # conda build -c nvidia -c rapidsai -c conda-forge -c defaults . @@ -32,6 +32,8 @@ requirements: - nccl>=2.7 - ucx-py {{ minor_version }} - ucx-proc=*=gpu + - gtest + - gmock run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7871ce7581b..19bfd24a591 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,7 +14,7 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA) @@ -156,19 +156,11 @@ endif(OpenMP_FOUND) # - find gtest ------------------------------------------------------------------------------------ if(BUILD_TESTS) - include(ConfigureGoogleTest) - - if(GTEST_FOUND) - message(STATUS - "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}") - else() - message(AUTHOR_WARNING - "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") - endif(GTEST_FOUND) + find_package(GTest REQUIRED) endif(BUILD_TESTS) ################################################################################################### -# - RMM ------------------------------------------------------------------------------------------- +# - find RMM -------------------------------------------------------------------------------------- find_path(RMM_INCLUDE "rmm" HINTS @@ -178,6 +170,18 @@ find_path(RMM_INCLUDE "rmm" message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") +################################################################################################### +# - find NCCL ------------------------------------------------------------------------------------- + +if(NOT NCCL_PATH) + find_package(NCCL REQUIRED) +else() + message("-- Manually set NCCL PATH to ${NCCL_PATH}") + set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) + set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) +endif(NOT NCCL_PATH) + + ################################################################################################### # - Fetch Content --------------------------------------------------------------------------------- include(FetchContent) @@ -235,26 +239,60 @@ endif() set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") +# - CUHORNET +FetchContent_Declare( + cuhornet + GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git + GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad + GIT_SHALLOW true + SOURCE_SUBDIR hornet +) + +FetchContent_GetProperties(cuhornet) +if(NOT cuhornet_POPULATED) + message("populating cuhornet") + FetchContent_Populate(cuhornet) + # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`. +endif() +set(CUHORNET_INCLUDE_DIR ${cuhornet_SOURCE_DIR} CACHE STRING "Path to cuhornet includes") + +# - raft - (header only) +# Only cloned if RAFT_PATH env variable is not defined +if(DEFINED ENV{RAFT_PATH}) + message(STATUS "RAFT_PATH environment variable detected.") + message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") + set(RAFT_DIR "$ENV{RAFT_PATH}") + +else(DEFINED ENV{RAFT_PATH}) + message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") + + FetchContent_Declare( + raft + GIT_REPOSITORY https://github.com/rapidsai/raft.git + GIT_TAG 9dbf2c8a9134ce8135f7fe947ec523d874fcab6a + SOURCE_SUBDIR raft + ) + + FetchContent_GetProperties(raft) + if(NOT raft_POPULATED) + message("populating raft") + FetchContent_Populate(raft) + # We are not using any raft CMake targets, so no need to call `add_subdirectory()`. + endif() + + set(RAFT_DIR "${raft_SOURCE_DIR}") +endif(DEFINED ENV{RAFT_PATH}) ################################################################################################### # - External Projects ----------------------------------------------------------------------------- # https://cmake.org/cmake/help/v3.0/module/ExternalProject.html -include(ExternalProject) - -# - CUHORNET -set(CUHORNET_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuhornet CACHE STRING "Path to cuhornet repo") -set(CUHORNET_INCLUDE_DIR ${CUHORNET_DIR}/src/cuhornet CACHE STRING "Path to cuhornet includes") +# FIXME: gunrock is the only external package still using ExternalProject +# instead of FetchContent. Consider migrating to FetchContent soon (this may +# require updates to the gunrock cmake files to support this). -ExternalProject_Add(cuhornet - GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad - PREFIX ${CUHORNET_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" -) +include(ExternalProject) # - GUNROCK set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") @@ -280,53 +318,9 @@ ExternalProject_Add(gunrock_ext ) add_library(gunrock STATIC IMPORTED) - add_dependencies(gunrock gunrock_ext) - set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) -# - NCCL -if(NOT NCCL_PATH) - find_package(NCCL REQUIRED) -else() - message("-- Manually set NCCL PATH to ${NCCL_PATH}") - set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) - set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) -endif(NOT NCCL_PATH) - -# - raft - (header only) ----------------------------------------------------- - -# Only cloned if RAFT_PATH env variable is not defined - -if(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable detected.") - message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") - set(RAFT_DIR "$ENV{RAFT_PATH}") - - ExternalProject_Add(raft - DOWNLOAD_COMMAND "" - SOURCE_DIR ${RAFT_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "") - -else(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") - set(RAFT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo") - - ExternalProject_Add(raft - GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 9dbf2c8a9134ce8135f7fe947ec523d874fcab6a - PREFIX ${RAFT_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "") - - # Redefining RAFT_DIR so it coincides with the one inferred by env variable. - set(RAFT_DIR "${RAFT_DIR}/src/raft/") -endif(DEFINED ENV{RAFT_PATH}) - - ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -383,7 +377,6 @@ add_library(cugraph SHARED # wait until after cugunrock is constructed. # add_dependencies(cugraph gunrock_ext) -add_dependencies(cugraph raft) ################################################################################################### # - include paths --------------------------------------------------------------------------------- @@ -465,16 +458,27 @@ target_link_libraries(cugraph PRIVATE ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) +# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the +# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent cmake +# warnings about policy CMP0104. With this setting, arch flags must be manually +# set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism used in +# cpp/CMakeLists.txt for setting arch options). Run "cmake --help-policy +# CMP0104" for policy details. +# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to the +# findcudatoolkit features in cmake 3.17+ +set_target_properties(cugraph PROPERTIES + CUDA_ARCHITECTURES OFF) + ################################################################################################### # - generate tests -------------------------------------------------------------------------------- if(BUILD_TESTS) if(GTEST_FOUND) # target_link_directories is added in cmake 3.13, and cmake advises to use this instead of - # link_directoires (we should switch to target_link_directories once 3.13 becomes the + # link_directories (we should switch to target_link_directories once 3.13 becomes the # minimum required version). link_directories(${GTEST_LIBRARY_DIR}) - add_subdirectory(${CMAKE_SOURCE_DIR}/tests) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) endif(GTEST_FOUND) endif(BUILD_TESTS) diff --git a/cpp/cmake/Modules/ConfigureArrow.cmake b/cpp/cmake/Modules/ConfigureArrow.cmake deleted file mode 100644 index b27e53dd415..00000000000 --- a/cpp/cmake/Modules/ConfigureArrow.cmake +++ /dev/null @@ -1,98 +0,0 @@ -set(ARROW_ROOT ${CMAKE_BINARY_DIR}/arrow) - -set(ARROW_CMAKE_ARGS " -DARROW_WITH_LZ4=OFF" - " -DARROW_WITH_ZSTD=OFF" - " -DARROW_WITH_BROTLI=OFF" - " -DARROW_WITH_SNAPPY=OFF" - " -DARROW_WITH_ZLIB=OFF" - " -DARROW_BUILD_STATIC=ON" - " -DARROW_BUILD_SHARED=OFF" - " -DARROW_BOOST_USE_SHARED=ON" - " -DARROW_BUILD_TESTS=OFF" - " -DARROW_TEST_LINKAGE=OFF" - " -DARROW_TEST_MEMCHECK=OFF" - " -DARROW_BUILD_BENCHMARKS=OFF" - " -DARROW_IPC=ON" - " -DARROW_COMPUTE=OFF" - " -DARROW_CUDA=OFF" - " -DARROW_JEMALLOC=OFF" - " -DARROW_BOOST_VENDORED=OFF" - " -DARROW_PYTHON=OFF" - " -DARROW_USE_GLOG=OFF" - " -DCMAKE_VERBOSE_MAKEFILE=ON") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/Arrow.CMakeLists.txt.cmake" - "${ARROW_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${ARROW_ROOT}/build") -file(MAKE_DIRECTORY "${ARROW_ROOT}/install") - -execute_process( - COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE ARROW_CONFIG - WORKING_DIRECTORY ${ARROW_ROOT}) - -if(ARROW_CONFIG) - message(FATAL_ERROR "Configuring Arrow failed: " ${ARROW_CONFIG}) -endif(ARROW_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(STATUS "ARROW BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(STATUS "ARROW BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(STATUS "ARROW BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process( - COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE ARROW_BUILD - WORKING_DIRECTORY ${ARROW_ROOT}/build) - -if(ARROW_BUILD) - message(FATAL_ERROR "Building Arrow failed: " ${ARROW_BUILD}) -endif(ARROW_BUILD) - -set(ARROW_GENERATED_IPC_DIR - "${ARROW_ROOT}/build/src/arrow/ipc") - -configure_file(${ARROW_GENERATED_IPC_DIR}/File_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/File_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Message_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Message_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Schema_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Schema_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Tensor_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Tensor_generated.h COPYONLY) - -message(STATUS "Arrow installed here: " ${ARROW_ROOT}/install) -set(ARROW_LIBRARY_DIR "${ARROW_ROOT}/install/lib") -set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/install/include") - -find_library(ARROW_LIB arrow - NO_DEFAULT_PATH - HINTS "${ARROW_LIBRARY_DIR}") - -if(ARROW_LIB) - message(STATUS "Arrow library: " ${ARROW_LIB}) - set(ARROW_FOUND TRUE) -endif(ARROW_LIB) - -set(FLATBUFFERS_ROOT "${ARROW_ROOT}/build/flatbuffers_ep-prefix/src/flatbuffers_ep-install") - -message(STATUS "FlatBuffers installed here: " ${FLATBUFFERS_ROOT}) -set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_ROOT}/include") -set(FLATBUFFERS_LIBRARY_DIR "${FLATBUFFERS_ROOT}/lib") - -add_definitions(-DARROW_METADATA_V4) -add_definitions(-DARROW_VERSION=1210) - - - - - - diff --git a/cpp/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/cmake/Modules/ConfigureGoogleTest.cmake deleted file mode 100644 index 9fac40f4649..00000000000 --- a/cpp/cmake/Modules/ConfigureGoogleTest.cmake +++ /dev/null @@ -1,49 +0,0 @@ -set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest") - -set(GTEST_CMAKE_ARGS "") - #" -Dgtest_build_samples=ON" - #" -DCMAKE_VERBOSE_MAKEFILE=ON") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" - "${GTEST_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${GTEST_ROOT}/build") -file(MAKE_DIRECTORY "${GTEST_ROOT}/install") - -execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . - RESULT_VARIABLE GTEST_CONFIG - WORKING_DIRECTORY ${GTEST_ROOT}) - -if(GTEST_CONFIG) - message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG}) -endif(GTEST_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(STATUS "GTEST BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(STATUS "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(STATUS "GTEST BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE GTEST_BUILD - WORKING_DIRECTORY ${GTEST_ROOT}/build) - -if(GTEST_BUILD) - message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD}) -endif(GTEST_BUILD) - -message(STATUS "GoogleTest installed here: " ${GTEST_ROOT}/install) -set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include") -set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib") -set(GTEST_FOUND TRUE) - diff --git a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake b/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake deleted file mode 100644 index b1eaf3f0efa..00000000000 --- a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(Arrow - GIT_REPOSITORY https://github.com/apache/arrow.git - GIT_TAG apache-arrow-0.12.1 - SOURCE_DIR "${ARROW_ROOT}/arrow" - SOURCE_SUBDIR "cpp" - BINARY_DIR "${ARROW_ROOT}/build" - INSTALL_DIR "${ARROW_ROOT}/install" - CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${ARROW_ROOT}/install) - - - - - - - diff --git a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake deleted file mode 100644 index 66e1dc85a50..00000000000 --- a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(GoogleTest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.8.0 - SOURCE_DIR "${GTEST_ROOT}/googletest" - BINARY_DIR "${GTEST_ROOT}/build" - INSTALL_DIR "${GTEST_ROOT}/install" - CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install) - - - - - - - - diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 59b84a88fe7..3d1fe88cc7d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -30,21 +30,31 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) "${CUCO_INCLUDE_DIR}" "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${GTEST_INCLUDE_DIR}" "${RMM_INCLUDE}" "${CUDF_INCLUDE}" "${CUDF_INCLUDE}/libcudf/libcudacxx" "${NCCL_INCLUDE_DIRS}" - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio" - "${CMAKE_SOURCE_DIR}/include" - "${CMAKE_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}/../src" "${CMAKE_CURRENT_SOURCE_DIR}" "${RAFT_DIR}/cpp/include" ) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE - gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${CMAKE_EXTRA_LIBS} ${NCCL_LIBRARIES} cudart cuda cublas cusparse cusolver curand) + cugraph + GTest::GTest + GTest::Main + ${CUDF_LIBRARY} + ${NCCL_LIBRARIES} + cudart + cuda + cublas + cusparse + cusolver + curand + ${CMAKE_EXTRA_LIBS}) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -96,8 +106,17 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) ### BUILD_RPATH "${TARGET_BUILD_RPATH}") ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) + + # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the + # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent + # cmake warnings about policy CMP0104. With this setting, arch flags must be + # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism + # used in cpp/CMakeLists.txt for setting arch options). + # Run "cmake --help-policy CMP0104" for policy details. + # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to + # the findcudatoolkit features in cmake 3.17+ set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gtests/") + CUDA_ARCHITECTURES OFF) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() @@ -120,7 +139,7 @@ endif(RAPIDS_DATASET_ROOT_DIR) # - katz centrality tests ------------------------------------------------------------------------- set(KATZ_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}" "") @@ -129,13 +148,13 @@ set(KATZ_TEST_SRC # - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}" "") set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}" "") @@ -144,7 +163,7 @@ set(EDGE_BETWEENNESS_TEST_SRC # - pagerank tests -------------------------------------------------------------------------------- set(PAGERANK_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_test.cpp") ConfigureTest(PAGERANK_TEST "${PAGERANK_TEST_SRC}" "") @@ -153,7 +172,7 @@ ConfigureTest(PAGERANK_TEST "${PAGERANK_TEST_SRC}" "") # - SSSP tests ------------------------------------------------------------------------------------ set(SSSP_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}" "") @@ -162,7 +181,7 @@ ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}" "") # - BFS tests ------------------------------------------------------------------------------------- set(BFS_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") @@ -171,7 +190,7 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") # - LOUVAIN tests --------------------------------------------------------------------------------- set(LOUVAIN_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cu") ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") @@ -180,7 +199,7 @@ ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") # - LEIDEN tests --------------------------------------------------------------------------------- set(LEIDEN_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}" "") @@ -213,7 +232,7 @@ ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}" "") # - RENUMBERING tests ----------------------------------------------------------------------------- set(RENUMBERING_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}" "") @@ -222,7 +241,7 @@ ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}" "") # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") ConfigureTest(FA2_TEST "${FA2_TEST_SRC}" "") @@ -231,7 +250,7 @@ ConfigureTest(FA2_TEST "${FA2_TEST_SRC}" "") # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}" "") @@ -240,7 +259,7 @@ ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}" "") # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") ConfigureTest(SCC_TEST "${SCC_TEST_SRC}" "") @@ -257,7 +276,7 @@ ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}" "") # - MST tests ---------------------------------------------------------------------------- set(MST_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") ConfigureTest(MST_TEST "${MST_TEST_SRC}" "") @@ -267,7 +286,7 @@ ConfigureTest(MST_TEST "${MST_TEST_SRC}" "") # - Experimental Graph tests ---------------------------------------------------------------------- set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") @@ -276,7 +295,7 @@ ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") # - Experimental coarsening tests ----------------------------------------------------------------- set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}" "") @@ -285,7 +304,7 @@ ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST # - Experimental induced subgraph tests ----------------------------------------------------------- set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}" "") @@ -294,7 +313,7 @@ ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAP # - Experimental BFS tests ------------------------------------------------------------------------ set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}" "") @@ -303,7 +322,7 @@ ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}" "") # - Experimental SSSP tests ----------------------------------------------------------------------- set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}" "") @@ -312,7 +331,7 @@ ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}" "") # - Experimental PAGERANK tests ------------------------------------------------------------------- set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "") @@ -321,7 +340,7 @@ ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "" # - Experimental LOUVAIN tests ------------------------------------------------------------------- set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") @@ -330,7 +349,7 @@ ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}" "") diff --git a/python/setuputils.py b/python/setuputils.py index 360526c2b56..47eaf74d4b6 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -79,8 +79,8 @@ def use_raft_package(raft_path, cpp_build_path, if you want to change RAFT location. - Uses RAFT located in $RAFT_PATH if $RAFT_PATH exists. - Otherwise it will look for RAFT in the libcugraph build folder, - located either in the default location ../cpp/build or in - $CUGRAPH_BUILD_PATH. + located either in the default locations ../cpp/build/raft, + ../cpp/build/_deps/raft-src, or in $CUGRAPH_BUILD_PATH. -Otherwise it will clone RAFT into _external_repositories. - Branch/git tag cloned is located in git_info_file in this case. @@ -88,17 +88,25 @@ def use_raft_package(raft_path, cpp_build_path, ------- raft_include_path: Str Path to the C++ include folder of RAFT + """ if os.path.isdir('cugraph/raft'): raft_path = os.path.realpath('cugraph/raft') # walk up two dirs from `python/raft` raft_path = os.path.join(raft_path, '..', '..') print("-- Using existing RAFT folder") + elif cpp_build_path and os.path.isdir(os.path.join(cpp_build_path, + '_deps/raft-src')): + raft_path = os.path.join(cpp_build_path, '_deps/raft-src') + raft_path = os.path.realpath(raft_path) + print("-- Using existing RAFT folder in CPP build dir from cmake " + "FetchContent") elif cpp_build_path and os.path.isdir(os.path.join(cpp_build_path, 'raft/src/raft')): raft_path = os.path.join(cpp_build_path, 'raft/src/raft') raft_path = os.path.realpath(raft_path) - print("-- Using existing RAFT folder in CPP build dir") + print("-- Using existing RAFT folder in CPP build dir from cmake " + "ExternalProject") elif isinstance(raft_path, (str, os.PathLike)): print('-- Using RAFT_PATH argument') elif os.environ.get('RAFT_PATH', False) is not False: @@ -254,8 +262,8 @@ def get_repo_cmake_info(names, file_path): `ExternalProject_Add(name` file_path : String Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. + module which contains FetchContent_Declare or ExternalProject_Add + definitions) to extract the information. Returns ------- @@ -264,22 +272,31 @@ def get_repo_cmake_info(names, file_path): where results[name][0] is the url of the repo and repo_info[repo][1] is the tag/commit hash to be cloned as specified by cmake. - """ with open(file_path, encoding='utf-8') as f: s = f.read() results = {} + cmake_ext_proj_decls = ["FetchContent_Declare", "ExternalProject_Add"] + for name in names: - res = re.findall(r'ExternalProject_Add\(' + re.escape(name) - + '\s.*GIT_REPOSITORY.*\s.*GIT_TAG.*', # noqa: W605 - s) - - res = re.sub(' +', ' ', res[0]) - res = res.split(' ') - res = [res[2][:-1], res[4]] - results[name] = res + res = None + for decl in cmake_ext_proj_decls: + res = re.search(f'{decl}\(\s*' # noqa: W605 + + '(' + re.escape(name) + ')' + + '\s+.*GIT_REPOSITORY\s+(\S+)\s+.+' # noqa: W605 + + '\s+.*GIT_TAG\s+(\S+)', # noqa: W605 + s) + if res: + break + if res is None: + raise RuntimeError('Could not find any of the following ' + f'statements: {cmake_ext_proj_decls}, for ' + f'module "{name}" in file "{file_path}" with ' + 'GIT_REPOSITORY and GIT_TAG settings') + + results[res.group(1)] = [res.group(2), res.group(3)] return results From 95d4decf77fafc8e25f80a5996c49273f9b0d759 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Thu, 4 Feb 2021 15:14:43 -0600 Subject: [PATCH 162/343] EgoNet (#1365) ### Description Let the egonet graph of a node x be the subgraph that includes the neighborhood of x and all edges between them. Here is a basic description (1-hop, single seed) : - Add center node x to the graph. - Go through all the neighbors y of this center node x, add edge (x, y) to the graph. - For each neighbor y of center node x, go through all the neighbors z of center node x, if there is an edge between y and z in the original graph, add edge (y, z) to our new graph. ### Proposed solution Rather than doing custom one/two hops features, we propose a generic k-hops solution leveraging BFS with cutoff to identify neighbors within a given radius. In addition to the single source version (matching what's available in Nx), we propose to handle multiple sources (seeds) at once which allows better performances. This PR also enables a path in the experimental stack for returning multiple graphs (edge list format) from CUDA prims to python without using the legacy classes. As future work, we want to enable concurrency for the cutoff BFS for each seed. This is dependent of #957 Close #475 Authors: - Alex Fender (@afender) - @Iroy30 Approvers: - @Iroy30 - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1365 --- README.md | 1 + cpp/CMakeLists.txt | 1 + cpp/include/algorithms.hpp | 37 ++- cpp/include/utilities/cython.hpp | 42 +++- cpp/src/community/egonet.cu | 210 ++++++++++++++++++ cpp/src/utilities/cython.cu | 76 +++++++ cpp/tests/CMakeLists.txt | 8 + cpp/tests/community/egonet_test.cu | 173 +++++++++++++++ docs/source/api.rst | 7 + python/cugraph/__init__.py | 2 + python/cugraph/community/__init__.py | 4 +- python/cugraph/community/egonet.pxd | 23 ++ python/cugraph/community/egonet.py | 145 ++++++++++++ python/cugraph/community/egonet_wrapper.pyx | 116 ++++++++++ python/cugraph/structure/graph_primtypes.pxd | 13 ++ python/cugraph/structure/number_map.py | 4 +- python/cugraph/tests/test_egonet.py | 101 +++++++++ python/cugraph/tree/minimum_spanning_tree.pxd | 2 +- python/cugraph/utilities/utils.py | 3 +- 19 files changed, 957 insertions(+), 11 deletions(-) create mode 100644 cpp/src/community/egonet.cu create mode 100644 cpp/tests/community/egonet_test.cu create mode 100644 python/cugraph/community/egonet.pxd create mode 100644 python/cugraph/community/egonet.py create mode 100644 python/cugraph/community/egonet_wrapper.pyx create mode 100644 python/cugraph/tests/test_egonet.py diff --git a/README.md b/README.md index 2f2f13b3167..03abd6c72af 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ As of Release 0.18 - including 0.18 nightly | | Betweenness Centrality | Single-GPU | | | | Edge Betweenness Centrality | Single-GPU | | | Community | | | | +| | EgoNet | Single-GPU | | | | Leiden | Single-GPU | | | | Louvain | Multi-GPU | | | | Ensemble Clustering for Graphs | Single-GPU | | diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 19bfd24a591..000f6837747 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -354,6 +354,7 @@ add_library(cugraph SHARED src/community/ECG.cu src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu + src/community/egonet.cu src/cores/core_number.cu src/traversal/two_hop_neighbors.cu src/components/connectivity.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index a57e550521e..2a7caed9d7b 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ #pragma once - +#include #include #include #include @@ -815,6 +815,7 @@ template std::unique_ptr> extract_subgraph_vertex(GraphCOOView const &graph, VT const *vertices, VT num_vertices); +} // namespace subgraph /** * @brief Wrapper function for Nvgraph balanced cut clustering @@ -837,7 +838,6 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView @@ -1191,6 +1191,35 @@ void katz_centrality(raft::handle_t const &handle, bool has_initial_guess = false, bool normalize = false, bool do_expensive_check = false); - +/** + * @brief returns induced EgoNet subgraph(s) of neighbors centered at nodes in source_vertex within + * a given radius. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view. + * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs). + * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p + * source_vertex). + * @param radius Include all neighbors of distance <= radius from @p source_vertex. + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, + * edge destination vertices, edge weights, and edge offsets for each induced EgoNet subgraph. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index d95d410af29..a22553777e2 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -110,6 +110,39 @@ struct graph_container_t { experimental::graph_properties_t graph_props; }; +/** + * @brief Owning struct. Allows returning multiple edge lists and edge offsets. + * cython only + * + * @param number_of_vertices The total number of vertices + * @param number_of_edges The total number of edges (number of elements in src_indices, + dst_indices and edge_data) + * @param number_of_subgraph The number of subgraphs, number of elements in subgraph_offsets - 1 + * @param source_indices This array of size E (number of edges) contains + * the index of the + * source for each edge. Indices must be in the range [0, V-1]. + * @param destination_indices This array of size E (number of edges) contains + * the index of the + * destination for each edge. Indices must be in the range [0, V-1]. + * @param edge_data This array size E (number of edges) contains + * the weight for each + * edge. This array can be null in which case the graph is considered + * unweighted. + * @param subgraph_offsets This array size number_of_subgraph + 1 contains edge offsets + for each subgraph + + + */ +struct cy_multi_edgelists_t { + size_t number_of_vertices; + size_t number_of_edges; + size_t number_of_subgraph; + std::unique_ptr src_indices; + std::unique_ptr dst_indices; + std::unique_ptr edge_data; + std::unique_ptr subgraph_offsets; +}; + // replacement for std::tuple<,,>, since std::tuple is not // supported in cython // @@ -268,7 +301,6 @@ struct renum_quad_t { vertex_t nv_; edge_t ne_; }; - // FIXME: finish description for vertex_partition_offsets // // Factory function for populating an empty graph container with a new graph @@ -406,6 +438,14 @@ void call_sssp(raft::handle_t const& handle, vertex_t* predecessors, const vertex_t source_vertex); +// Wrapper for calling egonet through a graph container +template +std::unique_ptr call_egonet(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius); + // wrapper for shuffling: // template diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu new file mode 100644 index 00000000000..fa788aa307b --- /dev/null +++ b/cpp/src/community/egonet.cu @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Alex Fender afender@nvida.com +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include "experimental/graph.hpp" +#include "utilities/graph_utils.cuh" + +#include +#include + +namespace { + +/* +Description +Let the egonet graph of a node x be the subgraph that includes node x, the neighborhood of x, and +all edges between them. Naive algorithm +- Add center node x to the graph. +- Go through all the neighbors y of this center node x, add edge (x, y) to the graph. +- For each neighbor y of center node x, go through all the neighbors z of center node x, if there is +an edge between y and z in original graph, add edge (y, z) to our new graph. + +Rather than doing custom one/two hops features, we propose a generic k-hops solution leveraging BFS +cutoff and subgraph extraction +*/ + +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const &csr_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + auto v = csr_view.get_number_of_vertices(); + auto e = csr_view.get_number_of_edges(); + auto stream = handle.get_stream(); + float avg_degree = e / v; + rmm::device_vector neighbors_offsets(n_subgraphs + 1); + rmm::device_vector neighbors; + + // It is the right thing to accept device memory for source_vertex + // FIXME consider adding a device API to BFS (ie. accept source on the device) + std::vector h_source_vertex(n_subgraphs); + raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream); + + // reserve some reasonable memory, but could grow larger than that + neighbors.reserve(v + avg_degree * n_subgraphs * radius); + neighbors_offsets[0] = 0; + // each source should be done concurently in the future + for (vertex_t i = 0; i < n_subgraphs; i++) { + // BFS with cutoff + rmm::device_vector reached(v); + rmm::device_vector predecessors(v); // not used + bool direction_optimizing = false; + cugraph::experimental::bfs(handle, + csr_view, + reached.data().get(), + predecessors.data().get(), + h_source_vertex[i], + direction_optimizing, + radius); + + // identify reached vertex ids from distance array + thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(v), + reached.begin(), + reached.begin(), + [sentinel = std::numeric_limits::max()] __device__( + auto id, auto val) { return val < sentinel ? id : sentinel; }); + + // removes unreached data + auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream), + reached.begin(), + reached.end(), + std::numeric_limits::max()); + + // update extraction input + size_t n_reached = thrust::distance(reached.begin(), reached_end); + neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached; + if (neighbors_offsets[i + 1] > neighbors.capacity()) + neighbors.reserve(neighbors_offsets[i + 1] * 2); + neighbors.insert(neighbors.end(), reached.begin(), reached_end); + } + + // extract + return cugraph::experimental::extract_induced_subgraphs( + handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs); +} +} // namespace +namespace cugraph { +namespace experimental { +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + if (multi_gpu) { + CUGRAPH_FAIL("Unimplemented."); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream())); + } + CUGRAPH_EXPECTS(n_subgraphs > 0, "Need at least one source to extract the egonet from"); + CUGRAPH_EXPECTS(n_subgraphs < graph_view.get_number_of_vertices(), + "Can't have more sources to extract from than vertices in the graph"); + CUGRAPH_EXPECTS(radius > 0, "Radius should be at least 1"); + CUGRAPH_EXPECTS(radius < graph_view.get_number_of_vertices(), "radius is too large"); + // source_vertex range is checked in bfs. + + return extract( + handle, graph_view, source_vertex, n_subgraphs, radius); +} + +// SG FP32 +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int64_t *, + int64_t, + int64_t); + +// SG FP64 +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int64_t *, + int64_t, + int64_t); +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 7f221304a0a..ce7b45c1c08 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -647,6 +647,55 @@ void call_bfs(raft::handle_t const& handle, } } +// Wrapper for calling extract_egonet through a graph container +// FIXME : this should not be a legacy COO and it is not clear how to handle C++ api return type as +// is.graph_container Need to figure out how to return edge lists +template +std::unique_ptr call_egonet(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + auto g = cugraph::experimental::extract_ego(handle, + graph->view(), + reinterpret_cast(source_vertex), + static_cast(n_subgraphs), + static_cast(radius)); + cy_multi_edgelists_t coo_contents{ + 0, // not used + std::get<0>(g).size(), + static_cast(n_subgraphs), + std::make_unique(std::get<0>(g).release()), + std::make_unique(std::get<1>(g).release()), + std::make_unique(std::get<2>(g).release()), + std::make_unique(std::get<3>(g).release())}; + return std::make_unique(std::move(coo_contents)); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + auto g = cugraph::experimental::extract_ego(handle, + graph->view(), + reinterpret_cast(source_vertex), + static_cast(n_subgraphs), + static_cast(radius)); + cy_multi_edgelists_t coo_contents{ + 0, // not used + std::get<0>(g).size(), + static_cast(n_subgraphs), + std::make_unique(std::get<0>(g).release()), + std::make_unique(std::get<1>(g).release()), + std::make_unique(std::get<2>(g).release()), + std::make_unique(std::get<3>(g).release())}; + return std::make_unique(std::move(coo_contents)); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -939,6 +988,33 @@ template void call_bfs(raft::handle_t const& handle, double* sp_counters, const int64_t start_vertex, bool directed); +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* source_vertex, + int32_t n_subgraphs, + int32_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* source_vertex, + int32_t n_subgraphs, + int32_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* source_vertex, + int64_t n_subgraphs, + int64_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* source_vertex, + int64_t n_subgraphs, + int64_t radius); template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3d1fe88cc7d..0ab8251e7e7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -228,6 +228,14 @@ set(TRIANGLE_TEST_SRC ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}" "") +################################################################################################### +# - EGO tests -------------------------------------------------------------------------------- + +set(EGO_TEST_SRC + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") + +ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") ################################################################################################### # - RENUMBERING tests ----------------------------------------------------------------------------- diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu new file mode 100644 index 00000000000..ec031228998 --- /dev/null +++ b/cpp/tests/community/egonet_test.cu @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +typedef struct InducedEgo_Usecase_t { + std::string graph_file_full_path{}; + std::vector ego_sources{}; + int32_t radius; + bool test_weighted{false}; + + InducedEgo_Usecase_t(std::string const& graph_file_path, + std::vector const& ego_sources, + int32_t radius, + bool test_weighted) + : ego_sources(ego_sources), radius(radius), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} InducedEgo_Usecase; + +class Tests_InducedEgo : public ::testing::TestWithParam { + public: + Tests_InducedEgo() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(InducedEgo_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + rmm::device_uvector d_ego_sources(configuration.ego_sources.size(), + handle.get_stream()); + + raft::update_device(d_ego_sources.data(), + configuration.ego_sources.data(), + configuration.ego_sources.size(), + handle.get_stream()); + + rmm::device_uvector d_ego_edgelist_src(0, handle.get_stream()); + rmm::device_uvector d_ego_edgelist_dst(0, handle.get_stream()); + rmm::device_uvector d_ego_edgelist_weights(0, handle.get_stream()); + rmm::device_uvector d_ego_edge_offsets(0, handle.get_stream()); + + std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) = + cugraph::experimental::extract_ego(handle, + graph_view, + d_ego_sources.data(), + static_cast(configuration.ego_sources.size()), + configuration.radius); + + std::vector h_cugraph_ego_edge_offsets(d_ego_edge_offsets.size()); + std::vector h_cugraph_ego_edgelist_src(d_ego_edgelist_src.size()); + std::vector h_cugraph_ego_edgelist_dst(d_ego_edgelist_dst.size()); + raft::update_host(h_cugraph_ego_edgelist_src.data(), + d_ego_edgelist_src.data(), + d_ego_edgelist_src.size(), + handle.get_stream()); + raft::update_host(h_cugraph_ego_edgelist_dst.data(), + d_ego_edgelist_dst.data(), + d_ego_edgelist_dst.size(), + handle.get_stream()); + raft::update_host(h_cugraph_ego_edge_offsets.data(), + d_ego_edge_offsets.data(), + d_ego_edge_offsets.size(), + handle.get_stream()); + ASSERT_TRUE(d_ego_edge_offsets.size() == (configuration.ego_sources.size() + 1)); + ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_dst.size()); + if (configuration.test_weighted) + ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_weights.size()); + ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] == + d_ego_edgelist_src.size()); + for (size_t i = 0; i < configuration.ego_sources.size(); i++) + ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] < h_cugraph_ego_edge_offsets[i + 1]); + auto n_vertices = graph_view.get_number_of_vertices(); + for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { + ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] >= 0); + ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] < n_vertices); + ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] >= 0); + ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] < n_vertices); + } + + /* + // For inspecting data + std::vector h_cugraph_ego_edgelist_weights(d_ego_edgelist_weights.size()); + if (configuration.test_weighted) { + raft::update_host(h_cugraph_ego_edgelist_weights.data(), + d_ego_edgelist_weights.data(), + d_ego_edgelist_weights.size(), + handle.get_stream()); + } + raft::print_host_vector("offsets", + &h_cugraph_ego_edge_offsets[0], + h_cugraph_ego_edge_offsets.size(), + std::cout); + raft::print_host_vector("src", + &h_cugraph_ego_edgelist_src[0], + h_cugraph_ego_edgelist_src.size(), + std::cout); + raft::print_host_vector("dst", + &h_cugraph_ego_edgelist_dst[0], + h_cugraph_ego_edgelist_dst.size(), + std::cout); + raft::print_host_vector("weights", + &h_cugraph_ego_edgelist_weights[0], + h_cugraph_ego_edgelist_weights.size(), + std::cout); + */ + } +}; + +TEST_P(Tests_InducedEgo, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_InducedEgo, + ::testing::Values( + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{0}, 1, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{0}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, 2, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/docs/source/api.rst b/docs/source/api.rst index b36fc6449e5..dcdf3e6ff33 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -51,6 +51,13 @@ Katz Centrality Community ========= +EgoNet +------------------------------------ + +.. automodule:: cugraph.community.egonet + :members: + :undoc-members: + Ensemble clustering for graphs (ECG) ------------------------------------ diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 455da7fa51f..46013903a38 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -24,6 +24,8 @@ analyzeClustering_ratio_cut, subgraph, triangles, + ego_graph, + batched_ego_graphs, ) from cugraph.structure import ( diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index d3bb6472894..9cc92637e20 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,3 +25,5 @@ from cugraph.community.triangle_count import triangles from cugraph.community.ktruss_subgraph import ktruss_subgraph from cugraph.community.ktruss_subgraph import k_truss +from cugraph.community.egonet import ego_graph +from cugraph.community.egonet import batched_ego_graphs diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd new file mode 100644 index 00000000000..3ddf929674f --- /dev/null +++ b/python/cugraph/community/egonet.pxd @@ -0,0 +1,23 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius) except + diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py new file mode 100644 index 00000000000..9ff12158b13 --- /dev/null +++ b/python/cugraph/community/egonet.py @@ -0,0 +1,145 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.community import egonet_wrapper +import cudf +from cugraph.utilities import ( + ensure_cugraph_obj, + import_optional, +) +from cugraph.utilities import cugraph_to_nx + +# optional dependencies used for handling different input types +nx = import_optional("networkx") + + +def _convert_graph_to_output_type(G, input_type): + """ + Given a cugraph.Graph, convert it to a new type appropriate for the + graph algos in this module, based on input_type. + """ + if (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): + return cugraph_to_nx(G) + + else: + return G + + +def _convert_df_series_to_output_type(df, offsets, input_type): + """ + Given a cudf.DataFrame df, convert it to a new type appropriate for the + graph algos in this module, based on input_type. + """ + if (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): + return df.to_pandas(), offsets.values_host.tolist() + + else: + return df, offsets + + +def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): + """ + Compute the induced subgraph of neighbors centered at node n, + within a given radius. + + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + n : integer + A single node + radius: integer, optional + Include all neighbors of distance<=radius from n. + center: bool, optional + Defaults to True. False is not supported + undirected: bool, optional + Defaults to False. True is not supported + distance: key, optional + Distances are counted in hops from n. Other cases are not supported. + + Returns + ------- + G_ego : cuGraph.Graph or networkx.Graph + A graph descriptor with a minimum spanning tree or forest. + The networkx graph will not have all attributes copied over + """ + + (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") + result_graph = type(G)() + + if G.renumbered is True: + n = G.lookup_internal_vertex_id(cudf.Series([n])) + + df, offsets = egonet_wrapper.egonet(G, n, radius) + + if G.renumbered: + df = G.unrenumber(df, "src") + df = G.unrenumber(df, "dst") + + if G.edgelist.weights: + result_graph.from_cudf_edgelist( + df, source="src", destination="dst", edge_attr="weight" + ) + else: + result_graph.from_cudf_edgelist(df, source="src", destination="dst") + return _convert_graph_to_output_type(result_graph, input_type) + + +def batched_ego_graphs( + G, seeds, radius=1, center=True, undirected=False, distance=None +): + """ + Compute the induced subgraph of neighbors for each node in seeds + within a given radius. + + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + seeds : cudf.Series or list + Specifies the seeds of the induced egonet subgraphs + radius: integer, optional + Include all neighbors of distance<=radius from n. + center: bool, optional + Defaults to True. False is not supported + undirected: bool, optional + Defaults to False. True is not supported + distance: key, optional + Distances are counted in hops from n. Other cases are not supported. + + Returns + ------- + ego_edge_lists : cudf.DataFrame or pandas.DataFrame + GPU data frame containing all induced sources identifiers, + destination identifiers, edge weights + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each seed. + """ + + (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") + + if G.renumbered is True: + seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) + + df, offsets = egonet_wrapper.egonet(G, seeds, radius) + + if G.renumbered: + df = G.unrenumber(df, "src", preserve_order=True) + df = G.unrenumber(df, "dst", preserve_order=True) + + return _convert_df_series_to_output_type(df, offsets, input_type) diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx new file mode 100644 index 00000000000..122dedbfabd --- /dev/null +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -0,0 +1,116 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.community.egonet cimport call_egonet +from cugraph.structure.graph_primtypes cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from cugraph.structure import graph_primtypes_wrapper +import cudf +import rmm +import numpy as np +import numpy.ctypeslib as ctypeslib +from rmm._lib.device_buffer cimport DeviceBuffer +from cudf.core.buffer import Buffer + + +def egonet(input_graph, vertices, radius=1): + """ + Call egonet + """ + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + vertex_t = src.dtype + edge_t = np.dtype("int32") + weights = None + if input_graph.edgelist.weights: + weights = input_graph.edgelist.edgelist_df['weights'] + + num_verts = input_graph.number_of_vertices() + num_edges = input_graph.number_of_edges(directed_edges=True) + num_partition_edges = num_edges + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] + weight_t = weights.dtype + else: + weight_t = np.dtype("float32") + + # Pointers for egonet + cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] + n_subgraphs = vertices.size + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_verts, + num_edges, + False, + False, False) + + if(weight_t==np.dtype("float32")): + el_struct_ptr = move(call_egonet[int, float](handle_[0], + graph_container, + c_source_vertex_ptr, + n_subgraphs, + radius)) + else: + el_struct_ptr = move(call_egonet[int, double](handle_[0], + graph_container, + c_source_vertex_ptr, + n_subgraphs, + radius)) + + el_struct = move(el_struct_ptr.get()[0]) + src = DeviceBuffer.c_from_unique_ptr(move(el_struct.src_indices)) + dst = DeviceBuffer.c_from_unique_ptr(move(el_struct.dst_indices)) + wgt = DeviceBuffer.c_from_unique_ptr(move(el_struct.edge_data)) + src = Buffer(src) + dst = Buffer(dst) + wgt = Buffer(wgt) + + src = cudf.Series(data=src, dtype=vertex_t) + dst = cudf.Series(data=dst, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = src + df['dst'] = dst + if wgt.nbytes != 0: + wgt = cudf.Series(data=wgt, dtype=weight_t) + df['weight'] = wgt + + offsets = DeviceBuffer.c_from_unique_ptr(move(el_struct.subgraph_offsets)) + offsets = Buffer(offsets) + offsets = cudf.Series(data=offsets, dtype="int") + + return df, offsets + diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index f109e5938a3..1a2891494ff 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -326,3 +326,16 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": int *local_vertices, int *local_edges, int *local_offsets) except + + + cdef cppclass cy_multi_edgelists_t: + size_t number_of_vertices + size_t number_of_edges + size_t number_of_subgraph + unique_ptr[device_buffer] src_indices + unique_ptr[device_buffer] dst_indices + unique_ptr[device_buffer] edge_data + unique_ptr[device_buffer] subgraph_offsets + +cdef extern from "" namespace "std" nogil: + cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) + cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index f1b8949eb5d..deb2b9f4114 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -895,7 +895,7 @@ def unrenumber(self, df, column_name, preserve_order=False): if preserve_order: df = df.sort_values( index_name - ).drop(index_name).reset_index(drop=True) + ).drop(columns=index_name).reset_index(drop=True) if type(df) is dask_cudf.DataFrame: return df.map_partitions( diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py new file mode 100644 index 00000000000..009fd1252f1 --- /dev/null +++ b/python/cugraph/tests/test_egonet.py @@ -0,0 +1,101 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.tests import utils + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +print("Networkx version : {} ".format(nx.__version__)) + +SEEDS = [0, 5, 13] +RADIUS = [1, 2, 3] + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("radius", RADIUS) +def test_ego_graph_nx(graph_file, seed, radius): + gc.collect() + + # Nx + df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + df, create_using=nx.Graph(), source="0", target="1", edge_attr="weight" + ) + ego_nx = nx.ego_graph(Gnx, seed, radius=radius) + + # cugraph + ego_cugraph = cugraph.ego_graph(Gnx, seed, radius=radius) + + assert nx.is_isomorphic(ego_nx, ego_cugraph) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("seeds", [[0, 5, 13]]) +@pytest.mark.parametrize("radius", [1, 2, 3]) +def test_batched_ego_graphs(graph_file, seeds, radius): + """ + Compute the induced subgraph of neighbors for each node in seeds + within a given radius. + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + seeds : cudf.Series + Specifies the seeds of the induced egonet subgraphs + radius: integer, optional + Include all neighbors of distance<=radius from n. + + Returns + ------- + ego_edge_lists : cudf.DataFrame + GPU data frame containing all induced sources identifiers, + destination identifiers, edge weights + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each seed. + """ + gc.collect() + + # Nx + df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + df, create_using=nx.Graph(), source="0", target="1", edge_attr="weight" + ) + + # cugraph + df, offsets = cugraph.batched_ego_graphs(Gnx, seeds, radius=radius) + for i in range(len(seeds)): + ego_nx = nx.ego_graph(Gnx, seeds[i], radius=radius) + ego_df = df[offsets[i]:offsets[i+1]] + ego_cugraph = nx.from_pandas_edgelist(ego_df, + source="src", + target="dst", + edge_attr="weight") + assert nx.is_isomorphic(ego_nx, ego_cugraph) diff --git a/python/cugraph/tree/minimum_spanning_tree.pxd b/python/cugraph/tree/minimum_spanning_tree.pxd index 8cea2bee0cc..a38aee96605 100644 --- a/python/cugraph/tree/minimum_spanning_tree.pxd +++ b/python/cugraph/tree/minimum_spanning_tree.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index d0140c02bca..39b789d7f79 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -223,7 +223,6 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None): from cugraph.utilities.nx_factory import convert_from_nx input_type = type(obj) - print(input_type) if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]: return (obj, input_type) From d8d35e44150c9ea9aaabab7a547f2063e4bd73c3 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Fri, 5 Feb 2021 09:57:50 -0600 Subject: [PATCH 163/343] update subgraph tests and remove legacy pagerank (#1378) Authors: - @Iroy30 Approvers: - Brad Rees (@BradReesWork) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1378 --- cpp/CMakeLists.txt | 2 - cpp/include/algorithms.hpp | 72 --- cpp/src/link_analysis/pagerank.cu | 433 ------------------ cpp/src/link_analysis/pagerank_1D.cu | 188 -------- cpp/src/link_analysis/pagerank_1D.cuh | 125 ----- cpp/src/structure/graph.cu | 2 + cpp/tests/CMakeLists.txt | 9 - cpp/tests/pagerank/pagerank_test.cpp | 206 --------- python/cugraph/tests/test_ecg.py | 4 +- python/cugraph/tests/test_k_truss_subgraph.py | 5 +- .../cugraph/tests/test_subgraph_extraction.py | 6 +- 11 files changed, 8 insertions(+), 1044 deletions(-) delete mode 100644 cpp/src/link_analysis/pagerank.cu delete mode 100644 cpp/src/link_analysis/pagerank_1D.cu delete mode 100644 cpp/src/link_analysis/pagerank_1D.cuh delete mode 100644 cpp/tests/pagerank/pagerank_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 000f6837747..58b5b386fc8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -337,8 +337,6 @@ add_library(cugraph SHARED src/utilities/cython.cu src/structure/graph.cu src/linear_assignment/hungarian.cu - src/link_analysis/pagerank.cu - src/link_analysis/pagerank_1D.cu src/link_analysis/gunrock_hits.cpp src/traversal/bfs.cu src/traversal/sssp.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 2a7caed9d7b..5ad0a374364 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -22,78 +22,6 @@ namespace cugraph { -/** - * @brief Find the PageRank vertex values for a graph. - * - * cuGraph computes an approximation of the Pagerank eigenvector using the power method. - * The number of iterations depends on the properties of the network itself; it increases - * when the tolerance descreases and/or alpha increases toward the limiting value of 1. - * The user is free to use default values or to provide inputs for the initial guess, - * tolerance and maximum number of iterations. - - * - * @throws cugraph::logic_error with a custom message when an error - occurs. - * - * @tparam VT Type of vertex identifiers. Supported value : int (signed, - 32-bit) - * @tparam ET Type of edge identifiers. Supported value : int (signed, - 32-bit) - * @tparam WT Type of edge weights. Supported value : float or double. - * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, - the multi GPU version will be selected. - * @param[in] graph cuGraph graph descriptor, should contain the connectivity - information as a transposed adjacency list (CSC). Edge weights are not used for this algorithm. - * @param[in] alpha The damping factor alpha represents the probability to follow - an outgoing edge, standard value is 0.85. Thus, 1.0-alpha is the probability to “teleport” to a - random vertex. Alpha should be greater than 0.0 and strictly lower than 1.0. - * The initial guess must not be the vector of 0s. Any value other - than 1 or 0 is treated as an invalid value. - * @param[in] pagerank Array of size V. Should contain the initial guess if - has_guess=true. In this case the initial guess cannot be the vector of 0s. Memory is provided and - owned by the caller. - * @param[in] personalization_subset_size (optional) Supported on single-GPU, on the roadmap for - Multi-GPU. The number of vertices for to personalize. Initialized to 0 by default. - * @param[in] personalization_subset (optional) Supported on single-GPU, on the roadmap for - Multi-GPU..= Array of size personalization_subset_size containing vertices for running personalized - pagerank. Initialized to nullptr by default. Memory is provided and owned by the caller. - * @param[in] personalization_values (optional) Supported on single-GPU, on the roadmap for - Multi-GPU. Array of size personalization_subset_size containing values associated with - personalization_subset vertices. Initialized to nullptr by default. Memory is provided and owned by - the caller. - * @param[in] tolerance Supported on single-GPU. Set the tolerance the approximation, - this parameter should be a small magnitude value. - * The lower the tolerance the better the approximation. If this - value is 0.0f, cuGraph will use the default value which is 1.0E-5. - * Setting too small a tolerance can lead to non-convergence due - to numerical roundoff. Usually values between 0.01 and 0.00001 are acceptable. - * @param[in] max_iter (optional) The maximum number of iterations before an answer is - returned. This can be used to limit the execution time and do an early exit before the solver - reaches the convergence tolerance. - * If this value is lower or equal to 0 cuGraph will use the - default value, which is 500. - * @param[in] has_guess (optional) Supported on single-GPU. This parameter is used to - notify cuGraph if it should use a user-provided initial guess. False means the user does not have a - guess, in this case cuGraph will use a uniform vector set to 1/V. - * If the value is True, cuGraph will read the pagerank parameter - and use this as an initial guess. - * @param[out] *pagerank The PageRank : pagerank[i] is the PageRank of vertex i. Memory - remains provided and owned by the caller. - * - */ -template -void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size = 0, - VT *personalization_subset = nullptr, - WT *personalization_values = nullptr, - double alpha = 0.85, - double tolerance = 1e-5, - int64_t max_iter = 500, - bool has_guess = false); - /** * @brief Compute jaccard similarity coefficient for all vertices * diff --git a/cpp/src/link_analysis/pagerank.cu b/cpp/src/link_analysis/pagerank.cu deleted file mode 100644 index 2dcd3d73f61..00000000000 --- a/cpp/src/link_analysis/pagerank.cu +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ - -// Pagerank solver -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include -#include -#include -#include -#include -#include -#include "cub/cub.cuh" - -#include -#include -#include - -#include -#include "pagerank_1D.cuh" -#include "utilities/graph_utils.cuh" - -#include - -namespace cugraph { -namespace detail { - -#ifdef DEBUG -#define PR_VERBOSE -#endif - -template -bool pagerankIteration(raft::handle_t const &handle, - IndexType n, - IndexType e, - IndexType const *cscPtr, - IndexType const *cscInd, - ValueType *cscVal, - ValueType alpha, - ValueType *a, - ValueType *b, - float tolerance, - int iter, - int max_iter, - ValueType *&tmp, - void *cub_d_temp_storage, - size_t cub_temp_storage_bytes, - ValueType *&pr, - ValueType *residual) -{ - ValueType dot_res; -//#if defined(CUDART_VERSION) and CUDART_VERSION >= 11000 -#if 1 - { - raft::matrix::sparse_matrix_t const r_csr_m{ - handle, cscPtr, cscInd, cscVal, n, e}; - r_csr_m.mv(1.0, tmp, 0.0, pr); - } -#else - CUDA_TRY(cub::DeviceSpmv::CsrMV(cub_d_temp_storage, - cub_temp_storage_bytes, - cscVal, - (IndexType *)cscPtr, - (IndexType *)cscInd, - tmp, - pr, - n, - n, - e)); -#endif - scal(n, alpha, pr); - dot_res = dot(n, a, tmp); - axpy(n, dot_res, b, pr); - scal(n, (ValueType)1.0 / nrm2(n, pr), pr); - axpy(n, (ValueType)-1.0, pr, tmp); - *residual = nrm2(n, tmp); - if (*residual < tolerance) { - scal(n, (ValueType)1.0 / nrm1(n, pr), pr); - return true; - } else { - if (iter < max_iter) { - // FIXME: Copy the pagerank vector results to the tmp vector, since there - // are still raw pointers in pagerank pointing to tmp vector locations - // that were std::swapped out in the solver. A thrust::swap would - // probably be more efficent if the vectors were passed everywhere instead - // of pointers. std::swap is unsafe though. Just copying for now, as this - // may soon be replaced by the pattern accelerator. - copy(n, pr, tmp); - } else { - scal(n, (ValueType)1.0 / nrm1(n, pr), pr); - } - return false; - } -} - -template -int pagerankSolver(raft::handle_t const &handle, - IndexType n, - IndexType e, - IndexType const *cscPtr, - IndexType const *cscInd, - ValueType *cscVal, - IndexType *prsVtx, - ValueType *prsVal, - IndexType prsLen, - bool has_personalization, - ValueType alpha, - ValueType *a, - bool has_guess, - float tolerance, - int max_iter, - ValueType *&pagerank_vector, - ValueType *&residual) -{ - int max_it, i = 0; - float tol; - bool converged = false; - ValueType randomProbability = static_cast(1.0 / n); - ValueType *tmp_d{nullptr}; - ValueType *b_d{nullptr}; - void *cub_d_temp_storage = NULL; - size_t cub_temp_storage_bytes = 0; - - if (max_iter > 0) - max_it = max_iter; - else - max_it = 500; - - if (tolerance == 0.0f) - tol = 1.0E-6f; - else if (tolerance < 1.0f && tolerance > 0.0f) - tol = tolerance; - else - return -1; - - if (alpha <= 0.0f || alpha >= 1.0f) return -1; - - rmm::device_vector b(n); - b_d = b.data().get(); - -#if 1 /* temporary solution till https://github.com/NVlabs/cub/issues/162 is resolved */ - thrust::device_vector tmp(n); - tmp_d = tmp.data().get(); -#else - rmm::device_vector tmp(n); - tmp_d = pr.data().get(); -#endif - // FIXME: this should take a passed CUDA strema instead of default nullptr - CHECK_CUDA(nullptr); - - if (!has_guess) { - fill(n, pagerank_vector, randomProbability); - fill(n, tmp_d, randomProbability); - } else { - copy(n, pagerank_vector, tmp_d); - } - - if (has_personalization) { - ValueType sum = nrm1(prsLen, prsVal); - if (static_cast(0) == sum) { - fill(n, b_d, randomProbability); - } else { - scal(n, static_cast(1.0 / sum), prsVal); - fill(n, b_d, static_cast(0)); - scatter(prsLen, prsVal, b_d, prsVtx); - } - } else { - fill(n, b_d, randomProbability); - } - update_dangling_nodes(n, a, alpha); - -//#if defined(CUDART_VERSION) and CUDART_VERSION >= 11000 -#if 1 - { - raft::matrix::sparse_matrix_t const r_csr_m{ - handle, cscPtr, cscInd, cscVal, n, e}; - r_csr_m.mv(1.0, tmp_d, 0.0, pagerank_vector); - } -#else - CUDA_TRY(cub::DeviceSpmv::CsrMV(cub_d_temp_storage, - cub_temp_storage_bytes, - cscVal, - (IndexType *)cscPtr, - (IndexType *)cscInd, - tmp_d, - pagerank_vector, - n, - n, - e)); -#endif - // Allocate temporary storage - rmm::device_buffer cub_temp_storage(cub_temp_storage_bytes); - cub_d_temp_storage = cub_temp_storage.data(); - -#ifdef PR_VERBOSE - std::stringstream ss; - ss.str(std::string()); - ss << " ------------------PageRank------------------" << std::endl; - ss << " --------------------------------------------" << std::endl; - ss << std::setw(10) << "Iteration" << std::setw(15) << "Residual" << std::endl; - ss << " --------------------------------------------" << std::endl; - std::cout << ss.str(); -#endif - - while (!converged && i < max_it) { - i++; - converged = pagerankIteration(handle, - n, - e, - cscPtr, - cscInd, - cscVal, - alpha, - a, - b_d, - tol, - i, - max_it, - tmp_d, - cub_d_temp_storage, - cub_temp_storage_bytes, - pagerank_vector, - residual); -#ifdef PR_VERBOSE - ss.str(std::string()); - ss << std::setw(10) << i; - ss.precision(3); - ss << std::setw(15) << std::scientific << *residual << std::endl; - std::cout << ss.str(); -#endif - } -#ifdef PR_VERBOSE - std::cout << " --------------------------------------------" << std::endl; -#endif - - return converged ? 0 : 1; -} - -// template int pagerankSolver ( int n, int e, int *cscPtr, int *cscInd,half *cscVal, -// half alpha, half *a, bool has_guess, float tolerance, int max_iter, half * &pagerank_vector, half -// * &residual); -template int pagerankSolver(raft::handle_t const &handle, - int n, - int e, - int const *cscPtr, - int const *cscInd, - float *cscVal, - int *prsVtx, - float *prsVal, - int prsLen, - bool has_personalization, - float alpha, - float *a, - bool has_guess, - float tolerance, - int max_iter, - float *&pagerank_vector, - float *&residual); -template int pagerankSolver(raft::handle_t const &handle, - int n, - int e, - const int *cscPtr, - int const *cscInd, - double *cscVal, - int *prsVtx, - double *prsVal, - int prsLen, - bool has_personalization, - double alpha, - double *a, - bool has_guess, - float tolerance, - int max_iter, - double *&pagerank_vector, - double *&residual); - -template -void pagerank_impl(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size = 0, - VT *personalization_subset = nullptr, - WT *personalization_values = nullptr, - double alpha = 0.85, - double tolerance = 1e-5, - int64_t max_iter = 100, - bool has_guess = false) -{ - bool has_personalization = false; - int prsLen = 0; - VT m = graph.number_of_vertices; - ET nnz = graph.number_of_edges; - int status{0}; - WT *d_pr{nullptr}, *d_val{nullptr}, *d_leaf_vector{nullptr}; - WT res = 1.0; - WT *residual = &res; - - if (personalization_subset_size != 0) { - CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid input argument: personalization_subset array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid input argument: personalization_values array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_subset_size <= m, - "Personalization size should be smaller than V"); - has_personalization = true; - prsLen = static_cast(personalization_subset_size); - } - -#if 1 /* temporary solution till https://github.com/NVlabs/cub/issues/162 is resolved */ - thrust::device_vector pr(m); - d_pr = pr.data().get(); -#else - rmm::device_vector pr(m); - d_pr = pr.data().get(); -#endif - - rmm::device_vector leaf_vector(m); - rmm::device_vector val(nnz); - - d_leaf_vector = leaf_vector.data().get(); - d_val = val.data().get(); - - // The templating for HT_matrix_csc_coo assumes that m, nnz and data are all the same type - HT_matrix_csc_coo(m, nnz, graph.offsets, graph.indices, d_val, d_leaf_vector); - - if (has_guess) { copy(m, (WT *)pagerank, d_pr); } - - status = pagerankSolver(handle, - m, - nnz, - graph.offsets, - graph.indices, - d_val, - personalization_subset, - personalization_values, - prsLen, - has_personalization, - alpha, - d_leaf_vector, - has_guess, - tolerance, - max_iter, - d_pr, - residual); - - switch (status) { - case 0: break; - case -1: CUGRAPH_FAIL("Error : bad parameters in Pagerank"); - case 1: break; // Warning : Pagerank did not reached the desired tolerance - default: CUGRAPH_FAIL("Pagerank exec failed"); - } - - copy(m, d_pr, (WT *)pagerank); -} -} // namespace detail - -template -void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess) -{ - CUGRAPH_EXPECTS(pagerank != nullptr, - "Invalid input argument: Pagerank array should be of size V"); - // Multi-GPU - if (handle.comms_initialized()) { - CUGRAPH_EXPECTS(has_guess == false, - "Invalid input argument: Multi-GPU Pagerank does not guess, please use the " - "single GPU version for this feature"); - CUGRAPH_EXPECTS(max_iter > 0, "The number of iteration must be positive"); - cugraph::mg::pagerank(handle, - graph, - pagerank, - personalization_subset_size, - personalization_subset, - personalization_values, - alpha, - max_iter, - tolerance); - } else // Single GPU - return detail::pagerank_impl(handle, - graph, - pagerank, - personalization_subset_size, - personalization_subset, - personalization_values, - alpha, - tolerance, - max_iter, - has_guess); -} - -// explicit instantiation -template void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - float *pagerank, - int personalization_subset_size, - int *personalization_subset, - float *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess); -template void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - double *pagerank, - int personalization_subset_size, - int *personalization_subset, - double *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess); - -} // namespace cugraph diff --git a/cpp/src/link_analysis/pagerank_1D.cu b/cpp/src/link_analysis/pagerank_1D.cu deleted file mode 100644 index 2447290000c..00000000000 --- a/cpp/src/link_analysis/pagerank_1D.cu +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include "pagerank_1D.cuh" -#include "utilities/graph_utils.cuh" - -namespace cugraph { -namespace mg { - -template -__global__ void transition_kernel(const size_t e, const VT *ind, const VT *degree, WT *val) -{ - for (auto i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) - val[i] = 1.0 / degree[ind[i]]; // Degree contains IN degree. So all degree[ind[i]] were - // incremented by definition (no div by 0). -} - -template -Pagerank::Pagerank(const raft::handle_t &handle_, GraphCSCView const &G) - : comm(handle_.get_comms()), - bookmark(G.number_of_vertices), - prev_pr(G.number_of_vertices), - val(G.local_edges[comm.get_rank()]), - handle(handle_), - has_personalization(false) -{ - v_glob = G.number_of_vertices; - v_loc = G.local_vertices[comm.get_rank()]; - e_loc = G.local_edges[comm.get_rank()]; - part_off = G.local_offsets; - local_vertices = G.local_vertices; - off = G.offsets; - ind = G.indices; - blocks = handle_.get_device_properties().maxGridSize[0]; - threads = handle_.get_device_properties().maxThreadsPerBlock; - sm_count = handle_.get_device_properties().multiProcessorCount; - - is_setup = false; -} - -template -Pagerank::~Pagerank() -{ -} - -template -void Pagerank::transition_vals(const VT *degree) -{ - if (e_loc > 0) { - int threads = std::min(e_loc, this->threads); - int blocks = std::min(32 * sm_count, this->blocks); - transition_kernel<<>>(e_loc, ind, degree, val.data().get()); - CHECK_CUDA(nullptr); - } -} - -template -void Pagerank::flag_leafs(const VT *degree) -{ - if (v_glob > 0) { - int threads = std::min(v_glob, this->threads); - int blocks = std::min(32 * sm_count, this->blocks); - cugraph::detail::flag_leafs_kernel - <<>>(v_glob, degree, bookmark.data().get()); - CHECK_CUDA(nullptr); - } -} - -// Artificially create the google matrix by setting val and bookmark -template -void Pagerank::setup(WT _alpha, - VT *degree, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values) -{ - if (!is_setup) { - alpha = _alpha; - WT zero = 0.0; - WT one = 1.0; - // Update dangling node vector - cugraph::detail::fill(v_glob, bookmark.data().get(), zero); - flag_leafs(degree); - cugraph::detail::update_dangling_nodes(v_glob, bookmark.data().get(), alpha); - - // Transition matrix - transition_vals(degree); - - // personalize - if (personalization_subset_size != 0) { - CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid input argument: personalization_subset array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid input argument: personalization_values array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_subset_size <= v_glob, - "Personalization size should be smaller than V"); - - WT sum = cugraph::detail::nrm1(personalization_subset_size, personalization_values); - if (sum != zero) { - has_personalization = true; - personalization_vector.resize(v_glob); - cugraph::detail::fill(v_glob, personalization_vector.data().get(), zero); - cugraph::detail::scal(v_glob, one / sum, personalization_values); - cugraph::detail::scatter(personalization_subset_size, - personalization_values, - personalization_vector.data().get(), - personalization_subset); - } - } - is_setup = true; - } else - CUGRAPH_FAIL("MG PageRank : Setup can be called only once"); -} - -// run the power iteration on the google matrix -template -int Pagerank::solve(int max_iter, float tolerance, WT *pagerank) -{ - if (is_setup) { - WT dot_res; - WT one = 1.0; - WT *pr = pagerank; - cugraph::detail::fill(v_glob, pagerank, one / v_glob); - cugraph::detail::fill(v_glob, prev_pr.data().get(), one / v_glob); - // This cuda sync was added to fix #426 - // This should not be requiered in theory - // This is not needed on one GPU at this time - cudaDeviceSynchronize(); - dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); - MGcsrmv spmv_solver( - handle, local_vertices, part_off, off, ind, val.data().get(), pagerank); - - WT residual; - int i; - for (i = 0; i < max_iter; ++i) { - spmv_solver.run(pagerank); - cugraph::detail::scal(v_glob, alpha, pr); - - // personalization - if (has_personalization) - cugraph::detail::axpy(v_glob, dot_res, personalization_vector.data().get(), pr); - else - cugraph::detail::addv(v_glob, dot_res * (one / v_glob), pr); - - dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); - cugraph::detail::scal(v_glob, one / cugraph::detail::nrm2(v_glob, pr), pr); - - // convergence check - cugraph::detail::axpy(v_glob, (WT)-1.0, pr, prev_pr.data().get()); - residual = cugraph::detail::nrm2(v_glob, prev_pr.data().get()); - if (residual < tolerance) - break; - else - cugraph::detail::copy(v_glob, pr, prev_pr.data().get()); - } - cugraph::detail::scal(v_glob, one / cugraph::detail::nrm1(v_glob, pr), pr); - return i; - } else { - CUGRAPH_FAIL("MG PageRank : Solve was called before setup"); - } -} - -template class Pagerank; -template class Pagerank; - -} // namespace mg -} // namespace cugraph - -#include "utilities/eidir_graph_utils.hpp" diff --git a/cpp/src/link_analysis/pagerank_1D.cuh b/cpp/src/link_analysis/pagerank_1D.cuh deleted file mode 100644 index de2e049df53..00000000000 --- a/cpp/src/link_analysis/pagerank_1D.cuh +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Author: Alex Fender afender@nvidia.com - -#pragma once - -#include -#include -#include - -#include "utilities/error.hpp" -#include "utilities/spmv_1D.cuh" - -namespace cugraph { -namespace mg { - -template -class Pagerank { - private: - VT v_glob{}; // global number of vertices - VT v_loc{}; // local number of vertices - ET e_loc{}; // local number of edges - WT alpha{}; // damping factor - bool has_personalization; - // CUDA - const raft::comms::comms_t &comm; // info about the mg comm setup - cudaStream_t stream; - int blocks; - int threads; - int sm_count; - - // Vertex offsets for each partition. - VT *part_off; - VT *local_vertices; - - // Google matrix - ET *off; - VT *ind; - - rmm::device_vector val; // values of the substochastic matrix - rmm::device_vector bookmark; // constant vector with dangling node info - rmm::device_vector prev_pr; // record the last pagerank for convergence check - rmm::device_vector personalization_vector; // personalization vector after reconstruction - - bool is_setup; - raft::handle_t const &handle; // raft handle propagation for SpMV, etc. - - public: - Pagerank(const raft::handle_t &handle, const GraphCSCView &G); - ~Pagerank(); - - void transition_vals(const VT *degree); - - void flag_leafs(const VT *degree); - - // Artificially create the google matrix by setting val and bookmark - void setup(WT _alpha, - VT *degree, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values); - - // run the power iteration on the google matrix, return the number of iterations - int solve(int max_iter, float tolerance, WT *pagerank); -}; - -template -int pagerank(raft::handle_t const &handle, - const GraphCSCView &G, - WT *pagerank_result, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values, - const double damping_factor = 0.85, - const int64_t n_iter = 100, - const double tolerance = 1e-5) -{ - // null pointers check - CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid input argument - offsets is null"); - CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid input argument - indidices is null"); - CUGRAPH_EXPECTS(pagerank_result != nullptr, - "Invalid input argument - pagerank output memory must be allocated"); - - // parameter values - CUGRAPH_EXPECTS(damping_factor > 0.0, - "Invalid input argument - invalid damping factor value (alpha<0)"); - CUGRAPH_EXPECTS(damping_factor < 1.0, - "Invalid input argument - invalid damping factor value (alpha>1)"); - CUGRAPH_EXPECTS(n_iter > 0, "Invalid input argument - n_iter must be > 0"); - - rmm::device_vector degree(G.number_of_vertices); - - // in-degree of CSC (equivalent to out-degree of original edge list) - G.degree(degree.data().get(), DegreeDirection::IN); - - // Allocate and intialize Pagerank class - Pagerank pr_solver(handle, G); - - // Set all constants info - pr_solver.setup(damping_factor, - degree.data().get(), - personalization_subset_size, - personalization_subset, - personalization_values); - - // Run pagerank - return pr_solver.solve(n_iter, tolerance, pagerank_result); -} - -} // namespace mg -} // namespace cugraph diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 2ce8a54d736..056ad39fefc 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -148,3 +148,5 @@ template class GraphCOOView; template class GraphCompressedSparseBaseView; template class GraphCompressedSparseBaseView; } // namespace cugraph + +#include "utilities/eidir_graph_utils.hpp" diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 0ab8251e7e7..f81c98b2f4c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -159,15 +159,6 @@ set(EDGE_BETWEENNESS_TEST_SRC ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}" "") -################################################################################################### -# - pagerank tests -------------------------------------------------------------------------------- - -set(PAGERANK_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_test.cpp") - -ConfigureTest(PAGERANK_TEST "${PAGERANK_TEST_SRC}" "") - ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ diff --git a/cpp/tests/pagerank/pagerank_test.cpp b/cpp/tests/pagerank/pagerank_test.cpp deleted file mode 100644 index 48705f7f324..00000000000 --- a/cpp/tests/pagerank/pagerank_test.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ - -// Pagerank solver tests -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include - -#include -#include - -#include -#include - -#include - -#include - -#include - -// do the perf measurements -// enabled by command line parameter s'--perf' -static int PERF = 0; - -// iterations for perf tests -// enabled by command line parameter '--perf-iters" -static int PERF_MULTIPLIER = 5; - -typedef struct Pagerank_Usecase_t { - std::string matrix_file; - std::string result_file; - Pagerank_Usecase_t(const std::string& a, const std::string& b) - { - // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR - const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); - if ((a != "") && (a[0] != '/')) { - matrix_file = rapidsDatasetRootDir + "/" + a; - } else { - matrix_file = a; - } - if ((b != "") && (b[0] != '/')) { - result_file = rapidsDatasetRootDir + "/" + b; - } else { - result_file = b; - } - } - Pagerank_Usecase_t& operator=(const Pagerank_Usecase_t& rhs) - { - matrix_file = rhs.matrix_file; - result_file = rhs.result_file; - return *this; - } -} Pagerank_Usecase; - -class Tests_Pagerank : public ::testing::TestWithParam { - public: - Tests_Pagerank() {} - static void SetupTestCase() {} - static void TearDownTestCase() - { - if (PERF) { - for (unsigned int i = 0; i < pagerank_time.size(); ++i) { - std::cout << pagerank_time[i] / PERF_MULTIPLIER << std::endl; - } - } - } - virtual void SetUp() {} - virtual void TearDown() {} - - static std::vector pagerank_time; - - template - void run_current_test(const Pagerank_Usecase& param) - { - const ::testing::TestInfo* const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); - std::stringstream ss; - std::string test_id = std::string(test_info->test_case_name()) + std::string(".") + - std::string(test_info->name()) + std::string("_") + - cugraph::test::getFileName(param.matrix_file) + std::string("_") + - ss.str().c_str(); - - int m, k, nnz; - MM_typecode mc; - - float tol = 1E-5f; - - // Default parameters - /* - float alpha = 0.85; - int max_iter = 500; - bool has_guess = false; - */ - - HighResClock hr_clock; - double time_tmp; - - FILE* fpin = fopen(param.matrix_file.c_str(), "r"); - ASSERT_NE(fpin, nullptr) << "fopen (" << param.matrix_file << ") failure."; - - ASSERT_EQ(cugraph::test::mm_properties(fpin, 1, &mc, &m, &k, &nnz), 0) - << "could not read Matrix Market file properties" - << "\n"; - ASSERT_TRUE(mm_is_matrix(mc)); - ASSERT_TRUE(mm_is_coordinate(mc)); - ASSERT_FALSE(mm_is_complex(mc)); - ASSERT_FALSE(mm_is_skew(mc)); - - // Allocate memory on host - std::vector cooRowInd(nnz), cooColInd(nnz); - std::vector cooVal(nnz), pagerank(m); - - // device alloc - rmm::device_uvector pagerank_vector(static_cast(m), nullptr); - T* d_pagerank = pagerank_vector.data(); - - // Read - ASSERT_EQ((cugraph::test::mm_to_coo( - fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], &cooVal[0], NULL)), - 0) - << "could not read matrix data" - << "\n"; - ASSERT_EQ(fclose(fpin), 0); - - // Pagerank runs on CSC, so feed COOtoCSR the row/col backwards. - raft::handle_t handle; - cugraph::GraphCOOView G_coo(&cooColInd[0], &cooRowInd[0], &cooVal[0], m, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSCView G(G_unique->view().offsets, - G_unique->view().indices, - G_unique->view().edge_data, - G_unique->view().number_of_vertices, - G_unique->view().number_of_edges); - - cudaDeviceSynchronize(); - if (PERF) { - hr_clock.start(); - for (int i = 0; i < PERF_MULTIPLIER; ++i) { - cugraph::pagerank(handle, G, d_pagerank); - cudaDeviceSynchronize(); - } - hr_clock.stop(&time_tmp); - pagerank_time.push_back(time_tmp); - } else { - cudaProfilerStart(); - cugraph::pagerank(handle, G, d_pagerank); - cudaProfilerStop(); - cudaDeviceSynchronize(); - } - - // Check vs golden data - if (param.result_file.length() > 0) { - std::vector calculated_res(m); - - CUDA_TRY(cudaMemcpy(&calculated_res[0], d_pagerank, sizeof(T) * m, cudaMemcpyDeviceToHost)); - std::sort(calculated_res.begin(), calculated_res.end()); - fpin = fopen(param.result_file.c_str(), "rb"); - ASSERT_TRUE(fpin != NULL) << " Cannot read file with reference data: " << param.result_file - << std::endl; - std::vector expected_res(m); - ASSERT_EQ(cugraph::test::read_binary_vector(fpin, m, expected_res), 0); - fclose(fpin); - T err; - int n_err = 0; - for (int i = 0; i < m; i++) { - err = fabs(expected_res[i] - calculated_res[i]); - if (err > tol * 1.1) { - n_err++; // count the number of mismatches - } - } - if (n_err) { - EXPECT_LE(n_err, 0.001 * m); // we tolerate 0.1% of values with a litte difference - } - } - } -}; - -std::vector Tests_Pagerank::pagerank_time; - -TEST_P(Tests_Pagerank, CheckFP32_T) { run_current_test(GetParam()); } - -TEST_P(Tests_Pagerank, CheckFP64_T) { run_current_test(GetParam()); } - -// --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_Pagerank, - ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", ""), - Pagerank_Usecase("test/datasets/web-Google.mtx", - "test/ref/pagerank/web-Google.pagerank_val_0.85.bin"), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", - "test/ref/pagerank/ljournal-2008.pagerank_val_0.85.bin"), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", - "test/ref/pagerank/webbase-1M.pagerank_val_0.85.bin"))); - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 86f9ed343ce..0512ef3b1a5 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -83,4 +83,6 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): ) # Get the modularity score for partitioning versus random assignment - _ = cugraph.ecg(G, min_weight, ensemble_size, "weight") + df_dict = cugraph.ecg(G, min_weight, ensemble_size, "weight") + + assert isinstance(df_dict, dict) diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 95bf5e3e7f4..a86490fb561 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -97,9 +97,6 @@ def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): create_using=nx.Graph() ) k_subgraph = cugraph.k_truss(G, k) - df = nx.to_pandas_edgelist(k_subgraph) - k_truss_nx = nx.k_truss(G, k) - nx_df = nx.to_pandas_edgelist(k_truss_nx) - assert len(df) == len(nx_df) + assert nx.is_isomorphic(k_subgraph, k_truss_nx) diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 2e6968892c2..9e9eccc4347 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -102,7 +102,6 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) -@pytest.mark.skip(reason="needs test updates for graph comparison") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() @@ -124,10 +123,9 @@ def test_subgraph_extraction_Graph_nx(graph_file): ) nx_sub = nx.subgraph(G, verts) - nx_df = nx.to_pandas_edgelist(nx_sub).to_dict() cu_verts = cudf.Series(verts) cu_sub = cugraph.subgraph(G, cu_verts) - cu_df = nx.to_pandas_edgelist(cu_sub).to_dict() - assert nx_df == cu_df + for (u, v) in cu_sub.edges(): + assert nx_sub.has_edge(u, v) From 039b857524b79f31e776a4b97264fe7a5105008f Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 5 Feb 2021 14:43:30 -0500 Subject: [PATCH 164/343] Build a Dendrogram class, adapt Louvain/Leiden/ECG to use it (#1359) Preparing for MNMG Leiden and ECG identified an area for code cleanup. The original cuGraph implementation of Louvain would flatten the hierarchical clustering as it was computed, filling (and returning) the final clustering. This adds an awkward step in the middle of the Louvain computation. Additionally, since Louvain (and Leiden and ECG which derive from it) is actually a hierarchical clustering algorithm it would be nice to generate the actual Dendrogram. This PR implements a Dendrogram class, a function for flattening the Dendrogram, and modifies Louvain, Leiden and ECG to use the Dendrogram class. It was suggested that the Dendrogram class could be moved to raft, decided to defer that until later, it's easy enough to move. Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Alex Fender (@afender) - Brad Rees (@BradReesWork) - Andrei Schaffer (@aschaffer) - Rick Ratzel (@rlratzel) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1359 --- ci/gpu/notebook_list.py | 48 +++ ci/gpu/test-notebooks.sh | 33 +- ci/test.sh | 4 +- cpp/CMakeLists.txt | 2 +- cpp/src/community/dendrogram.cuh | 68 +++ cpp/src/community/{ECG.cu => ecg.cu} | 98 +++-- cpp/src/community/flatten_dendrogram.cuh | 53 +++ cpp/src/community/leiden.cu | 27 +- cpp/src/community/leiden.cuh | 86 ++-- cpp/src/community/louvain.cu | 40 +- cpp/src/community/louvain.cuh | 405 +++++++++--------- cpp/src/experimental/louvain.cuh | 214 +++------ cpp/tests/experimental/louvain_test.cu | 2 +- notebooks/community/ECG.ipynb | 1 + notebooks/community/Louvain.ipynb | 1 + notebooks/community/Subgraph-Extraction.ipynb | 1 + notebooks/community/Triangle-Counting.ipynb | 2 +- .../cugraph_benchmarks/bfs_benchmark.ipynb | 1 + .../louvain_benchmark.ipynb | 1 + .../nx_cugraph_bc_benchmarking.ipynb | 1 + .../pagerank_benchmark.ipynb | 1 + notebooks/cugraph_benchmarks/release.ipynb | 1 + .../cugraph_benchmarks/sssp_benchmark.ipynb | 1 + notebooks/demo/uvm.ipynb | 1 + python/cugraph/tests/test_ecg.py | 59 ++- python/cugraph/tests/test_leiden.py | 27 +- python/cugraph/tests/test_louvain.py | 79 ++-- 27 files changed, 712 insertions(+), 545 deletions(-) create mode 100644 ci/gpu/notebook_list.py create mode 100644 cpp/src/community/dendrogram.cuh rename cpp/src/community/{ECG.cu => ecg.cu} (72%) create mode 100644 cpp/src/community/flatten_dendrogram.cuh diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py new file mode 100644 index 00000000000..bb54913ac8d --- /dev/null +++ b/ci/gpu/notebook_list.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +import glob + +from numba import cuda + +# +# Not strictly true... however what we mean is +# Pascal or earlier +# +pascal = False + +device = cuda.get_current_device() +cc = getattr(device, 'COMPUTE_CAPABILITY') +if (cc[0] < 7): + pascal = True + +for filename in glob.iglob('**/*.ipynb', recursive=True): + skip = False + for line in open(filename, 'r'): + if re.search('# Skip notebook test', line): + skip = True + print(f'SKIPPING {filename} (marked as skip)', file=sys.stderr) + break; + elif re.search('dask', line): + print(f'SKIPPING {filename} (suspected Dask usage, not currently automatable)', file=sys.stderr) + skip = True + break; + elif pascal and re.search('# Does not run on Pascal', line): + print(f'SKIPPING {filename} (does not run on Pascal)', file=sys.stderr) + skip = True + break; + + if not skip: + print(filename) diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index 389d3be0bfd..f5f768d7f12 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,11 +20,6 @@ LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache cd ${NOTEBOOKS_DIR} TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) -# Add notebooks that should be skipped here -# (space-separated list of filenames without paths) - -SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb nx_cugraph_bc_benchmarking.ipynb" - ## Check env env @@ -37,26 +32,14 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do echo "FOLDER: ${folder}" echo "========================================" cd ${NOTEBOOKS_DIR}/${folder} - for nb in $(find . -name "*.ipynb"); do + for nb in $(python ${WORKSPACE}/ci/gpu/notebook_list.py); do nbBasename=$(basename ${nb}) - # Skip all NBs that use dask (in the code or even in their name) - if ((echo ${nb}|grep -qi dask) || \ - (grep -q dask ${nb})); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)" - echo "--------------------------------------------------------------------------------" - elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (listed in skip list)" - echo "--------------------------------------------------------------------------------" - else - cd $(dirname ${nb}) - nvidia-smi - ${NBTEST} ${nbBasename} - EXITCODE=$((EXITCODE | $?)) - rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* - cd ${NOTEBOOKS_DIR}/${folder} - fi + cd $(dirname ${nb}) + nvidia-smi + ${NBTEST} ${nbBasename} + EXITCODE=$((EXITCODE | $?)) + rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* + cd ${NOTEBOOKS_DIR}/${folder} done done diff --git a/ci/test.sh b/ci/test.sh index db9390461c0..c173088862d 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -64,7 +64,7 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -for gt in gtests/*; do +for gt in tests/*_TEST; do test_name=$(basename $gt) echo "Running GoogleTest $test_name" ${gt} ${GTEST_FILTER} ${GTEST_ARGS} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 58b5b386fc8..e12382bf344 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -349,7 +349,7 @@ add_library(cugraph SHARED src/community/louvain.cu src/community/leiden.cu src/community/ktruss.cu - src/community/ECG.cu + src/community/ecg.cu src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu src/community/egonet.cu diff --git a/cpp/src/community/dendrogram.cuh b/cpp/src/community/dendrogram.cuh new file mode 100644 index 00000000000..414f5f3854d --- /dev/null +++ b/cpp/src/community/dendrogram.cuh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +namespace cugraph { + +template +class Dendrogram { + public: + void add_level(vertex_t num_verts, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + { + level_ptr_.push_back( + std::make_unique(num_verts * sizeof(vertex_t), stream, mr)); + level_size_.push_back(num_verts); + } + + size_t current_level() const { return level_size_.size() - 1; } + + size_t num_levels() const { return level_size_.size(); } + + vertex_t const *get_level_ptr_nocheck(size_t level) const + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t *get_level_ptr_nocheck(size_t level) + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t get_level_size_nocheck(size_t level) const { return level_size_[level]; } + + vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } + + vertex_t const *current_level_end() const { return current_level_begin() + current_level_size(); } + + vertex_t *current_level_begin() { return get_level_ptr_nocheck(current_level()); } + + vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } + + vertex_t current_level_size() const { return get_level_size_nocheck(current_level()); } + + private: + std::vector level_size_; + std::vector> level_ptr_; +}; + +} // namespace cugraph diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ecg.cu similarity index 72% rename from cpp/src/community/ECG.cu rename to cpp/src/community/ecg.cu index ea21f87ff7e..994204ecd32 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ecg.cu @@ -15,13 +15,15 @@ */ #include +#include +#include +#include +#include #include #include -#include + #include -#include -#include "utilities/graph_utils.cuh" namespace { template @@ -41,26 +43,23 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT } } +// FIXME: This shouldn't need to be a custom kernel, this +// seems like it should just be a thrust::transform template -__global__ void match_check_kernel(IdxT size, - IdxT num_verts, - IdxT *offsets, - IdxT *indices, - IdxT *permutation, - IdxT *parts, - ValT *weights) +__global__ void match_check_kernel( + IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights) { IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; while (tid < size) { IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts); IdxT dest = indices[tid]; - if (parts[permutation[source]] == parts[permutation[dest]]) weights[tid] += 1; + if (parts[source] == parts[dest]) weights[tid] += 1; tid += gridDim.x * blockDim.x; } } struct prg { - __host__ __device__ float operator()(int n) + __device__ float operator()(int n) { thrust::default_random_engine rng; thrust::uniform_real_distribution dist(0.0, 1.0); @@ -93,7 +92,7 @@ struct update_functor { template void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) { - rmm::device_vector randoms_v(size); + rmm::device_uvector randoms_v(size, stream); thrust::counting_iterator index(seed); thrust::transform( @@ -103,6 +102,31 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation); } +template +class EcgLouvain : public cugraph::Louvain { + public: + using graph_t = graph_type; + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + + EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed) + : cugraph::Louvain(handle, graph), seed_(seed) + { + } + + void initialize_dendrogram_level(vertex_t num_vertices) override + { + this->dendrogram_->add_level(num_vertices); + + get_permutation_vector( + num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); + } + + private: + vertex_t seed_; +}; + } // anonymous namespace namespace cugraph { @@ -114,37 +138,34 @@ void ecg(raft::handle_t const &handle, vertex_t ensemble_size, vertex_t *clustering) { + using graph_type = GraphCSRView; + CUGRAPH_EXPECTS(graph.edge_data != nullptr, - "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); + "Invalid input argument: ecg expects a weighted graph"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is NULL, should be a device pointer to " + "memory for storing the result"); cudaStream_t stream{0}; - rmm::device_vector ecg_weights_v(graph.edge_data, - graph.edge_data + graph.number_of_edges); + rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream()); + + thrust::copy(rmm::exec_policy(stream)->on(stream), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + ecg_weights_v.data()); vertex_t size{graph.number_of_vertices}; - vertex_t seed{1}; - auto permuted_graph = std::make_unique>( - size, graph.number_of_edges, graph.has_data()); + // FIXME: This seed should be a parameter + vertex_t seed{1}; // Iterate over each member of the ensemble for (vertex_t i = 0; i < ensemble_size; i++) { - // Take random permutation of the graph - rmm::device_vector permutation_v(size); - vertex_t *d_permutation = permutation_v.data().get(); - - get_permutation_vector(size, seed, d_permutation, stream); + EcgLouvain runner(handle, graph, seed); seed += size; - detail::permute_graph(graph, d_permutation, permuted_graph->view()); - - // Run one level of Louvain clustering on the random permutation - rmm::device_vector parts_v(size); - vertex_t *d_parts = parts_v.data().get(); - - cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1}); + weight_t wt = runner(size_t{1}, weight_t{1}); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -155,17 +176,16 @@ void ecg(raft::handle_t const &handle, graph.number_of_vertices, graph.offsets, graph.indices, - permutation_v.data().get(), - d_parts, - ecg_weights_v.data().get()); + runner.get_dendrogram().get_level_ptr_nocheck(0), + ecg_weights_v.data()); } // Set weights = min_weight + (1 - min-weight)*sum/ensemble_size update_functor uf(min_weight, ensemble_size); thrust::transform(rmm::exec_policy(stream)->on(stream), - ecg_weights_v.data().get(), - ecg_weights_v.data().get() + graph.number_of_edges, - ecg_weights_v.data().get(), + ecg_weights_v.begin(), + ecg_weights_v.end(), + ecg_weights_v.begin(), uf); // Run Louvain on the original graph using the computed weights @@ -173,7 +193,7 @@ void ecg(raft::handle_t const &handle, GraphCSRView louvain_graph; louvain_graph.indices = graph.indices; louvain_graph.offsets = graph.offsets; - louvain_graph.edge_data = ecg_weights_v.data().get(); + louvain_graph.edge_data = ecg_weights_v.data(); louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh new file mode 100644 index 00000000000..892fe2d1c51 --- /dev/null +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { + +template +void partition_at_level(raft::handle_t const &handle, + Dendrogram const &dendrogram, + vertex_t const *d_vertex_ids, + vertex_t *d_partition, + size_t level) +{ + vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertex_ids, + d_vertex_ids + local_num_verts, + d_partition); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(level), + [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) { + cugraph::experimental::relabel( + handle, + std::tuple( + d_vertex_ids, dendrogram.get_level_ptr_nocheck(l)), + dendrogram.get_level_size_nocheck(l), + d_partition, + local_num_verts); + }); +} + +} // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 9e5a847cdf0..427e62d3286 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,11 @@ * limitations under the License. */ +#include #include +#include + namespace cugraph { template @@ -27,11 +30,29 @@ std::pair leiden(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: leiden expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Leiden> runner(handle, graph); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph.number_of_vertices, handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), // MNMG - base vertex id + thrust::make_counting_iterator( + graph.number_of_vertices), // MNMG - base vertex id + number_of_vertices + vertex_ids_v.begin()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); - return runner(clustering, max_level, resolution); + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } // Explicit template instantations diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index f2f84433284..141f8beac40 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cugraph { template @@ -28,7 +30,8 @@ class Leiden : public Louvain { using weight_t = typename graph_type::weight_type; Leiden(raft::handle_t const &handle, graph_type const &graph) - : Louvain(handle, graph), constraint_v_(graph.number_of_vertices) + : Louvain(handle, graph), + constraint_v_(graph.number_of_vertices, handle.get_stream()) { } @@ -38,22 +41,28 @@ class Leiden : public Louvain { { this->timer_start("update_clustering_constrained"); - rmm::device_vector next_cluster_v(this->cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + rmm::device_uvector next_cluster_v(this->dendrogram_->current_level_size(), + this->stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, this->stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, this->stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, this->stream_); - vertex_t const *d_src_indices = this->src_indices_v_.data().get(); + vertex_t const *d_src_indices = this->src_indices_v_.data(); vertex_t const *d_dst_indices = graph.indices; - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = this->cluster_v_.data().get(); - weight_t const *d_vertex_weights = this->vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = this->cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_constraint = constraint_v_.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + vertex_t *d_cluster = this->dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = this->vertex_weights_v_.data(); + weight_t *d_cluster_weights = this->cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + vertex_t *d_constraint = constraint_v_.data(); + + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = - this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); + weight_t new_Q = this->modularity( + total_edge_weight, resolution, graph, this->dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -83,13 +92,13 @@ class Leiden : public Louvain { up_down = !up_down; - new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), next_cluster_v.begin(), next_cluster_v.end(), - this->cluster_v_.begin()); + this->dendrogram_->current_level_begin()); } } @@ -97,9 +106,7 @@ class Leiden : public Louvain { return cur_Q; } - std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + weight_t operator()(size_t max_level, weight_t resolution) override { size_t num_level{0}; @@ -109,57 +116,50 @@ class Leiden : public Louvain { weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end()); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(this->offsets_v_.data().get(), - this->indices_v_.data().get(), - this->weights_v_.data().get(), + GraphCSRView current_graph(this->offsets_v_.data(), + this->indices_v_.data(), + this->weights_v_.data(), this->number_of_vertices_, this->number_of_edges_); - current_graph.get_source_indices(this->src_indices_v_.data().get()); + current_graph.get_source_indices(this->src_indices_v_.data()); while (num_level < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + this->dendrogram_->add_level(current_graph.number_of_vertices); + + thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end()); + this->compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = this->update_clustering(total_edge_weight, resolution, current_graph); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - constraint_v_.begin()); - new_Q = update_clustering_constrained(total_edge_weight, resolution, current_graph); if (new_Q <= best_modularity) { break; } best_modularity = new_Q; - this->shrink_graph(current_graph, d_cluster_vec); + this->shrink_graph(current_graph); num_level++; } this->timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } private: - rmm::device_vector constraint_v_; + rmm::device_uvector constraint_v_; }; } // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 81a68a31663..aef6fcdafde 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,13 @@ * limitations under the License. */ +#include #include #include #include +#include + namespace cugraph { namespace detail { @@ -31,10 +34,29 @@ std::pair louvain(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), // MNMG - base vertex id + thrust::make_counting_iterator( + graph_view.number_of_vertices), // MNMG - base vertex id + number_of_vertices + vertex_ids_v.begin()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } template @@ -45,7 +67,9 @@ std::pair louvain( size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); // "FIXME": remove this check and the guards below // @@ -61,7 +85,13 @@ std::pair louvain( } else { experimental::Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + + weight_t wt = runner(max_level, resolution); + // TODO: implement this... + // runner.get_dendrogram().partition_at_level(clustering, runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } } diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 7ca3638f42b..f13c64867cb 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,13 @@ #include -#include - #include #include +#include + +#include + //#define TIMING #ifdef TIMING @@ -44,26 +46,42 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), // FIXME: Don't really need to copy here but would need // to change the logic to populate this properly // in generate_superverticies_graph. // - offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), - indices_v_(graph.indices, graph.indices + graph.number_of_edges), - weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), - src_indices_v_(graph.number_of_edges), - vertex_weights_v_(graph.number_of_vertices), - cluster_weights_v_(graph.number_of_vertices), - cluster_v_(graph.number_of_vertices), - tmp_arr_v_(graph.number_of_vertices), - cluster_inverse_v_(graph.number_of_vertices), + offsets_v_(graph.number_of_vertices + 1, handle.get_stream()), + indices_v_(graph.number_of_edges, handle.get_stream()), + weights_v_(graph.number_of_edges, handle.get_stream()), + src_indices_v_(graph.number_of_edges, handle.get_stream()), + vertex_weights_v_(graph.number_of_vertices, handle.get_stream()), + cluster_weights_v_(graph.number_of_vertices, handle.get_stream()), + tmp_arr_v_(graph.number_of_vertices, handle.get_stream()), + cluster_inverse_v_(graph.number_of_vertices, handle.get_stream()), number_of_vertices_(graph.number_of_vertices), number_of_edges_(graph.number_of_edges), stream_(handle.get_stream()) { + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.offsets, + graph.offsets + graph.number_of_vertices + 1, + offsets_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.indices, + graph.indices + graph.number_of_edges, + indices_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + weights_v_.begin()); } + virtual ~Louvain() {} + weight_t modularity(weight_t total_edge_weight, weight_t resolution, graph_t const &graph, @@ -71,43 +89,45 @@ class Louvain { { vertex_t n_verts = graph.number_of_vertices; - rmm::device_vector inc(n_verts, weight_t{0.0}); - rmm::device_vector deg(n_verts, weight_t{0.0}); + rmm::device_uvector inc(n_verts, stream_); + rmm::device_uvector deg(n_verts, stream_); - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_inc = inc.data().get(); - weight_t *d_deg = deg.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), inc.begin(), inc.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), deg.begin(), deg.end(), weight_t{0.0}); // FIXME: Already have weighted degree computed in main loop, // could pass that in rather than computing d_deg... which // would save an atomicAdd (synchronization) // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_inc, d_deg, d_offsets, d_indices, d_weights, d_cluster] __device__(vertex_t v) { - vertex_t community = d_cluster[v]; - weight_t increase{0.0}; - weight_t degree{0.0}; - - for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { - vertex_t neighbor = d_indices[loc]; - degree += d_weights[loc]; - if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } - } + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_inc = inc.data(), + d_deg = deg.data(), + d_offsets = graph.offsets, + d_indices = graph.indices, + d_weights = graph.edge_data, + d_cluster] __device__(vertex_t v) { + vertex_t community = d_cluster[v]; + weight_t increase{0.0}; + weight_t degree{0.0}; + + for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { + vertex_t neighbor = d_indices[loc]; + degree += d_weights[loc]; + if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } + } - if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); - if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); - }); + if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); + if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); + }); weight_t Q = thrust::transform_reduce( rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { + [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( + vertex_t community) { return ((d_inc[community] / total_edge_weight) - resolution * (d_deg[community] * d_deg[community]) / (total_edge_weight * total_edge_weight)); @@ -118,37 +138,35 @@ class Louvain { return Q; } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) - { - size_t num_level{0}; + Dendrogram &get_dendrogram() const { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return dendrogram_; } + virtual weight_t operator()(size_t max_level, weight_t resolution) + { weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(offsets_v_.data().get(), - indices_v_.data().get(), - weights_v_.data().get(), + GraphCSRView current_graph(offsets_v_.data(), + indices_v_.data(), + weights_v_.data(), number_of_vertices_, number_of_edges_); - current_graph.get_source_indices(src_indices_v_.data().get()); + current_graph.get_source_indices(src_indices_v_.data()); + + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph.number_of_vertices); - while (num_level < max_level) { compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = update_clustering(total_edge_weight, resolution, current_graph); @@ -157,14 +175,17 @@ class Louvain { best_modularity = new_Q; - shrink_graph(current_graph, d_cluster_vec); + shrink_graph(current_graph); - num_level++; + // TODO: Note, somehow after shrink_graph - having converted to device_uvector - the + // modularity of the new graph is too small... + // Was that always true? Perhaps I need to discard the bottom of the dendrogram + // in the break statement above? } timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -190,6 +211,15 @@ class Louvain { #endif } + virtual void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); + } + public: void compute_vertex_and_cluster_weights(graph_type const &graph) { @@ -198,8 +228,8 @@ class Louvain { edge_t const *d_offsets = graph.offsets; vertex_t const *d_indices = graph.indices; weight_t const *d_weights = graph.edge_data; - weight_t *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + weight_t *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); // // MNMG: copy_v_transform_reduce_out_nbr, then copy @@ -229,18 +259,25 @@ class Louvain { // // MNMG: This is the hard one, see writeup // - rmm::device_vector next_cluster_v(cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); + // TODO: will this work, or do I need to use the size and then copy? + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, stream_); + + vertex_t *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = modularity(total_edge_weight, resolution, graph, cluster_v_.data().get()); + weight_t new_Q = + modularity(total_edge_weight, resolution, graph, dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -259,13 +296,13 @@ class Louvain { up_down = !up_down; - new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } @@ -276,45 +313,37 @@ class Louvain { void compute_delta_modularity(weight_t total_edge_weight, weight_t resolution, graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &old_cluster_sum_v, - rmm::device_vector &delta_Q_v) + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &old_cluster_sum_v, + rmm::device_uvector &delta_Q_v) { - vertex_t const *d_src_indices = src_indices_v_.data().get(); - vertex_t const *d_dst_indices = graph.indices; edge_t const *d_offsets = graph.offsets; weight_t const *d_weights = graph.edge_data; - vertex_t const *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t const *d_cluster_weights = cluster_weights_v_.data().get(); + vertex_t const *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t const *d_cluster_weights = cluster_weights_v_.data(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - weight_t *d_old_cluster_sum = old_cluster_sum_v.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + weight_t *d_old_cluster_sum = old_cluster_sum_v.data(); weight_t *d_new_cluster_sum = d_delta_Q; - thrust::fill(cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill(delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(old_cluster_sum_v.begin(), old_cluster_sum_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_hash_v.begin(), + cluster_hash_v.end(), + vertex_t{-1}); + thrust::fill( + rmm::exec_policy(stream_)->on(stream_), delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + old_cluster_sum_v.begin(), + old_cluster_sum_v.end(), + weight_t{0.0}); - // MNMG: New technique using reduce_by_key. Would require a segmented sort - // or a pair of sorts on each node, so probably slower than what's here. - // This might still be faster even in MNMG... - // - // - // FIXME: Eventually this should use cuCollections concurrent map - // implementation, but that won't be available for a while. - // - // For each source vertex, we're going to build a hash - // table to the destination cluster ids. We can use - // the offsets ranges to define the bounds of the hash - // table. - // thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices, - d_dst_indices, + [d_src_indices = src_indices_v_.data(), + d_dst_indices = graph.indices, d_cluster, d_offsets, d_cluster_hash, @@ -355,7 +384,7 @@ class Louvain { [total_edge_weight, resolution, d_cluster_hash, - d_src_indices, + d_src_indices = src_indices_v_.data(), d_cluster, d_vertex_weights, d_delta_Q, @@ -383,33 +412,37 @@ class Louvain { } void assign_nodes(graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &next_cluster_v, - rmm::device_vector &delta_Q_v, + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &next_cluster_v, + rmm::device_uvector &delta_Q_v, bool up_down) { - rmm::device_vector temp_vertices_v(graph.number_of_vertices); - rmm::device_vector temp_cluster_v(graph.number_of_vertices, vertex_t{-1}); - rmm::device_vector temp_delta_Q_v(graph.number_of_vertices, weight_t{0.0}); + rmm::device_uvector temp_vertices_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_cluster_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, stream_); + + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_cluster_v.begin(), + temp_cluster_v.end(), + vertex_t{-1}); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_next_cluster = next_cluster_v.data().get(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_delta_Q_v.begin(), + temp_delta_Q_v.end(), + weight_t{0}); auto cluster_reduce_iterator = - thrust::make_zip_iterator(thrust::make_tuple(d_cluster_hash, d_delta_Q)); + thrust::make_zip_iterator(thrust::make_tuple(cluster_hash_v.begin(), delta_Q_v.begin())); - auto output_edge_iterator2 = thrust::make_zip_iterator( - thrust::make_tuple(temp_cluster_v.data().get(), temp_delta_Q_v.data().get())); + auto output_edge_iterator2 = + thrust::make_zip_iterator(thrust::make_tuple(temp_cluster_v.begin(), temp_delta_Q_v.begin())); auto cluster_reduce_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), src_indices_v_.begin(), src_indices_v_.end(), cluster_reduce_iterator, - temp_vertices_v.data().get(), + temp_vertices_v.data(), output_edge_iterator2, thrust::equal_to(), [] __device__(auto pair1, auto pair2) { @@ -422,22 +455,18 @@ class Louvain { return pair2; }); - vertex_t final_size = thrust::distance(temp_vertices_v.data().get(), cluster_reduce_end.first); - - vertex_t *d_temp_vertices = temp_vertices_v.data().get(); - vertex_t *d_temp_clusters = temp_cluster_v.data().get(); - weight_t *d_temp_delta_Q = temp_delta_Q_v.data().get(); + vertex_t final_size = thrust::distance(temp_vertices_v.data(), cluster_reduce_end.first); thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(final_size), - [d_temp_delta_Q, - up_down, - d_next_cluster, - d_temp_vertices, - d_vertex_weights, - d_temp_clusters, - d_cluster_weights] __device__(vertex_t id) { + [up_down, + d_temp_delta_Q = temp_delta_Q_v.data(), + d_next_cluster = next_cluster_v.data(), + d_temp_vertices = temp_vertices_v.data(), + d_vertex_weights = vertex_weights_v_.data(), + d_temp_clusters = temp_cluster_v.data(), + d_cluster_weights = cluster_weights_v_.data()] __device__(vertex_t id) { if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { vertex_t new_cluster = d_temp_clusters[id]; vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; @@ -453,38 +482,38 @@ class Louvain { }); } - void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) + void shrink_graph(graph_t &graph) { timer_start("shrinking graph"); // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(d_cluster_vec); - cluster_weights_v_.resize(num_clusters); + vertex_t num_clusters = renumber_clusters(); + cluster_weights_v_.resize(num_clusters, stream_); // shrink our graph to represent the graph of supervertices generate_superverticies_graph(graph, num_clusters); - // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - timer_stop(stream_); } - vertex_t renumber_clusters(vertex_t *d_cluster_vec) + vertex_t renumber_clusters() { - vertex_t *d_tmp_array = tmp_arr_v_.data().get(); - vertex_t *d_cluster_inverse = cluster_inverse_v_.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); + vertex_t *d_tmp_array = tmp_arr_v_.data(); + vertex_t *d_cluster_inverse = cluster_inverse_v_.data(); + vertex_t *d_cluster = dendrogram_->current_level_begin(); - vertex_t old_num_clusters = cluster_v_.size(); + vertex_t old_num_clusters = dendrogram_->current_level_size(); // // New technique. Initialize cluster_inverse_v_ to 0 // - thrust::fill(cluster_inverse_v_.begin(), cluster_inverse_v_.end(), vertex_t{0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_inverse_v_.begin(), + cluster_inverse_v_.end(), + vertex_t{0}); // - // Iterate over every element c in cluster_v_ and set cluster_inverse_v to 1 + // Iterate over every element c in the current clustering and set cluster_inverse_v to 1 // auto first_1 = thrust::make_constant_iterator(1); auto last_1 = first_1 + old_num_clusters; @@ -492,7 +521,7 @@ class Louvain { thrust::scatter(rmm::exec_policy(stream_)->on(stream_), first_1, last_1, - cluster_v_.begin(), + dendrogram_->current_level_begin(), cluster_inverse_v_.begin()); // @@ -506,7 +535,7 @@ class Louvain { [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); - tmp_arr_v_.resize(new_num_clusters); + tmp_arr_v_.resize(new_num_clusters, stream_); // // Now we can set each value in cluster_inverse of a cluster to its index @@ -525,32 +554,16 @@ class Louvain { d_cluster[i] = d_cluster_inverse[d_cluster[i]]; }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(number_of_vertices_), - [d_cluster_vec, d_cluster] __device__(vertex_t i) { - d_cluster_vec[i] = d_cluster[d_cluster_vec[i]]; - }); - - cluster_inverse_v_.resize(new_num_clusters); - cluster_v_.resize(new_num_clusters); + cluster_inverse_v_.resize(new_num_clusters, stream_); return new_num_clusters; } void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { - rmm::device_vector new_src_v(graph.number_of_edges); - rmm::device_vector new_dst_v(graph.number_of_edges); - rmm::device_vector new_weight_v(graph.number_of_edges); - - vertex_t *d_old_src = src_indices_v_.data().get(); - vertex_t *d_old_dst = graph.indices; - weight_t *d_old_weight = graph.edge_data; - vertex_t *d_new_src = new_src_v.data().get(); - vertex_t *d_new_dst = new_dst_v.data().get(); - vertex_t *d_clusters = cluster_v_.data().get(); - weight_t *d_new_weight = new_weight_v.data().get(); + rmm::device_uvector new_src_v(graph.number_of_edges, stream_); + rmm::device_uvector new_dst_v(graph.number_of_edges, stream_); + rmm::device_uvector new_weight_v(graph.number_of_edges, stream_); // // Renumber the COO @@ -558,13 +571,13 @@ class Louvain { thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_old_src, - d_old_dst, - d_old_weight, - d_new_src, - d_new_dst, - d_new_weight, - d_clusters] __device__(edge_t e) { + [d_old_src = src_indices_v_.data(), + d_old_dst = graph.indices, + d_old_weight = graph.edge_data, + d_new_src = new_src_v.data(), + d_new_dst = new_dst_v.data(), + d_new_weight = new_weight_v.data(), + d_clusters = dendrogram_->current_level_begin()] __device__(edge_t e) { d_new_src[e] = d_clusters[d_old_src[e]]; d_new_dst[e] = d_clusters[d_old_dst[e]]; d_new_weight[e] = d_old_weight[e]; @@ -572,39 +585,42 @@ class Louvain { thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_dst, - d_new_dst + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_weight))); + new_dst_v.begin(), + new_dst_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_weight_v.begin()))); thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_src, - d_new_src + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_dst, d_new_weight))); + new_src_v.begin(), + new_src_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_dst_v.begin(), new_weight_v.begin()))); // // Now we reduce by key to combine the weights of duplicate // edges. // - auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); - auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + auto start = + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())); + auto new_start = + thrust::make_zip_iterator(thrust::make_tuple(src_indices_v_.data(), graph.indices)); + auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), start, start + graph.number_of_edges, - d_new_weight, + new_weight_v.begin(), new_start, - d_old_weight, + graph.edge_data, thrust::equal_to>(), thrust::plus()); graph.number_of_edges = thrust::distance(new_start, new_end.first); graph.number_of_vertices = num_clusters; - detail::fill_offset(d_old_src, graph.offsets, num_clusters, graph.number_of_edges, stream_); + detail::fill_offset( + src_indices_v_.data(), graph.offsets, num_clusters, graph.number_of_edges, stream_); CHECK_CUDA(stream_); - src_indices_v_.resize(graph.number_of_edges); - indices_v_.resize(graph.number_of_edges); - weights_v_.resize(graph.number_of_edges); + src_indices_v_.resize(graph.number_of_edges, stream_); + indices_v_.resize(graph.number_of_edges, stream_); + weights_v_.resize(graph.number_of_edges, stream_); } protected: @@ -613,27 +629,28 @@ class Louvain { edge_t number_of_edges_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + // // Copy of graph // - rmm::device_vector offsets_v_; - rmm::device_vector indices_v_; - rmm::device_vector weights_v_; - rmm::device_vector src_indices_v_; + rmm::device_uvector offsets_v_; + rmm::device_uvector indices_v_; + rmm::device_uvector weights_v_; + rmm::device_uvector src_indices_v_; // // Weights and clustering across iterations of algorithm // - rmm::device_vector vertex_weights_v_; - rmm::device_vector cluster_weights_v_; - rmm::device_vector cluster_v_; + rmm::device_uvector vertex_weights_v_; + rmm::device_uvector cluster_weights_v_; // // Temporaries used within kernels. Each iteration uses less // of this memory // - rmm::device_vector tmp_arr_v_; - rmm::device_vector cluster_inverse_v_; + rmm::device_uvector tmp_arr_v_; + rmm::device_uvector cluster_inverse_v_; #ifdef TIMING HighResTimer hr_timer_; diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 4257953d390..cbd831a67bc 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -33,6 +33,8 @@ #include +#include + //#define TIMING #ifdef TIMING @@ -374,9 +376,9 @@ create_graph(raft::handle_t const &handle, // as above would allow us to eventually run the single GPU version of single level Louvain // on the contracted graphs - which should be more efficient. // -// FIXME: We should return the dendogram and let the python layer clean it up (or have a -// separate C++ function to flatten the dendogram). There are customers that might -// like the dendogram and the implementation would be a bit cleaner if we did the +// FIXME: We should return the dendrogram and let the python layer clean it up (or have a +// separate C++ function to flatten the dendrogram). There are customers that might +// like the dendrogram and the implementation would be a bit cleaner if we did the // collapsing as a separate step // template @@ -398,6 +400,7 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), current_graph_view_(graph_view), compute_partition_(graph_view), local_num_vertices_(graph_view.get_number_of_local_vertices()), @@ -406,7 +409,6 @@ class Louvain { local_num_edges_(graph_view.get_number_of_edges()), vertex_weights_v_(graph_view.get_number_of_local_vertices()), cluster_weights_v_(graph_view.get_number_of_local_vertices()), - cluster_v_(graph_view.get_number_of_local_vertices()), number_of_vertices_(graph_view.get_number_of_local_vertices()), stream_(handle.get_stream()) { @@ -440,11 +442,12 @@ class Louvain { } } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + Dendrogram &get_dendrogram() const { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return dendrogram_; } + + virtual weight_t operator()(size_t max_level, weight_t resolution) { - size_t num_level{0}; weight_t best_modularity = weight_t{-1}; #ifdef CUCO_STATIC_MAP_DEFINED @@ -457,17 +460,12 @@ class Louvain { [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, weight_t{0}); - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph_view_.get_number_of_local_vertices()); - while (num_level < max_level) { compute_vertex_and_cluster_weights(); weight_t new_Q = update_clustering(total_edge_weight, resolution); @@ -476,15 +474,13 @@ class Louvain { best_modularity = new_Q; - shrink_graph(d_cluster_vec); - - num_level++; + shrink_graph(); } timer_display(std::cout); #endif - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -512,6 +508,17 @@ class Louvain { #endif } + protected: + void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + base_vertex_id_); + } + public: weight_t modularity(weight_t total_edge_weight, weight_t resolution) { @@ -561,23 +568,16 @@ class Louvain { cluster_weights_v_.begin()); cache_vertex_properties( - vertex_weights_v_, src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + vertex_weights_v_.begin(), src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); timer_stop(stream_); } - // - // FIXME: Consider returning d_src_cache and d_dst_cache - // (as a pair). This would be a nice optimization - // for single GPU, as we wouldn't need to make 3 copies - // of the data, could return a pair of device pointers to - // local_input_v. - // - template - void cache_vertex_properties(rmm::device_vector const &local_input_v, + template + void cache_vertex_properties(iterator_t const &local_input_iterator, rmm::device_vector &src_cache_v, rmm::device_vector &dst_cache_v, bool src = true, @@ -586,13 +586,13 @@ class Louvain { if (src) { src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); copy_to_adj_matrix_row( - handle_, current_graph_view_, local_input_v.begin(), src_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, src_cache_v.begin()); } if (dst) { dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); copy_to_adj_matrix_col( - handle_, current_graph_view_, local_input_v.begin(), dst_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, dst_cache_v.begin()); } } @@ -601,9 +601,10 @@ class Louvain { { timer_start("update_clustering"); - rmm::device_vector next_cluster_v(cluster_v_); + rmm::device_vector next_cluster_v(dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); weight_t new_Q = modularity(total_edge_weight, resolution); weight_t cur_Q = new_Q - 1; @@ -620,7 +621,7 @@ class Louvain { up_down = !up_down; - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); new_Q = modularity(total_edge_weight, resolution); @@ -628,12 +629,13 @@ class Louvain { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } // cache the final clustering locally on each cpu - cache_vertex_properties(cluster_v_, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties( + dendrogram_->current_level_begin(), src_cluster_cache_v_, dst_cluster_cache_v_); timer_stop(stream_); return cur_Q; @@ -662,7 +664,7 @@ class Louvain { old_cluster_sum_v.begin()); cache_vertex_properties( - old_cluster_sum_v, src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); + old_cluster_sum_v.begin(), src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); detail::src_cluster_equality_comparator_t compare( src_indices_v_.data().get(), @@ -1118,7 +1120,7 @@ class Louvain { }); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); } template @@ -1204,7 +1206,7 @@ class Louvain { return std::make_pair(relevant_edges_v, relevant_edge_weights_v); } - void shrink_graph(vertex_t *d_cluster_vec) + void shrink_graph() { timer_start("shrinking graph"); @@ -1216,18 +1218,12 @@ class Louvain { // renumber the clusters to the range 0..(num_clusters-1) vertex_t num_clusters = renumber_clusters(hash_map); - renumber_result(hash_map, d_cluster_vec, num_clusters); + // TODO: renumber result needs to be moved to the dendrogram + // renumber_result(hash_map, num_clusters); // shrink our graph to represent the graph of supervertices generate_supervertices_graph(hash_map, num_clusters); - // assign each new vertex to its own cluster - // MNMG: This can be done locally with no communication required - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - timer_stop(stream_); } @@ -1401,119 +1397,6 @@ class Louvain { } } - void renumber_result(cuco::static_map const &hash_map, - vertex_t *d_cluster_vec, - vertex_t num_clusters) - { - if (graph_view_t::is_multi_gpu) { - // - // FIXME: Perhaps there's a general purpose function hidden here... - // Given a set of vertex_t values, and a distributed set of - // vertex properties, go to the proper node and retrieve - // the vertex properties and return them to this gpu. - // - std::size_t capacity{static_cast((local_num_vertices_) / 0.7)}; - cuco::static_map result_hash_map( - capacity, std::numeric_limits::max(), std::numeric_limits::max()); - - auto cluster_iter = thrust::make_transform_iterator(d_cluster_vec, [] __device__(vertex_t c) { - return detail::create_cuco_pair_t()(c); - }); - - result_hash_map.insert(cluster_iter, cluster_iter + local_num_vertices_); - - rmm::device_vector used_cluster_ids_v(result_hash_map.get_size()); - - auto transform_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(std::size_t idx) { - return d_result_hash_map.begin_slot()[idx].first.load(); - }); - - used_cluster_ids_v = detail::remove_elements_from_vector( - used_cluster_ids_v, - transform_iter, - transform_iter + result_hash_map.get_capacity(), - [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { - return cluster != vmax; - }, - stream_); - - auto partition_cluster_ids_iter = thrust::make_transform_iterator( - used_cluster_ids_v.begin(), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - }); - - rmm::device_vector old_cluster_ids_v = - variable_shuffle(handle_, - used_cluster_ids_v.size(), - used_cluster_ids_v.begin(), - partition_cluster_ids_iter); - - rmm::device_vector original_gpus_v = - variable_shuffle( - handle_, - used_cluster_ids_v.size(), - thrust::make_constant_iterator(rank_), - partition_cluster_ids_iter); - - // Now each GPU has old cluster ids, let's compute new cluster ids - rmm::device_vector new_cluster_ids_v(old_cluster_ids_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - old_cluster_ids_v.begin(), - old_cluster_ids_v.end(), - new_cluster_ids_v.begin(), - [base_vertex_id = base_vertex_id_, - d_cluster = cluster_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t cluster_id) { - vertex_t c = d_cluster[cluster_id - base_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - // Shuffle everything back - old_cluster_ids_v = variable_shuffle( - handle_, old_cluster_ids_v.size(), old_cluster_ids_v.begin(), original_gpus_v.begin()); - new_cluster_ids_v = variable_shuffle( - handle_, new_cluster_ids_v.size(), new_cluster_ids_v.begin(), original_gpus_v.begin()); - - // Update result_hash_map - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(old_cluster_ids_v.begin(), new_cluster_ids_v.begin())), - old_cluster_ids_v.size(), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(auto pair) mutable { - auto pos = d_result_hash_map.find(thrust::get<0>(pair)); - pos->second.store(thrust::get<1>(pair)); - }); - - thrust::transform( - rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_result_hash_map = result_hash_map.get_device_view()] __device__(vertex_t c) { - auto pos = d_result_hash_map.find(c); - return pos->second.load(); - }); - - } else { - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_hash_map = hash_map.get_device_view(), - d_dst_cluster = dst_cluster_cache_v_.data()] __device__(vertex_t v) { - vertex_t c = d_dst_cluster[v]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - } - } - void generate_supervertices_graph(cuco::static_map const &hash_map, vertex_t num_clusters) { @@ -1672,6 +1555,8 @@ class Louvain { raft::handle_t const &handle_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + vertex_t number_of_vertices_; vertex_t base_vertex_id_{0}; vertex_t base_src_vertex_id_{0}; @@ -1707,7 +1592,6 @@ class Louvain { rmm::device_vector src_cluster_weights_cache_v_{}; rmm::device_vector dst_cluster_weights_cache_v_{}; - rmm::device_vector cluster_v_; rmm::device_vector src_cluster_cache_v_{}; rmm::device_vector dst_cluster_cache_v_{}; diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 4a47b1a1aca..35a26923df6 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/notebooks/community/ECG.ipynb b/notebooks/community/ECG.ipynb index d7595dadb26..4a9eedd3c3a 100644 --- a/notebooks/community/ECG.ipynb +++ b/notebooks/community/ECG.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Ensemble Clustering for Graphs (ECG)\n", + "# Does not run on Pascal\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Ensemble Clustering for Graph approach. \n", "\n", "\n", diff --git a/notebooks/community/Louvain.ipynb b/notebooks/community/Louvain.ipynb index e5e5e6a04ed..bfb8e299f49 100755 --- a/notebooks/community/Louvain.ipynb +++ b/notebooks/community/Louvain.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Community Detection\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Louvain algorithm \n", diff --git a/notebooks/community/Subgraph-Extraction.ipynb b/notebooks/community/Subgraph-Extraction.ipynb index e068ef53aa5..cac52262d4d 100755 --- a/notebooks/community/Subgraph-Extraction.ipynb +++ b/notebooks/community/Subgraph-Extraction.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Subgraph Extraction\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to extract a subgraph from the test graph. \n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 09d7906a526..19d3f838fc6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -21,7 +21,7 @@ "\n", "\n", "## Introduction\n", - "Triancle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", + "Triangle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", "\n", "\n", "To compute the Pagerank scores for a graph in cuGraph we use:
\n", diff --git a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb index 58eb94bf0ee..6ae695e206e 100644 --- a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# BFS Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running BFS within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb index a12b7c4bcc2..00e99a28617 100644 --- a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance improvement of running the Louvain clustering algorithm within cuGraph against NetworkX. The test is run over eight test networks (graphs) and then results plotted. \n", "

\n", diff --git a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb index 6f76868f9a4..403c317ac0a 100644 --- a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb +++ b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Benchmarking NetworkX compatibility\n", + "# Skip notebook test\n", "This notebook benchmark the use of a NetworkX Graph object as input into algorithms.

\n", "The intention of the feature is to be able to drop cuGraph into existing NetworkX code in spot where performance is not optimal.\n", "\n", diff --git a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb index c2933a10c7d..d0416efdd87 100644 --- a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# PageRank Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running PageRank within cuGraph against NetworkX. NetworkX contains several implementations of PageRank. This benchmark will compare cuGraph versus the defaukt Nx implementation as well as the SciPy version\n", "\n", diff --git a/notebooks/cugraph_benchmarks/release.ipynb b/notebooks/cugraph_benchmarks/release.ipynb index d3110da3621..3c6da55abc0 100644 --- a/notebooks/cugraph_benchmarks/release.ipynb +++ b/notebooks/cugraph_benchmarks/release.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Release Benchmarking\n", + "# Skip notebook test\n", "\n", "With every release, RAPIDS publishes a release slide deck that includes the current performance state of cuGraph. \n", "This notebook, starting with release 0.15, runs all the various algorithms to computes the performance gain. \n", diff --git a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb index 2d040e0acaf..32b562e7a1e 100644 --- a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# SSSP Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running SSSP within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/demo/uvm.ipynb b/notebooks/demo/uvm.ipynb index d279be8ed54..8fa2b08b6d1 100644 --- a/notebooks/demo/uvm.ipynb +++ b/notebooks/demo/uvm.ipynb @@ -6,6 +6,7 @@ "source": [ "# Oversubscribing GPU memory in cuGraph\n", "#### Author : Alex Fender\n", + "# Skip notebook test\n", "\n", "In this notebook, we will show how to **scale to 4x larger graphs than before** without incurring a performance drop using managed memory features in cuGraph. We will compute the PageRank of each user in Twitter's dataset on a single GPU as an example. This technique applies to all features.\n", "\n", diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 0512ef3b1a5..ba705a787ee 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -16,32 +16,38 @@ import pytest import networkx as nx import cugraph + from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + from pathlib import PurePath def cugraph_call(G, min_weight, ensemble_size): df = cugraph.ecg(G, min_weight, ensemble_size) num_parts = df["partition"].max() + 1 - score = cugraph.analyzeClustering_modularity(G, num_parts, df, - 'vertex', 'partition') + score = cugraph.analyzeClustering_modularity( + G, num_parts, df, "vertex", "partition" + ) return score, num_parts def golden_call(graph_file): - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"dolphins.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "dolphins.csv": return 0.4962422251701355 - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "karate.csv": return 0.38428664207458496 - if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv": + if ( + graph_file + == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "netscience.csv" + ): return 0.9279554486274719 -DATASETS = [PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/f for f in [ - "karate.csv", - "dolphins.csv", - "netscience.csv"] +DATASETS = [ + PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / f + for f in ["karate.csv", "dolphins.csv", "netscience.csv"] ] MIN_WEIGHTS = [0.05, 0.10, 0.15] @@ -55,20 +61,32 @@ def golden_call(graph_file): def test_ecg_clustering(graph_file, min_weight, ensemble_size): gc.collect() - # Read in the graph and get a cugraph object - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - golden_score = golden_call(graph_file) + # Get the modularity score for partitioning versus random assignment + with pytest.raises(RuntimeError): + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + else: + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + + # Get the modularity score for partitioning versus random assignment + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + golden_score = golden_call(graph_file) - # Assert that the partitioning has better modularity than the random - # assignment - assert cu_score > (0.95 * golden_score) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > (0.95 * golden_score) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) @@ -78,8 +96,7 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): # Read in the graph and get a NetworkX graph M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) G = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", - create_using=nx.Graph() + M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) # Get the modularity score for partitioning versus random assignment diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index d6a7f86b5c5..89203d5014c 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,8 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -53,6 +55,9 @@ def cugraph_louvain(G, edgevals=False): return parts, mod +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() @@ -73,6 +78,9 @@ def test_leiden(graph_file): assert leiden_mod >= (0.99 * louvain_mod) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden_nx(graph_file): gc.collect() @@ -81,18 +89,13 @@ def test_leiden_nx(graph_file): NM = utils.read_csv_for_nx(graph_file) if edgevals: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1" + ) else: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1", - edge_attr="2" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1", edge_attr="2" + ) leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True) louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True) diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index d6b0030eb73..50e9ccaa4c5 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,51 +72,63 @@ def networkx_call(M): def test_louvain_with_edgevals(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) - nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_louvain(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M) - nx_parts = networkx_call(M) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M) + nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 From 466e29a587251703796e27acbb660e4234a69b77 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 9 Feb 2021 21:27:10 -0600 Subject: [PATCH 165/343] Updated CI scripts to use a different error handling convention, updated LD_LIBRARY_PATH for project flash runs (#1386) * Updated CI scripts to use a different error handling convention * Updated LD_LIBRARY_PATH for project flash runs * Added extra logging to report status after each test command * Added comments * Removed unused "top-20 slowest" report * Minor updates for consistency Tested locally by simulating various error conditions (removed .so files, inserted errors in NBs, killed processes, etc.) and checked exit codes. Still need to verify in a project Flash env, but using CI for that. Authors: - Rick Ratzel (@rlratzel) Approvers: - AJ Schmidt (@ajschmidt8) - Ray Douglass (@raydouglass) - Dillon Cullinan (@dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1386 --- ci/getGTestTimes.sh | 46 ------------------------------------- ci/gpu/build.sh | 28 ++++++++++++++--------- ci/gpu/test-notebooks.sh | 20 ++++++++++++---- ci/test.sh | 49 ++++++++++++++++++++++++---------------- ci/utils/nbtest.sh | 16 +++++++++++-- 5 files changed, 76 insertions(+), 83 deletions(-) delete mode 100755 ci/getGTestTimes.sh diff --git a/ci/getGTestTimes.sh b/ci/getGTestTimes.sh deleted file mode 100755 index 8a3752d76e2..00000000000 --- a/ci/getGTestTimes.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script will print the gtest results sorted by runtime. This will print -# the results two ways: first by printing all tests sorted by runtime, then by -# printing all tests grouped by test binary with tests sorted by runtime within -# the group. -# -# To use this script, capture the test run output to a file then run this script -# with the file as the first arg, or just redirect test output to this script. - -awk '/^Running GoogleTest .+$/ { - testbinary = $3 - } - /^\[ OK \].+$/ { - testtime = substr($(NF-1),2) - newtestdata = testbinary ":" substr($0,14) - alltestdata = alltestdata newtestdata "\n" - testdata[testbinary] = testdata[testbinary] newtestdata "\n" - totaltime = totaltime + testtime - } - END { - # Print all tests sorted by time - system("echo \"" alltestdata "\" | sort -r -t\\( -nk2") - print "\n================================================================================" - # Print test binaries with tests sorted by time - print "Tests grouped by test binary:" - for (testbinary in testdata) { - print testbinary - system("echo \"" testdata[testbinary] "\" | sort -r -t\\( -nk2") - } - print "\n================================================================================" - print totaltime " milliseconds = " totaltime/60000 " minutes" - } -' $1 diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 019d03e21da..0fef7b62f8d 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ########################################## # cuGraph GPU build & testscript for CI # ########################################## -set -e -set -o pipefail +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error NUMARGS=$# ARGS=$* @@ -98,10 +98,15 @@ fi # TEST - Run GoogleTest and py.tests for libcugraph and cuGraph ################################################################################ -set +e -Eo pipefail -EXITCODE=0 +# Switch to +e to allow failing commands to continue the script, which is needed +# so all testing commands run regardless of pass/fail. This requires the desired +# exit code to be managed using the ERR trap. +set +e # allow script to continue on error +set -E # ERR traps are inherited by subcommands trap "EXITCODE=1" ERR +EXITCODE=0 + if hasArg --skip-tests; then gpuci_logger "Skipping Tests" else @@ -117,18 +122,19 @@ else TEST_MODE_FLAG="" fi + gpuci_logger "Running cuGraph test.sh..." ${WORKSPACE}/ci/test.sh ${TEST_MODE_FLAG} | tee testoutput.txt + gpuci_logger "Ran cuGraph test.sh : return code was: $?, gpu/build.sh exit code is now: $EXITCODE" - echo -e "\nTOP 20 SLOWEST TESTS:\n" - # Wrap in echo to prevent non-zero exit since this command is non-essential - echo "$(${WORKSPACE}/ci/getGTestTimes.sh testoutput.txt | head -20)" - + gpuci_logger "Running cuGraph notebook test script..." ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log + gpuci_logger "Ran cuGraph notebook test script : return code was: $?, gpu/build.sh exit code is now: $EXITCODE" python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log fi -if [ -n "\${CODECOV_TOKEN}" ]; then - codecov -t \$CODECOV_TOKEN +if [ -n "${CODECOV_TOKEN}" ]; then + codecov -t $CODECOV_TOKEN fi +gpuci_logger "gpu/build.sh returning value: $EXITCODE" return ${EXITCODE} diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index f5f768d7f12..650132f116d 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -12,10 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -#RAPIDS_DIR=/rapids +# Any failing command will set EXITCODE to non-zero +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR + NOTEBOOKS_DIR=${WORKSPACE}/notebooks NBTEST=${WORKSPACE}/ci/utils/nbtest.sh LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache +EXITCODE=0 cd ${NOTEBOOKS_DIR} TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) @@ -23,7 +29,10 @@ TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) ## Check env env -EXITCODE=0 +# Do not abort the script on error. This allows all tests to run regardless of +# pass/fail but relies on the ERR trap above to manage the EXITCODE for the +# script. +set +e # Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure # if any run fails @@ -32,12 +41,14 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do echo "FOLDER: ${folder}" echo "========================================" cd ${NOTEBOOKS_DIR}/${folder} - for nb in $(python ${WORKSPACE}/ci/gpu/notebook_list.py); do + NBLIST=$(python ${WORKSPACE}/ci/gpu/notebook_list.py) + for nb in ${NBLIST}; do nbBasename=$(basename ${nb}) cd $(dirname ${nb}) nvidia-smi ${NBTEST} ${nbBasename} - EXITCODE=$((EXITCODE | $?)) + echo "Ran nbtest for $nb : return code was: $?, test script exit code is now: $EXITCODE" + echo rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* cd ${NOTEBOOKS_DIR}/${folder} done @@ -45,4 +56,5 @@ done nvidia-smi +echo "Notebook test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/ci/test.sh b/ci/test.sh index c173088862d..db060d3a55a 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# note: do not use set -e in order to allow all gtest invocations to take place, -# and instead keep track of exit status and exit with an overall exit status -set -o pipefail +# Any failing command will set EXITCODE to non-zero +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR NUMARGS=$# ARGS=$* @@ -22,7 +24,7 @@ THISDIR=$(cd $(dirname $0);pwd) CUGRAPH_ROOT=$(cd ${THISDIR}/..;pwd) GTEST_ARGS="--gtest_output=xml:${CUGRAPH_ROOT}/test-results/" DOWNLOAD_MODE="" -ERRORCODE=0 +EXITCODE=0 export RAPIDS_DATASET_ROOT_DIR=${CUGRAPH_ROOT}/datasets @@ -50,27 +52,20 @@ else echo "Download datasets..." cd ${RAPIDS_DATASET_ROOT_DIR} bash ./get_test_data.sh ${DOWNLOAD_MODE} - ERRORCODE=$((ERRORCODE | $?)) - # no need to run tests if dataset download fails - if (( ${ERRORCODE} != 0 )); then - exit ${ERRORCODE} - fi fi if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then cd ${CUGRAPH_ROOT}/cpp/build else - export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$LD_LIBRARY_PATH" + export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -for gt in tests/*_TEST; do - test_name=$(basename $gt) - echo "Running GoogleTest $test_name" - ${gt} ${GTEST_FILTER} ${GTEST_ARGS} - ERRORCODE=$((ERRORCODE | $?)) -done - +# FIXME: if possible, any install and build steps should be moved outside this +# script since a failing install/build step is treated as a failing test command +# and will not stop the script. This script is also only expected to run tests +# in a preconfigured environment, and install/build steps are unexpected side +# effects. if [[ "$PROJECT_FLASH" == "1" ]]; then CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension @@ -83,14 +78,28 @@ if [[ "$PROJECT_FLASH" == "1" ]]; then $WORKSPACE/build.sh cugraph fi +# Do not abort the script on error from this point on. This allows all tests to +# run regardless of pass/fail, but relies on the ERR trap above to manage the +# EXITCODE for the script. +set +e + +echo "C++ gtests for cuGraph..." +for gt in tests/*_TEST; do + test_name=$(basename $gt) + echo "Running gtest $test_name" + ${gt} ${GTEST_FILTER} ${GTEST_ARGS} + echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" +done + echo "Python pytest for cuGraph..." cd ${CUGRAPH_ROOT}/python pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft --benchmark-disable -ERRORCODE=$((ERRORCODE | $?)) +echo "Ran Python pytest for cugraph : return code was: $?, test script exit code is now: $EXITCODE" echo "Python benchmarks for cuGraph (running as tests)..." cd ${CUGRAPH_ROOT}/benchmarks pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable -ERRORCODE=$((ERRORCODE | $?)) +echo "Ran Python benchmarks for cuGraph (running as tests) : return code was: $?, test script exit code is now: $EXITCODE" -exit ${ERRORCODE} +echo "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh index 8c86baeaa09..ae8b52df106 100755 --- a/ci/utils/nbtest.sh +++ b/ci/utils/nbtest.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,6 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Any failing command will set EXITCODE to non-zero +set +e # do not abort the script on error +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR + +# Prepend the following code to all scripts generated from nbconvert. This +# allows all cell and line magic code to run and update the namespace as if +# running in jupyter, but will also tolerate failures due to running in a +# non-jupyter env. +# Note: depending on the assumptions of the notebook script, ignoring failures +# may not be acceptable (meaning the converted notebook simply cannot run +# outside of jupyter as-is), hence the warning. MAGIC_OVERRIDE_CODE=" def my_run_line_magic(*args, **kwargs): g=globals() @@ -58,7 +71,6 @@ for nb in $*; do NBEXITCODE=$? echo EXIT CODE: ${NBEXITCODE} echo - EXITCODE=$((EXITCODE | ${NBEXITCODE})) done exit ${EXITCODE} From 7e5859134950ba6da302356bdf5d31195b61efe2 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 10 Feb 2021 09:32:49 -0500 Subject: [PATCH 166/343] Use new coarsen_graph primitive in Louvain (#1362) Modify experimental::louvain to use the new `coarsen_graph` primitive. This replaces the original implementation of `shrink_graph`. Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Andrei Schaffer (@aschaffer) - Rick Ratzel (@rlratzel) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1362 --- benchmarks/bench_algos.py | 5 +- cpp/src/community/louvain.cu | 25 +- cpp/src/community/louvain.cuh | 10 - cpp/src/experimental/coarsen_graph.cu | 140 +++++++ cpp/src/experimental/louvain.cuh | 375 ++---------------- cpp/src/experimental/relabel.cu | 15 + cpp/tests/CMakeLists.txt | 4 +- .../community/{ecg_test.cu => ecg_test.cpp} | 63 ++- cpp/tests/community/leiden_test.cpp | 45 ++- .../{louvain_test.cu => louvain_test.cpp} | 98 +++-- 10 files changed, 357 insertions(+), 423 deletions(-) rename cpp/tests/community/{ecg_test.cu => ecg_test.cpp} (73%) rename cpp/tests/community/{louvain_test.cu => louvain_test.cpp} (62%) diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index 9be636ca480..f9f8bf9cf53 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,6 +32,7 @@ def setFixtureParamNames(*args, **kwargs): import cugraph from cugraph.structure.number_map import NumberMap from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm from .params import FIXTURE_PARAMS @@ -212,6 +213,8 @@ def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index aef6fcdafde..a851777ad93 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -43,11 +43,10 @@ std::pair louvain(raft::handle_t const &handle, rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(0), // MNMG - base vertex id - thrust::make_counting_iterator( - graph_view.number_of_vertices), // MNMG - base vertex id + number_of_vertices - vertex_ids_v.begin()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + vertex_t{0}); partition_at_level(handle, runner.get_dendrogram(), @@ -87,8 +86,20 @@ std::pair louvain( runner(handle, graph_view); weight_t wt = runner(max_level, resolution); - // TODO: implement this... - // runner.get_dendrogram().partition_at_level(clustering, runner.get_dendrogram().num_levels()); + + rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), + handle.get_stream()); + + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + graph_view.get_local_vertex_first()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); // FIXME: Consider returning the Dendrogram at some point return std::make_pair(runner.get_dendrogram().num_levels(), wt); diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index f13c64867cb..e28f0f1746d 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -176,11 +176,6 @@ class Louvain { best_modularity = new_Q; shrink_graph(current_graph); - - // TODO: Note, somehow after shrink_graph - having converted to device_uvector - the - // modularity of the new graph is too small... - // Was that always true? Perhaps I need to discard the bottom of the dendrogram - // in the break statement above? } timer_display(std::cout); @@ -256,11 +251,6 @@ class Louvain { { timer_start("update_clustering"); - // - // MNMG: This is the hard one, see writeup - // - - // TODO: will this work, or do I need to use the size and then copy? rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), stream_); rmm::device_uvector delta_Q_v(graph.number_of_edges, stream_); rmm::device_uvector cluster_hash_v(graph.number_of_edges, stream_); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 7e312540896..02b0c388b31 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -569,5 +569,145 @@ coarsen_graph(raft::handle_t const &handle, int32_t const *labels, bool do_expensive_check); +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index cbd831a67bc..f162cd17a61 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -26,6 +26,7 @@ #include +#include #include #include #include @@ -35,6 +36,8 @@ #include +#include + //#define TIMING #ifdef TIMING @@ -418,11 +421,16 @@ class Louvain { base_src_vertex_id_ = graph_view.get_local_adj_matrix_partition_row_first(0); base_dst_vertex_id_ = graph_view.get_local_adj_matrix_partition_col_first(0); - raft::copy(&local_num_edges_, - graph_view.offsets() + graph_view.get_local_adj_matrix_partition_row_last(0) - - graph_view.get_local_adj_matrix_partition_row_first(0), - 1, - stream_); + local_num_edges_ = thrust::transform_reduce( + thrust::host, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator( + graph_view.get_number_of_local_adj_matrix_partitions()), + [&graph_view](auto indx) { + return graph_view.get_number_of_local_adj_matrix_partition_edges(indx); + }, + size_t{0}, + thrust::plus()); CUDA_TRY(cudaStreamSynchronize(stream_)); } @@ -1205,350 +1213,55 @@ class Louvain { return std::make_pair(relevant_edges_v, relevant_edge_weights_v); } +#endif void shrink_graph() { timer_start("shrinking graph"); - std::size_t capacity{static_cast((local_num_rows_ + local_num_cols_) / 0.7)}; - - cuco::static_map hash_map( - capacity, std::numeric_limits::max(), std::numeric_limits::max()); - - // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(hash_map); - - // TODO: renumber result needs to be moved to the dendrogram - // renumber_result(hash_map, num_clusters); - - // shrink our graph to represent the graph of supervertices - generate_supervertices_graph(hash_map, num_clusters); - - timer_stop(stream_); - } - - vertex_t renumber_clusters(cuco::static_map &hash_map) - { - rmm::device_vector cluster_inverse_v(local_num_vertices_, vertex_t{0}); - - // - // FIXME: Faster to iterate from graph_.get_vertex_partition_first() - // to graph_.get_vertex_partition_last()? That would potentially - // result in adding a cluster that isn't used on this GPU, - // although I don't think it would break the result in any way. - // - // This would also eliminate this use of src_indices_v_. - // - auto it_src = thrust::make_transform_iterator( - src_indices_v_.begin(), - [base_src_vertex_id = base_src_vertex_id_, - d_src_cluster_cache = src_cluster_cache_v_.data().get()] __device__(auto idx) { - return detail::create_cuco_pair_t()( - d_src_cluster_cache[idx - base_src_vertex_id]); - }); - - auto it_dst = thrust::make_transform_iterator( - current_graph_view_.indices(), - [base_dst_vertex_id = base_dst_vertex_id_, - d_dst_cluster_cache = dst_cluster_cache_v_.data().get()] __device__(auto idx) { - return detail::create_cuco_pair_t()( - d_dst_cluster_cache[idx - base_dst_vertex_id]); - }); - - hash_map.insert(it_src, it_src + local_num_edges_); - hash_map.insert(it_dst, it_dst + local_num_edges_); - - // Now I need to get the keys into an array and shuffle them - rmm::device_vector used_cluster_ids_v(hash_map.get_size()); - - auto transform_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [d_hash_map = hash_map.get_device_view()] __device__(std::size_t idx) { - return d_hash_map.begin_slot()[idx].first.load(); - }); - - used_cluster_ids_v = detail::remove_elements_from_vector( - used_cluster_ids_v, - transform_iter, - transform_iter + hash_map.get_capacity(), - [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { - return cluster != vmax; - }, - stream_); - - auto partition_cluster_ids_iter = thrust::make_transform_iterator( - used_cluster_ids_v.begin(), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - }); - - rmm::device_vector original_gpus_v; - rmm::device_vector my_cluster_ids_v = - variable_shuffle( - handle_, used_cluster_ids_v.size(), used_cluster_ids_v.begin(), partition_cluster_ids_iter); - - if (graph_view_t::is_multi_gpu) { - original_gpus_v = variable_shuffle( - handle_, - used_cluster_ids_v.size(), - thrust::make_constant_iterator(rank_), - partition_cluster_ids_iter); - } + rmm::device_uvector numbering_map(0, stream_); - // - // Now my_cluster_ids contains the cluster ids that this gpu is - // responsible for. I'm going to set cluster_inverse_v to one - // for each cluster in this list. - // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - my_cluster_ids_v.begin(), - my_cluster_ids_v.end(), - [base_vertex_id = base_vertex_id_, - d_cluster_inverse = cluster_inverse_v.data().get()] __device__(vertex_t cluster) { - d_cluster_inverse[cluster - base_vertex_id] = 1; - }); - - rmm::device_vector my_cluster_ids_deduped_v = detail::remove_elements_from_vector( - my_cluster_ids_v, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(cluster_inverse_v.size()), - [d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { - return d_cluster_inverse[idx] == 1; - }, - stream_); - - // - // Need to gather everything to be able to compute base addresses - // - vertex_t base_address{0}; - - if (graph_view_t::is_multi_gpu) { - int num_gpus{1}; - rmm::device_vector sizes_v(num_gpus + 1, my_cluster_ids_deduped_v.size()); - - handle_.get_comms().allgather( - sizes_v.data().get() + num_gpus, sizes_v.data().get(), num_gpus, stream_); - - base_address = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), - sizes_v.begin(), - sizes_v.begin() + rank_, - vertex_t{0}); - } - - // - // Now let's update cluster_inverse_v to contain - // the mapping of old cluster id to new vertex id - // - thrust::fill( - cluster_inverse_v.begin(), cluster_inverse_v.end(), std::numeric_limits::max()); - - thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - my_cluster_ids_deduped_v.size(), - [base_address, - d_my_cluster_ids_deduped = my_cluster_ids_deduped_v.data().get(), - d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { - d_cluster_inverse[d_my_cluster_ids_deduped[idx]] = idx + base_address; - }); - - // - // Now I need to shuffle back to original gpus the - // subset of my mapping that is required - // - rmm::device_vector new_vertex_ids_v = - variable_shuffle( - handle_, - my_cluster_ids_v.size(), - thrust::make_transform_iterator(my_cluster_ids_v.begin(), - [d_cluster_inverse = cluster_inverse_v.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto v) { - return d_cluster_inverse[v - base_vertex_id]; - }), - original_gpus_v.begin()); - - if (graph_view_t::is_multi_gpu) { - my_cluster_ids_v = variable_shuffle( - handle_, my_cluster_ids_v.size(), my_cluster_ids_v.begin(), original_gpus_v.begin()); - } - - // - // Now update the hash map with the new vertex id - // - thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(my_cluster_ids_v.begin(), new_vertex_ids_v.begin())), - my_cluster_ids_v.size(), - [d_hash_map = hash_map.get_device_view()] __device__(auto p) mutable { - auto pos = d_hash_map.find(thrust::get<0>(p)); - pos->second.store(thrust::get<1>(p)); - }); - - // - // At this point we have a renumbered COO that is - // improperly distributed around the cluster, which - // will be fixed by generate_supervertices_graph - // - if (graph_t::is_multi_gpu) { - return host_scalar_allreduce( - handle_.get_comms(), static_cast(my_cluster_ids_deduped_v.size()), stream_); - } else { - return static_cast(my_cluster_ids_deduped_v.size()); - } - } - - void generate_supervertices_graph(cuco::static_map const &hash_map, - vertex_t num_clusters) - { - rmm::device_vector new_src_v(local_num_edges_); - rmm::device_vector new_dst_v(local_num_edges_); - rmm::device_vector new_weight_v(current_graph_view_.weights(), - current_graph_view_.weights() + local_num_edges_); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - src_indices_v_.begin(), - src_indices_v_.end(), - new_src_v.begin(), - [base_src_vertex_id = base_src_vertex_id_, - d_src_cluster = src_cluster_cache_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { - vertex_t c = d_src_cluster[v - base_src_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - current_graph_view_.indices(), - current_graph_view_.indices() + local_num_edges_, - new_dst_v.begin(), - [base_dst_vertex_id = base_dst_vertex_id_, - d_dst_cluster = dst_cluster_cache_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { - vertex_t c = d_dst_cluster[v - base_dst_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - // Combine common edges on local gpu - std::tie(new_src_v, new_dst_v, new_weight_v) = - combine_local_edges(new_src_v, new_dst_v, new_weight_v); - - if (graph_view_t::is_multi_gpu) { - // - // Shuffle the data to the proper GPU - // FIXME: This needs some performance exploration. It is - // possible (likely?) that the shrunken graph is - // more dense than the original graph. Perhaps that - // changes the dynamic of partitioning efficiently. - // - // For now, we're going to keep the partitioning the same, - // but because we've renumbered to lower numbers, fewer - // partitions will actually have data. - // - rmm::device_vector partition_v(new_src_v.size()); - - thrust::transform( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.end(), new_dst_v.end())), - partition_v.begin(), - [d_edge_device_view = compute_partition_.edge_device_view()] __device__( - thrust::tuple tuple) { - return d_edge_device_view(thrust::get<0>(tuple), thrust::get<1>(tuple)); - }); - - new_src_v = variable_shuffle( - handle_, partition_v.size(), new_src_v.begin(), partition_v.begin()); - - new_dst_v = variable_shuffle( - handle_, partition_v.size(), new_dst_v.begin(), partition_v.begin()); - - new_weight_v = variable_shuffle( - handle_, partition_v.size(), new_weight_v.begin(), partition_v.begin()); - - // - // Now everything is on the correct node, again combine like edges - // - std::tie(new_src_v, new_dst_v, new_weight_v) = - combine_local_edges(new_src_v, new_dst_v, new_weight_v); - } - - // - // Now I have a COO of the new graph, distributed according to the - // original clustering (eventually this likely fits on one GPU and - // everything else is empty). - // - current_graph_ = - detail::create_graph(handle_, - new_src_v, - new_dst_v, - new_weight_v, - num_clusters, - experimental::graph_properties_t{true, true}, - current_graph_view_); + std::tie(current_graph_, numbering_map) = + coarsen_graph(handle_, current_graph_view_, dendrogram_->current_level_begin()); current_graph_view_ = current_graph_->view(); - src_indices_v_.resize(new_src_v.size()); - local_num_vertices_ = current_graph_view_.get_number_of_local_vertices(); local_num_rows_ = current_graph_view_.get_number_of_local_adj_matrix_partition_rows(); local_num_cols_ = current_graph_view_.get_number_of_local_adj_matrix_partition_cols(); - local_num_edges_ = new_src_v.size(); + base_vertex_id_ = current_graph_view_.get_local_vertex_first(); + + local_num_edges_ = thrust::transform_reduce( + thrust::host, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator( + current_graph_view_.get_number_of_local_adj_matrix_partitions()), + [this](auto indx) { + return current_graph_view_.get_number_of_local_adj_matrix_partition_edges(indx); + }, + size_t{0}, + thrust::plus()); + + src_indices_v_.resize(local_num_edges_); cugraph::detail::offsets_to_indices( current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - } -#endif - std:: - tuple, rmm::device_vector, rmm::device_vector> - combine_local_edges(rmm::device_vector &src_v, - rmm::device_vector &dst_v, - rmm::device_vector &weight_v) - { - thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), - dst_v.begin(), - dst_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), weight_v.begin()))); - thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), - src_v.begin(), - src_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(dst_v.begin(), weight_v.begin()))); + rmm::device_uvector numbering_indices(numbering_map.size(), stream_); + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + numbering_indices.begin(), + numbering_indices.end(), + base_vertex_id_); - rmm::device_vector combined_src_v(src_v.size()); - rmm::device_vector combined_dst_v(src_v.size()); - rmm::device_vector combined_weight_v(src_v.size()); + relabel( + handle_, + std::make_tuple(static_cast(numbering_map.begin()), + static_cast(numbering_indices.begin())), + local_num_vertices_, + dendrogram_->current_level_begin(), + dendrogram_->current_level_size()); - // - // Now we reduce by key to combine the weights of duplicate - // edges. - // - auto start = thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), dst_v.begin())); - auto new_start = - thrust::make_zip_iterator(thrust::make_tuple(combined_src_v.begin(), combined_dst_v.begin())); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), - start, - start + src_v.size(), - weight_v.begin(), - new_start, - combined_weight_v.begin(), - thrust::equal_to>(), - thrust::plus()); - - auto num_edges = thrust::distance(new_start, new_end.first); - - combined_src_v.resize(num_edges); - combined_dst_v.resize(num_edges); - combined_weight_v.resize(num_edges); - - return std::make_tuple(combined_src_v, combined_dst_v, combined_weight_v); + timer_stop(stream_); } protected: diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index f2cb1e616c8..4a36cac180d 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -224,5 +224,20 @@ template void relabel( int32_t num_labels, bool do_expensive_check); +template void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int64_t num_label_pairs, + int64_t* labels, + int64_t num_labels, + bool do_expensive_check); + +template void relabel( + raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int64_t num_label_pairs, + int64_t* labels, + int64_t num_labels, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f81c98b2f4c..1b93b848515 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -182,7 +182,7 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") set(LOUVAIN_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") @@ -199,7 +199,7 @@ ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}" "") # - ECG tests --------------------------------------------------------------------------------- set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") ConfigureTest(ECG_TEST "${ECG_TEST_SRC}" "") diff --git a/cpp/tests/community/ecg_test.cu b/cpp/tests/community/ecg_test.cpp similarity index 73% rename from cpp/tests/community/ecg_test.cu rename to cpp/tests/community/ecg_test.cpp index 85b80b1610b..a13ee2fe360 100644 --- a/cpp/tests/community/ecg_test.cu +++ b/cpp/tests/community/ecg_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -77,6 +77,10 @@ TEST(ecg, success) TEST(ecg, dolphin) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 6, 14, 18, 21, 22, 26, 32, 37, 43, 50, 55, 56, 57, 65, 77, 84, 90, 99, 106, 110, 119, 125, 126, 129, 135, 138, 141, 146, 151, 160, 165, 166, 169, 179, 184, 185, 192, 203, @@ -103,38 +107,55 @@ TEST(ecg, dolphin) int num_verts = off_h.size() - 1; int num_edges = ind_h.size(); - thrust::host_vector cluster_id(num_verts, -1); + std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView graph_csr( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); - raft::handle_t handle; - cugraph::ecg(handle, graph_csr, .05, 16, result_v.data().get()); + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW( + (cugraph::ecg(handle, graph_csr, .05, 16, result_v.data())), + cugraph::logic_error); + } else { + cugraph::ecg(handle, graph_csr, .05, 16, result_v.data()); - cluster_id = result_v; - int max = *max_element(cluster_id.begin(), cluster_id.end()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - ASSERT_EQ((min >= 0), 1); + CUDA_TRY(cudaDeviceSynchronize()); - std::set cluster_ids; - for (auto c : cluster_id) { cluster_ids.insert(c); } + int max = *max_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); + ASSERT_EQ((min >= 0), 1); - float modularity{0.0}; + std::set cluster_ids; + for (auto c : cluster_id) { cluster_ids.insert(c); } - cugraph::ext_raft::analyzeClustering_modularity( - graph_csr, max + 1, result_v.data().get(), &modularity); + ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); + + float modularity{0.0}; + + cugraph::ext_raft::analyzeClustering_modularity( + graph_csr, max + 1, result_v.data(), &modularity); - float random_modularity{0.95 * 0.4962422251701355}; + float random_modularity{0.95 * 0.4962422251701355}; - ASSERT_GT(modularity, random_modularity); + ASSERT_GT(modularity, random_modularity); + } } CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 764ab8bf6cb..9083400f85c 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -19,6 +19,10 @@ TEST(leiden_karate, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; @@ -46,27 +50,38 @@ TEST(leiden_karate, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; - std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data().get()); + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::leiden(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data()); + + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.41116042 * 0.99); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.41116042 * 0.99); + } } diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cpp similarity index 62% rename from cpp/tests/community/louvain_test.cu rename to cpp/tests/community/louvain_test.cpp index 2bac0097212..d3024282be3 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -15,10 +15,14 @@ #include -#include +#include TEST(louvain, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; @@ -49,42 +53,54 @@ TEST(louvain, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::louvain(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data()); - std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; + std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); - ASSERT_EQ(result_v, result_h); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_EQ(cluster_id, result_h); + } } TEST(louvain_renumbered, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 56, 73, 85, 95, 101, 107, 112, 117, 121, 125, 129, - 132, 135, 138, 141, 144, 147, 149, 151, 153, 155, 156 - - }; + 132, 135, 138, 141, 144, 147, 149, 151, 153, 155, 156}; std::vector ind_h = { 1, 3, 7, 11, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 33, 0, 5, 11, 15, 16, 19, 21, 25, 30, 4, 13, 14, 22, 27, 0, 9, 20, 24, 2, 13, 15, 26, 1, 13, 14, 18, 13, 15, 0, 16, @@ -110,32 +126,42 @@ TEST(louvain_renumbered, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::louvain(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data()); - std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; + std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.402777 * 0.95); + } } CUGRAPH_TEST_PROGRAM_MAIN() From 62111aacb7c17d73ed56d6b4da250ab3221f5751 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Wed, 10 Feb 2021 17:03:01 -0600 Subject: [PATCH 167/343] Added initial infrastructure for MG C++ testing and a Pagerank MG test using it (#1361) Added initial infrastructure for MG C++ testing and a Pagerank MG test using it. Still a WIP, need to: * Shuffle step is currently failing * `graph_t` ctor expensive check is failing * Finish comparison code to reference SG Pagerank results * Fix the `#include` guard hack in `test_utilities.hpp` * Lots of cleanup * Refactor common steps into proper `SetUp()` and `TearDown()` functions closes #1136 Authors: - Rick Ratzel (@rlratzel) - Seunghwa Kang (@seunghwak) Approvers: - Brad Rees (@BradReesWork) - Andrei Schaffer (@aschaffer) - Chuck Hastings (@ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1361 --- build.sh | 17 +- cpp/CMakeLists.txt | 29 +- cpp/tests/CMakeLists.txt | 114 ++++-- cpp/tests/README.md | 31 ++ cpp/tests/pagerank/pagerank_mg_test.cpp | 229 +++++++++++ cpp/tests/utilities/base_fixture.hpp | 8 +- cpp/tests/utilities/mg_test_utilities.cu | 180 +++++++++ cpp/tests/utilities/mg_test_utilities.hpp | 77 ++++ cpp/tests/utilities/test_utilities.cpp | 442 ++++++++++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 319 +--------------- 10 files changed, 1097 insertions(+), 349 deletions(-) create mode 100644 cpp/tests/README.md create mode 100644 cpp/tests/pagerank/pagerank_mg_test.cpp create mode 100644 cpp/tests/utilities/mg_test_utilities.cu create mode 100644 cpp/tests/utilities/mg_test_utilities.hpp create mode 100644 cpp/tests/utilities/test_utilities.cpp diff --git a/build.sh b/build.sh index b3d3463ed4e..dfe31089b0f 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # cugraph build script @@ -19,12 +19,13 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --show_depr_warn -h --help" +VALIDARGS="clean libcugraph cugraph cpp-mgtests docs -v -g -n --allgpuarch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) libcugraph - build the cugraph C++ code cugraph - build the cugraph Python package + cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency. docs - build the docs and is: -v - verbose build mode @@ -48,6 +49,7 @@ VERBOSE="" BUILD_TYPE=Release INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON +BUILD_CPP_MG_TESTS=OFF GPU_ARCH="" # Set defaults for vars that may not have been defined externally @@ -96,6 +98,9 @@ fi if hasArg --show_depr_warn; then BUILD_DISABLE_DEPRECATION_WARNING=OFF fi +if hasArg cpp-mgtests; then + BUILD_CPP_MG_TESTS=ON +fi # If clean given, run it prior to any other steps if hasArg clean; then @@ -127,9 +132,11 @@ if buildAll || hasArg libcugraph; then mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - ${GPU_ARCH} \ - -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp + ${GPU_ARCH} \ + -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ + ${REPODIR}/cpp make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e12382bf344..61e882aad81 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -33,6 +33,11 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +############################################################################## +# - User Options ------------------------------------------------------------ + +option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF) + ################################################################################################### # - compiler options ------------------------------------------------------------------------------ @@ -181,6 +186,12 @@ else() set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) endif(NOT NCCL_PATH) +################################################################################################### +# - find MPI - only enabled if MG tests are to be built + +if(BUILD_CUGRAPH_MG_TESTS) + find_package(MPI REQUIRED) +endif(BUILD_CUGRAPH_MG_TESTS) ################################################################################################### # - Fetch Content --------------------------------------------------------------------------------- @@ -324,14 +335,6 @@ set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunroc ################################################################################################### # - library targets ------------------------------------------------------------------------------- -# target_link_directories is added in cmake 3.13, and cmake advises to use this instead of -# link_directoires (we should switch to target_link_directories once 3.13 becomes the minimum -# required version). -link_directories( - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the - # link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") - add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu @@ -371,6 +374,12 @@ add_library(cugraph SHARED src/tree/mst.cu ) +target_link_directories(cugraph + PRIVATE + # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the + # link directories for nvcc. + "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + # # NOTE: This dependency will force the building of cugraph to # wait until after cugunrock is constructed. @@ -473,10 +482,6 @@ set_target_properties(cugraph PROPERTIES if(BUILD_TESTS) if(GTEST_FOUND) - # target_link_directories is added in cmake 3.13, and cmake advises to use this instead of - # link_directories (we should switch to target_link_directories once 3.13 becomes the - # minimum required version). - link_directories(${GTEST_LIBRARY_DIR}) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) endif(GTEST_FOUND) endif(BUILD_TESTS) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1b93b848515..3ee25d993b6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -19,7 +19,7 @@ ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) +function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) add_executable(${CMAKE_TEST_NAME} ${CMAKE_TEST_SRC}) @@ -41,6 +41,12 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) "${RAFT_DIR}/cpp/include" ) + target_link_directories(${CMAKE_TEST_NAME} + PRIVATE + # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported + # variable containing the link directories for nvcc. + "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cugraph @@ -53,8 +59,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) cublas cusparse cusolver - curand - ${CMAKE_EXTRA_LIBS}) + curand) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -135,95 +140,113 @@ endif(RAPIDS_DATASET_ROOT_DIR) ### test sources ################################################################################## ################################################################################################### +# FIXME: consider adding a "add_library(cugraph_testing SHARED ...) instead of +# adding the same test utility sources to each test target. There may need to be +# an additional cugraph_mg_testing lib due to the optional inclusion of MPI. + ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- set(KATZ_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") - ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}" "") + ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") ################################################################################################### # - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") - ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}" "") + ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") set(EDGE_BETWEENNESS_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") - ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}" "") + ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ set(SSSP_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") -ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}" "") +ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") ################################################################################################### # - BFS tests ------------------------------------------------------------------------------------- set(BFS_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") -ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") +ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") ################################################################################################### # - LOUVAIN tests --------------------------------------------------------------------------------- set(LOUVAIN_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") -ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") +ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") ################################################################################################### # - LEIDEN tests --------------------------------------------------------------------------------- set(LEIDEN_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") -ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}" "") +ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") ################################################################################################### # - ECG tests --------------------------------------------------------------------------------- set(ECG_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") -ConfigureTest(ECG_TEST "${ECG_TEST_SRC}" "") +ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- set(BALANCED_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") -ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}" "") +ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") ################################################################################################### # - TRIANGLE tests -------------------------------------------------------------------------------- set(TRIANGLE_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") -ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}" "") +ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") ################################################################################################### # - EGO tests -------------------------------------------------------------------------------- set(EGO_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") @@ -232,53 +255,60 @@ ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") set(RENUMBERING_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") -ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}" "") +ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") -ConfigureTest(FA2_TEST "${FA2_TEST_SRC}" "") +ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") ################################################################################################### # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") -ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}" "") +ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") ################################################################################################### # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") -ConfigureTest(SCC_TEST "${SCC_TEST_SRC}" "") +ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") ################################################################################################### #-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- set(HUNGARIAN_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") -ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}" "") +ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") ################################################################################################### # - MST tests ---------------------------------------------------------------------------- set(MST_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") -ConfigureTest(MST_TEST "${MST_TEST_SRC}" "") +ConfigureTest(MST_TEST "${MST_TEST_SRC}") ################################################################################################### @@ -286,72 +316,102 @@ ConfigureTest(MST_TEST "${MST_TEST_SRC}" "") set(EXPERIMENTAL_GRAPH_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") -ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") ################################################################################################### # - Experimental coarsening tests ----------------------------------------------------------------- set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") -ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") ################################################################################################### # - Experimental induced subgraph tests ----------------------------------------------------------- set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") -ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ set(EXPERIMENTAL_BFS_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") -ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- set(EXPERIMENTAL_SSSP_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") -ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") ################################################################################################### # - Experimental PAGERANK tests ------------------------------------------------------------------- set(EXPERIMENTAL_PAGERANK_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") -ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") ################################################################################################### # - Experimental LOUVAIN tests ------------------------------------------------------------------- set(EXPERIMENTAL_LOUVAIN_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") -ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") -ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") + + +################################################################################################### +# - MG tests -------------------------------------------------------------------------------------- +if(BUILD_CUGRAPH_MG_TESTS) + if(MPI_CXX_FOUND) + ########################################################################################### + # - MG PAGERANK tests --------------------------------------------------------------------- + + set(MG_PAGERANK_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/mg_test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_mg_test.cpp") + + ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") + target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + else(MPI_CXX_FOUND) + message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") + endif(MPI_CXX_FOUND) +endif(BUILD_CUGRAPH_MG_TESTS) ################################################################################################### ### enable testing ################################################################################ diff --git a/cpp/tests/README.md b/cpp/tests/README.md new file mode 100644 index 00000000000..b5808822467 --- /dev/null +++ b/cpp/tests/README.md @@ -0,0 +1,31 @@ +# libcugraph C++ tests + +## Prerequisites +### Datasets +``` +/path/to/cuGraph> ./datasets/get_test_data.sh +/path/to/cuGraph> export RAPIDS_DATASET_ROOT_DIR=/path/to/cuGraph/datasets +``` +### System Requirements +* MPI (multi-GPU tests only) + ``` + conda install -c conda-forge openmpi + ``` + +## Building +``` +/path/to/cuGraph> ./build.sh libcugraph +``` +To build the multi-GPU tests: +``` +/path/to/cuGraph> ./build.sh libcugraph cpp-mgtests +``` + +## Running +``` + +``` +To run the multi-GPU tests (example using 2 GPUs): +``` +/path/to/cuGraph> mpirun -n 2 ./cpp/build/gtests/MG_PAGERANK_TEST +``` diff --git a/cpp/tests/pagerank/pagerank_mg_test.cpp b/cpp/tests/pagerank/pagerank_mg_test.cpp new file mode 100644 index 00000000000..7f789226bf1 --- /dev/null +++ b/cpp/tests/pagerank/pagerank_mg_test.cpp @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +#include + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_CASE_P() +// +typedef struct Pagerank_Testparams_t { + std::string graph_file_full_path{}; + double personalization_ratio{0.0}; + bool test_weighted{false}; + + Pagerank_Testparams_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Pagerank_Testparams_t; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class Pagerank_E2E_MG_Testfixture_t : public cugraph::test::MG_TestFixture_t, + public ::testing::WithParamInterface { + public: + Pagerank_E2E_MG_Testfixture_t() {} + + // Run once for each test instance + virtual void SetUp() {} + virtual void TearDown() {} + + // Return the results of running pagerank on a single GPU for the dataset in + // graph_file_path. + template + std::vector get_sg_results(raft::handle_t& handle, + const std::string& graph_file_path, + const result_t alpha, + const result_t epsilon) + { + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, graph_file_path, true); // FIXME: should use param.test_weighted instead of true + + auto graph_view = graph.view(); + cudaStream_t stream = handle.get_stream(); + rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), stream); + + cugraph::experimental::pagerank( + handle, + graph_view, + static_cast(nullptr), // adj_matrix_row_out_weight_sums + static_cast(nullptr), // personalization_vertices + static_cast(nullptr), // personalization_values + static_cast(0), // personalization_vector_size + d_pageranks.begin(), // pageranks + alpha, // alpha (damping factor) + epsilon, // error tolerance for convergence + std::numeric_limits::max(), // max_iterations + false, // has_initial_guess + true); // do_expensive_check + + std::vector h_pageranks(graph_view.get_number_of_vertices()); + raft::update_host(h_pageranks.data(), d_pageranks.data(), d_pageranks.size(), stream); + + return h_pageranks; + } + + // Compare the results of running pagerank on multiple GPUs to that of a + // single-GPU run for the configuration in param. + template + void run_test(const Pagerank_Testparams_t& param) + { + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + raft::handle_t handle; + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + cudaStream_t stream = handle.get_stream(); + + // Assuming 2 GPUs which means 1 row, 2 cols. 2 cols = row_comm_size of 2. + // FIXME: DO NOT ASSUME 2 GPUs, add code to compute prows, pcols + size_t row_comm_size{2}; + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + int my_rank = comm.get_rank(); + + // FIXME: graph must be weighted! + std::unique_ptr> // store_transposed=true, + // multi_gpu=true + mg_graph_ptr{}; + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + + std::tie(mg_graph_ptr, d_renumber_map_labels) = cugraph::test:: + create_graph_for_gpu // store_transposed=true + (handle, param.graph_file_full_path); + + auto mg_graph_view = mg_graph_ptr->view(); + + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_vertices(), stream); + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::pagerank( + handle, + mg_graph_view, + static_cast(nullptr), // adj_matrix_row_out_weight_sums + static_cast(nullptr), // personalization_vertices + static_cast(nullptr), // personalization_values + static_cast(0), // personalization_vector_size + d_mg_pageranks.begin(), // pageranks + alpha, // alpha (damping factor) + epsilon, // error tolerance for convergence + std::numeric_limits::max(), // max_iterations + false, // has_initial_guess + true); // do_expensive_check + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_vertices()); + + raft::update_host(h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), stream); + + std::vector h_renumber_map_labels(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + stream); + + // Compare MG to SG + // Each GPU will have pagerank values for their range, so ech GPU must + // compare to specific SG results for their respective range. + + auto h_sg_pageranks = get_sg_results( + handle, param.graph_file_full_path, alpha, epsilon); + + // For this test, each GPU will have the full set of vertices and + // therefore the pageranks vectors should be equal in size. + ASSERT_EQ(h_sg_pageranks.size(), h_mg_pageranks.size()); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + vertex_t mapped_vertex{0}; + for (vertex_t i = 0; + i + mg_graph_view.get_local_vertex_first() < mg_graph_view.get_local_vertex_last(); + ++i) { + mapped_vertex = h_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << i << " in rank: " << my_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " << h_sg_pageranks[i]; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(Pagerank_E2E_MG_Testfixture_t, CheckInt32Int32FloatFloat) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + e2e, + Pagerank_E2E_MG_Testfixture_t, + + // FIXME: the personalization_ratio and use_weighted boo are not used + // (personilization vectors are not used, and all datasets are assumed + // weighted). update this to use personilization vectors and non-weighted + // graphs. + ::testing::Values(Pagerank_Testparams_t("test/datasets/karate.mtx", 0.0, true), + // FIXME: The commented datasets contain isolate vertices + // which result in a different number of vertices in the + // renumbered MG graph (because the renumbering function + // does not include them) vs. the SG graph object used for + // the pagerank comparison because the SG graph reads the + // COO as-is without renumbering. Update the utility that + // reads a .mtx and constructs a SG graph object to also + // renumber and return the renumber vertices vector. This + // will result in a comparison of an equal number of + // pagerank values. + // + // Pagerank_Testparams_t("test/datasets/web-Google.mtx", 0.0, true), + // Pagerank_Testparams_t("test/datasets/ljournal-2008.mtx", 0.0, true), + Pagerank_Testparams_t("test/datasets/webbase-1M.mtx", 0.0, true))); + +// FIXME: Enable proper RMM configuration by using CUGRAPH_TEST_PROGRAM_MAIN(). +// Currently seeing a RMM failure during init, need to investigate. +// CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 535b4b9c79e..3525db73425 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,12 @@ namespace cugraph { namespace test { +// FIXME: The BaseFixture class is not used in any tests. This file is only +// needed for the CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so +// consider removing the BaseFixture class and renaming this file, or moving +// CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file and removing this +// file completely. + /** * @brief Base test fixture class from which all libcudf tests should inherit. * diff --git a/cpp/tests/utilities/mg_test_utilities.cu b/cpp/tests/utilities/mg_test_utilities.cu new file mode 100644 index 00000000000..26f2450b589 --- /dev/null +++ b/cpp/tests/utilities/mg_test_utilities.cu @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +// Given a raft handle and a path to a dataset (must be a .mtx file), returns a +// tuple containing: +// * graph_t instance for the partition accesible from the raft handle +// * vector of indices representing the original unrenumberd vertices +// +// This function creates a graph_t instance appropriate for MG graph +// applications from the edgelist graph data file passed in by filtering out the +// vertices not to be assigned to the GPU in this rank, then renumbering the +// vertices appropriately. The returned vector of vertices contains the original +// vertex IDs, ordered by the new sequential renumbered IDs (this is needed for +// unrenumbering). +template +std::tuple< + std::unique_ptr>, // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path) +{ + const auto& comm = handle.get_comms(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + int my_rank = comm.get_rank(); + + auto edgelist_from_mm = + ::cugraph::test::read_edgelist_from_matrix_market_file( + graph_file_path); + + edge_t total_number_edges = static_cast(edgelist_from_mm.h_rows.size()); + + ////////// + // Copy COO to device + rmm::device_uvector d_edgelist_rows(total_number_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(total_number_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(total_number_edges, handle.get_stream()); + + raft::update_device(d_edgelist_rows.data(), + edgelist_from_mm.h_rows.data(), + total_number_edges, + handle.get_stream()); + raft::update_device(d_edgelist_cols.data(), + edgelist_from_mm.h_cols.data(), + total_number_edges, + handle.get_stream()); + raft::update_device(d_edgelist_weights.data(), + edgelist_from_mm.h_weights.data(), + total_number_edges, + handle.get_stream()); + + ////////// + // Filter out edges that are not to be associated with this rank + // + // Create a edge_gpu_identifier, which will be used by the individual jobs to + // identify if a edge belongs to a particular rank + cugraph::experimental::detail::compute_gpu_id_from_edge_t edge_gpu_identifier{ + false, comm.get_size(), row_comm.get_size(), col_comm.get_size()}; + + auto edgelist_zip_it_begin = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + bool is_transposed{store_transposed}; + + // Do the removal - note: remove_if does not delete items, it moves "removed" + // items to the back of the vector and returns the iterator (new_end) that + // represents the items kept. Actual removal of items can be done by + // resizing (see below). + auto new_end = thrust::remove_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_zip_it_begin, + edgelist_zip_it_begin + total_number_edges, + [my_rank, is_transposed, edge_gpu_identifier] __device__(auto tup) { + if (is_transposed) { + return (edge_gpu_identifier(thrust::get<1>(tup), thrust::get<0>(tup)) != my_rank); + } else { + return (edge_gpu_identifier(thrust::get<0>(tup), thrust::get<1>(tup)) != my_rank); + } + }); + + edge_t local_number_edges = thrust::distance(edgelist_zip_it_begin, new_end); + // Free the memory used for the items remove_if "removed". This not only + // frees memory, but keeps the actual vector sizes consistent with the data + // being used from this point forward. + d_edgelist_rows.resize(local_number_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(local_number_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + d_edgelist_weights.resize(local_number_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + + ////////// + // renumber filtered edgelist_from_mm + vertex_t* major_vertices{nullptr}; + vertex_t* minor_vertices{nullptr}; + if (is_transposed) { + major_vertices = d_edgelist_cols.data(); + minor_vertices = d_edgelist_rows.data(); + } else { + major_vertices = d_edgelist_rows.data(); + minor_vertices = d_edgelist_cols.data(); + } + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition( + std::vector(comm.get_size() + 1, 0), + false, // is_hypergraph_partitioned() + row_comm.get_size(), + col_comm.get_size(), + row_comm.get_rank(), + col_comm.get_rank()); + vertex_t number_of_vertices{}; + edge_t number_of_edges{}; + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + ::cugraph::experimental::renumber_edgelist // multi_gpu=true + (handle, + major_vertices, // edgelist_major_vertices, INOUT of vertex_t* + minor_vertices, // edgelist_minor_vertices, INOUT of vertex_t* + local_number_edges, + false, // is_hypergraph_partitioned + true); // do_expensive_check + + cugraph::experimental::edgelist_t edgelist{ + d_edgelist_rows.data(), d_edgelist_cols.data(), d_edgelist_weights.data(), local_number_edges}; + + std::vector> edgelist_vect; + edgelist_vect.push_back(edgelist); + cugraph::experimental::graph_properties_t properties; + properties.is_symmetric = edgelist_from_mm.is_symmetric; + properties.is_multigraph = false; + + // Finally, create instance of graph_t using filtered & renumbered edgelist + return std::make_tuple( + std::make_unique< + cugraph::experimental::graph_t>( + handle, + edgelist_vect, + partition, + number_of_vertices, + total_number_edges, + properties, + false, // sorted_by_global_degree_within_vertex_partition + true), // do_expensive_check + std::move(renumber_map_labels)); +} + +// explicit instantiation +template std::tuple< + std::unique_ptr< + cugraph::experimental::graph_t>, // store_transposed=true + // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/mg_test_utilities.hpp b/cpp/tests/utilities/mg_test_utilities.hpp new file mode 100644 index 00000000000..c23f6c43a6d --- /dev/null +++ b/cpp/tests/utilities/mg_test_utilities.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cugraph { +namespace test { + +// Given a raft handle and a path to a dataset (must be a .mtx file), returns a +// tuple containing: +// * graph_t instance for the partition accesible from the raft handle +// * 4-tuple containing renumber info resulting from renumbering the +// edgelist for the partition +template +std::tuple< + std::unique_ptr>, // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); + +/** + * @brief Base test fixture class, responsible for handling common operations + * needed by all MG tests. + * + * It's expected this class will be built out and refactored often as new MG C++ + * tests are added and new patterns evolve. + * + * Example: + * ``` + * class MyTestFixture : public cugraph::test::MG_TestFixture_t {}; + * ``` + **/ + +// FIXME: consider moving this to a separate file? (eg. mg_test_fixture.cpp)? + +class MG_TestFixture_t : public ::testing::Test { + public: + static void SetUpTestCase() + { + MPI_TRY(MPI_Init(NULL, NULL)); + + int rank, size; + MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); + MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &size)); + + int nGpus; + CUDA_CHECK(cudaGetDeviceCount(&nGpus)); + + ASSERT( + nGpus >= size, "Number of GPUs are lesser than MPI ranks! ngpus=%d, nranks=%d", nGpus, size); + + CUDA_CHECK(cudaSetDevice(rank)); + } + + static void TearDownTestCase() { MPI_TRY(MPI_Finalize()); } +}; + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cpp b/cpp/tests/utilities/test_utilities.cpp new file mode 100644 index 00000000000..abb416a632d --- /dev/null +++ b/cpp/tests/utilities/test_utilities.cpp @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include + +#include + +extern "C" { +#include "mmio.h" +} + +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +/// Read matrix properties from Matrix Market file +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param t (Output) MM_typecode with matrix properties. + * @param m (Output) Number of matrix rows. + * @param n (Output) Number of matrix columns. + * @param nnz (Output) Number of non-zero matrix entries. + * @return Zero if properties were read successfully. Otherwise + * non-zero. + */ +template +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) +{ + // Read matrix properties from file + int mint, nint, nnzint; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + *m = mint; + *n = nint; + *nnz = nnzint; + + // Find total number of non-zero entries + if (tg && !mm_is_general(*t)) { + // Non-diagonal entries should be counted twice + *nnz *= 2; + + // Diagonal entries should not be double-counted + int st; + for (int i = 0; i < nnzint; ++i) { + // Read matrix entry + // MTX only supports int for row and col idx + int row, col; + double rval, ival; + if (mm_is_pattern(*t)) + st = fscanf(f, "%d %d\n", &row, &col); + else if (mm_is_real(*t) || mm_is_integer(*t)) + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Check if entry is diagonal + if (row == col) --(*nnz); + } + } + + return 0; +} + +/// Read Matrix Market file and convert to COO format matrix +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param nnz Number of non-zero matrix entries. + * @param cooRowInd (Output) Row indices for COO matrix. Should have + * at least nnz entries. + * @param cooColInd (Output) Column indices for COO matrix. Should + * have at least nnz entries. + * @param cooRVal (Output) Real component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @param cooIVal (Output) Imaginary component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @return Zero if matrix was read successfully. Otherwise non-zero. + */ +template +int mm_to_coo(FILE* f, + int tg, + IndexType_ nnz, + IndexType_* cooRowInd, + IndexType_* cooColInd, + ValueType_* cooRVal, + ValueType_* cooIVal) +{ + // Read matrix properties from file + MM_typecode t; + int m, n, nnzOld; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, &t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + + // Add each matrix entry in file to COO format matrix + int i; // Entry index in Matrix Market file; can only be int in the MTX format + int j = 0; // Entry index in COO format matrix; can only be int in the MTX format + for (i = 0; i < nnzOld; ++i) { + // Read entry from file + int row, col; + double rval, ival; + int st; + if (mm_is_pattern(t)) { + st = fscanf(f, "%d %d\n", &row, &col); + rval = 1.0; + ival = 0.0; + } else if (mm_is_real(t) || mm_is_integer(t)) { + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + ival = 0.0; + } else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Switch to 0-based indexing + --row; + --col; + + // Record entry + cooRowInd[j] = row; + cooColInd[j] = col; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + + // Add symmetric complement of non-diagonal entries + if (tg && !mm_is_general(t) && (row != col)) { + // Modify entry value if matrix is skew symmetric or Hermitian + if (mm_is_skew(t)) { + rval = -rval; + ival = -ival; + } else if (mm_is_hermitian(t)) { + ival = -ival; + } + + // Record entry + cooRowInd[j] = col; + cooColInd[j] = row; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + } + } + return 0; +} + +int read_binary_vector(FILE* fpin, int n, std::vector& val) +{ + size_t is_read1; + + double* t_storage = new double[n]; + is_read1 = fread(t_storage, sizeof(double), n, fpin); + for (int i = 0; i < n; i++) { + if (t_storage[i] == DBL_MAX) + val[i] = FLT_MAX; + else if (t_storage[i] == -DBL_MAX) + val[i] = -FLT_MAX; + else + val[i] = static_cast(t_storage[i]); + } + delete[] t_storage; + + if (is_read1 != (size_t)n) { + printf("%s", "I/O fail\n"); + return 1; + } + return 0; +} + +int read_binary_vector(FILE* fpin, int n, std::vector& val) +{ + size_t is_read1; + + is_read1 = fread(&val[0], sizeof(double), n, fpin); + + if (is_read1 != (size_t)n) { + printf("%s", "I/O fail\n"); + return 1; + } + return 0; +} + +// FIXME: A similar function could be useful for CSC format +// There are functions above that operate coo -> csr and coo->csc +/** + * @tparam + */ +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file) +{ + vertex_t number_of_vertices; + edge_t number_of_edges; + + FILE* fpin = fopen(mm_file.c_str(), "r"); + EXPECT_NE(fpin, nullptr); + + vertex_t number_of_columns = 0; + MM_typecode mm_typecode{0}; + EXPECT_EQ(mm_properties( + fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), + 0); + EXPECT_TRUE(mm_is_matrix(mm_typecode)); + EXPECT_TRUE(mm_is_coordinate(mm_typecode)); + EXPECT_FALSE(mm_is_complex(mm_typecode)); + EXPECT_FALSE(mm_is_skew(mm_typecode)); + + directed = !mm_is_symmetric(mm_typecode); + + // Allocate memory on host + std::vector coo_row_ind(number_of_edges); + std::vector coo_col_ind(number_of_edges); + std::vector coo_val(number_of_edges); + + // Read + EXPECT_EQ((mm_to_coo( + fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), + 0); + EXPECT_EQ(fclose(fpin), 0); + + cugraph::GraphCOOView cooview( + &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); + + return cugraph::coo_to_csr(cooview); +} + +template +edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( + std::string const& graph_file_full_path) +{ + edgelist_from_market_matrix_file_t ret{}; + + MM_typecode mc{}; + vertex_t m{}; + edge_t nnz{}; + + FILE* file = fopen(graph_file_full_path.c_str(), "r"); + CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + + edge_t tmp_m{}; + edge_t tmp_k{}; + auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); + m = static_cast(tmp_m); + CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), + "invalid Matrix Market file properties."); + + ret.h_rows.assign(nnz, vertex_t{0}); + ret.h_cols.assign(nnz, vertex_t{0}); + ret.h_weights.assign(nnz, weight_t{0.0}); + ret.number_of_vertices = m; + ret.is_symmetric = mm_is_symmetric(mc); + + mm_ret = cugraph::test::mm_to_coo( + file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); + + auto file_ret = fclose(file); + CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); + + return std::move(ret); +} + +template +cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted) +{ + auto mm_graph = + read_edgelist_from_matrix_market_file(graph_file_full_path); + edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); + + rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, + handle.get_stream()); + + raft::update_device( + d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); + raft::update_device( + d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); + if (test_weighted) { + raft::update_device( + d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); + } + + cugraph::experimental::edgelist_t edgelist{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + number_of_edges}; + + return cugraph::experimental::graph_t( + handle, + edgelist, + mm_graph.number_of_vertices, + cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, + false, + true); +} + +// explicit instantiations + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, int* cooRVal, int* cooIVal); + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, double* cooRVal, double* cooIVal); + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, float* cooRVal, float* cooIVal); + +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 518e7c2860e..406f09048e0 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,37 +16,20 @@ #pragma once #include -#include -#include +#include -#include +#include +#include +#include extern "C" { #include "mmio.h" } -#include - -#include -#include -#include -#include - namespace cugraph { namespace test { -std::string getFileName(const std::string& s) -{ - char sep = '/'; - -#ifdef _WIN32 - sep = '\\'; -#endif - - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} +std::string getFileName(const std::string& s); /// Read matrix properties from Matrix Market file /** Matrix Market file is assumed to be a sparse matrix in coordinate @@ -63,64 +46,7 @@ std::string getFileName(const std::string& s) * non-zero. */ template -int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) -{ - // Read matrix properties from file - int mint, nint, nnzint; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - *m = mint; - *n = nint; - *nnz = nnzint; - - // Find total number of non-zero entries - if (tg && !mm_is_general(*t)) { - // Non-diagonal entries should be counted twice - *nnz *= 2; - - // Diagonal entries should not be double-counted - int st; - for (int i = 0; i < nnzint; ++i) { - // Read matrix entry - // MTX only supports int for row and col idx - int row, col; - double rval, ival; - if (mm_is_pattern(*t)) - st = fscanf(f, "%d %d\n", &row, &col); - else if (mm_is_real(*t) || mm_is_integer(*t)) - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Check if entry is diagonal - if (row == col) --(*nnz); - } - } - - return 0; -} +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz); /// Read Matrix Market file and convert to COO format matrix /** Matrix Market file is assumed to be a sparse matrix in coordinate @@ -149,169 +75,20 @@ int mm_to_coo(FILE* f, IndexType_* cooRowInd, IndexType_* cooColInd, ValueType_* cooRVal, - ValueType_* cooIVal) -{ - // Read matrix properties from file - MM_typecode t; - int m, n, nnzOld; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, &t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - - // Add each matrix entry in file to COO format matrix - int i; // Entry index in Matrix Market file; can only be int in the MTX format - int j = 0; // Entry index in COO format matrix; can only be int in the MTX format - for (i = 0; i < nnzOld; ++i) { - // Read entry from file - int row, col; - double rval, ival; - int st; - if (mm_is_pattern(t)) { - st = fscanf(f, "%d %d\n", &row, &col); - rval = 1.0; - ival = 0.0; - } else if (mm_is_real(t) || mm_is_integer(t)) { - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - ival = 0.0; - } else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Switch to 0-based indexing - --row; - --col; - - // Record entry - cooRowInd[j] = row; - cooColInd[j] = col; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - - // Add symmetric complement of non-diagonal entries - if (tg && !mm_is_general(t) && (row != col)) { - // Modify entry value if matrix is skew symmetric or Hermitian - if (mm_is_skew(t)) { - rval = -rval; - ival = -ival; - } else if (mm_is_hermitian(t)) { - ival = -ival; - } - - // Record entry - cooRowInd[j] = col; - cooColInd[j] = row; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - } - } - return 0; -} + ValueType_* cooIVal); -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - double* t_storage = new double[n]; - is_read1 = fread(t_storage, sizeof(double), n, fpin); - for (int i = 0; i < n; i++) { - if (t_storage[i] == DBL_MAX) - val[i] = FLT_MAX; - else if (t_storage[i] == -DBL_MAX) - val[i] = -FLT_MAX; - else - val[i] = static_cast(t_storage[i]); - } - delete[] t_storage; - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; +int read_binary_vector(FILE* fpin, int n, std::vector& val); - is_read1 = fread(&val[0], sizeof(double), n, fpin); - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} +int read_binary_vector(FILE* fpin, int n, std::vector& val); // FIXME: A similar function could be useful for CSC format // There are functions above that operate coo -> csr and coo->csc /** * @tparam */ -template -std::unique_ptr> generate_graph_csr_from_mm(bool& directed, - std::string mm_file) -{ - VT number_of_vertices; - ET number_of_edges; - - FILE* fpin = fopen(mm_file.c_str(), "r"); - EXPECT_NE(fpin, nullptr); - - VT number_of_columns = 0; - MM_typecode mm_typecode{0}; - EXPECT_EQ(mm_properties( - fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), - 0); - EXPECT_TRUE(mm_is_matrix(mm_typecode)); - EXPECT_TRUE(mm_is_coordinate(mm_typecode)); - EXPECT_FALSE(mm_is_complex(mm_typecode)); - EXPECT_FALSE(mm_is_skew(mm_typecode)); - - directed = !mm_is_symmetric(mm_typecode); - - // Allocate memory on host - std::vector coo_row_ind(number_of_edges); - std::vector coo_col_ind(number_of_edges); - std::vector coo_val(number_of_edges); - - // Read - EXPECT_EQ((mm_to_coo( - fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), - 0); - EXPECT_EQ(fclose(fpin), 0); - - cugraph::GraphCOOView cooview( - &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); - - return cugraph::coo_to_csr(cooview); -} - -//////////////////////////////////////////////////////////////////////////////// -// FIXME: move this code to rapids-core -//////////////////////////////////////////////////////////////////////////////// +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); // Define RAPIDS_DATASET_ROOT_DIR using a preprocessor variable to // allow for a build to override the default. This is useful for @@ -342,79 +119,13 @@ struct edgelist_from_market_matrix_file_t { template edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path) -{ - edgelist_from_market_matrix_file_t ret{}; - - MM_typecode mc{}; - vertex_t m{}; - edge_t nnz{}; - - FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); - - edge_t tmp_m{}; - edge_t tmp_k{}; - auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); - m = static_cast(tmp_m); - CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), - "invalid Matrix Market file properties."); - - ret.h_rows.assign(nnz, vertex_t{0}); - ret.h_cols.assign(nnz, vertex_t{0}); - ret.h_weights.assign(nnz, weight_t{0.0}); - ret.number_of_vertices = m; - ret.is_symmetric = mm_is_symmetric(mc); - - mm_ret = cugraph::test::mm_to_coo( - file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); - - auto file_ret = fclose(file); - CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); - - return std::move(ret); -} + std::string const& graph_file_full_path); template cugraph::experimental::graph_t read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, - bool test_weighted) -{ - auto mm_graph = - read_edgelist_from_matrix_market_file(graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); - - rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (test_weighted) { - raft::update_device( - d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - number_of_edges}; - - return cugraph::experimental::graph_t( - handle, - edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, - false, - true); -} + bool test_weighted); } // namespace test } // namespace cugraph From 27430205b703b35d17bd17810212a981a32a790d Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Thu, 11 Feb 2021 06:17:38 -0800 Subject: [PATCH 168/343] Add SG TSP (#1360) This PR implements an approximated solution to the Traveling Salesperson Problem (TSP). The algorithm is exposed under ```traversal``` through a Python API taking 2D pos as input and returning a route. This PR relies on RAFT KNN: https://github.com/rapidsai/raft/pull/126 Solves: https://github.com/rapidsai/cugraph/issues/1185 Authors: - Hugo Linsenmaier (@hlinsen) Approvers: - AJ Schmidt (@ajschmidt8) - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) - Alex Fender (@afender) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1360 --- README.md | 1 + build.sh | 11 +- ci/test.sh | 9 +- conda/environments/cugraph_dev_cuda10.1.yml | 2 + conda/environments/cugraph_dev_cuda10.2.yml | 2 + conda/environments/cugraph_dev_cuda11.0.yml | 2 + conda/recipes/libcugraph/meta.yaml | 4 + cpp/CMakeLists.txt | 63 ++- cpp/cmake/Modules/FindFAISS.cmake | 98 +++++ cpp/cmake/faiss_cuda11.patch | 40 ++ cpp/include/algorithms.hpp | 38 ++ cpp/src/traversal/tsp.cu | 252 +++++++++++ cpp/src/traversal/tsp.hpp | 88 ++++ cpp/src/traversal/tsp_solver.hpp | 414 ++++++++++++++++++ cpp/src/traversal/tsp_utils.hpp | 85 ++++ cpp/tests/CMakeLists.txt | 10 + cpp/tests/traversal/tsp_test.cu | 245 +++++++++++ datasets/eil51.tsp | 58 +++ datasets/get_test_data.sh | 17 + datasets/gil262.tsp | 269 ++++++++++++ datasets/kroA100.tsp | 107 +++++ datasets/tsp225.tsp | 232 ++++++++++ python/cugraph/__init__.py | 3 +- .../link_analysis/pagerank_wrapper.pyx | 1 - .../tests/test_traveling_salesperson.py | 81 ++++ python/cugraph/tests/utils.py | 7 + python/cugraph/traversal/__init__.py | 5 +- .../traversal/traveling_salesperson.pxd | 34 ++ .../traversal/traveling_salesperson.py | 75 ++++ .../traveling_salesperson_wrapper.pyx | 79 ++++ 30 files changed, 2323 insertions(+), 9 deletions(-) create mode 100644 cpp/cmake/Modules/FindFAISS.cmake create mode 100644 cpp/cmake/faiss_cuda11.patch create mode 100644 cpp/src/traversal/tsp.cu create mode 100644 cpp/src/traversal/tsp.hpp create mode 100644 cpp/src/traversal/tsp_solver.hpp create mode 100644 cpp/src/traversal/tsp_utils.hpp create mode 100644 cpp/tests/traversal/tsp_test.cu create mode 100644 datasets/eil51.tsp create mode 100755 datasets/gil262.tsp create mode 100644 datasets/kroA100.tsp create mode 100644 datasets/tsp225.tsp create mode 100644 python/cugraph/tests/test_traveling_salesperson.py create mode 100644 python/cugraph/traversal/traveling_salesperson.pxd create mode 100644 python/cugraph/traversal/traveling_salesperson.py create mode 100644 python/cugraph/traversal/traveling_salesperson_wrapper.pyx diff --git a/README.md b/README.md index 03abd6c72af..62059e9c7b6 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ As of Release 0.18 - including 0.18 nightly | Traversal | | | | | | Breadth First Search (BFS) | Multi-GPU | with cutoff support | | | Single Source Shortest Path (SSSP) | Multi-GPU | | +| | Traveling Salesperson Problem (TSP) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | diff --git a/build.sh b/build.sh index dfe31089b0f..ef210e841c6 100755 --- a/build.sh +++ b/build.sh @@ -19,7 +19,7 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph cpp-mgtests docs -v -g -n --allgpuarch --show_depr_warn -h --help" +VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -32,6 +32,7 @@ HELP="$0 [ ...] [ ...] -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures + --buildfaiss - build faiss statically into cugraph --show_depr_warn - show cmake deprecation warnings -h - print this text @@ -50,6 +51,7 @@ BUILD_TYPE=Release INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON BUILD_CPP_MG_TESTS=OFF +BUILD_STATIC_FAISS=OFF GPU_ARCH="" # Set defaults for vars that may not have been defined externally @@ -95,6 +97,9 @@ fi if hasArg --allgpuarch; then GPU_ARCH="-DGPU_ARCHS=ALL" fi +if hasArg --buildfaiss; then + BUILD_STATIC_FAISS=ON +fi if hasArg --show_depr_warn; then BUILD_DISABLE_DEPRECATION_WARNING=OFF fi @@ -135,6 +140,7 @@ if buildAll || hasArg libcugraph; then ${GPU_ARCH} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ ${REPODIR}/cpp make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} @@ -159,7 +165,8 @@ if buildAll || hasArg docs; then cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp \ + -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} fi cd ${LIBCUGRAPH_BUILD_DIR} make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} docs_cugraph diff --git a/ci/test.sh b/ci/test.sh index db060d3a55a..b0134e97246 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -67,13 +67,20 @@ fi # in a preconfigured environment, and install/build steps are unexpected side # effects. if [[ "$PROJECT_FLASH" == "1" ]]; then + export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" + + # Faiss patch + echo "Update libcugraph.so" + cd $LIBCUGRAPH_BUILD_DIR + chrpath -d libcugraph.so + patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so + CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" - export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" echo "Build cugraph..." $WORKSPACE/build.sh cugraph fi diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 369a1f1205a..a74cdbdb144 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -29,6 +29,8 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 82903a6c2a5..5077f2bb23e 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -29,6 +29,8 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 4720183e0b0..a93297ea758 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -29,6 +29,8 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 955c723bd27..cd83e5a9b7a 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -33,6 +33,8 @@ requirements: - ucx-py {{ minor_version }} - ucx-proc=*=gpu - gtest + - faiss-proc=*=cuda + - libfaiss=1.6.3 - gmock run: - libcudf={{ minor_version }} @@ -40,6 +42,8 @@ requirements: - nccl>=2.7 - ucx-py {{ minor_version }} - ucx-proc=*=gpu + - faiss-proc=*=cuda + - libfaiss=1.6.3 #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 61e882aad81..1b15d04bbfd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -38,6 +38,13 @@ endif() option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF) +################################################################################################### +# - user options ------------------------------------------------------------------------------ + +set(BLAS_LIBRARIES "" CACHE STRING + "Location of BLAS library for FAISS build.") +option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search on GPU" OFF) + ################################################################################################### # - compiler options ------------------------------------------------------------------------------ @@ -95,10 +102,12 @@ message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") foreach(arch ${GPU_ARCHS}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") set(GUNROCK_GENCODE_SM${arch} "ON") + set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${arch},code=sm_${arch}") endforeach() list(GET GPU_ARCHS -1 ptx) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") +set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=compute_${ptx}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") @@ -157,6 +166,15 @@ if(OpenMP_FOUND) endif(OpenMP_FOUND) +################################################################################################### +# - find blas ------------------------------------------------------------------------------------- + +if(NOT DEFINED BLAS_LIBRARIES) + find_package( BLAS REQUIRED ) +else() + message(STATUS "Manually setting BLAS to ${BLAS_LIBRARIES}") +endif() + ################################################################################################### # - find gtest ------------------------------------------------------------------------------------ @@ -280,7 +298,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 9dbf2c8a9134ce8135f7fe947ec523d874fcab6a + GIT_TAG 4a79adcb0c0e87964dcdc9b9122f242b5235b702 SOURCE_SUBDIR raft ) @@ -332,6 +350,46 @@ add_library(gunrock STATIC IMPORTED) add_dependencies(gunrock gunrock_ext) set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) +# - FAISS +# FIXME: The commit currently being fetched from faiss is using autotools which +# is more convenient to build with ExternalProjectAdd. +# Consider migrating to FetchContent once the tagged commit is changed. + +if(BUILD_STATIC_FAISS) + set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING + "Path to FAISS source directory") + ExternalProject_Add(faiss + GIT_REPOSITORY https://github.com/facebookresearch/faiss.git + GIT_TAG a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d + CONFIGURE_COMMAND LIBS=-pthread + CPPFLAGS=-w + LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib + ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure + --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss + --with-blas=${BLAS_LIBRARIES} + --with-cuda=${CUDA_TOOLKIT_ROOT_DIR} + --with-cuda-arch=${FAISS_GPU_ARCHS} + -v + PREFIX ${FAISS_DIR} + BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 + BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a + BUILD_ALWAYS 1 + INSTALL_COMMAND make -s install > /dev/null + UPDATE_COMMAND "" + BUILD_IN_SOURCE 1 + PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true) + + ExternalProject_Get_Property(faiss install_dir) + add_library(FAISS::FAISS STATIC IMPORTED) + add_dependencies(FAISS::FAISS faiss) + set_property(TARGET FAISS::FAISS PROPERTY + IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a) + set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src") +else() + set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) + find_package(FAISS REQUIRED) +endif(BUILD_STATIC_FAISS) + ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -343,6 +401,7 @@ add_library(cugraph SHARED src/link_analysis/gunrock_hits.cpp src/traversal/bfs.cu src/traversal/sssp.cu + src/traversal/tsp.cu src/link_prediction/jaccard.cu src/link_prediction/overlap.cu src/layout/force_atlas2.cu @@ -413,7 +472,7 @@ target_include_directories(cugraph # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cugraph PRIVATE - gunrock cublas cusparse curand cusolver cudart cuda ${NCCL_LIBRARIES}) + gunrock cublas cusparse curand cusolver cudart cuda FAISS::FAISS ${NCCL_LIBRARIES}) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE diff --git a/cpp/cmake/Modules/FindFAISS.cmake b/cpp/cmake/Modules/FindFAISS.cmake new file mode 100644 index 00000000000..7c456edfeef --- /dev/null +++ b/cpp/cmake/Modules/FindFAISS.cmake @@ -0,0 +1,98 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Based on FindPNG.cmake from cmake 3.14.3 + +#[=======================================================================[.rst: +FindFAISS +-------- + +Template to generate FindPKG_NAME.cmake CMake modules + +Find FAISS + +Imported targets +^^^^^^^^^^^^^^^^ + +This module defines the following :prop_tgt:`IMPORTED` target: + +``FAISS::FAISS`` + The libFAISS library, if found. + +Result variables +^^^^^^^^^^^^^^^^ + +This module will set the following variables in your project: + +``FAISS_INCLUDE_DIRS`` + where to find FAISS.hpp , etc. +``FAISS_LIBRARIES`` + the libraries to link against to use libFAISS. +``FAISS_FOUND`` + If false, do not try to use FAISS. +``FAISS_VERSION_STRING`` + the version of the FAISS library found + +#]=======================================================================] + +find_path(FAISS_LOCATION faiss/IndexFlat.h + HINTS ${FAISS_INSTALL_DIR} + PATH_SUFFIXES include include/) + +list(APPEND FAISS_NAMES faiss libfaiss) +set(_FAISS_VERSION_SUFFIXES ) + +foreach(v IN LISTS _FAISS_VERSION_SUFFIXES) + list(APPEND FAISS_NAMES faiss${v} libfaiss${v}) + list(APPEND FAISS_NAMES faiss.${v} libfaiss.${v}) +endforeach() +unset(_FAISS_VERSION_SUFFIXES) + +find_library(FAISS_LIBRARY_RELEASE NAMES ${FAISS_NAMES} + HINTS ${FAISS_INSTALL_DIR} + PATH_SUFFIXES lib) + +include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) +select_library_configurations(FAISS) +mark_as_advanced(FAISS_LIBRARY_RELEASE) +unset(FAISS_NAMES) + +# Set by select_library_configurations(), but we want the one from +# find_package_handle_standard_args() below. +unset(FAISS_FOUND) + +if (FAISS_LIBRARY AND FAISS_LOCATION) + set(FAISS_INCLUDE_DIRS ${FAISS_LOCATION} ) + set(FAISS_LIBRARY ${FAISS_LIBRARY}) + + if(NOT TARGET FAISS::FAISS) + add_library(FAISS::FAISS UNKNOWN IMPORTED) + set_target_properties(FAISS::FAISS PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIRS}") + if(EXISTS "${FAISS_LIBRARY}") + set_target_properties(FAISS::FAISS PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${FAISS_LIBRARY}") + endif() + endif() +endif () + + +include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) +find_package_handle_standard_args(FAISS + REQUIRED_VARS FAISS_LIBRARY FAISS_LOCATION + VERSION_VAR FAISS_VERSION_STRING) + +mark_as_advanced(FAISS_LOCATION FAISS_LIBRARY) diff --git a/cpp/cmake/faiss_cuda11.patch b/cpp/cmake/faiss_cuda11.patch new file mode 100644 index 00000000000..496ca0e7b23 --- /dev/null +++ b/cpp/cmake/faiss_cuda11.patch @@ -0,0 +1,40 @@ +diff --git a/configure b/configure +index ed40dae..f88ed0a 100755 +--- a/configure ++++ b/configure +@@ -2970,7 +2970,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex + ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + +- ax_cxx_compile_alternatives="11 0x" ax_cxx_compile_cxx11_required=true ++ ax_cxx_compile_alternatives="14 11 0x" ax_cxx_compile_cxx11_required=true + ac_ext=cpp + ac_cpp='$CXXCPP $CPPFLAGS' + ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +diff --git a/gpu/utils/DeviceDefs.cuh b/gpu/utils/DeviceDefs.cuh +index 89d3dda..bc0f9b5 100644 +--- a/gpu/utils/DeviceDefs.cuh ++++ b/gpu/utils/DeviceDefs.cuh +@@ -13,7 +13,7 @@ + namespace faiss { namespace gpu { + + #ifdef __CUDA_ARCH__ +-#if __CUDA_ARCH__ <= 750 ++#if __CUDA_ARCH__ <= 800 + constexpr int kWarpSize = 32; + #else + #error Unknown __CUDA_ARCH__; please define parameters for compute capability +diff --git a/gpu/utils/MatrixMult-inl.cuh b/gpu/utils/MatrixMult-inl.cuh +index ede225e..4f7eb44 100644 +--- a/gpu/utils/MatrixMult-inl.cuh ++++ b/gpu/utils/MatrixMult-inl.cuh +@@ -51,6 +51,9 @@ rawGemm(cublasHandle_t handle, + auto cBT = GetCudaType::Type; + + // Always accumulate in f32 ++# if __CUDACC_VER_MAJOR__ >= 11 ++ cublasSetMathMode(handle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION); ++# endif + return cublasSgemmEx(handle, transa, transb, m, n, k, + &fAlpha, A, cAT, lda, + B, cBT, ldb, diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 5ad0a374364..c666bce23ad 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -192,6 +192,44 @@ void force_atlas2(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr); +/** + * @brief Finds an approximate solution to the traveling salesperson problem (TSP). + * cuGraph computes an approximation of the TSP problem using hill climbing + * optimization. + * + * The current implementation does not support a weighted graph. + * + * @throws cugraph::logic_error when an error occurs. + * @param[in] handle Library handle (RAFT). If a communicator is set in the + * handle, the multi GPU version will be selected. + * @param[in] vtx_ptr Device array containing the vertex identifiers used + * to initialize the route. + * @param[in] x_pos Device array containing starting x-axis positions. + * @param[in] y_pos Device array containing starting y-axis positions. + * @param[in] nodes Number of cities. + * @param[in] restarts Number of starts to try. The more restarts, + * the better the solution will be approximated. The number of restarts depends on the problem + * size and should be kept low for instances above 2k cities. + * @param[in] beam_search Specify if the initial solution should use KNN + * for an approximation solution. + * @param[in] k Beam width to use in the search. + * @param[in] nstart Start from a specific position. + * @param[in] verbose Logs configuration and iterative improvement. + * @param[out] route Device array containing the returned route. + * + */ +float traveling_salesperson(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route); + /** * @brief Compute betweenness centrality for a graph * diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu new file mode 100644 index 00000000000..c669246bc49 --- /dev/null +++ b/cpp/src/traversal/tsp.cu @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "tsp.hpp" +#include "tsp_solver.hpp" + +namespace cugraph { +namespace detail { + +TSP::TSP(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) + : handle_(handle), + vtx_ptr_(vtx_ptr), + x_pos_(x_pos), + y_pos_(y_pos), + nodes_(nodes), + restarts_(restarts), + beam_search_(beam_search), + k_(k), + nstart_(nstart), + verbose_(verbose), + route_(route), + stream_(handle_.get_stream()), + max_blocks_(handle_.get_device_properties().maxGridSize[0]), + max_threads_(handle_.get_device_properties().maxThreadsPerBlock), + warp_size_(handle_.get_device_properties().warpSize), + sm_count_(handle_.get_device_properties().multiProcessorCount), + restart_batch_(4096) +{ + allocate(); +} + +void TSP::allocate() +{ + // Scalars + mylock_ = mylock_scalar_.data(); + best_tour_ = best_tour_scalar_.data(); + climbs_ = climbs_scalar_.data(); + + // Vectors + neighbors_vec_.resize((k_ + 1) * nodes_); + // pre-allocate workspace for climbs, each block needs a separate permutation space and search + // buffer. We allocate a work buffer that will store the computed distances, px, py and the route. + // We align it on the warp size. + work_vec_.resize(sizeof(float) * restart_batch_ * + ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_)); + + // Pointers + neighbors_ = neighbors_vec_.data().get(); + work_ = work_vec_.data().get(); +} + +float TSP::compute() +{ + float valid_coo_dist = 0.f; + int num_restart_batches = (restarts_ + restart_batch_ - 1) / restart_batch_; + int restart_resid = restarts_ - (num_restart_batches - 1) * restart_batch_; + int global_best = INT_MAX; + float *soln = nullptr; + int *route_sol = nullptr; + int best = 0; + std::vector h_x_pos; + std::vector h_y_pos; + h_x_pos.reserve(nodes_ + 1); + h_y_pos.reserve(nodes_ + 1); + + // Stats + int n_timers = 3; + long total_climbs = 0; + std::vector h_times; + struct timeval starttime, endtime; + + // KNN call + knn(); + + if (verbose_) { + std::cout << "Doing " << num_restart_batches - 1 << " batches of size " << restart_batch_ + << ", with " << restart_resid << " tail\n"; + std::cout << "configuration: " << nodes_ << " nodes, " << restarts_ << " restart\n"; + std::cout << "optimizing graph with kswap = " << kswaps << "\n"; + } + + // Tell the cache how we want it to behave + cudaFuncSetCacheConfig(search_solution, cudaFuncCachePreferEqual); + + int threads = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); + if (verbose_) std::cout << "Calculated best thread number = " << threads << "\n"; + + rmm::device_vector times(n_timers * threads + n_timers); + h_times.reserve(n_timers * threads + n_timers); + + gettimeofday(&starttime, NULL); + for (int b = 0; b < num_restart_batches; ++b) { + reset<<<1, 1, 0, stream_>>>(mylock_, best_tour_, climbs_); + CHECK_CUDA(stream_); + + if (b == num_restart_batches - 1) restart_batch_ = restart_resid; + + search_solution<<>>(mylock_, + best_tour_, + vtx_ptr_, + beam_search_, + k_, + nodes_, + neighbors_, + x_pos_, + y_pos_, + work_, + nstart_, + times.data().get(), + climbs_, + threads); + + CHECK_CUDA(stream_); + cudaDeviceSynchronize(); + + CUDA_TRY(cudaMemcpy(&best, best_tour_, sizeof(int), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + if (verbose_) std::cout << "Best reported by kernel = " << best << "\n"; + + if (best < global_best) { + global_best = best; + CUDA_TRY(cudaMemcpyFromSymbol(&soln, best_soln, sizeof(void *))); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpyFromSymbol(&route_sol, best_route, sizeof(void *))); + cudaDeviceSynchronize(); + } + total_climbs += climbs_scalar_.value(stream_); + } + gettimeofday(&endtime, NULL); + double runtime = + endtime.tv_sec + endtime.tv_usec / 1e6 - starttime.tv_sec - starttime.tv_usec / 1e6; + long long moves = 1LL * total_climbs * (nodes_ - 2) * (nodes_ - 1) / 2; + + raft::copy(route_, route_sol, nodes_, stream_); + + CUDA_TRY(cudaMemcpy(h_x_pos.data(), soln, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpy( + h_y_pos.data(), soln + nodes_ + 1, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + for (int i = 0; i < nodes_; ++i) { + if (verbose_) { std::cout << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } + valid_coo_dist += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); + } + + CUDA_TRY(cudaMemcpy(h_times.data(), + times.data().get(), + sizeof(float) * n_timers * threads + n_timers, + cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + if (verbose_) { + std::cout << "Search runtime = " << runtime << ", " << moves * 1e-9 / runtime << " Gmoves/s\n"; + std::cout << "Optimized tour length = " << global_best << "\n"; + print_times(h_times, n_timers, handle_.get_device(), threads); + } + + return valid_coo_dist; +} + +void TSP::knn() +{ + if (verbose_) std::cout << "Looking at " << k_ << " nearest neighbors\n"; + + int dim = 2; + bool row_major_order = false; + + rmm::device_vector input(nodes_ * dim); + float *input_ptr = input.data().get(); + raft::copy(input_ptr, x_pos_, nodes_, stream_); + raft::copy(input_ptr + nodes_, y_pos_, nodes_, stream_); + + rmm::device_vector search_data(nodes_ * dim); + float *search_data_ptr = search_data.data().get(); + raft::copy(search_data_ptr, input_ptr, nodes_ * dim, stream_); + + rmm::device_vector distances(nodes_ * (k_ + 1)); + float *distances_ptr = distances.data().get(); + + std::vector input_vec; + std::vector sizes_vec; + input_vec.push_back(input_ptr); + sizes_vec.push_back(nodes_); + + // k neighbors + 1 is needed because the nearest neighbor of each point is + // the point itself that we don't want to take into account. + + raft::spatial::knn::brute_force_knn(handle_, + input_vec, + sizes_vec, + dim, + search_data_ptr, + nodes_, + neighbors_, + distances_ptr, + k_ + 1, + row_major_order, + row_major_order); +} +} // namespace detail + +float traveling_salesperson(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) +{ + RAFT_EXPECTS(route != nullptr, "route should equal the number of nodes"); + RAFT_EXPECTS(nodes > 0, "nodes should be strictly positive"); + RAFT_EXPECTS(restarts > 0, "restarts should be strictly positive"); + RAFT_EXPECTS(nstart >= 0 && nstart < nodes, "nstart should be between 0 and nodes - 1"); + RAFT_EXPECTS(k > 0, "k should be strictly positive"); + + cugraph::detail::TSP tsp( + handle, vtx_ptr, x_pos, y_pos, nodes, restarts, beam_search, k, nstart, verbose, route); + return tsp.compute(); +} + +} // namespace cugraph diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp new file mode 100644 index 00000000000..b065b779b96 --- /dev/null +++ b/cpp/src/traversal/tsp.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace detail { +class TSP { + public: + TSP(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route); + + void allocate(); + float compute(); + void knn(); + ~TSP(){}; + + private: + // Config + raft::handle_t &handle_; + cudaStream_t stream_; + int max_blocks_; + int max_threads_; + int warp_size_; + int sm_count_; + // how large a grid we want to run, this is fixed + int restart_batch_; + + // TSP + int const *vtx_ptr_; + int *route_; + float const *x_pos_; + float const *y_pos_; + int nodes_; + int restarts_; + bool beam_search_; + int k_; + int nstart_; + bool verbose_; + + // Scalars + rmm::device_scalar mylock_scalar_; + rmm::device_scalar best_tour_scalar_; + rmm::device_scalar climbs_scalar_; + + int *mylock_; + int *best_tour_; + int *climbs_; + + // Vectors + rmm::device_vector neighbors_vec_; + rmm::device_vector work_vec_; + + int64_t *neighbors_; + int *work_; + int *work_route_; +}; +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp new file mode 100644 index 00000000000..20d826cac5c --- /dev/null +++ b/cpp/src/traversal/tsp_solver.hpp @@ -0,0 +1,414 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "tsp_utils.hpp" + +namespace cugraph { +namespace detail { + +__device__ float *best_soln; +__device__ int *best_route; +extern __shared__ int shbuf[]; + +__global__ void reset(int *mylock, int *best_tour, int *climbs) +{ + *mylock = 0; + *best_tour = INT_MAX; + *climbs = 0; + best_soln = nullptr; + best_route = nullptr; +} + +// random permutation kernel +__device__ void random_init(float const *posx, + float const *posy, + int const *vtx_ptr, + int *path, + float *px, + float *py, + int const nstart, + int const nodes) +{ + // Fill values + for (int i = threadIdx.x; i <= nodes; i += blockDim.x) { + px[i] = posx[i]; + py[i] = posy[i]; + path[i] = vtx_ptr[i]; + } + + __syncthreads(); + + if (threadIdx.x == 0) { /* serial permutation as starting point */ + // swap to start at nstart node + raft::swapVals(px[0], px[nstart]); + raft::swapVals(py[0], py[nstart]); + raft::swapVals(path[0], path[nstart]); + + curandState rndstate; + curand_init(blockIdx.x, 0, 0, &rndstate); + for (int i = 1; i < nodes; i++) { + int j = curand(&rndstate) % (nodes - 1 - i) + i; + if (i == j) continue; + raft::swapVals(px[i], px[j]); + raft::swapVals(py[i], py[j]); + raft::swapVals(path[i], path[j]); + } + px[nodes] = px[0]; /* close the loop now, avoid special cases later */ + py[nodes] = py[0]; + path[nodes] = path[0]; + } +} + +// Use KNN as a starting solution +__device__ void knn_init(float const *posx, + float const *posy, + int const *vtx_ptr, + int64_t const *neighbors, + int *buf, + int *path, + float *px, + float *py, + int const nstart, + int const nodes, + int const K) +{ + for (int i = threadIdx.x; i < nodes; i += blockDim.x) buf[i] = 0; + + __syncthreads(); + + if (threadIdx.x == 0) { + curandState rndstate; + curand_init(blockIdx.x, 0, 0, &rndstate); + int progress = 0; + int initlen = 0; + + px[0] = posx[nstart]; + py[0] = posy[nstart]; + path[0] = vtx_ptr[nstart]; + int head = nstart; + int v = 0; + buf[head] = 1; + while (progress < nodes - 1) { // beam search as starting point + for (int i = 1; i <= progress; i++) buf[i] = 0; + progress = 0; // reset current location in path and visited array + initlen = 0; + int randjumps = 0; + while (progress < nodes - 1) { + int nj = curand(&rndstate) % K; + int linked = 0; + for (int nh = 0; nh < K; ++nh) { + // offset (idx / K) + 1 filters the points as their own nearest neighbors. + int offset = (K * head + nj) / K + 1; + v = neighbors[K * head + nj + offset]; + if (v < nodes && buf[v] == 0) { + head = v; + progress += 1; + buf[head] = 1; + linked = 1; + break; + } + nj = (nj + 1) % K; + } + if (linked == 0) { + if (randjumps > nodes - 1) + break; // give up on this traversal, we failed to find a next link + randjumps += 1; + int nr = (head + 1) % nodes; // jump to next node + while (buf[nr] == 1) { nr = (nr + 1) % nodes; } + head = nr; + progress += 1; + buf[head] = 1; + } + // copy from input into beam-search order, update len + px[progress] = posx[head]; + py[progress] = posy[head]; + path[progress] = vtx_ptr[head]; + initlen += __float2int_rn(euclidean_dist(px, py, progress, progress - 1)); + } + } + px[nodes] = px[nstart]; + py[nodes] = py[nstart]; + path[nodes] = path[nstart]; + initlen += __float2int_rn(euclidean_dist(px, py, nodes, nstart)); + } +} + +__device__ void two_opt_search( + int *buf, float *px, float *py, int *shbuf, int *minchange, int *mini, int *minj, int const nodes) +{ + __shared__ float shmem_x[tilesize]; + __shared__ float shmem_y[tilesize]; + + for (int ii = 0; ii < nodes - 2; ii += blockDim.x) { + int i = ii + threadIdx.x; + float pxi0, pyi0, pxi1, pyi1, pxj1, pyj1; + if (i < nodes - 2) { + minchange[0] -= buf[i]; + pxi0 = px[i]; + pyi0 = py[i]; + pxi1 = px[i + 1]; + pyi1 = py[i + 1]; + pxj1 = px[nodes]; + pyj1 = py[nodes]; + } + for (int jj = nodes - 1; jj >= ii + 2; jj -= tilesize) { + int bound = jj - tilesize + 1; + for (int k = threadIdx.x; k < tilesize; k += blockDim.x) { + if (k + bound >= ii + 2) { + shmem_x[k] = px[k + bound]; + shmem_y[k] = py[k + bound]; + shbuf[k] = buf[k + bound]; + } + } + __syncthreads(); + + int lower = bound; + if (lower < (i + 2)) lower = i + 2; + for (int j = jj; j >= lower; j--) { + int jm = j - bound; + float pxj0 = shmem_x[jm]; + float pyj0 = shmem_y[jm]; + int delta = + shbuf[jm] + + __float2int_rn(sqrtf((pxi0 - pxj0) * (pxi0 - pxj0) + (pyi0 - pyj0) * (pyi0 - pyj0))) + + __float2int_rn(sqrtf((pxi1 - pxj1) * (pxi1 - pxj1) + (pyi1 - pyj1) * (pyi1 - pyj1))); + pxj1 = pxj0; + pyj1 = pyj0; + + if (delta < minchange[0]) { + minchange[0] = delta; + mini[0] = i; + minj[0] = j; + } + } + __syncthreads(); + } + + if (i < nodes - 2) { minchange[0] += buf[i]; } + } +} + +// This function being runned for each block +__device__ void hill_climbing( + float *px, float *py, int *buf, int *path, int *shbuf, int const nodes, int *climbs) +{ + __shared__ int best_change[kswaps]; + __shared__ int best_i[kswaps]; + __shared__ int best_j[kswaps]; + + int minchange; + int mini; + int minj; + int kswaps_active = kswaps; + int myswaps = 0; + + // Hill climbing, iteratively improve from the starting guess + do { + if (threadIdx.x == 0) { + for (int k = 0; k < kswaps; k++) { + best_change[k] = 0; + best_i[k] = 0; + best_j[k] = 0; + } + } + __syncthreads(); + for (int i = threadIdx.x; i < nodes; i += blockDim.x) { + buf[i] = -__float2int_rn(euclidean_dist(px, py, i, i + 1)); + } + __syncthreads(); + + // Reset + minchange = 0; + mini = 0; + minj = 0; + + // Find best indices + two_opt_search(buf, px, py, shbuf, &minchange, &mini, &minj, nodes); + __syncthreads(); + + // Stats only + if (threadIdx.x == 0) atomicAdd(climbs, 1); + + shbuf[threadIdx.x] = minchange; + + int j = blockDim.x; // warp reduction to find best thread results + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { + shbuf[threadIdx.x] = min(shbuf[threadIdx.x + k], shbuf[threadIdx.x]); + } + j = k; + __syncthreads(); + } while (j > 1); // thread winner for this k is in shbuf[0] + + if (threadIdx.x == 0) { + best_change[0] = shbuf[0]; // sort best result in shared + } + __syncthreads(); + + if (minchange == shbuf[0]) { // My thread is as good as the winner + shbuf[1] = threadIdx.x; // store thread ID in shbuf[1] + } + __syncthreads(); + + if (threadIdx.x == shbuf[1]) { // move from thread local to shared + best_i[0] = mini; // shared best indices for compatibility checks + best_j[0] = minj; + } + __syncthreads(); + + // look for more compatible swaps + for (int kmin = 1; kmin < kswaps_active; kmin++) { + // disallow swaps that conflict with ones already picked + for (int kchk = kmin - 1; kchk >= 0; --kchk) { + if ((mini < (best_j[kchk] + 1)) && (minj > (best_i[kchk] - 1))) { + minchange = shbuf[threadIdx.x] = 0; + } + __syncthreads(); + } + shbuf[threadIdx.x] = minchange; + + j = blockDim.x; + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { + shbuf[threadIdx.x] = min(shbuf[threadIdx.x + k], shbuf[threadIdx.x]); + } + j = k; + __syncthreads(); + } while (j > 1); // thread winner for this k is in shbuf[0] + + if (threadIdx.x == 0) { + best_change[kmin] = shbuf[0]; // store best result in shared + } + __syncthreads(); + + if (minchange == shbuf[0]) { // My thread is as good as the winner + shbuf[1] = threadIdx.x; // store thread ID in shbuf[1] + __threadfence_block(); + } + __syncthreads(); + + if (threadIdx.x == shbuf[1]) { // move from thread local to shared + best_i[kmin] = mini; // store swap targets + best_j[kmin] = minj; + __threadfence_block(); + } + __syncthreads(); + // look for the best compatible move + } // end loop over kmin + minchange = best_change[0]; + myswaps += 1; + for (int kmin = 0; kmin < kswaps_active; kmin++) { + int sum = best_i[kmin] + best_j[kmin] + 1; // = mini + minj +1 + // this is a reversal of all nodes included in the range [ i+1, j ] + for (int i = threadIdx.x; (i + i) < sum; i += blockDim.x) { + if (best_i[kmin] < i) { + int j = sum - i; + raft::swapVals(px[i], px[j]); + raft::swapVals(py[i], py[j]); + raft::swapVals(path[i], path[j]); + } + } + __syncthreads(); + } + } while (minchange < 0 && myswaps < 2 * nodes); +} + +__device__ void get_optimal_tour( + int *mylock, int *best_tour, float *px, float *py, int *path, int *shbuf, int const nodes) +{ + // Now find actual length of the last tour, result of the climb + int term = 0; + for (int i = threadIdx.x; i < nodes; i += blockDim.x) { + term += __float2int_rn(euclidean_dist(px, py, i, i + 1)); + } + shbuf[threadIdx.x] = term; + __syncthreads(); + + int j = blockDim.x; // block level reduction + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { shbuf[threadIdx.x] += shbuf[threadIdx.x + k]; } + j = k; // divide active warp size in half + __syncthreads(); + } while (j > 1); + term = shbuf[0]; + + if (threadIdx.x == 0) { + atomicMin(best_tour, term); + while (atomicExch(mylock, 1) != 0) + ; // acquire + if (best_tour[0] == term) { + best_soln = px; + best_route = path; + } + *mylock = 0; // release + __threadfence(); + } +} + +__global__ __launch_bounds__(2048, 2) void search_solution(int *mylock, + int *best_tour, + int const *vtx_ptr, + bool beam_search, + int const K, + int nodes, + int64_t const *neighbors, + float const *posx, + float const *posy, + int *work, + int const nstart, + float *times, + int *climbs, + int threads) +{ + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + __shared__ int shbuf[tilesize]; + clock_t start; + + start = clock64(); + if (!beam_search) + random_init(posx, posy, vtx_ptr, path, px, py, nstart, nodes); + else + knn_init(posx, posy, vtx_ptr, neighbors, buf, path, px, py, nstart, nodes, K); + __syncthreads(); + times[threadIdx.x] = clock64() - start; + + start = clock64(); + hill_climbing(px, py, buf, path, shbuf, nodes, climbs); + __syncthreads(); + times[threads + threadIdx.x + 1] = clock64() - start; + + start = clock64(); + get_optimal_tour(mylock, best_tour, px, py, path, shbuf, nodes); + times[2 * threads + threadIdx.x + 1] = clock64() - start; +} +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp new file mode 100644 index 00000000000..3faa2efea3b --- /dev/null +++ b/cpp/src/traversal/tsp_utils.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#define tilesize 128 +#define kswaps 4 + +#include +#include +#include + +namespace cugraph { +namespace detail { + +__host__ __device__ inline float euclidean_dist(float *px, float *py, int a, int b) +{ + return sqrtf((px[a] - px[b]) * (px[a] - px[b]) + (py[a] - py[b]) * (py[a] - py[b])); +} + +static std::vector device_func = {"Find First", "Hill Climbing", "Retrieve Path"}; + +void print_times(std::vector &h_times, int const n_timers, int device, int threads) +{ + int clock_rate; + cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, device); + + double total = 0; + h_times[0] /= (float)clock_rate; + total += h_times[0]; + for (int i = 1; i < n_timers; ++i) { + h_times[i * threads + 1] /= (float)clock_rate; + total += h_times[i * threads + 1]; + } + std::cout << "Stats: \n"; + std::cout << device_func[0] << " time: " << h_times[0] * 1e-3 << " " + << (h_times[0] / total) * 100.0 << "%\n"; + for (int i = 1; i < n_timers; ++i) { + std::cout << device_func[i] << " time: " << h_times[i * threads + 1] * 1e-3 << " " + << (h_times[i * threads + 1] / total) * 100.0 << "%\n"; + } +} + +// Get maximum number of threads we can run on based on number of nodes, +// shared memory usage, max threads per block and SM, max blocks for SM and registers per SM. +int best_thread_count(int nodes, int max_threads, int sm_count, int warp_size) +{ + int smem, blocks, thr, perf; + int const max_threads_sm = 2048; + int max = nodes - 2; + int best = 0; + int bthr = 4; + + if (max > max_threads) max = max_threads; + + for (int threads = 1; threads <= max; ++threads) { + smem = sizeof(int) * threads + 2 * sizeof(float) * tilesize + sizeof(int) * tilesize; + blocks = (16384 * 2) / smem; + if (blocks > sm_count) blocks = sm_count; + thr = (threads + warp_size - 1) / warp_size * warp_size; + while (blocks * thr > max_threads_sm) blocks--; + perf = threads * blocks; + if (perf > best) { + best = perf; + bthr = threads; + } + } + + return bthr; +} +} // namespace detail +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3ee25d993b6..5425c68e896 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -270,6 +270,16 @@ set(FA2_TEST_SRC ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") +################################################################################################### +# - TSP tests -------------------------------------------------------------------------- + +set(TSP_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") + + ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") + ################################################################################################### # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu new file mode 100644 index 00000000000..383427a56cf --- /dev/null +++ b/cpp/tests/traversal/tsp_test.cu @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +// TSP solver tests +// Author: Hugo Linsenmaier hlinsenmaier@nvidia.com + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +// TSP solver tests +// Author: Hugo Linsenmaier hlinsenmaier@nvidia.com + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include + +typedef struct Tsp_Usecase_t { + std::string tsp_file; + float ref_cost; + Tsp_Usecase_t(const std::string& a, const float c) + { + // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR + const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); + if ((a != "") && (a[0] != '/')) { + tsp_file = rapidsDatasetRootDir + "/" + a; + } else { + tsp_file = a; + } + ref_cost = c; + } + Tsp_Usecase_t& operator=(const Tsp_Usecase_t& rhs) + { + tsp_file = rhs.tsp_file; + ref_cost = rhs.ref_cost; + return *this; + } +} Tsp_Usecase; + +static std::vector euc_2d{ + {"tsplib/datasets/a280.tsp", 2579}, {"tsplib/datasets/berlin52.tsp", 7542}, + {"tsplib/datasets/bier127.tsp", 118282}, {"tsplib/datasets/ch130.tsp", 6110}, + {"tsplib/datasets/ch150.tsp", 6528}, {"tsplib/datasets/d1291.tsp", 50801}, + {"tsplib/datasets/d1655.tsp", 62128}, {"tsplib/datasets/d198.tsp", 15780}, + {"tsplib/datasets/d2103.tsp", 80450}, {"tsplib/datasets/d493.tsp", 35002}, + {"tsplib/datasets/d657.tsp", 48912}, {"tsplib/datasets/eil101.tsp", 629}, + {"tsplib/datasets/eil51.tsp", 426}, {"tsplib/datasets/eil76.tsp", 538}, + {"tsplib/datasets/fl1400.tsp", 20127}, {"tsplib/datasets/fl1577.tsp", 22249}, + {"tsplib/datasets/fl417.tsp", 11861}, {"tsplib/datasets/gil262.tsp", 2378}, + {"tsplib/datasets/kroA100.tsp", 21282}, {"tsplib/datasets/kroA150.tsp", 26524}, + {"tsplib/datasets/kroA200.tsp", 29368}, {"tsplib/datasets/kroB100.tsp", 22141}, + {"tsplib/datasets/kroB150.tsp", 26130}, {"tsplib/datasets/kroB200.tsp", 29437}, + {"tsplib/datasets/kroC100.tsp", 20749}, {"tsplib/datasets/kroD100.tsp", 21294}, + {"tsplib/datasets/kroE100.tsp", 22068}, {"tsplib/datasets/lin105.tsp", 14379}, + {"tsplib/datasets/lin318.tsp", 42029}, {"tsplib/datasets/nrw1379.tsp", 56638}, + {"tsplib/datasets/p654.tsp", 34643}, {"tsplib/datasets/pcb1173.tsp", 56892}, + {"tsplib/datasets/pcb442.tsp", 50778}, {"tsplib/datasets/pr1002.tsp", 259045}, + {"tsplib/datasets/pr107.tsp", 44303}, {"tsplib/datasets/pr136.tsp", 96772}, + {"tsplib/datasets/pr144.tsp", 58537}, {"tsplib/datasets/pr152.tsp", 73682}, + {"tsplib/datasets/pr226.tsp", 80369}, {"tsplib/datasets/pr264.tsp", 49135}, + {"tsplib/datasets/pr299.tsp", 48191}, {"tsplib/datasets/pr439.tsp", 107217}, + {"tsplib/datasets/pr76.tsp", 108159}, {"tsplib/datasets/rat195.tsp", 2323}, + {"tsplib/datasets/rat575.tsp", 6773}, {"tsplib/datasets/rat783.tsp", 8806}, + {"tsplib/datasets/rat99.tsp", 1211}, {"tsplib/datasets/rd100.tsp", 7910}, + {"tsplib/datasets/rd400.tsp", 15281}, {"tsplib/datasets/rl1323.tsp", 270199}, + {"tsplib/datasets/st70.tsp", 675}, {"tsplib/datasets/ts225.tsp", 126643}, + {"tsplib/datasets/tsp225.tsp", 3916}, {"tsplib/datasets/u1060.tsp", 224094}, + {"tsplib/datasets/u1432.tsp", 152970}, {"tsplib/datasets/u159.tsp", 42080}, + {"tsplib/datasets/u574.tsp", 36905}, {"tsplib/datasets/u724.tsp", 41910}, + {"tsplib/datasets/vm1084.tsp", 239297}, +}; + +struct Route { + std::vector cities; + std::vector x_pos; + std::vector y_pos; +}; + +class Tests_Tsp : public ::testing::TestWithParam { + public: + Tests_Tsp() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + virtual void SetUp() {} + virtual void TearDown() {} + + void run_current_test(const Tsp_Usecase& param) + { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + std::stringstream ss; + std::string test_id = std::string(test_info->test_case_name()) + std::string(".") + + std::string(test_info->name()) + std::string("_") + + cugraph::test::getFileName(param.tsp_file) + std::string("_") + + ss.str().c_str(); + + float tol = 1E-1f; + HighResClock hr_clock; + double time_tmp; + Route input; + + std::cout << "File: " << param.tsp_file.c_str() << "\n"; + int nodes = load_tsp(param.tsp_file.c_str(), &input); + + // Device alloc + raft::handle_t handle; + rmm::device_uvector vertices(static_cast(nodes), nullptr); + rmm::device_uvector route(static_cast(nodes), nullptr); + rmm::device_uvector x_pos(static_cast(nodes), nullptr); + rmm::device_uvector y_pos(static_cast(nodes), nullptr); + + int* vtx_ptr = vertices.data(); + int* d_route = route.data(); + float* d_x_pos = x_pos.data(); + float* d_y_pos = y_pos.data(); + + CUDA_TRY(cudaMemcpy(vtx_ptr, input.cities.data(), sizeof(int) * nodes, cudaMemcpyHostToDevice)); + CUDA_TRY( + cudaMemcpy(d_x_pos, input.x_pos.data(), sizeof(float) * nodes, cudaMemcpyHostToDevice)); + CUDA_TRY( + cudaMemcpy(d_y_pos, input.y_pos.data(), sizeof(float) * nodes, cudaMemcpyHostToDevice)); + + // Default parameters + int restarts = 4096; + bool beam_search = true; + int k = 4; + int nstart = 0; + bool verbose = false; + + hr_clock.start(); + cudaDeviceSynchronize(); + cudaProfilerStart(); + + float final_cost = cugraph::traveling_salesperson( + handle, vtx_ptr, d_x_pos, d_y_pos, nodes, restarts, beam_search, k, nstart, verbose, d_route); + cudaProfilerStop(); + cudaDeviceSynchronize(); + hr_clock.stop(&time_tmp); + + std::vector h_route; + h_route.resize(nodes); + std::vector h_vertices; + h_vertices.resize(nodes); + CUDA_TRY(cudaMemcpy(h_route.data(), d_route, sizeof(int) * nodes, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpy(h_vertices.data(), vtx_ptr, sizeof(int) * nodes, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + std::cout << "tsp_time: " << time_tmp << " us" << std::endl; + std::cout << "Ref cost is: " << param.ref_cost << "\n"; + std::cout << "Final cost is: " << final_cost << "\n"; + float err = fabs(final_cost - param.ref_cost); + err /= param.ref_cost; + std::cout << "Approximation error is: " << err * 100 << "%\n"; + EXPECT_LE(err, tol); + + // Check route goes through each vertex once + size_t u_nodes = nodes; + std::set node_set(h_route.begin(), h_route.end()); + ASSERT_EQ(node_set.size(), u_nodes); + + // Bound check + int max = *std::max_element(h_vertices.begin(), h_vertices.end()); + int min = *std::min_element(h_vertices.begin(), h_vertices.end()); + EXPECT_GE(*node_set.begin(), min); + EXPECT_LE(*node_set.rbegin(), max); + } + + private: + std::vector split(const std::string& s, char delimiter) + { + std::vector tokens; + std::string token; + std::istringstream tokenStream(s); + while (std::getline(tokenStream, token, delimiter)) { + if (token.size() == 0) continue; + tokens.push_back(token); + } + return tokens; + } + + // FIXME: At the moment TSP does not accept a graph_t as input and therefore + // deviates from the standard testing I/O pattern. Once other input types + // are supported we want to reconcile TSP testing with the rest of cugraph. + int load_tsp(const char* fname, Route* input) + { + std::fstream fs; + fs.open(fname); + std::string line; + std::vector tokens; + int nodes = 0; + while (std::getline(fs, line) && line.find(':') != std::string::npos) { + tokens = split(line, ':'); + auto strip_token = split(tokens[0], ' ')[0]; + if (strip_token == "DIMENSION") nodes = std::stof(tokens[1]); + } + + while (std::getline(fs, line) && line.find(' ') != std::string::npos) { + tokens = split(line, ' '); + auto city_id = std::stof(tokens[0]); + auto x = std::stof(tokens[1]); + auto y = std::stof(tokens[2]); + input->cities.push_back(city_id); + input->x_pos.push_back(x); + input->y_pos.push_back(y); + } + fs.close(); + assert(nodes == input->cities.size()); + return nodes; + } +}; + +TEST_P(Tests_Tsp, CheckFP32_T) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, Tests_Tsp, ::testing::ValuesIn(euc_2d)); +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/datasets/eil51.tsp b/datasets/eil51.tsp new file mode 100644 index 00000000000..543d1013c14 --- /dev/null +++ b/datasets/eil51.tsp @@ -0,0 +1,58 @@ +NAME : eil51 +COMMENT : 51-city problem (Christofides/Eilon) +TYPE : TSP +DIMENSION : 51 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 37 52 +2 49 49 +3 52 64 +4 20 26 +5 40 30 +6 21 47 +7 17 63 +8 31 62 +9 52 33 +10 51 21 +11 42 41 +12 31 32 +13 5 25 +14 12 42 +15 36 16 +16 52 41 +17 27 23 +18 17 33 +19 13 13 +20 57 58 +21 62 42 +22 42 57 +23 16 57 +24 8 52 +25 7 38 +26 27 68 +27 30 48 +28 43 67 +29 58 48 +30 58 27 +31 37 69 +32 38 46 +33 46 10 +34 61 33 +35 62 63 +36 63 69 +37 32 22 +38 45 35 +39 59 15 +40 5 6 +41 10 17 +42 21 10 +43 5 64 +44 30 15 +45 39 10 +46 32 39 +47 25 32 +48 25 55 +49 48 28 +50 56 37 +51 30 40 +EOF diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index 071a4b8dea3..3e0b6c55c37 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -1,3 +1,16 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + #!/bin/bash set -e set -o pipefail @@ -31,6 +44,10 @@ benchmark # ~1s download https://s3.us-east-2.amazonaws.com/rapidsai-data/cugraph/benchmark/hibench/hibench_1_small.tgz benchmark + +# ~0.6s download +https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/test/tsplib/datasets.tar.gz +tsplib " EXTENDED_DATASET_DATA=" diff --git a/datasets/gil262.tsp b/datasets/gil262.tsp new file mode 100755 index 00000000000..cfcb15c3b78 --- /dev/null +++ b/datasets/gil262.tsp @@ -0,0 +1,269 @@ +NAME : gil262 +COMMENT : 262-city problem (Gillet/Johnson) +TYPE : TSP +DIMENSION : 262 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 -99 -97 +2 -59 50 +3 0 14 +4 -17 -66 +5 -69 -19 +6 31 12 +7 5 -41 +8 -12 10 +9 -64 70 +10 -12 85 +11 -18 64 +12 -77 -16 +13 -53 88 +14 83 -24 +15 24 41 +16 17 21 +17 42 96 +18 -65 0 +19 -47 -26 +20 85 36 +21 -35 -54 +22 54 -21 +23 64 -17 +24 55 89 +25 17 -25 +26 -61 66 +27 -61 26 +28 17 -72 +29 79 38 +30 -62 -2 +31 -90 -68 +32 52 66 +33 -54 -50 +34 8 -84 +35 37 -90 +36 -83 49 +37 35 -1 +38 7 59 +39 12 48 +40 57 95 +41 92 28 +42 -3 97 +43 -7 52 +44 42 -15 +45 77 -43 +46 59 -49 +47 25 91 +48 69 -19 +49 -82 -14 +50 74 -70 +51 69 59 +52 29 33 +53 -97 9 +54 -58 9 +55 28 93 +56 7 73 +57 -28 73 +58 -76 55 +59 41 42 +60 92 40 +61 -84 -29 +62 -12 42 +63 51 -45 +64 -37 46 +65 -97 35 +66 14 89 +67 60 58 +68 -63 -75 +69 -18 34 +70 -46 -82 +71 -86 -79 +72 -43 -30 +73 -44 7 +74 -3 -20 +75 36 41 +76 -30 -94 +77 79 -62 +78 51 70 +79 -61 -26 +80 6 94 +81 -19 -62 +82 -20 51 +83 -81 37 +84 7 31 +85 52 12 +86 83 -91 +87 -7 -92 +88 82 -74 +89 -70 85 +90 -83 -30 +91 71 -61 +92 85 11 +93 66 -48 +94 78 -87 +95 9 -79 +96 -36 4 +97 66 39 +98 92 -17 +99 -46 -79 +100 -30 -63 +101 -42 63 +102 20 42 +103 15 98 +104 1 -17 +105 64 20 +106 -96 85 +107 93 -29 +108 -40 -84 +109 86 35 +110 91 36 +111 62 -8 +112 -24 4 +113 11 96 +114 -53 62 +115 -28 -71 +116 7 -4 +117 95 -9 +118 -3 17 +119 53 -90 +120 58 -19 +121 -83 84 +122 -1 49 +123 -4 17 +124 -82 -3 +125 -43 47 +126 6 -6 +127 70 99 +128 68 -29 +129 -94 -30 +130 -94 -20 +131 -21 77 +132 64 37 +133 -70 -19 +134 88 65 +135 2 29 +136 33 57 +137 -70 6 +138 -38 -56 +139 -80 -95 +140 -5 -39 +141 8 -22 +142 -61 -76 +143 76 -22 +144 49 -71 +145 -30 -68 +146 1 34 +147 77 79 +148 -58 64 +149 82 -97 +150 -80 55 +151 81 -86 +152 39 -49 +153 -67 72 +154 -25 -89 +155 -44 -95 +156 32 -68 +157 -17 49 +158 93 49 +159 99 81 +160 10 -49 +161 63 -41 +162 38 39 +163 -28 39 +164 -2 -47 +165 38 8 +166 -42 -6 +167 -67 88 +168 19 93 +169 40 27 +170 -61 56 +171 43 33 +172 -18 -39 +173 -69 19 +174 75 -18 +175 31 85 +176 25 58 +177 -16 36 +178 91 15 +179 60 -39 +180 49 -47 +181 42 33 +182 16 -81 +183 -78 53 +184 53 -80 +185 -46 -26 +186 -25 -54 +187 69 -46 +188 0 -78 +189 -84 74 +190 -16 16 +191 -63 -14 +192 51 -77 +193 -39 61 +194 5 97 +195 -55 39 +196 70 -14 +197 0 95 +198 -45 7 +199 38 -24 +200 50 -37 +201 59 71 +202 -73 -96 +203 -29 72 +204 -47 12 +205 -88 -61 +206 -88 36 +207 -46 -3 +208 26 -37 +209 -39 -67 +210 92 27 +211 -80 -31 +212 93 -50 +213 -20 -5 +214 -22 73 +215 -4 -7 +216 54 -48 +217 -70 39 +218 54 -82 +219 29 41 +220 -87 51 +221 -96 -36 +222 49 8 +223 -5 54 +224 -26 43 +225 -11 60 +226 40 61 +227 82 35 +228 -92 12 +229 -93 -86 +230 -66 63 +231 -72 -87 +232 -57 -84 +233 23 52 +234 -56 -62 +235 -19 59 +236 63 -14 +237 -13 38 +238 -19 87 +239 44 -84 +240 98 -17 +241 -16 62 +242 3 66 +243 26 22 +244 -38 -81 +245 70 80 +246 17 -35 +247 96 -83 +248 -77 80 +249 -14 44 +250 -33 33 +251 33 -33 +252 70 0 +253 -50 60 +254 -50 -60 +255 75 0 +256 0 75 +257 -75 0 +258 0 -75 +259 40 80 +260 40 -80 +261 -60 20 +262 -60 -20 +EOF diff --git a/datasets/kroA100.tsp b/datasets/kroA100.tsp new file mode 100644 index 00000000000..05ebae994ac --- /dev/null +++ b/datasets/kroA100.tsp @@ -0,0 +1,107 @@ +NAME: kroA100 +TYPE: TSP +COMMENT: 100-city problem A (Krolak/Felts/Nelson) +DIMENSION: 100 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 1380 939 +2 2848 96 +3 3510 1671 +4 457 334 +5 3888 666 +6 984 965 +7 2721 1482 +8 1286 525 +9 2716 1432 +10 738 1325 +11 1251 1832 +12 2728 1698 +13 3815 169 +14 3683 1533 +15 1247 1945 +16 123 862 +17 1234 1946 +18 252 1240 +19 611 673 +20 2576 1676 +21 928 1700 +22 53 857 +23 1807 1711 +24 274 1420 +25 2574 946 +26 178 24 +27 2678 1825 +28 1795 962 +29 3384 1498 +30 3520 1079 +31 1256 61 +32 1424 1728 +33 3913 192 +34 3085 1528 +35 2573 1969 +36 463 1670 +37 3875 598 +38 298 1513 +39 3479 821 +40 2542 236 +41 3955 1743 +42 1323 280 +43 3447 1830 +44 2936 337 +45 1621 1830 +46 3373 1646 +47 1393 1368 +48 3874 1318 +49 938 955 +50 3022 474 +51 2482 1183 +52 3854 923 +53 376 825 +54 2519 135 +55 2945 1622 +56 953 268 +57 2628 1479 +58 2097 981 +59 890 1846 +60 2139 1806 +61 2421 1007 +62 2290 1810 +63 1115 1052 +64 2588 302 +65 327 265 +66 241 341 +67 1917 687 +68 2991 792 +69 2573 599 +70 19 674 +71 3911 1673 +72 872 1559 +73 2863 558 +74 929 1766 +75 839 620 +76 3893 102 +77 2178 1619 +78 3822 899 +79 378 1048 +80 1178 100 +81 2599 901 +82 3416 143 +83 2961 1605 +84 611 1384 +85 3113 885 +86 2597 1830 +87 2586 1286 +88 161 906 +89 1429 134 +90 742 1025 +91 1625 1651 +92 1187 706 +93 1787 1009 +94 22 987 +95 3640 43 +96 3756 882 +97 776 392 +98 1724 1642 +99 198 1810 +100 3950 1558 +EOF diff --git a/datasets/tsp225.tsp b/datasets/tsp225.tsp new file mode 100644 index 00000000000..ac9e06cecc1 --- /dev/null +++ b/datasets/tsp225.tsp @@ -0,0 +1,232 @@ +NAME : tsp225 +COMMENT : A TSP problem (Reinelt) +TYPE : TSP +DIMENSION : 225 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 155.42 150.65 +2 375.92 164.65 +3 183.92 150.65 +4 205.42 150.65 +5 205.42 171.65 +6 226.42 171.65 +7 226.42 186.15 +8 226.42 207.15 +9 226.42 235.65 +10 226.42 264.15 +11 226.42 292.65 +12 226.42 314.15 +13 226.42 335.65 +14 205.42 335.65 +15 190.92 335.65 +16 190.92 328.15 +17 176.92 328.15 +18 176.92 299.65 +19 155.42 299.65 +20 155.42 328.15 +21 155.42 356.65 +22 183.92 356.65 +23 219.42 356.65 +24 240.92 356.65 +25 269.42 356.65 +26 290.42 356.65 +27 387.42 136.15 +28 318.92 356.65 +29 318.92 335.65 +30 318.92 328.15 +31 318.92 299.65 +32 297.92 299.65 +33 290.42 328.15 +34 290.42 335.65 +35 297.92 328.15 +36 254.92 335.65 +37 254.92 314.15 +38 254.92 292.65 +39 254.92 271.65 +40 254.92 243.15 +41 254.92 221.65 +42 254.92 193.15 +43 254.92 171.65 +44 276.42 171.65 +45 296.42 150.65 +46 276.42 150.65 +47 375.92 150.65 +48 308.92 150.65 +49 354.92 164.65 +50 338.42 174.65 +51 354.92 174.65 +52 338.42 200.15 +53 338.42 221.65 +54 354.92 221.65 +55 354.92 200.15 +56 361.92 200.15 +57 361.92 186.15 +58 383.42 186.15 +59 383.42 179.15 +60 404.42 179.15 +61 404.42 186.15 +62 418.92 186.15 +63 418.92 200.15 +64 432.92 200.15 +65 432.92 221.65 +66 418.92 221.65 +67 418.92 235.65 +68 397.42 235.65 +69 397.42 243.15 +70 375.92 243.15 +71 375.92 257.15 +72 368.92 257.15 +73 368.92 264.15 +74 347.42 264.15 +75 347.42 278.65 +76 336.42 278.65 +77 336.42 328.15 +78 347.42 328.15 +79 347.42 342.65 +80 368.92 342.65 +81 368.92 353.65 +82 418.92 353.65 +83 418.92 342.65 +84 432.92 342.65 +85 432.92 356.65 +86 447.42 356.65 +87 447.42 321.15 +88 447.42 292.65 +89 432.92 292.65 +90 432.92 314.15 +91 418.92 314.15 +92 418.92 321.15 +93 397.42 321.15 +94 397.42 333.65 +95 375.92 333.65 +96 375.92 321.15 +97 361.92 321.15 +98 361.92 299.65 +99 375.92 299.65 +100 375.92 285.65 +101 397.42 285.65 +102 397.42 271.65 +103 418.92 271.65 +104 418.92 264.15 +105 439.92 264.15 +106 439.92 250.15 +107 454.42 250.15 +108 454.42 243.15 +109 461.42 243.15 +110 461.42 214.65 +111 461.42 193.15 +112 447.42 193.15 +113 447.42 179.15 +114 439.92 179.15 +115 439.92 167.65 +116 419.92 167.65 +117 419.92 150.65 +118 439.92 150.65 +119 454.42 150.65 +120 475.92 150.65 +121 475.92 171.65 +122 496.92 171.65 +123 496.92 193.15 +124 496.92 214.65 +125 496.92 243.15 +126 496.92 271.65 +127 496.92 292.65 +128 496.92 317.15 +129 496.92 335.65 +130 470.42 335.65 +131 470.42 356.65 +132 496.92 356.65 +133 347.42 150.65 +134 539.92 356.65 +135 560.92 356.65 +136 589.42 356.65 +137 589.42 342.65 +138 603.92 342.65 +139 610.92 342.65 +140 610.92 335.65 +141 610.92 321.15 +142 624.92 321.15 +143 624.92 278.65 +144 610.92 278.65 +145 610.92 257.15 +146 589.42 257.15 +147 589.42 250.15 +148 575.42 250.15 +149 560.92 250.15 +150 542.92 250.15 +151 542.92 264.15 +152 560.92 264.15 +153 575.42 264.15 +154 575.42 271.65 +155 582.42 271.65 +156 582.42 285.65 +157 596.42 285.65 +158 560.92 335.65 +159 596.42 314.15 +160 582.42 314.15 +161 582.42 321.15 +162 575.42 321.15 +163 575.42 335.65 +164 525.42 335.65 +165 525.42 314.15 +166 525.42 299.65 +167 525.42 281.65 +168 525.42 233.15 +169 525.42 214.65 +170 525.42 193.15 +171 525.42 171.65 +172 546.92 171.65 +173 546.92 150.65 +174 568.42 150.65 +175 475.92 160.65 +176 603.92 150.65 +177 624.92 150.65 +178 624.92 136.15 +179 596.42 136.15 +180 575.42 136.15 +181 553.92 136.15 +182 532.42 136.15 +183 575.42 356.65 +184 489.92 136.15 +185 468.42 136.15 +186 447.42 136.15 +187 425.92 136.15 +188 404.42 136.15 +189 370.42 136.15 +190 361.92 150.65 +191 340.42 136.15 +192 326.42 136.15 +193 301.92 136.15 +194 276.42 136.15 +195 254.92 136.15 +196 315.92 136.15 +197 212.42 136.15 +198 190.92 136.15 +199 338.92 150.65 +200 155.42 136.15 +201 624.92 299.65 +202 318.92 321.65 +203 155.42 314.15 +204 311.92 356.65 +205 355.42 136.15 +206 318.92 314.15 +207 362.92 164.65 +208 254.92 356.65 +209 383.42 333.65 +210 447.42 335.65 +211 470.42 345.65 +212 525.42 250.15 +213 546.92 335.65 +214 525.42 261.15 +215 525.42 356.65 +216 336.42 298.65 +217 336.42 313.15 +218 293.42 136.15 +219 336.42 306.15 +220 425.92 264.15 +221 391.42 353.65 +222 482.92 335.65 +223 429.92 167.65 +224 330.92 150.65 +225 368.42 150.65 +EOF diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 46013903a38..8a847d1f1d4 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -81,7 +81,8 @@ sssp, shortest_path, filter_unreachable, - shortest_path_length + shortest_path_length, + traveling_salesperson ) from cugraph.tree import minimum_spanning_tree, maximum_spanning_tree diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 88548539a4e..fea1939db6a 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -57,7 +57,6 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. raise ValueError('nstart must have initial guess for all vertices') df['pagerank'][nstart['vertex']] = nstart['values'] has_guess = 1 - print(df) cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; diff --git a/python/cugraph/tests/test_traveling_salesperson.py b/python/cugraph/tests/test_traveling_salesperson.py new file mode 100644 index 00000000000..d43b55c43d0 --- /dev/null +++ b/python/cugraph/tests/test_traveling_salesperson.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.tests import utils +import cudf +import cugraph +import gc +import numpy as np +import pytest + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +print("Networkx version : {} ".format(nx.__version__)) + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +# ============================================================================= +# Helper functions +# ============================================================================= +def load_tsp(filename=None): + gdf = cudf.read_csv(filename, + delim_whitespace=True, + skiprows=6, + names=["vertex", "x", "y"], + dtypes={"vertex": "int32", + "x": "float32", + "y": "float32"} + ) + gdf = gdf.dropna() + gdf['vertex'] = gdf['vertex'].str.strip() + gdf['vertex'] = gdf['vertex'].astype("int32") + return gdf + + +# ============================================================================= +# Tests +# ============================================================================= +@pytest.mark.parametrize("tsplib_file, ref_cost", utils.DATASETS_TSPLIB) +def test_traveling_salesperson(gpubenchmark, tsplib_file, ref_cost): + pos_list = load_tsp(tsplib_file) + + cu_route, cu_cost = gpubenchmark(cugraph.traveling_salesperson, + pos_list, + restarts=4096) + + print("Cugraph cost: ", cu_cost) + print("Ref cost: ", ref_cost) + error = np.abs(cu_cost - ref_cost) / ref_cost + print("Approximation error is: {:.2f}%".format(error * 100)) + # Check we are within 5% of TSPLIB + assert(error * 100 < 5.) + assert(cu_route.nunique() == pos_list.shape[0]) + assert(cu_route.shape[0] == pos_list.shape[0]) + min_val = pos_list["vertex"].min() + max_val = pos_list["vertex"].max() + assert(cu_route.clip(min_val, max_val).shape[0] == cu_route.shape[0]) diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index ab12601c171..c2c14e0c02d 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -77,6 +77,13 @@ PurePath(RAPIDS_DATASET_ROOT_DIR)/"ref/ktruss/polbooks.csv") ] +DATASETS_TSPLIB = [ + (PurePath(RAPIDS_DATASET_ROOT_DIR)/f,) + (d,) for (f, d) in [ + ("gil262.tsp", 2378), + ("eil51.tsp", 426), + ("kroA100.tsp", 21282), + ("tsp225.tsp", 3916)] +] DATASETS_SMALL = [ PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ diff --git a/python/cugraph/traversal/__init__.py b/python/cugraph/traversal/__init__.py index 58e37a7add0..5944ebe0865 100644 --- a/python/cugraph/traversal/__init__.py +++ b/python/cugraph/traversal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,4 +18,5 @@ shortest_path, filter_unreachable, shortest_path_length -) \ No newline at end of file +) +from cugraph.traversal.traveling_salesperson import traveling_salesperson diff --git a/python/cugraph/traversal/traveling_salesperson.pxd b/python/cugraph/traversal/traveling_salesperson.pxd new file mode 100644 index 00000000000..956c7da0978 --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "algorithms.hpp" namespace "cugraph": + + cdef float traveling_salesperson(const handle_t &handle, + int *vtx_ptr, + float *x_pos, + float *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) except + + diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py new file mode 100644 index 00000000000..80f9cd7441b --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson.py @@ -0,0 +1,75 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.traversal import traveling_salesperson_wrapper +from cugraph.structure.graph import null_check +import cudf + + +def traveling_salesperson(pos_list, + restarts=100000, + beam_search=True, + k=4, + nstart=1, + verbose=False, + ): + """ + Finds an approximate solution to the traveling salesperson problem (TSP). + cuGraph computes an approximation of the TSP problem using hill climbing + optimization. + + The current implementation does not support a weighted graph. + Parameters + ---------- + pos_list: cudf.DataFrame + Data frame with initial vertex positions containing three columns: + 'vertex' ids and 'x', 'y' positions. + restarts: int + Number of starts to try. The more restarts, the better the solution + will be approximated. The number of restarts depends on the problem + size and should be kept low for instances above 2k cities. + beam_search: bool + Specify if the initial solution should use KNN for an approximation + solution. + k: int + Beam width to use in the search. + nstart: int + Vertex id to use as starting position. + verbose: bool + Logs configuration and iterative improvement. + + Returns + ------- + route : cudf.Series + cudf.Series of size V containing the ordered list of vertices + than needs to be visited. + """ + + if not isinstance(pos_list, cudf.DataFrame): + raise TypeError("Instance should be cudf.DataFrame") + + null_check(pos_list['vertex']) + null_check(pos_list['x']) + null_check(pos_list['y']) + + if not pos_list[pos_list['vertex'] == nstart].index: + raise ValueError("nstart should be in vertex ids") + + route, cost = traveling_salesperson_wrapper.traveling_salesperson( + pos_list, + restarts, + beam_search, + k, + nstart, + verbose) + return route, cost diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx new file mode 100644 index 00000000000..b728c3ff37d --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx @@ -0,0 +1,79 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.traversal.traveling_salesperson cimport traveling_salesperson as c_traveling_salesperson +from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from numba import cuda + +import cudf +import numpy as np + + +def traveling_salesperson(pos_list, + restarts=100000, + beam_search=True, + k=4, + nstart=1, + verbose=False, + renumber=True, +): + """ + Call traveling_salesperson + """ + + nodes = pos_list.shape[0] + cdef uintptr_t x_pos = NULL + cdef uintptr_t y_pos = NULL + + pos_list['x'] = pos_list['x'].astype(np.float32) + pos_list['y'] = pos_list['y'].astype(np.float32) + x_pos = pos_list['x'].__cuda_array_interface__['data'][0] + y_pos = pos_list['y'].__cuda_array_interface__['data'][0] + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef float final_cost = 0.0 + + cdef uintptr_t route_ptr = NULL + route_arr = cuda.device_array(nodes, dtype=np.int32) + route_ptr = route_arr.device_ctypes_pointer.value + + cdef uintptr_t vtx_ptr = NULL + vtx_ptr = pos_list['vertex'].__cuda_array_interface__['data'][0] + + renumbered_nstart = pos_list[pos_list['vertex'] == nstart].index[0] + + final_cost = c_traveling_salesperson(handle_[0], + vtx_ptr, + x_pos, + y_pos, + nodes, + restarts, + beam_search, + k, + renumbered_nstart, + verbose, + route_ptr) + + route = cudf.Series(route_arr) + return route, final_cost From 574063d83515464e49fcf0eb97e6363dd439914a Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Tue, 16 Feb 2021 19:51:50 +0100 Subject: [PATCH 169/343] Add GHA to mark issues/prs as stale/rotten (#1408) Issues and PRs without activity for 30d will be marked as stale. If there is no activity for 90d, they will be marked as rotten. Authors: - Jordan Jacobelli (@Ethyling) Approvers: - Brad Rees (@BradReesWork) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1408 --- .github/workflows/stale.yaml | 65 ++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .github/workflows/stale.yaml diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000000..3b7de7ec699 --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,65 @@ +name: Mark stale and rotten issues and pull requests + +on: + schedule: + - cron: "0 * * * *" + +jobs: + mark-stale-issues: + runs-on: ubuntu-latest + steps: + - name: Mark Issues as Stale + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: > + This issue has been marked stale due to no recent activity in the past 30d. + Please close this issue if no further response or action is needed. + Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. + This issue will be marked rotten if there is no activity in the next 60d. + stale-issue-label: "stale" + days-before-issue-stale: 30 + days-before-issue-close: -1 + mark-stale-prs: + runs-on: ubuntu-latest + steps: + - name: Mark PRs as Stale + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-pr-message: > + This PR has been marked stale due to no recent activity in the past 30d. + Please close this PR if it is no longer required. + Otherwise, please respond with a comment indicating any updates. + This PR will be marked rotten if there is no activity in the next 60d. + stale-pr-label: "stale" + days-before-pr-stale: 30 + days-before-pr-close: -1 + mark-rotten-issues: + runs-on: ubuntu-latest + steps: + - name: Mark Issues as Rotten + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: > + This issue has been marked rotten due to no recent activity in the past 90d. + Please close this issue if no further response or action is needed. + Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. + stale-issue-label: "rotten" + days-before-issue-stale: 90 + days-before-issue-close: -1 + mark-rotten-prs: + runs-on: ubuntu-latest + steps: + - name: Mark PRs as Rotten + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-pr-message: > + This PR has been marked rotten due to no recent activity in the past 90d. + Please close this PR if it is no longer required. + Otherwise, please respond with a comment indicating any updates. + stale-pr-label: "rotten" + days-before-pr-stale: 90 + days-before-pr-close: -1 From d78f3cb460544fae159a6004c9e1a7457476d5e5 Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:54:13 -0500 Subject: [PATCH 170/343] Update stale GHA with exemptions & new labels (#1413) Follows #1408 Updates the stale GHA with the following changes: - [x] Uses `inactive-30d` and `inactive-90d` labels instead of `stale` and `rotten` - [x] Updates comments to reflect changes in labels - [x] Exempts the following labels from being marked `inactive-30d` or `inactive-90d` - `0 - Blocked` - `0 - Backlog` - `good first issue` Authors: - Mike Wendt (@mike-wendt) Approvers: - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1413 --- .github/workflows/stale.yaml | 50 +++++++++++++++--------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index 3b7de7ec699..8b65da69aa2 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -1,65 +1,57 @@ -name: Mark stale and rotten issues and pull requests +name: Mark inactive issues and pull requests on: schedule: - cron: "0 * * * *" jobs: - mark-stale-issues: + mark-inactive-30d: runs-on: ubuntu-latest steps: - - name: Mark Issues as Stale + - name: Mark 30 day inactive issues and pull requests uses: actions/stale@v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: > - This issue has been marked stale due to no recent activity in the past 30d. + This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days. Please close this issue if no further response or action is needed. Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. - This issue will be marked rotten if there is no activity in the next 60d. - stale-issue-label: "stale" + This issue will be labeled `inactive-90d` if there is no activity in the next 60 days. + stale-issue-label: "inactive-30d" + exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" days-before-issue-stale: 30 days-before-issue-close: -1 - mark-stale-prs: - runs-on: ubuntu-latest - steps: - - name: Mark PRs as Stale - uses: actions/stale@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: > - This PR has been marked stale due to no recent activity in the past 30d. + This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days. Please close this PR if it is no longer required. Otherwise, please respond with a comment indicating any updates. - This PR will be marked rotten if there is no activity in the next 60d. - stale-pr-label: "stale" + This PR will be labeled `inactive-90d` if there is no activity in the next 60 days. + stale-pr-label: "inactive-30d" + exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" days-before-pr-stale: 30 days-before-pr-close: -1 - mark-rotten-issues: + operations-per-run: 50 + mark-inactive-90d: runs-on: ubuntu-latest steps: - - name: Mark Issues as Rotten + - name: Mark 90 day inactive issues and pull requests uses: actions/stale@v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: > - This issue has been marked rotten due to no recent activity in the past 90d. + This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days. Please close this issue if no further response or action is needed. Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. - stale-issue-label: "rotten" + stale-issue-label: "inactive-90d" + exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" days-before-issue-stale: 90 days-before-issue-close: -1 - mark-rotten-prs: - runs-on: ubuntu-latest - steps: - - name: Mark PRs as Rotten - uses: actions/stale@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: > - This PR has been marked rotten due to no recent activity in the past 90d. + This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days. Please close this PR if it is no longer required. Otherwise, please respond with a comment indicating any updates. - stale-pr-label: "rotten" + stale-pr-label: "inactive-90d" + exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" days-before-pr-stale: 90 days-before-pr-close: -1 + operations-per-run: 50 From cb2d841673c04daa44054623dc662fab6d25fbce Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Wed, 17 Feb 2021 11:23:30 -0800 Subject: [PATCH 171/343] TSP fix route return (#1412) The vertex list that was fed as input to TSP was of type `int64` which ended up being corrupted when passed down to the cpp layer as `vtx_ptr`. I updated the wrapper to cast the vertices to `int32`. In addition, I fixed the handling of nstart in the wrapper which was assuming vertex ids were starting at 1. Solves: https://github.com/rapidsai/cugraph/issues/1410 Authors: - Hugo Linsenmaier (@hlinsen) Approvers: - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1412 --- python/cugraph/traversal/traveling_salesperson.py | 4 ++-- .../cugraph/traversal/traveling_salesperson_wrapper.pyx | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py index 80f9cd7441b..ae17555e4ea 100644 --- a/python/cugraph/traversal/traveling_salesperson.py +++ b/python/cugraph/traversal/traveling_salesperson.py @@ -20,7 +20,7 @@ def traveling_salesperson(pos_list, restarts=100000, beam_search=True, k=4, - nstart=1, + nstart=None, verbose=False, ): """ @@ -62,7 +62,7 @@ def traveling_salesperson(pos_list, null_check(pos_list['x']) null_check(pos_list['y']) - if not pos_list[pos_list['vertex'] == nstart].index: + if nstart is not None and not pos_list[pos_list['vertex'] == nstart].index: raise ValueError("nstart should be in vertex ids") route, cost = traveling_salesperson_wrapper.traveling_salesperson( diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx index b728c3ff37d..5f87c42a638 100644 --- a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx +++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx @@ -31,7 +31,7 @@ def traveling_salesperson(pos_list, restarts=100000, beam_search=True, k=4, - nstart=1, + nstart=None, verbose=False, renumber=True, ): @@ -43,6 +43,7 @@ def traveling_salesperson(pos_list, cdef uintptr_t x_pos = NULL cdef uintptr_t y_pos = NULL + pos_list['vertex'] = pos_list['vertex'].astype(np.int32) pos_list['x'] = pos_list['x'].astype(np.float32) pos_list['y'] = pos_list['y'].astype(np.float32) x_pos = pos_list['x'].__cuda_array_interface__['data'][0] @@ -61,7 +62,10 @@ def traveling_salesperson(pos_list, cdef uintptr_t vtx_ptr = NULL vtx_ptr = pos_list['vertex'].__cuda_array_interface__['data'][0] - renumbered_nstart = pos_list[pos_list['vertex'] == nstart].index[0] + if nstart is None: + renumbered_nstart = 0 + else: + renumbered_nstart = pos_list[pos_list['vertex'] == nstart].index[0] final_cost = c_traveling_salesperson(handle_[0], vtx_ptr, From 369beee0f912e87bd8272ae500bc56711319f6d6 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 22 Feb 2021 15:42:16 -0500 Subject: [PATCH 172/343] Rename sort_and_shuffle to groupby_gpuid_and_shuffle (#1392) Rename to better reflect what this function should do than how it is currently implemented (which can change in the future for better performance). Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1392 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 4 ++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- cpp/include/utilities/cython.hpp | 3 ++- cpp/include/utilities/shuffle_comm.cuh | 22 +++++++++---------- cpp/src/experimental/coarsen_graph.cu | 4 ++-- cpp/src/experimental/relabel.cu | 4 ++-- cpp/src/experimental/renumber_edgelist.cu | 18 +++++++-------- cpp/src/utilities/cython.cu | 5 +++-- python/cugraph/structure/graph_primtypes.pxd | 2 +- 9 files changed, 33 insertions(+), 31 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 774f6d08bf4..785f8197aff 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -234,7 +234,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_unique_keys(0, handle.get_stream()); std::vector rx_value_counts{}; - std::tie(rx_unique_keys, rx_value_counts) = sort_and_shuffle_values( + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( comm, unique_keys.begin(), unique_keys.end(), @@ -372,7 +372,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream()); std::forward_as_tuple( std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( sub_comm, triplet_first, triplet_first + tmp_major_vertices.size(), diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 7ffd17faec9..70b6dc92752 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -241,7 +241,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( rmm::device_uvector rx_unique_keys(0, handle.get_stream()); auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = - sort_and_shuffle_kv_pairs( + groupby_gpuid_and_shuffle_kv_pairs( comm, unique_keys.begin(), unique_keys.end(), diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index a22553777e2..e94190897b8 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -451,7 +451,8 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, template std::unique_ptr> call_shuffle( raft::handle_t const& handle, - vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] edge_t num_edgelist_edges, diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 05fe51184ca..7e04c7e1972 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -232,11 +232,11 @@ auto shuffle_values(raft::comms::comms_t const &comm, } template -auto sort_and_shuffle_values(raft::comms::comms_t const &comm, - ValueIterator tx_value_first /* [INOUT */, - ValueIterator tx_value_last /* [INOUT */, - ValueToGPUIdOp value_to_gpu_id_op, - cudaStream_t stream) +auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) { auto const comm_size = comm.get_size(); @@ -275,12 +275,12 @@ auto sort_and_shuffle_values(raft::comms::comms_t const &comm, } template -auto sort_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, - VertexIterator tx_key_first /* [INOUT */, - VertexIterator tx_key_last /* [INOUT */, - ValueIterator tx_value_first /* [INOUT */, - KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) +auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) { auto d_tx_value_counts = detail::sort_and_count( comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 02b0c388b31..0cd551b0d73 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -329,7 +329,7 @@ coarsen_graph( std::forward_as_tuple( std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( handle.get_comms(), edge_first, edge_first + coarsened_edgelist_major_vertices.size(), @@ -371,7 +371,7 @@ coarsen_graph( handle.get_stream()); rmm::device_uvector rx_unique_labels(0, handle.get_stream()); - std::tie(rx_unique_labels, std::ignore) = sort_and_shuffle_values( + std::tie(rx_unique_labels, std::ignore) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_labels.begin(), unique_labels.end(), diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 4a36cac180d..62bd6951f71 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -106,7 +106,7 @@ void relabel(raft::handle_t const& handle, thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); std::forward_as_tuple(std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( handle.get_comms(), pair_first, pair_first + num_label_pairs, @@ -142,7 +142,7 @@ void relabel(raft::handle_t const& handle, { rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); std::vector rx_value_counts{}; - std::tie(rx_unique_old_labels, rx_value_counts) = sort_and_shuffle_values( + std::tie(rx_unique_old_labels, rx_value_counts) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_old_labels.begin(), unique_old_labels.end(), diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 1f9a5a573fa..6a5a1c732c2 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -151,14 +151,14 @@ rmm::device_uvector compute_renumber_map( auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = sort_and_shuffle_values( - comm, - pair_first, - pair_first + labels.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val)); - }, - handle.get_stream()); + std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = + groupby_gpuid_and_shuffle_values( + comm, + pair_first, + pair_first + labels.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); @@ -309,7 +309,7 @@ void expensive_check_edgelist( handle.get_stream()); rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); - std::tie(rx_unique_edge_vertices, std::ignore) = sort_and_shuffle_values( + std::tie(rx_unique_edge_vertices, std::ignore) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_edge_vertices.begin(), unique_edge_vertices.end(), diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index ce7b45c1c08..e95a001cb91 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -749,7 +749,8 @@ void call_sssp(raft::handle_t const& handle, template std::unique_ptr> call_shuffle( raft::handle_t const& handle, - vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] edge_t num_edgelist_edges, @@ -770,7 +771,7 @@ std::unique_ptr> call_shuffle( std::forward_as_tuple( std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), std::ignore) = - cugraph::experimental::sort_and_shuffle_values( + cugraph::experimental::groupby_gpuid_and_shuffle_values( comm, // handle.get_comms(), zip_edge, zip_edge + num_edgelist_edges, diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 1a2891494ff..07132df2598 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -200,7 +200,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t get_part_matrix_partition_minor_first() vertex_t get_part_matrix_partition_minor_last() -# 4. `sort_and_shuffle_values()` wrapper: +# 4. `groupby_gpuid_and_shuffle_values()` wrapper: # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": From 89bffa5d5a593eb7e0c3d6b9c319a3cfa217877a Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Tue, 23 Feb 2021 16:34:33 +0100 Subject: [PATCH 173/343] ENH Build with `cmake --build` & Pass ccache variables to conda recipe & use Ninja in CI (#1415) - Build using `cmake --build` - Add ccache env variables to conda recipe - Use Ninja in CI Authors: - Jordan Jacobelli (@Ethyling) Approvers: - Dillon Cullinan (@dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1415 --- build.sh | 8 ++++---- ci/cpu/build.sh | 5 ++++- conda/recipes/libcugraph/meta.yaml | 7 +++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index ef210e841c6..54634e2ca6e 100755 --- a/build.sh +++ b/build.sh @@ -46,7 +46,7 @@ CUGRAPH_BUILD_DIR=${REPODIR}/python/build BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${CUGRAPH_BUILD_DIR}" # Set defaults for vars modified by flags to this script -VERBOSE="" +VERBOSE_FLAG="" BUILD_TYPE=Release INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON @@ -86,7 +86,7 @@ fi # Process flags if hasArg -v; then - VERBOSE=1 + VERBOSE_FLAG="-v" fi if hasArg -g; then BUILD_TYPE=Debug @@ -143,7 +143,7 @@ if buildAll || hasArg libcugraph; then -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ ${REPODIR}/cpp - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG} fi # Build and install the cugraph Python package @@ -169,7 +169,7 @@ if buildAll || hasArg docs; then -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} fi cd ${LIBCUGRAPH_BUILD_DIR} - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} docs_cugraph + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG} cd ${REPODIR}/docs make html fi diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2c6dc899be2..d69448cda4e 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ######################################### # cuGraph CPU conda build script for CI # ######################################### @@ -24,6 +24,9 @@ fi export GPUCI_CONDA_RETRY_MAX=1 export GPUCI_CONDA_RETRY_SLEEP=30 +# Use Ninja to build +export CMAKE_GENERATOR="Ninja" + ################################################################################ # SETUP - Check environment ################################################################################ diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index cd83e5a9b7a..8f7495eab3c 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -21,6 +21,13 @@ build: - CUDAHOSTCXX - PARALLEL_LEVEL - VERSION_SUFFIX + - CCACHE_DIR + - CCACHE_NOHASHDIR + - CCACHE_COMPILERCHECK + - CMAKE_GENERATOR + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER requirements: build: From 0cc951f709a0907d356883018978221274712f9d Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 24 Feb 2021 11:04:01 -0500 Subject: [PATCH 174/343] update changelog --- CHANGELOG.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2957a22a68d..3740c4227a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,56 @@ -# 0.18.0 - -Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.18-latest for the latest changes to this development branch. +# cuGraph 0.18.0 (24 Feb 2021) + +## Bug Fixes 🐛 + +- Fixed TSP returned routes (#1412) @hlinsen +- Updated CI scripts to use a different error handling convention, updated LD_LIBRARY_PATH for project flash runs (#1386) @rlratzel +- Bug fixes for MNMG coarsen_graph, renumber_edgelist, relabel (#1364) @seunghwak +- Set a specific known working commit hash for gunrock instead of "dev" (#1336) @rlratzel +- Updated git utils used by copyright.py for compatibility with current CI env (#1325) @rlratzel +- Fix MNMG Louvain tests on Pascal architecture (#1322) @ChuckHastings +- FIX Set bash trap after PATH is updated (#1321) @dillon-cullinan +- Fix graph nodes function and renumbering from series (#1319) @Iroy30 +- Fix Branch 0.18 merge 0.17 (#1314) @BradReesWork +- Fix EXPERIMENTAL_LOUVAIN_TEST on Pascal (#1312) @ChuckHastings +- Updated cuxfilter to 0.18, removed datashader indirect dependency in conda dev .yml files (#1311) @rlratzel +- Update SG PageRank C++ tests (#1307) @seunghwak + +## Documentation 📖 + +- Enabled MultiGraph class and tests, updated SOURCEBUILD.md to include the latest build.sh options (#1351) @rlratzel + +## New Features 🚀 + +- New EgoNet extractor (#1365) @afender +- Implement induced subgraph extraction primitive (SG C++) (#1354) @seunghwak + +## Improvements 🛠️ + +- Update stale GHA with exemptions & new labels (#1413) @mike-wendt +- Add GHA to mark issues/prs as stale/rotten (#1408) @Ethyling +- update subgraph tests and remove legacy pagerank (#1378) @Iroy30 +- Update the conda environments and README file (#1369) @BradReesWork +- Prepare Changelog for Automation (#1368) @ajschmidt8 +- Update CMakeLists.txt files for consistency with RAPIDS and to support cugraph as an external project and other tech debt removal (#1367) @rlratzel +- Use new coarsen_graph primitive in Louvain (#1362) @ChuckHastings +- Added initial infrastructure for MG C++ testing and a Pagerank MG test using it (#1361) @rlratzel +- Add SG TSP (#1360) @hlinsen +- Build a Dendrogram class, adapt Louvain/Leiden/ECG to use it (#1359) @ChuckHastings +- Auto-label PRs based on their content (#1358) @jolorunyomi +- Implement MNMG Renumber (#1355) @aschaffer +- Enabling pytest code coverage output by default (#1352) @jnke2016 +- Added configuration for new cugraph-doc-codeowners review group (#1344) @rlratzel +- API update to match RAFT PR #120 (#1343) @drobison00 +- Pin gunrock to v1.2 for version 0.18 (#1342) @ChuckHastings +- Fix #1340 - Use generic from_edgelist() methods (#1341) @miguelusque +- Using RAPIDS_DATASET_ROOT_DIR env var in place of absolute path to datasets in tests (#1337) @jnke2016 +- Expose dense implementation of Hungarian algorithm (#1333) @ChuckHastings +- SG Pagerank transition (#1332) @Iroy30 +- improving error checking and docs (#1327) @BradReesWork +- Fix MNMG cleanup exceptions (#1326) @Iroy30 +- Create labeler.yml (#1318) @jolorunyomi +- Updates to support nightly MG test automation (#1308) @rlratzel +- Add C++ graph functions (coarsen_grpah, renumber_edgelist, relabel) and primitvies (transform_reduce_by_adj_matrix_row_key, transform_reduce_by_adj_matrix_col_key, copy_v_transform_reduce_key_aggregated_out_nbr) (#1257) @seunghwak # cuGraph 0.17.0 (10 Dec 2020) ## New Features From 06ac713c5e5700185abe28fbc261c84e2b7165a8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:08:15 -0500 Subject: [PATCH 175/343] Matching updates for RAFT comms updates (device_sendrecv, device_multicast_sendrecv, gather, gatherv) (#1391) - [x] Update cuGraph to use RAFT::comms_t's newly added device_sendrecv & device_multicast_sendrecv) - [x] Update cuGraph to use RAFT::comms_t's newly added gather & gatherv - [x] Update RAFT git tag once https://github.com/rapidsai/raft/pull/114 (currently merged in 0.18 but is not merged to 0.19) and https://github.com/rapidsai/raft/pull/144 are merged to 0.19 Ready for review but cannot be merged till RAFT PR 114 and 144 are merged to RAFT branch-0.19. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1391 --- cpp/CMakeLists.txt | 2 +- cpp/include/utilities/device_comm.cuh | 55 +++++++--------------- cpp/include/utilities/host_scalar_comm.cuh | 10 ---- 3 files changed, 17 insertions(+), 50 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b2d537edaa2..d211fe9ed5a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -298,7 +298,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 4a79adcb0c0e87964dcdc9b9122f242b5235b702 + GIT_TAG a3461b201ea1c9f61571f1927274f739e775d2d2 SOURCE_SUBDIR raft ) diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 8c3b0f86a47..24b9147ce3c 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -196,21 +196,13 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, using value_type = typename std::iterator_traits::value_type; static_assert( std::is_same::value_type, value_type>::value); - // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock - ncclGroupStart(); - ncclSend(iter_to_raw_ptr(input_first), - tx_count * sizeof(value_type), - ncclUint8, - dst, - comm.get_nccl_comm(), - stream); - ncclRecv(iter_to_raw_ptr(output_first), - rx_count * sizeof(value_type), - ncclUint8, - src, - comm.get_nccl_comm(), - stream); - ncclGroupEnd(); + comm.device_sendrecv(iter_to_raw_ptr(input_first), + tx_count, + dst, + iter_to_raw_ptr(output_first), + rx_count, + src, + stream); } template @@ -288,25 +280,15 @@ device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, using value_type = typename std::iterator_traits::value_type; static_assert( std::is_same::value_type, value_type>::value); - // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock - ncclGroupStart(); - for (size_t i = 0; i < tx_counts.size(); ++i) { - ncclSend(iter_to_raw_ptr(input_first + tx_offsets[i]), - tx_counts[i] * sizeof(value_type), - ncclUint8, - tx_dst_ranks[i], - comm.get_nccl_comm(), - stream); - } - for (size_t i = 0; i < rx_counts.size(); ++i) { - ncclRecv(iter_to_raw_ptr(output_first + rx_offsets[i]), - rx_counts[i] * sizeof(value_type), - ncclUint8, - rx_src_ranks[i], - comm.get_nccl_comm(), - stream); - } - ncclGroupEnd(); + comm.device_multicast_sendrecv(iter_to_raw_ptr(input_first), + tx_counts, + tx_offsets, + tx_dst_ranks, + iter_to_raw_ptr(output_first), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); } template @@ -589,10 +571,6 @@ device_gatherv_impl(raft::comms::comms_t const& comm, { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gatherv(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), sendcount, @@ -600,7 +578,6 @@ device_gatherv_impl(raft::comms::comms_t const& comm, displacements.data(), root, stream); -#endif } template diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh index dda0ce1f091..2ecfd913813 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -321,16 +321,11 @@ std::enable_if_t::value, std::vector> host_scalar_gathe &input, 1, stream); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), d_outputs.data(), size_t{1}, root, stream); -#endif std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); if (comm.get_rank() == root) { raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); @@ -358,10 +353,6 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre h_tuple_scalar_elements.data(), tuple_size, stream); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gather(comm.get_rank() == root ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size : d_gathered_tuple_scalar_elements.data(), @@ -369,7 +360,6 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre tuple_size, root, stream); -#endif std::vector h_gathered_tuple_scalar_elements( comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); if (comm.get_rank() == root) { From 99d1328922b03a96734cf7b520263af66347e55c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:21:27 -0500 Subject: [PATCH 176/343] Adding new primitives: copy_v_transform_reduce_key_aggregated_out_nbr & transform_reduce_by_adj_matrix_row|col_key_e bug fixes (#1399) bug fixes Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1399 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 20 +-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 128 ++++++++---------- cpp/include/utilities/device_comm.cuh | 8 +- 3 files changed, 75 insertions(+), 81 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 785f8197aff..8490df1d17d 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -18,8 +18,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -100,10 +102,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( } thrust::fill(thrust::seq, major_vertices + local_offset, - major_vertices + local_offset + key_idx, + major_vertices + local_offset + key_idx + 1, matrix_partition.get_major_from_major_offset_nocheck(major_offset)); thrust::fill(thrust::seq, - major_vertices + local_offset + key_idx, + major_vertices + local_offset + key_idx + 1, major_vertices + local_offset + local_degree, invalid_vertex); } @@ -159,8 +161,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( * pairs provided by @p map_key_first, @p map_key_last, and @p map_value_first (aggregated over the * entire set of processes in multi-GPU). * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. - * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for - * each vertex. + * @param init Initial value to be added to the reduced @p reduce_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p @@ -191,6 +192,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( "GraphViewType should support the push model."); static_assert(std::is_same::value_type, typename GraphViewType::vertex_type>::value); + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -393,7 +395,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), triplet_first, - triplet_first + major_vertices.size(), + triplet_first + tmp_major_vertices.size(), tmp_e_op_result_buffer_first, [adj_matrix_row_value_input_first, key_aggregated_e_op, @@ -408,7 +410,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( w, *(adj_matrix_row_value_input_first + matrix_partition.get_major_offset_from_major_nocheck(major)), - kv_map.find(key)->second); + kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); }); tmp_minor_keys.resize(0, handle.get_stream()); tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); @@ -488,11 +490,12 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto major_vertex_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), [major_vertices = major_vertices.data()] __device__(auto i) { - return ((i == 0) || (major_vertices[i] == major_vertices[i - 1])) + return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? major_vertices[i] : invalid_vertex_id::value; }); thrust::copy_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_vertex_first, major_vertex_first + major_vertices.size(), unique_major_vertices.begin(), @@ -506,9 +509,10 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( thrust::make_permutation_iterator( vertex_value_output_first, thrust::make_transform_iterator( - major_vertices.begin(), + unique_major_vertices.begin(), [vertex_partition = vertex_partition_device_t(graph_view)] __device__( auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })), + thrust::equal_to{}, reduce_op); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 70b6dc92752..0b3588bc8c5 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -17,6 +17,8 @@ #include #include +#include +#include #include #include #include @@ -124,6 +126,35 @@ __global__ void for_all_major_for_all_nbr_low_degree( } } +// FIXME: better derive value_t from BufferType +template +std::tuple, BufferType> reduce_to_unique_kv_pairs( + rmm::device_uvector&& keys, BufferType&& value_buffer, cudaStream_t stream) +{ + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer)); + auto num_uniques = + thrust::count_if(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(keys.size()), + [keys = keys.data()] __device__(auto i) { + return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; + }); + + rmm::device_uvector unique_keys(num_uniques, stream); + auto value_for_unique_key_buffer = allocate_dataframe_buffer(unique_keys.size(), stream); + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer), + unique_keys.begin(), + get_dataframe_buffer_begin(value_for_unique_key_buffer)); + + return std::make_tuple(std::move(unique_keys), std::move(value_for_unique_key_buffer)); +} + template (tmp_value_buffer)); } + std::tie(tmp_keys, tmp_value_buffer) = reduce_to_unique_kv_pairs( + std::move(tmp_keys), std::move(tmp_value_buffer), handle.get_stream()); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - get_dataframe_buffer_begin(tmp_value_buffer)); - - auto num_uniques = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(tmp_keys.size()), - [tmp_keys = tmp_keys.data()] __device__(auto i) { - return ((i == 0) || (tmp_keys[i] != tmp_keys[i - 1])) ? true : false; - }); - rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); - auto value_for_unique_key_buffer = - allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); - - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - get_dataframe_buffer_begin(tmp_value_buffer), - unique_keys.begin(), - get_dataframe_buffer_begin(value_for_unique_key_buffer)); - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = groupby_gpuid_and_shuffle_kv_pairs( comm, - unique_keys.begin(), - unique_keys.end(), - get_dataframe_buffer_begin(value_for_unique_key_buffer), + tmp_keys.begin(), + tmp_keys.end(), + get_dataframe_buffer_begin(tmp_value_buffer), [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( auto val) { return key_func(val); }, handle.get_stream()); - // FIXME: we can reduce after shuffle - - tmp_keys = std::move(rx_unique_keys); - tmp_value_buffer = std::move(rx_value_for_unique_key_buffer); + std::tie(tmp_keys, tmp_value_buffer) = reduce_to_unique_kv_pairs( + std::move(rx_unique_keys), std::move(rx_value_for_unique_key_buffer), handle.get_stream()); } auto cur_size = keys.size(); - // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can - // reserve address space to avoid expensive reallocation. - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management - keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); - resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); - - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - keys.begin() + cur_size); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(tmp_value_buffer), - get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), - get_dataframe_buffer_begin(value_buffer) + cur_size); + if (cur_size == 0) { + keys = std::move(tmp_keys); + value_buffer = std::move(tmp_value_buffer); + } else { + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we + // can reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); + resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + keys.begin() + cur_size); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(tmp_value_buffer), + get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), + get_dataframe_buffer_begin(value_buffer) + cur_size); + } } if (GraphViewType::is_multi_gpu) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.end(), - get_dataframe_buffer_begin(value_buffer)); - - auto num_uniques = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(keys.size()), - [keys = keys.data()] __device__(auto i) { - return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; - }); - rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); - auto value_for_unique_key_buffer = - allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); - - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.end(), - get_dataframe_buffer_begin(value_buffer), - unique_keys.begin(), - get_dataframe_buffer_begin(value_for_unique_key_buffer)); - - keys = std::move(unique_keys); - value_buffer = std::move(value_for_unique_key_buffer); + std::tie(keys, value_buffer) = reduce_to_unique_kv_pairs( + std::move(keys), std::move(value_buffer), handle.get_stream()); } // FIXME: add init diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 24b9147ce3c..7b9956902cc 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -973,10 +973,10 @@ device_gatherv(raft::comms::comms_t const& comm, size_t constexpr tuple_size = thrust::tuple_size::value_type>::value; - detail::device_allgatherv_tuple_iterator_element_impl() + detail::device_gatherv_tuple_iterator_element_impl() .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } From 55896052e05f4e1d27def51391458cb08c3516ca Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:21:52 -0500 Subject: [PATCH 177/343] Add new primitives: compute_in|out_degrees, compute_in|out_weight_sums to graph_view_t (#1394) Close https://github.com/rapidsai/cugraph/issues/1208 - [x] add compute_in|out_degrees, compute_in|out_weight_sums - [x] replace PageRank's custom code to compute out-weight-sums to use graph_view_t's compute_out_weight_sums - [x] add SG C++ tests Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1394 --- .../experimental/detail/graph_utils.cuh | 22 +- cpp/include/experimental/graph_view.hpp | 12 + cpp/include/utilities/shuffle_comm.cuh | 4 +- cpp/src/experimental/graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 228 +++++++++++++++++- cpp/src/experimental/pagerank.cu | 20 +- cpp/src/experimental/renumber_edgelist.cu | 4 +- cpp/tests/CMakeLists.txt | 20 ++ cpp/tests/experimental/degree_test.cpp | 165 +++++++++++++ cpp/tests/experimental/weight_sum_test.cpp | 186 ++++++++++++++ 10 files changed, 637 insertions(+), 26 deletions(-) create mode 100644 cpp/tests/experimental/degree_test.cpp create mode 100644 cpp/tests/experimental/weight_sum_test.cpp diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 3ac2e2163c6..084d68b8ba4 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -39,7 +40,7 @@ namespace detail { // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template -rmm::device_uvector compute_major_degree( +rmm::device_uvector compute_major_degrees( raft::handle_t const &handle, std::vector const &adj_matrix_partition_offsets, partition_t const &partition) @@ -120,7 +121,7 @@ rmm::device_uvector compute_major_degree( // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template -rmm::device_uvector compute_major_degree( +rmm::device_uvector compute_major_degrees( raft::handle_t const &handle, std::vector> const &adj_matrix_partition_offsets, partition_t const &partition) @@ -131,7 +132,22 @@ rmm::device_uvector compute_major_degree( adj_matrix_partition_offsets.end(), tmp_offsets.begin(), [](auto const &offsets) { return offsets.data(); }); - return compute_major_degree(handle, tmp_offsets, partition); + return compute_major_degrees(handle, tmp_offsets, partition); +} + +// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = +// false) or columns (of the graph adjacency matrix, if store_transposed = true) +template +rmm::device_uvector compute_major_degrees(raft::handle_t const &handle, + edge_t const *offsets, + vertex_t number_of_vertices) +{ + rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); + thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + degrees.begin(), + degrees.end(), + [offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; }); + return degrees; } template diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index d2ae1150970..7598841fc1a 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -494,6 +494,12 @@ class graph_view_t(nullptr); } + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; + + rmm::device_uvector compute_in_weight_sums(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + private: std::vector adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; @@ -638,6 +644,12 @@ class graph_view_t compute_in_degrees(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; + + rmm::device_uvector compute_in_weight_sums(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 7e04c7e1972..da86f76b11d 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -69,7 +69,7 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, d_tx_value_counts = std::move(d_counts); } - return std::move(d_tx_value_counts); + return d_tx_value_counts; } template @@ -111,7 +111,7 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, d_tx_value_counts = std::move(d_counts); } - return std::move(d_tx_value_counts); + return d_tx_value_counts; } // inline to suppress a complaint about ODR violation diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 5cf393bfce4..498bb4eaefe 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -278,7 +278,7 @@ graph_tget_handle_ptr()), adj_matrix_partition_offsets_, partition_); // optional expensive checks (part 2/3) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index df92fd94194..f443608e424 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,83 @@ std::vector update_adj_matrix_partition_edge_counts( return adj_matrix_partition_edge_counts; } +template +rmm::device_uvector compute_minor_degrees( + raft::handle_t const& handle, + graph_view_t const& graph_view) +{ + rmm::device_uvector minor_degrees(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + if (store_transposed) { + copy_v_transform_reduce_out_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return edge_t{1}; + }, + edge_t{0}, + minor_degrees.data()); + } else { + copy_v_transform_reduce_in_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return edge_t{1}; + }, + edge_t{0}, + minor_degrees.data()); + } + + return minor_degrees; +} + +template +rmm::device_uvector compute_weight_sums( + raft::handle_t const& handle, + graph_view_t const& graph_view) +{ + rmm::device_uvector weight_sums(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + if (major == store_transposed) { + copy_v_transform_reduce_in_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w; + }, + weight_t{0.0}, + weight_sums.data()); + } else { + copy_v_transform_reduce_out_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w; + }, + weight_t{0.0}, + weight_sums.data()); + } + + return weight_sums; +} + } // namespace template on(default_stream), degrees.begin(), @@ -301,6 +379,154 @@ graph_view_t +rmm::device_uvector +graph_view_t>:: + compute_in_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return detail::compute_major_degrees( + handle, this->adj_matrix_partition_offsets_, this->partition_); + } else { + return compute_minor_degrees(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>::compute_in_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return detail::compute_major_degrees( + handle, this->offsets_, this->get_number_of_local_vertices()); + } else { + return compute_minor_degrees(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_out_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_minor_degrees(handle, *this); + } else { + return detail::compute_major_degrees( + handle, this->adj_matrix_partition_offsets_, this->partition_); + } +} + +template +rmm::device_uvector +graph_view_t>::compute_out_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_minor_degrees(handle, *this); + } else { + return detail::compute_major_degrees( + handle, this->offsets_, this->get_number_of_local_vertices()); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_in_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_in_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_out_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_out_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + // explicit instantiation template class graph_view_t; diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 058cbfe5966..c498d2864b4 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -142,23 +142,9 @@ void pagerank(raft::handle_t const& handle, // 2. compute the sums of the out-going edge weights (if not provided) - rmm::device_uvector tmp_vertex_out_weight_sums(0, handle.get_stream()); - if (precomputed_vertex_out_weight_sums == nullptr) { - tmp_vertex_out_weight_sums.resize(pull_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - // FIXME: better refactor this out (computing out-degree). - copy_v_transform_reduce_out_nbr( - handle, - pull_graph_view, - thrust::make_constant_iterator(0) /* dummy */, - thrust::make_constant_iterator(0) /* dummy */, - [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { - return w; - }, - weight_t{0.0}, - tmp_vertex_out_weight_sums.data()); - } - + auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums == nullptr + ? pull_graph_view.compute_out_weight_sums(handle) + : rmm::device_uvector(0, handle.get_stream()); auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr ? precomputed_vertex_out_weight_sums : tmp_vertex_out_weight_sums.data(); diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 6a5a1c732c2..b093a9adb22 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -224,7 +224,7 @@ rmm::device_uvector compute_renumber_map( labels.begin(), thrust::greater()); - return std::move(labels); + return labels; } template @@ -609,7 +609,7 @@ std::enable_if_t> renumber_edgelist( renumber_map.find( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); - return std::move(renumber_map_labels); + return renumber_map_labels; #else return rmm::device_uvector(0, handle.get_stream()); #endif diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5425c68e896..68b277871b1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -331,6 +331,26 @@ set(EXPERIMENTAL_GRAPH_TEST_SRCS ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") +################################################################################################### +# - Experimental weight-sum tests ----------------------------------------------------------------- + +set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") + +ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") + +################################################################################################### +# - Experimental degree tests --------------------------------------------------------------------- + +set(EXPERIMENTAL_DEGREE_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") + +ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") + ################################################################################################### # - Experimental coarsening tests ----------------------------------------------------------------- diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp new file mode 100644 index 00000000000..7c7b41cdacc --- /dev/null +++ b/cpp/tests/experimental/degree_test.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void degree_reference(edge_t const* offsets, + vertex_t const* indices, + edge_t* degrees, + vertex_t num_vertices, + bool major) +{ + if (major) { + std::adjacent_difference(offsets + 1, offsets + num_vertices + 1, degrees); + } else { + std::fill(degrees, degrees + num_vertices, edge_t{0}); + for (vertex_t i = 0; i < num_vertices; ++i) { + for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { + auto nbr = indices[j]; + ++degrees[nbr]; + } + } + } + + return; +} + +typedef struct Degree_Usecase_t { + std::string graph_file_full_path{}; + + Degree_Usecase_t(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Degree_Usecase; + +class Tests_Degree : public ::testing::TestWithParam { + public: + Tests_Degree() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(Degree_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_in_degrees(graph_view.get_number_of_vertices()); + std::vector h_reference_out_degrees(graph_view.get_number_of_vertices()); + + degree_reference(h_offsets.data(), + h_indices.data(), + h_reference_in_degrees.data(), + graph_view.get_number_of_vertices(), + store_transposed); + + degree_reference(h_offsets.data(), + h_indices.data(), + h_reference_out_degrees.data(), + graph_view.get_number_of_vertices(), + !store_transposed); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto d_in_degrees = graph_view.compute_in_degrees(handle); + auto d_out_degrees = graph_view.compute_out_degrees(handle); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_in_degrees(graph_view.get_number_of_vertices()); + std::vector h_cugraph_out_degrees(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_in_degrees.data(), d_in_degrees.data(), d_in_degrees.size(), handle.get_stream()); + raft::update_host(h_cugraph_out_degrees.data(), + d_out_degrees.data(), + d_out_degrees.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(std::equal( + h_reference_in_degrees.begin(), h_reference_in_degrees.end(), h_cugraph_in_degrees.begin())) + << "In-degree values do not match with the reference values."; + ASSERT_TRUE(std::equal(h_reference_out_degrees.begin(), + h_reference_out_degrees.end(), + h_cugraph_out_degrees.begin())) + << "Out-degree values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_Degree, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_Degree, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_Degree, + ::testing::Values(Degree_Usecase("test/datasets/karate.mtx"), + Degree_Usecase("test/datasets/web-Google.mtx"), + Degree_Usecase("test/datasets/ljournal-2008.mtx"), + Degree_Usecase("test/datasets/webbase-1M.mtx"))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp new file mode 100644 index 00000000000..aeda7386314 --- /dev/null +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void weight_sum_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + weight_t* weight_sums, + vertex_t num_vertices, + bool major) +{ + if (!major) { std::fill(weight_sums, weight_sums + num_vertices, weight_t{0.0}); } + for (vertex_t i = 0; i < num_vertices; ++i) { + if (major) { + weight_sums[i] = + std::accumulate(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); + } else { + for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { + auto nbr = indices[j]; + weight_sums[nbr] += weights[j]; + } + } + } + + return; +} + +typedef struct WeightSum_Usecase_t { + std::string graph_file_full_path{}; + + WeightSum_Usecase_t(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} WeightSum_Usecase; + +class Tests_WeightSum : public ::testing::TestWithParam { + public: + Tests_WeightSum() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(WeightSum_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_in_weight_sums(graph_view.get_number_of_vertices()); + std::vector h_reference_out_weight_sums(graph_view.get_number_of_vertices()); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_in_weight_sums.data(), + graph_view.get_number_of_vertices(), + store_transposed); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_out_weight_sums.data(), + graph_view.get_number_of_vertices(), + !store_transposed); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto d_in_weight_sums = graph_view.compute_in_weight_sums(handle); + auto d_out_weight_sums = graph_view.compute_out_weight_sums(handle); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_in_weight_sums(graph_view.get_number_of_vertices()); + std::vector h_cugraph_out_weight_sums(graph_view.get_number_of_vertices()); + + raft::update_host(h_cugraph_in_weight_sums.data(), + d_in_weight_sums.data(), + d_in_weight_sums.size(), + handle.get_stream()); + raft::update_host(h_cugraph_out_weight_sums.data(), + d_out_weight_sums.data(), + d_out_weight_sums.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + auto threshold_ratio = weight_t{1e-4}; + auto threshold_magnitude = std::numeric_limits::min(); + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_in_weight_sums.begin(), + h_reference_in_weight_sums.end(), + h_cugraph_in_weight_sums.begin(), + nearly_equal)) + << "In-weight-sum values do not match with the reference values."; + ASSERT_TRUE(std::equal(h_reference_out_weight_sums.begin(), + h_reference_out_weight_sums.end(), + h_cugraph_out_weight_sums.begin(), + nearly_equal)) + << "Out-weight-sum values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_WeightSum, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_WeightSum, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_WeightSum, + ::testing::Values(WeightSum_Usecase("test/datasets/karate.mtx"), + WeightSum_Usecase("test/datasets/web-Google.mtx"), + WeightSum_Usecase("test/datasets/ljournal-2008.mtx"), + WeightSum_Usecase("test/datasets/webbase-1M.mtx"))); + +CUGRAPH_TEST_PROGRAM_MAIN() From ca895946189ae6cb00daa5c5bde1e37cb78788e4 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 1 Mar 2021 09:58:55 -0500 Subject: [PATCH 178/343] Add boost 1.0 license file. (#1401) #1411 added code (to address #1329) that follows the BOOST 1.0 license and this PR adds the BOOST 1.0 license to cuGraph codebase. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1401 --- thirdparty/LICENSES/LICENSE.boost | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 thirdparty/LICENSES/LICENSE.boost diff --git a/thirdparty/LICENSES/LICENSE.boost b/thirdparty/LICENSES/LICENSE.boost new file mode 100644 index 00000000000..36b7cd93cdf --- /dev/null +++ b/thirdparty/LICENSES/LICENSE.boost @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. From 0adc558ea57ee4d07957dda589770d73d5d514cb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 1 Mar 2021 14:02:33 -0500 Subject: [PATCH 179/343] Update C++ MG PageRank test (#1419) - [x] Add tests using graphs with isolated vertices - [x] Add personalized PageRank tests - [x] Test code refactoring - [x] Create libcugraphtestutil.a This PR fixes FIXMEs added in https://github.com/rapidsai/cugraph/pull/1361 to address https://github.com/rapidsai/cugraph/issues/1136 Authors: - Seunghwa Kang (@seunghwak) Approvers: - Rick Ratzel (@rlratzel) - Andrei Schaffer (@aschaffer) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1419 --- cpp/include/algorithms.hpp | 8 +- cpp/include/experimental/graph.hpp | 8 + cpp/include/experimental/graph_view.hpp | 4 + cpp/src/experimental/katz_centrality.cu | 28 +- cpp/src/experimental/pagerank.cu | 84 +- cpp/src/experimental/renumber_edgelist.cu | 9 +- cpp/tests/CMakeLists.txt | 102 +-- cpp/tests/community/egonet_test.cu | 8 +- cpp/tests/experimental/bfs_test.cpp | 9 +- cpp/tests/experimental/coarsen_graph_test.cpp | 8 +- cpp/tests/experimental/graph_test.cpp | 60 +- .../experimental/induced_subgraph_test.cpp | 8 +- .../experimental/katz_centrality_test.cpp | 7 +- cpp/tests/experimental/louvain_test.cu | 7 +- cpp/tests/experimental/pagerank_test.cpp | 11 +- cpp/tests/experimental/sssp_test.cpp | 9 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 296 +++++++ cpp/tests/pagerank/pagerank_mg_test.cpp | 229 ----- cpp/tests/utilities/base_fixture.hpp | 71 +- cpp/tests/utilities/mg_test_utilities.cu | 180 ---- cpp/tests/utilities/mg_test_utilities.hpp | 77 -- cpp/tests/utilities/test_utilities.cpp | 442 ---------- cpp/tests/utilities/test_utilities.cu | 788 ++++++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 41 +- 24 files changed, 1337 insertions(+), 1157 deletions(-) create mode 100644 cpp/tests/pagerank/mg_pagerank_test.cpp delete mode 100644 cpp/tests/pagerank/pagerank_mg_test.cpp delete mode 100644 cpp/tests/utilities/mg_test_utilities.cu delete mode 100644 cpp/tests/utilities/mg_test_utilities.hpp delete mode 100644 cpp/tests/utilities/test_utilities.cpp create mode 100644 cpp/tests/utilities/test_utilities.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index c666bce23ad..0b8bd59587f 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1100,9 +1100,9 @@ void sssp(raft::handle_t const &handle, template void pagerank(raft::handle_t const &handle, graph_view_t const &graph_view, - weight_t *adj_matrix_row_out_weight_sums, - vertex_t *personalization_vertices, - result_t *personalization_values, + weight_t const *adj_matrix_row_out_weight_sums, + vertex_t const *personalization_vertices, + result_t const *personalization_values, vertex_t personalization_vector_size, result_t *pageranks, result_t alpha, @@ -1148,7 +1148,7 @@ void pagerank(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index cc21f7c5013..6a10256e6f4 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -61,6 +61,8 @@ class graph_t() {} + graph_t(raft::handle_t const &handle, std::vector> const &edgelists, partition_t const &partition, @@ -123,6 +125,12 @@ class graph_t(), + offsets_(0, handle.get_stream()), + indices_(0, handle.get_stream()), + weights_(0, handle.get_stream()){}; + graph_t(raft::handle_t const &handle, edgelist_t const &edgelist, vertex_t number_of_vertices, diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 7598841fc1a..5d3d09bb087 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -82,6 +82,8 @@ namespace experimental { template class partition_t { public: + partition_t() = default; + partition_t(std::vector const& vertex_partition_offsets, bool hypergraph_partitioned, int row_comm_size, @@ -247,6 +249,8 @@ size_t constexpr num_segments_per_vertex_partition{3}; template class graph_base_t { public: + graph_base_t() = default; + graph_base_t(raft::handle_t const& handle, vertex_t number_of_vertices, edge_t number_of_edges, diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 1ab824f1c91..7ffef5053af 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -38,7 +38,7 @@ namespace detail { template void katz_centrality(raft::handle_t const &handle, GraphViewType const &pull_graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, // relevant only if betas == nullptr @@ -173,7 +173,7 @@ void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, // relevant only if beta == nullptr @@ -200,7 +200,7 @@ void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -212,7 +212,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -224,7 +224,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -236,7 +236,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -248,7 +248,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -260,7 +260,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -272,7 +272,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -284,7 +284,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -296,7 +296,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -308,7 +308,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -320,7 +320,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -332,7 +332,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index c498d2864b4..e5874acb04f 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -44,9 +44,9 @@ namespace detail { template void pagerank(raft::handle_t const& handle, GraphViewType const& pull_graph_view, - typename GraphViewType::weight_type* precomputed_vertex_out_weight_sums, - typename GraphViewType::vertex_type* personalization_vertices, - result_t* personalization_values, + typename GraphViewType::weight_type const* precomputed_vertex_out_weight_sums, + typename GraphViewType::vertex_type const* personalization_vertices, + result_t const* personalization_values, typename GraphViewType::vertex_type personalization_vector_size, result_t* pageranks, result_t alpha, @@ -279,9 +279,9 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - weight_t* precomputed_vertex_out_weight_sums, - vertex_t* personalization_vertices, - result_t* personalization_values, + weight_t const* precomputed_vertex_out_weight_sums, + vertex_t const* personalization_vertices, + result_t const* personalization_values, vertex_t personalization_vector_size, result_t* pageranks, result_t alpha, @@ -308,9 +308,9 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -321,9 +321,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -334,9 +334,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -347,9 +347,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -360,9 +360,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + float const* personalization_values, int64_t personalization_vector_size, float* pageranks, float alpha, @@ -373,9 +373,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + double const* personalization_values, int64_t personalization_vector_size, double* pageranks, double alpha, @@ -386,9 +386,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -399,9 +399,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -412,9 +412,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -425,9 +425,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -438,9 +438,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + float const* personalization_values, int64_t personalization_vector_size, float* pageranks, float alpha, @@ -451,9 +451,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + double const* personalization_values, int64_t personalization_vector_size, double* pageranks, double alpha, diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index b093a9adb22..a8847167b87 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -547,11 +547,10 @@ renumber_edgelist(raft::handle_t const& handle, return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); #else - return std::make_tuple( - rmm::device_uvector(0, handle.get_stream()), - partition_t(std::vector(), false, int{0}, int{0}, int{0}, int{0}), - vertex_t{0}, - edge_t{0}); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + partition_t{}, + vertex_t{0}, + edge_t{0}); #endif } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 68b277871b1..a93aa0cfabb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -16,6 +16,43 @@ # #============================================================================= +################################################################################################### +# - common test utils ----------------------------------------------------------------------------- + +add_library(cugraphtestutil STATIC + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") + +set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) + +target_include_directories(cugraphtestutil + PRIVATE + "${CUB_INCLUDE_DIR}" + "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" + "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" + "${RMM_INCLUDE}" + "${NCCL_INCLUDE_DIRS}" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${RAFT_DIR}/cpp/include" +) + +target_link_libraries(cugraphtestutil cugraph) + +# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the +# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent +# cmake warnings about policy CMP0104. With this setting, arch flags must be +# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism +# used in cpp/CMakeLists.txt for setting arch options). +# Run "cmake --help-policy CMP0104" for policy details. +# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to +# the findcudatoolkit features in cmake 3.17+ +set_target_properties(cugraphtestutil PROPERTIES + CUDA_ARCHITECTURES OFF) + ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -31,8 +68,6 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${RMM_INCLUDE}" - "${CUDF_INCLUDE}" - "${CUDF_INCLUDE}/libcudf/libcudacxx" "${NCCL_INCLUDE_DIRS}" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" "${CMAKE_CURRENT_SOURCE_DIR}/../include" @@ -49,6 +84,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE + cugraphtestutil cugraph GTest::GTest GTest::Main @@ -140,16 +176,10 @@ endif(RAPIDS_DATASET_ROOT_DIR) ### test sources ################################################################################## ################################################################################################### -# FIXME: consider adding a "add_library(cugraph_testing SHARED ...) instead of -# adding the same test utility sources to each test target. There may need to be -# an additional cugraph_mg_testing lib due to the optional inclusion of MPI. - ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- set(KATZ_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") @@ -158,15 +188,11 @@ set(KATZ_TEST_SRC # - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") @@ -175,8 +201,6 @@ set(EDGE_BETWEENNESS_TEST_SRC # - SSSP tests ------------------------------------------------------------------------------------ set(SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") @@ -185,8 +209,6 @@ ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") # - BFS tests ------------------------------------------------------------------------------------- set(BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") @@ -195,8 +217,6 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") # - LOUVAIN tests --------------------------------------------------------------------------------- set(LOUVAIN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") @@ -205,8 +225,6 @@ ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") # - LEIDEN tests --------------------------------------------------------------------------------- set(LEIDEN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") @@ -215,8 +233,6 @@ ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") # - ECG tests --------------------------------------------------------------------------------- set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") @@ -225,8 +241,6 @@ ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") # - Balanced cut clustering tests ----------------------------------------------------------------- set(BALANCED_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") @@ -235,8 +249,6 @@ ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") # - TRIANGLE tests -------------------------------------------------------------------------------- set(TRIANGLE_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") @@ -245,8 +257,6 @@ ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") # - EGO tests -------------------------------------------------------------------------------- set(EGO_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") @@ -254,8 +264,6 @@ ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") # - RENUMBERING tests ----------------------------------------------------------------------------- set(RENUMBERING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") @@ -264,8 +272,6 @@ ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") @@ -274,8 +280,6 @@ ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") # - TSP tests -------------------------------------------------------------------------- set(TSP_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") @@ -284,8 +288,6 @@ set(TSP_TEST_SRC # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") @@ -294,8 +296,6 @@ ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") @@ -304,8 +304,6 @@ ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") #-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- set(HUNGARIAN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") @@ -314,8 +312,6 @@ ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") # - MST tests ---------------------------------------------------------------------------- set(MST_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") ConfigureTest(MST_TEST "${MST_TEST_SRC}") @@ -325,8 +321,6 @@ ConfigureTest(MST_TEST "${MST_TEST_SRC}") # - Experimental Graph tests ---------------------------------------------------------------------- set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") @@ -355,8 +349,6 @@ ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") # - Experimental coarsening tests ----------------------------------------------------------------- set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") @@ -365,8 +357,6 @@ ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST # - Experimental induced subgraph tests ----------------------------------------------------------- set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") @@ -375,8 +365,6 @@ ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAP # - Experimental BFS tests ------------------------------------------------------------------------ set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") @@ -385,8 +373,6 @@ ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") # - Experimental SSSP tests ----------------------------------------------------------------------- set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") @@ -395,8 +381,6 @@ ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") # - Experimental PAGERANK tests ------------------------------------------------------------------- set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") @@ -405,8 +389,6 @@ ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") # - Experimental LOUVAIN tests ------------------------------------------------------------------- set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") @@ -415,8 +397,6 @@ ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") @@ -424,16 +404,14 @@ ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_ ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- + if(BUILD_CUGRAPH_MG_TESTS) if(MPI_CXX_FOUND) ########################################################################################### # - MG PAGERANK tests --------------------------------------------------------------------- set(MG_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/mg_test_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_mg_test.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp") ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index ec031228998..ef2699bd1d0 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -69,9 +69,11 @@ class Tests_InducedEgo : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); rmm::device_uvector d_ego_sources(configuration.ego_sources.size(), diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 82286b1e2fa..5b8add98560 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,9 +102,10 @@ class Tests_BFS : public ::testing::TestWithParam { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index b790dfffa69..941b33e5661 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -273,9 +273,11 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam return; } - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); if (graph_view.get_number_of_vertices() == 0) { return; } diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index b80de68f95c..949f6d2e08e 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,10 +91,28 @@ class Tests_Graph : public ::testing::TestWithParam { template void run_current_test(Graph_Usecase const& configuration) { - auto mm_graph = - cugraph::test::read_edgelist_from_matrix_market_file( - configuration.graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); + raft::handle_t handle{}; + + rmm::device_uvector d_rows(0, handle.get_stream()); + rmm::device_uvector d_cols(0, handle.get_stream()); + rmm::device_uvector d_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_rows, d_cols, d_weights, number_of_vertices, is_symmetric) = + cugraph::test::read_edgelist_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + edge_t number_of_edges = static_cast(d_rows.size()); + + std::vector h_rows(number_of_edges); + std::vector h_cols(number_of_edges); + std::vector h_weights(configuration.test_weighted ? number_of_edges : edge_t{0}); + + raft::update_host(h_rows.data(), d_rows.data(), number_of_edges, handle.get_stream()); + raft::update_host(h_cols.data(), d_cols.data(), number_of_edges, handle.get_stream()); + if (configuration.test_weighted) { + raft::update_host(h_weights.data(), d_weights.data(), number_of_edges, handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); std::vector h_reference_offsets{}; std::vector h_reference_indices{}; @@ -102,28 +120,12 @@ class Tests_Graph : public ::testing::TestWithParam { std::tie(h_reference_offsets, h_reference_indices, h_reference_weights) = graph_reference( - mm_graph.h_rows.data(), - mm_graph.h_cols.data(), - configuration.test_weighted ? mm_graph.h_weights.data() : nullptr, - mm_graph.number_of_vertices, + h_rows.data(), + h_cols.data(), + configuration.test_weighted ? h_weights.data() : static_cast(nullptr), + number_of_vertices, number_of_edges); - raft::handle_t handle{}; - - rmm::device_uvector d_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_weights(configuration.test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (configuration.test_weighted) { - raft::update_device( - d_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - cugraph::experimental::edgelist_t edgelist{ d_rows.data(), d_cols.data(), @@ -136,8 +138,8 @@ class Tests_Graph : public ::testing::TestWithParam { cugraph::experimental::graph_t( handle, edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, false, true); @@ -145,7 +147,7 @@ class Tests_Graph : public ::testing::TestWithParam { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - ASSERT_EQ(graph_view.get_number_of_vertices(), mm_graph.number_of_vertices); + ASSERT_EQ(graph_view.get_number_of_vertices(), number_of_vertices); ASSERT_EQ(graph_view.get_number_of_edges(), number_of_edges); std::vector h_cugraph_offsets(graph_view.get_number_of_vertices() + 1); @@ -174,7 +176,7 @@ class Tests_Graph : public ::testing::TestWithParam { std::equal(h_reference_offsets.begin(), h_reference_offsets.end(), h_cugraph_offsets.begin())) << "Graph compressed sparse format offsets do not match with the reference values."; ASSERT_EQ(h_reference_weights.size(), h_cugraph_weights.size()); - for (vertex_t i = 0; i < mm_graph.number_of_vertices; ++i) { + for (vertex_t i = 0; i < number_of_vertices; ++i) { auto start = h_reference_offsets[i]; auto degree = h_reference_offsets[i + 1] - start; if (configuration.test_weighted) { diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 72894a9349f..4e0ca9e7d92 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -113,9 +113,11 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 3e9f0b478a0..945248cc4de 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -117,9 +117,10 @@ class Tests_KatzCentrality : public ::testing::TestWithParam( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 35a26923df6..56fb2c109bf 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -69,9 +69,10 @@ class Tests_Louvain : public ::testing::TestWithParam { std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 53143bf0bf3..514f73e3311 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -155,9 +155,10 @@ class Tests_PageRank : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -225,11 +226,11 @@ class Tests_PageRank : public ::testing::TestWithParam { handle.get_stream()); } - std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); - result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; + std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); + pagerank_reference(h_offsets.data(), h_indices.data(), h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 2f7cc499d35..7fd59d49a25 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,9 +106,10 @@ class Tests_SSSP : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp new file mode 100644 index 00000000000..cf9f452162b --- /dev/null +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include + +typedef struct Pagerank_Usecase_t { + std::string graph_file_full_path{}; + double personalization_ratio{0.0}; + bool test_weighted{false}; + + Pagerank_Usecase_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Pagerank_Usecase; + +class Tests_MGPageRank : public ::testing::TestWithParam { + public: + Tests_MGPageRank() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run + template + void run_current_test(Pagerank_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create SG & MG graphs + + cugraph::experimental::graph_t sg_graph(handle); + rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); + std::tie(sg_graph, d_sg_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, true); + + auto sg_graph_view = sg_graph.view(); + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, true); + + auto mg_graph_view = mg_graph.view(); + + std::vector h_sg_renumber_map_labels(d_sg_renumber_map_labels.size()); + raft::update_host(h_sg_renumber_map_labels.data(), + d_sg_renumber_map_labels.data(), + d_sg_renumber_map_labels.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(mg_graph_view.get_number_of_local_vertices()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 2. generate personalization vertex/value pairs + + std::vector h_personalization_vertices{}; + std::vector h_personalization_values{}; + if (configuration.personalization_ratio > 0.0) { + std::default_random_engine generator{}; + std::uniform_real_distribution distribution{0.0, 1.0}; + h_personalization_vertices.resize(sg_graph_view.get_number_of_vertices()); + std::iota(h_personalization_vertices.begin(), h_personalization_vertices.end(), vertex_t{0}); + h_personalization_vertices.erase( + std::remove_if(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + [&generator, &distribution, configuration](auto v) { + return distribution(generator) >= configuration.personalization_ratio; + }), + h_personalization_vertices.end()); + h_personalization_values.resize(h_personalization_vertices.size()); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [&distribution, &generator](auto& val) { val = distribution(generator); }); + } + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + // 3. run SG pagerank + + std::vector h_sg_personalization_vertices{}; + std::vector h_sg_personalization_values{}; + if (h_personalization_vertices.size() > 0) { + for (vertex_t i = 0; i < sg_graph_view.get_number_of_vertices(); ++i) { + auto it = std::lower_bound(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + h_sg_renumber_map_labels[i]); + if (*it == h_sg_renumber_map_labels[i]) { + h_sg_personalization_vertices.push_back(i); + h_sg_personalization_values.push_back( + h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); + } + } + } + + rmm::device_uvector d_sg_personalization_vertices( + h_sg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_sg_personalization_values(d_sg_personalization_vertices.size(), + handle.get_stream()); + if (d_sg_personalization_vertices.size() > 0) { + raft::update_device(d_sg_personalization_vertices.data(), + h_sg_personalization_vertices.data(), + h_sg_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_sg_personalization_values.data(), + h_sg_personalization_values.data(), + h_sg_personalization_values.size(), + handle.get_stream()); + } + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_personalization_vertices.data(), + d_sg_personalization_values.data(), + static_cast(d_sg_personalization_vertices.size()), + d_sg_pageranks.begin(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false, + false); + + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 4. run MG pagerank + + std::vector h_mg_personalization_vertices{}; + std::vector h_mg_personalization_values{}; + if (h_personalization_vertices.size() > 0) { + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto it = std::lower_bound(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + h_mg_renumber_map_labels[i]); + if (*it == h_mg_renumber_map_labels[i]) { + h_mg_personalization_vertices.push_back(mg_graph_view.get_local_vertex_first() + i); + h_mg_personalization_values.push_back( + h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); + } + } + } + + rmm::device_uvector d_mg_personalization_vertices( + h_mg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_mg_personalization_values(d_mg_personalization_vertices.size(), + handle.get_stream()); + if (d_mg_personalization_vertices.size() > 0) { + raft::update_device(d_mg_personalization_vertices.data(), + h_mg_personalization_vertices.data(), + h_mg_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_mg_personalization_values.data(), + h_mg_personalization_values.data(), + h_mg_personalization_values.size(), + handle.get_stream()); + } + + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::pagerank(handle, + mg_graph_view, + static_cast(nullptr), + d_mg_personalization_vertices.data(), + d_mg_personalization_values.data(), + static_cast(d_mg_personalization_vertices.size()), + d_mg_pageranks.begin(), + alpha, + epsilon, + std::numeric_limits::max(), + false, + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 5. copmare SG & MG results + + std::vector h_sg_shuffled_pageranks(sg_graph_view.get_number_of_vertices(), + result_t{0.0}); + for (size_t i = 0; i < h_sg_pageranks.size(); ++i) { + h_sg_shuffled_pageranks[h_sg_renumber_map_labels[i]] = h_sg_pageranks[i]; + } + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_shuffled_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << i << " in rank: " << comm_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_shuffled_pageranks[mapped_vertex]; + } + } +}; + +TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGPageRank, + ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/karate.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/karate.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/karate.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/pagerank_mg_test.cpp b/cpp/tests/pagerank/pagerank_mg_test.cpp deleted file mode 100644 index 7f789226bf1..00000000000 --- a/cpp/tests/pagerank/pagerank_mg_test.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include - -#include - -#include - -//////////////////////////////////////////////////////////////////////////////// -// Test param object. This defines the input and expected output for a test, and -// will be instantiated as the parameter to the tests defined below using -// INSTANTIATE_TEST_CASE_P() -// -typedef struct Pagerank_Testparams_t { - std::string graph_file_full_path{}; - double personalization_ratio{0.0}; - bool test_weighted{false}; - - Pagerank_Testparams_t(std::string const& graph_file_path, - double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - }; -} Pagerank_Testparams_t; - -//////////////////////////////////////////////////////////////////////////////// -// Parameterized test fixture, to be used with TEST_P(). This defines common -// setup and teardown steps as well as common utilities used by each E2E MG -// test. In this case, each test is identical except for the inputs and -// expected outputs, so the entire test is defined in the run_test() method. -// -class Pagerank_E2E_MG_Testfixture_t : public cugraph::test::MG_TestFixture_t, - public ::testing::WithParamInterface { - public: - Pagerank_E2E_MG_Testfixture_t() {} - - // Run once for each test instance - virtual void SetUp() {} - virtual void TearDown() {} - - // Return the results of running pagerank on a single GPU for the dataset in - // graph_file_path. - template - std::vector get_sg_results(raft::handle_t& handle, - const std::string& graph_file_path, - const result_t alpha, - const result_t epsilon) - { - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, graph_file_path, true); // FIXME: should use param.test_weighted instead of true - - auto graph_view = graph.view(); - cudaStream_t stream = handle.get_stream(); - rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), stream); - - cugraph::experimental::pagerank( - handle, - graph_view, - static_cast(nullptr), // adj_matrix_row_out_weight_sums - static_cast(nullptr), // personalization_vertices - static_cast(nullptr), // personalization_values - static_cast(0), // personalization_vector_size - d_pageranks.begin(), // pageranks - alpha, // alpha (damping factor) - epsilon, // error tolerance for convergence - std::numeric_limits::max(), // max_iterations - false, // has_initial_guess - true); // do_expensive_check - - std::vector h_pageranks(graph_view.get_number_of_vertices()); - raft::update_host(h_pageranks.data(), d_pageranks.data(), d_pageranks.size(), stream); - - return h_pageranks; - } - - // Compare the results of running pagerank on multiple GPUs to that of a - // single-GPU run for the configuration in param. - template - void run_test(const Pagerank_Testparams_t& param) - { - result_t constexpr alpha{0.85}; - result_t constexpr epsilon{1e-6}; - - raft::handle_t handle; - raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); - const auto& comm = handle.get_comms(); - - cudaStream_t stream = handle.get_stream(); - - // Assuming 2 GPUs which means 1 row, 2 cols. 2 cols = row_comm_size of 2. - // FIXME: DO NOT ASSUME 2 GPUs, add code to compute prows, pcols - size_t row_comm_size{2}; - cugraph::partition_2d::subcomm_factory_t - subcomm_factory(handle, row_comm_size); - - int my_rank = comm.get_rank(); - - // FIXME: graph must be weighted! - std::unique_ptr> // store_transposed=true, - // multi_gpu=true - mg_graph_ptr{}; - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - - std::tie(mg_graph_ptr, d_renumber_map_labels) = cugraph::test:: - create_graph_for_gpu // store_transposed=true - (handle, param.graph_file_full_path); - - auto mg_graph_view = mg_graph_ptr->view(); - - rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_vertices(), stream); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - cugraph::experimental::pagerank( - handle, - mg_graph_view, - static_cast(nullptr), // adj_matrix_row_out_weight_sums - static_cast(nullptr), // personalization_vertices - static_cast(nullptr), // personalization_values - static_cast(0), // personalization_vector_size - d_mg_pageranks.begin(), // pageranks - alpha, // alpha (damping factor) - epsilon, // error tolerance for convergence - std::numeric_limits::max(), // max_iterations - false, // has_initial_guess - true); // do_expensive_check - - std::vector h_mg_pageranks(mg_graph_view.get_number_of_vertices()); - - raft::update_host(h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), stream); - - std::vector h_renumber_map_labels(mg_graph_view.get_number_of_vertices()); - raft::update_host(h_renumber_map_labels.data(), - d_renumber_map_labels.data(), - d_renumber_map_labels.size(), - stream); - - // Compare MG to SG - // Each GPU will have pagerank values for their range, so ech GPU must - // compare to specific SG results for their respective range. - - auto h_sg_pageranks = get_sg_results( - handle, param.graph_file_full_path, alpha, epsilon); - - // For this test, each GPU will have the full set of vertices and - // therefore the pageranks vectors should be equal in size. - ASSERT_EQ(h_sg_pageranks.size(), h_mg_pageranks.size()); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - vertex_t mapped_vertex{0}; - for (vertex_t i = 0; - i + mg_graph_view.get_local_vertex_first() < mg_graph_view.get_local_vertex_last(); - ++i) { - mapped_vertex = h_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << i << " in rank: " << my_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " << h_sg_pageranks[i]; - } - } -}; - -//////////////////////////////////////////////////////////////////////////////// -TEST_P(Pagerank_E2E_MG_Testfixture_t, CheckInt32Int32FloatFloat) -{ - run_test(GetParam()); -} - -INSTANTIATE_TEST_CASE_P( - e2e, - Pagerank_E2E_MG_Testfixture_t, - - // FIXME: the personalization_ratio and use_weighted boo are not used - // (personilization vectors are not used, and all datasets are assumed - // weighted). update this to use personilization vectors and non-weighted - // graphs. - ::testing::Values(Pagerank_Testparams_t("test/datasets/karate.mtx", 0.0, true), - // FIXME: The commented datasets contain isolate vertices - // which result in a different number of vertices in the - // renumbered MG graph (because the renumbering function - // does not include them) vs. the SG graph object used for - // the pagerank comparison because the SG graph reads the - // COO as-is without renumbering. Update the utility that - // reads a .mtx and constructs a SG graph object to also - // renumber and return the renumber vertices vector. This - // will result in a comparison of an equal number of - // pagerank values. - // - // Pagerank_Testparams_t("test/datasets/web-Google.mtx", 0.0, true), - // Pagerank_Testparams_t("test/datasets/ljournal-2008.mtx", 0.0, true), - Pagerank_Testparams_t("test/datasets/webbase-1M.mtx", 0.0, true))); - -// FIXME: Enable proper RMM configuration by using CUGRAPH_TEST_PROGRAM_MAIN(). -// Currently seeing a RMM failure during init, need to investigate. -// CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 3525db73425..e8f11acfbf4 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -32,18 +32,17 @@ namespace cugraph { namespace test { -// FIXME: The BaseFixture class is not used in any tests. This file is only -// needed for the CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so -// consider removing the BaseFixture class and renaming this file, or moving -// CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file and removing this -// file completely. +// FIXME: The BaseFixture class is not used in any tests. This file is only needed for the +// CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so consider removing the BaseFixture +// class and renaming this file, or moving CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file +// and removing this file completely. /** - * @brief Base test fixture class from which all libcudf tests should inherit. + * @brief Base test fixture class from which all libcugraph tests should inherit. * * Example: * ``` - * class MyTestFixture : public cudf::test::BaseFixture {}; + * class MyTestFixture : public cugraph::test::BaseFixture {}; * ``` **/ class BaseFixture : public ::testing::Test { @@ -51,8 +50,8 @@ class BaseFixture : public ::testing::Test { public: /** - * @brief Returns pointer to `device_memory_resource` that should be used for - * all tests inheriting from this fixture + * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting + *from this fixture **/ rmm::mr::device_memory_resource *mr() { return _mr; } }; @@ -77,15 +76,14 @@ inline auto make_binning() } /** - * @brief Creates a memory resource for the unit test environment - * given the name of the allocation mode. + * @brief Creates a memory resource for the unit test environment given the name of the allocation + * mode. * - * The returned resource instance must be kept alive for the duration of - * the tests. Attaching the resource to a TestEnvironment causes - * issues since the environment objects are not destroyed until + * The returned resource instance must be kept alive for the duration of the tests. Attaching the + * resource to a TestEnvironment causes issues since the environment objects are not destroyed until * after the runtime is shutdown. * - * @throw cudf::logic_error if the `allocation_mode` is unsupported. + * @throw cugraph::logic_error if the `allocation_mode` is unsupported. * * @param allocation_mode String identifies which resource type. * Accepted types are "pool", "cuda", and "managed" only. @@ -105,17 +103,17 @@ inline std::shared_ptr create_memory_resource( } // namespace cugraph /** - * @brief Parses the cuDF test command line options. + * @brief Parses the cuGraph test command line options. * - * Currently only supports 'rmm_mode' string paramater, which set the rmm - * allocation mode. The default value of the parameter is 'pool'. + * Currently only supports 'rmm_mode' string paramater, which set the rmm allocation mode. The + * default value of the parameter is 'pool'. * * @return Parsing results in the form of cxxopts::ParseResult */ inline auto parse_test_options(int argc, char **argv) { try { - cxxopts::Options options(argv[0], " - cuDF tests command line options"); + cxxopts::Options options(argv[0], " - cuGraph tests command line options"); options.allow_unrecognised_options().add_options()( "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value("pool")); @@ -128,13 +126,11 @@ inline auto parse_test_options(int argc, char **argv) /** * @brief Macro that defines main function for gtest programs that use rmm * - * Should be included in every test program that uses rmm allocators since - * it maintains the lifespan of the rmm default memory resource. - * This `main` function is a wrapper around the google test generated `main`, - * maintaining the original functionality. In addition, this custom `main` - * function parses the command line to customize test behavior, like the - * allocation mode used for creating the default memory resource. - * + * Should be included in every test program that uses rmm allocators since it maintains the lifespan + * of the rmm default memory resource. This `main` function is a wrapper around the google test + * generated `main`, maintaining the original functionality. In addition, this custom `main` + * function parses the command line to customize test behavior, like the allocation mode used for + * creating the default memory resource. */ #define CUGRAPH_TEST_PROGRAM_MAIN() \ int main(int argc, char **argv) \ @@ -146,3 +142,26 @@ inline auto parse_test_options(int argc, char **argv) rmm::mr::set_current_device_resource(resource.get()); \ return RUN_ALL_TESTS(); \ } + +#define CUGRAPH_MG_TEST_PROGRAM_MAIN() \ + int main(int argc, char **argv) \ + { \ + MPI_TRY(MPI_Init(&argc, &argv)); \ + int comm_rank{}; \ + int comm_size{}; \ + MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank)); \ + MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &comm_size)); \ + int num_gpus{}; \ + CUDA_TRY(cudaGetDeviceCount(&num_gpus)); \ + CUGRAPH_EXPECTS( \ + comm_size <= num_gpus, "# MPI ranks (%d) > # GPUs (%d).", comm_size, num_gpus); \ + CUDA_TRY(cudaSetDevice(comm_rank)); \ + ::testing::InitGoogleTest(&argc, argv); \ + auto const cmd_opts = parse_test_options(argc, argv); \ + auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ + auto resource = cugraph::test::create_memory_resource(rmm_mode); \ + rmm::mr::set_current_device_resource(resource.get()); \ + auto ret = RUN_ALL_TESTS(); \ + MPI_TRY(MPI_Finalize()); \ + return ret; \ + } diff --git a/cpp/tests/utilities/mg_test_utilities.cu b/cpp/tests/utilities/mg_test_utilities.cu deleted file mode 100644 index 26f2450b589..00000000000 --- a/cpp/tests/utilities/mg_test_utilities.cu +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -namespace cugraph { -namespace test { - -// Given a raft handle and a path to a dataset (must be a .mtx file), returns a -// tuple containing: -// * graph_t instance for the partition accesible from the raft handle -// * vector of indices representing the original unrenumberd vertices -// -// This function creates a graph_t instance appropriate for MG graph -// applications from the edgelist graph data file passed in by filtering out the -// vertices not to be assigned to the GPU in this rank, then renumbering the -// vertices appropriately. The returned vector of vertices contains the original -// vertex IDs, ordered by the new sequential renumbered IDs (this is needed for -// unrenumbering). -template -std::tuple< - std::unique_ptr>, // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path) -{ - const auto& comm = handle.get_comms(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - - int my_rank = comm.get_rank(); - - auto edgelist_from_mm = - ::cugraph::test::read_edgelist_from_matrix_market_file( - graph_file_path); - - edge_t total_number_edges = static_cast(edgelist_from_mm.h_rows.size()); - - ////////// - // Copy COO to device - rmm::device_uvector d_edgelist_rows(total_number_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(total_number_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(total_number_edges, handle.get_stream()); - - raft::update_device(d_edgelist_rows.data(), - edgelist_from_mm.h_rows.data(), - total_number_edges, - handle.get_stream()); - raft::update_device(d_edgelist_cols.data(), - edgelist_from_mm.h_cols.data(), - total_number_edges, - handle.get_stream()); - raft::update_device(d_edgelist_weights.data(), - edgelist_from_mm.h_weights.data(), - total_number_edges, - handle.get_stream()); - - ////////// - // Filter out edges that are not to be associated with this rank - // - // Create a edge_gpu_identifier, which will be used by the individual jobs to - // identify if a edge belongs to a particular rank - cugraph::experimental::detail::compute_gpu_id_from_edge_t edge_gpu_identifier{ - false, comm.get_size(), row_comm.get_size(), col_comm.get_size()}; - - auto edgelist_zip_it_begin = thrust::make_zip_iterator(thrust::make_tuple( - d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); - bool is_transposed{store_transposed}; - - // Do the removal - note: remove_if does not delete items, it moves "removed" - // items to the back of the vector and returns the iterator (new_end) that - // represents the items kept. Actual removal of items can be done by - // resizing (see below). - auto new_end = thrust::remove_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_zip_it_begin, - edgelist_zip_it_begin + total_number_edges, - [my_rank, is_transposed, edge_gpu_identifier] __device__(auto tup) { - if (is_transposed) { - return (edge_gpu_identifier(thrust::get<1>(tup), thrust::get<0>(tup)) != my_rank); - } else { - return (edge_gpu_identifier(thrust::get<0>(tup), thrust::get<1>(tup)) != my_rank); - } - }); - - edge_t local_number_edges = thrust::distance(edgelist_zip_it_begin, new_end); - // Free the memory used for the items remove_if "removed". This not only - // frees memory, but keeps the actual vector sizes consistent with the data - // being used from this point forward. - d_edgelist_rows.resize(local_number_edges, handle.get_stream()); - d_edgelist_rows.shrink_to_fit(handle.get_stream()); - d_edgelist_cols.resize(local_number_edges, handle.get_stream()); - d_edgelist_cols.shrink_to_fit(handle.get_stream()); - d_edgelist_weights.resize(local_number_edges, handle.get_stream()); - d_edgelist_weights.shrink_to_fit(handle.get_stream()); - - ////////// - // renumber filtered edgelist_from_mm - vertex_t* major_vertices{nullptr}; - vertex_t* minor_vertices{nullptr}; - if (is_transposed) { - major_vertices = d_edgelist_cols.data(); - minor_vertices = d_edgelist_rows.data(); - } else { - major_vertices = d_edgelist_rows.data(); - minor_vertices = d_edgelist_cols.data(); - } - - rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - cugraph::experimental::partition_t partition( - std::vector(comm.get_size() + 1, 0), - false, // is_hypergraph_partitioned() - row_comm.get_size(), - col_comm.get_size(), - row_comm.get_rank(), - col_comm.get_rank()); - vertex_t number_of_vertices{}; - edge_t number_of_edges{}; - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - ::cugraph::experimental::renumber_edgelist // multi_gpu=true - (handle, - major_vertices, // edgelist_major_vertices, INOUT of vertex_t* - minor_vertices, // edgelist_minor_vertices, INOUT of vertex_t* - local_number_edges, - false, // is_hypergraph_partitioned - true); // do_expensive_check - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), d_edgelist_cols.data(), d_edgelist_weights.data(), local_number_edges}; - - std::vector> edgelist_vect; - edgelist_vect.push_back(edgelist); - cugraph::experimental::graph_properties_t properties; - properties.is_symmetric = edgelist_from_mm.is_symmetric; - properties.is_multigraph = false; - - // Finally, create instance of graph_t using filtered & renumbered edgelist - return std::make_tuple( - std::make_unique< - cugraph::experimental::graph_t>( - handle, - edgelist_vect, - partition, - number_of_vertices, - total_number_edges, - properties, - false, // sorted_by_global_degree_within_vertex_partition - true), // do_expensive_check - std::move(renumber_map_labels)); -} - -// explicit instantiation -template std::tuple< - std::unique_ptr< - cugraph::experimental::graph_t>, // store_transposed=true - // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/mg_test_utilities.hpp b/cpp/tests/utilities/mg_test_utilities.hpp deleted file mode 100644 index c23f6c43a6d..00000000000 --- a/cpp/tests/utilities/mg_test_utilities.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -#include - -namespace cugraph { -namespace test { - -// Given a raft handle and a path to a dataset (must be a .mtx file), returns a -// tuple containing: -// * graph_t instance for the partition accesible from the raft handle -// * 4-tuple containing renumber info resulting from renumbering the -// edgelist for the partition -template -std::tuple< - std::unique_ptr>, // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); - -/** - * @brief Base test fixture class, responsible for handling common operations - * needed by all MG tests. - * - * It's expected this class will be built out and refactored often as new MG C++ - * tests are added and new patterns evolve. - * - * Example: - * ``` - * class MyTestFixture : public cugraph::test::MG_TestFixture_t {}; - * ``` - **/ - -// FIXME: consider moving this to a separate file? (eg. mg_test_fixture.cpp)? - -class MG_TestFixture_t : public ::testing::Test { - public: - static void SetUpTestCase() - { - MPI_TRY(MPI_Init(NULL, NULL)); - - int rank, size; - MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); - MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &size)); - - int nGpus; - CUDA_CHECK(cudaGetDeviceCount(&nGpus)); - - ASSERT( - nGpus >= size, "Number of GPUs are lesser than MPI ranks! ngpus=%d, nranks=%d", nGpus, size); - - CUDA_CHECK(cudaSetDevice(rank)); - } - - static void TearDownTestCase() { MPI_TRY(MPI_Finalize()); } -}; - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cpp b/cpp/tests/utilities/test_utilities.cpp deleted file mode 100644 index abb416a632d..00000000000 --- a/cpp/tests/utilities/test_utilities.cpp +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include -#include -#include - -#include - -extern "C" { -#include "mmio.h" -} - -#include - -#include -#include -#include -#include - -namespace cugraph { -namespace test { - -std::string getFileName(const std::string& s) -{ - char sep = '/'; -#ifdef _WIN32 - sep = '\\'; -#endif - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} - -/// Read matrix properties from Matrix Market file -/** Matrix Market file is assumed to be a sparse matrix in coordinate - * format. - * - * @param f File stream for Matrix Market file. - * @param tg Boolean indicating whether to convert matrix to general - * format (from symmetric, Hermitian, or skew symmetric format). - * @param t (Output) MM_typecode with matrix properties. - * @param m (Output) Number of matrix rows. - * @param n (Output) Number of matrix columns. - * @param nnz (Output) Number of non-zero matrix entries. - * @return Zero if properties were read successfully. Otherwise - * non-zero. - */ -template -int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) -{ - // Read matrix properties from file - int mint, nint, nnzint; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - *m = mint; - *n = nint; - *nnz = nnzint; - - // Find total number of non-zero entries - if (tg && !mm_is_general(*t)) { - // Non-diagonal entries should be counted twice - *nnz *= 2; - - // Diagonal entries should not be double-counted - int st; - for (int i = 0; i < nnzint; ++i) { - // Read matrix entry - // MTX only supports int for row and col idx - int row, col; - double rval, ival; - if (mm_is_pattern(*t)) - st = fscanf(f, "%d %d\n", &row, &col); - else if (mm_is_real(*t) || mm_is_integer(*t)) - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Check if entry is diagonal - if (row == col) --(*nnz); - } - } - - return 0; -} - -/// Read Matrix Market file and convert to COO format matrix -/** Matrix Market file is assumed to be a sparse matrix in coordinate - * format. - * - * @param f File stream for Matrix Market file. - * @param tg Boolean indicating whether to convert matrix to general - * format (from symmetric, Hermitian, or skew symmetric format). - * @param nnz Number of non-zero matrix entries. - * @param cooRowInd (Output) Row indices for COO matrix. Should have - * at least nnz entries. - * @param cooColInd (Output) Column indices for COO matrix. Should - * have at least nnz entries. - * @param cooRVal (Output) Real component of COO matrix - * entries. Should have at least nnz entries. Ignored if null - * pointer. - * @param cooIVal (Output) Imaginary component of COO matrix - * entries. Should have at least nnz entries. Ignored if null - * pointer. - * @return Zero if matrix was read successfully. Otherwise non-zero. - */ -template -int mm_to_coo(FILE* f, - int tg, - IndexType_ nnz, - IndexType_* cooRowInd, - IndexType_* cooColInd, - ValueType_* cooRVal, - ValueType_* cooIVal) -{ - // Read matrix properties from file - MM_typecode t; - int m, n, nnzOld; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, &t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - - // Add each matrix entry in file to COO format matrix - int i; // Entry index in Matrix Market file; can only be int in the MTX format - int j = 0; // Entry index in COO format matrix; can only be int in the MTX format - for (i = 0; i < nnzOld; ++i) { - // Read entry from file - int row, col; - double rval, ival; - int st; - if (mm_is_pattern(t)) { - st = fscanf(f, "%d %d\n", &row, &col); - rval = 1.0; - ival = 0.0; - } else if (mm_is_real(t) || mm_is_integer(t)) { - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - ival = 0.0; - } else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Switch to 0-based indexing - --row; - --col; - - // Record entry - cooRowInd[j] = row; - cooColInd[j] = col; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - - // Add symmetric complement of non-diagonal entries - if (tg && !mm_is_general(t) && (row != col)) { - // Modify entry value if matrix is skew symmetric or Hermitian - if (mm_is_skew(t)) { - rval = -rval; - ival = -ival; - } else if (mm_is_hermitian(t)) { - ival = -ival; - } - - // Record entry - cooRowInd[j] = col; - cooColInd[j] = row; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - } - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - double* t_storage = new double[n]; - is_read1 = fread(t_storage, sizeof(double), n, fpin); - for (int i = 0; i < n; i++) { - if (t_storage[i] == DBL_MAX) - val[i] = FLT_MAX; - else if (t_storage[i] == -DBL_MAX) - val[i] = -FLT_MAX; - else - val[i] = static_cast(t_storage[i]); - } - delete[] t_storage; - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - is_read1 = fread(&val[0], sizeof(double), n, fpin); - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -// FIXME: A similar function could be useful for CSC format -// There are functions above that operate coo -> csr and coo->csc -/** - * @tparam - */ -template -std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file) -{ - vertex_t number_of_vertices; - edge_t number_of_edges; - - FILE* fpin = fopen(mm_file.c_str(), "r"); - EXPECT_NE(fpin, nullptr); - - vertex_t number_of_columns = 0; - MM_typecode mm_typecode{0}; - EXPECT_EQ(mm_properties( - fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), - 0); - EXPECT_TRUE(mm_is_matrix(mm_typecode)); - EXPECT_TRUE(mm_is_coordinate(mm_typecode)); - EXPECT_FALSE(mm_is_complex(mm_typecode)); - EXPECT_FALSE(mm_is_skew(mm_typecode)); - - directed = !mm_is_symmetric(mm_typecode); - - // Allocate memory on host - std::vector coo_row_ind(number_of_edges); - std::vector coo_col_ind(number_of_edges); - std::vector coo_val(number_of_edges); - - // Read - EXPECT_EQ((mm_to_coo( - fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), - 0); - EXPECT_EQ(fclose(fpin), 0); - - cugraph::GraphCOOView cooview( - &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); - - return cugraph::coo_to_csr(cooview); -} - -template -edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path) -{ - edgelist_from_market_matrix_file_t ret{}; - - MM_typecode mc{}; - vertex_t m{}; - edge_t nnz{}; - - FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); - - edge_t tmp_m{}; - edge_t tmp_k{}; - auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); - m = static_cast(tmp_m); - CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), - "invalid Matrix Market file properties."); - - ret.h_rows.assign(nnz, vertex_t{0}); - ret.h_cols.assign(nnz, vertex_t{0}); - ret.h_weights.assign(nnz, weight_t{0.0}); - ret.number_of_vertices = m; - ret.is_symmetric = mm_is_symmetric(mc); - - mm_ret = cugraph::test::mm_to_coo( - file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); - - auto file_ret = fclose(file); - CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); - - return std::move(ret); -} - -template -cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted) -{ - auto mm_graph = - read_edgelist_from_matrix_market_file(graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); - - rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (test_weighted) { - raft::update_device( - d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - number_of_edges}; - - return cugraph::experimental::graph_t( - handle, - edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, - false, - true); -} - -// explicit instantiations - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, int* cooRVal, int* cooIVal); - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, double* cooRVal, double* cooIVal); - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, float* cooRVal, float* cooIVal); - -template std::unique_ptr> -generate_graph_csr_from_mm(bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cu b/cpp/tests/utilities/test_utilities.cu new file mode 100644 index 00000000000..0a7b58b32cd --- /dev/null +++ b/cpp/tests/utilities/test_utilities.cu @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +extern "C" { +#include "mmio.h" +} + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +/// Read matrix properties from Matrix Market file +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param t (Output) MM_typecode with matrix properties. + * @param m (Output) Number of matrix rows. + * @param n (Output) Number of matrix columns. + * @param nnz (Output) Number of non-zero matrix entries. + * @return Zero if properties were read successfully. Otherwise + * non-zero. + */ +template +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) +{ + // Read matrix properties from file + int mint, nint, nnzint; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + *m = mint; + *n = nint; + *nnz = nnzint; + + // Find total number of non-zero entries + if (tg && !mm_is_general(*t)) { + // Non-diagonal entries should be counted twice + *nnz *= 2; + + // Diagonal entries should not be double-counted + int st; + for (int i = 0; i < nnzint; ++i) { + // Read matrix entry + // MTX only supports int for row and col idx + int row, col; + double rval, ival; + if (mm_is_pattern(*t)) + st = fscanf(f, "%d %d\n", &row, &col); + else if (mm_is_real(*t) || mm_is_integer(*t)) + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Check if entry is diagonal + if (row == col) --(*nnz); + } + } + + return 0; +} + +/// Read Matrix Market file and convert to COO format matrix +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param nnz Number of non-zero matrix entries. + * @param cooRowInd (Output) Row indices for COO matrix. Should have + * at least nnz entries. + * @param cooColInd (Output) Column indices for COO matrix. Should + * have at least nnz entries. + * @param cooRVal (Output) Real component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @param cooIVal (Output) Imaginary component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @return Zero if matrix was read successfully. Otherwise non-zero. + */ +template +int mm_to_coo(FILE* f, + int tg, + IndexType_ nnz, + IndexType_* cooRowInd, + IndexType_* cooColInd, + ValueType_* cooRVal, + ValueType_* cooIVal) +{ + // Read matrix properties from file + MM_typecode t; + int m, n, nnzOld; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, &t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + + // Add each matrix entry in file to COO format matrix + int i; // Entry index in Matrix Market file; can only be int in the MTX format + int j = 0; // Entry index in COO format matrix; can only be int in the MTX format + for (i = 0; i < nnzOld; ++i) { + // Read entry from file + int row, col; + double rval, ival; + int st; + if (mm_is_pattern(t)) { + st = fscanf(f, "%d %d\n", &row, &col); + rval = 1.0; + ival = 0.0; + } else if (mm_is_real(t) || mm_is_integer(t)) { + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + ival = 0.0; + } else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Switch to 0-based indexing + --row; + --col; + + // Record entry + cooRowInd[j] = row; + cooColInd[j] = col; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + + // Add symmetric complement of non-diagonal entries + if (tg && !mm_is_general(t) && (row != col)) { + // Modify entry value if matrix is skew symmetric or Hermitian + if (mm_is_skew(t)) { + rval = -rval; + ival = -ival; + } else if (mm_is_hermitian(t)) { + ival = -ival; + } + + // Record entry + cooRowInd[j] = col; + cooColInd[j] = row; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + } + } + return 0; +} + +// FIXME: A similar function could be useful for CSC format +// There are functions above that operate coo -> csr and coo->csc +/** + * @tparam + */ +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file) +{ + vertex_t number_of_vertices; + edge_t number_of_edges; + + FILE* fpin = fopen(mm_file.c_str(), "r"); + CUGRAPH_EXPECTS(fpin != nullptr, "fopen (%s) failure.", mm_file.c_str()); + + vertex_t number_of_columns = 0; + MM_typecode mm_typecode{0}; + CUGRAPH_EXPECTS( + mm_properties( + fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges) == 0, + "mm_properties query failure."); + CUGRAPH_EXPECTS(mm_is_matrix(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(mm_is_coordinate(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(!mm_is_complex(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(!mm_is_skew(mm_typecode), "Invalid input file."); + + directed = !mm_is_symmetric(mm_typecode); + + // Allocate memory on host + std::vector coo_row_ind(number_of_edges); + std::vector coo_col_ind(number_of_edges); + std::vector coo_val(number_of_edges); + + // Read + CUGRAPH_EXPECTS( + (mm_to_coo( + fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)) == 0, + "file read failure."); + CUGRAPH_EXPECTS(fclose(fpin) == 0, "fclose failure."); + + cugraph::GraphCOOView cooview( + &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); + + return cugraph::coo_to_csr(cooview); +} + +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> +read_edgelist_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted) +{ + MM_typecode mc{}; + vertex_t m{}; + size_t nnz{}; + + FILE* file = fopen(graph_file_full_path.c_str(), "r"); + CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + + size_t tmp_m{}; + size_t tmp_k{}; + auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); + m = static_cast(tmp_m); + CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), + "invalid Matrix Market file properties."); + + vertex_t number_of_vertices = m; + bool is_symmetric = mm_is_symmetric(mc); + + std::vector h_rows(nnz); + std::vector h_cols(nnz); + std::vector h_weights(nnz); + + mm_ret = cugraph::test::mm_to_coo( + file, 1, nnz, h_rows.data(), h_cols.data(), h_weights.data(), static_cast(nullptr)); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); + + auto file_ret = fclose(file); + CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); + + rmm::device_uvector d_edgelist_rows(h_rows.size(), handle.get_stream()); + rmm::device_uvector d_edgelist_cols(h_cols.size(), handle.get_stream()); + rmm::device_uvector d_edgelist_weights(test_weighted ? h_weights.size() : size_t{0}, + handle.get_stream()); + + raft::update_device(d_edgelist_rows.data(), h_rows.data(), h_rows.size(), handle.get_stream()); + raft::update_device(d_edgelist_cols.data(), h_cols.data(), h_cols.size(), handle.get_stream()); + if (test_weighted) { + raft::update_device( + d_edgelist_weights.data(), h_weights.data(), h_weights.size(), handle.get_stream()); + } + + return std::make_tuple(std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + number_of_vertices, + is_symmetric); +} + +namespace detail { + +template +std::enable_if_t< + multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); + + // 1. read from the matrix market file + + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path, test_weighted); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + // 2. filter non-local vertices & edges + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + d_vertices.resize( + thrust::distance( + d_vertices.begin(), + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + d_vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + false, comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + } + + // 3. renumber + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition{}; + vertex_t aggregate_number_of_vertices{}; + edge_t number_of_edges{}; + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + d_vertices.data(), + static_cast(d_vertices.size()), + store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), + store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), + d_edgelist_rows.size(), + false, + true); + assert(aggregate_number_of_vertices == number_of_vertices); + + // 4. create a graph + + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + std::vector>{ + cugraph::experimental::edgelist_t{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + static_cast(d_edgelist_rows.size())}}, + partition, + number_of_vertices, + number_of_edges, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + true, + true), + std::move(renumber_map_labels)); +} + +template +std::enable_if_t< + !multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path, test_weighted); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + // FIXME: set do_expensive_check to false once validated + auto renumber_map_labels = + renumber ? cugraph::experimental::renumber_edgelist( + handle, + d_vertices.data(), + static_cast(d_vertices.size()), + store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), + store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), + static_cast(d_edgelist_rows.size()), + true) + : rmm::device_uvector(0, handle.get_stream()); + + // FIXME: set do_expensive_check to false once validated + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + cugraph::experimental::edgelist_t{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + static_cast(d_edgelist_rows.size())}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + renumber ? true : false, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + return detail:: + read_graph_from_matrix_market_file( + handle, graph_file_full_path, test_weighted, renumber); +} +// explicit instantiations + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + int32_t* cooRVal, + int32_t* cooIVal); + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + double* cooRVal, + double* cooIVal); + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + float* cooRVal, + float* cooIVal); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 406f09048e0..4b5517271f5 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -18,6 +18,8 @@ #include #include +#include + #include #include #include @@ -77,10 +79,6 @@ int mm_to_coo(FILE* f, ValueType_* cooRVal, ValueType_* cooIVal); -int read_binary_vector(FILE* fpin, int n, std::vector& val); - -int read_binary_vector(FILE* fpin, int n, std::vector& val); - // FIXME: A similar function could be useful for CSC format // There are functions above that operate coo -> csr and coo->csc /** @@ -108,24 +106,29 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } +// returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template -struct edgelist_from_market_matrix_file_t { - std::vector h_rows{}; - std::vector h_cols{}; - std::vector h_weights{}; - vertex_t number_of_vertices{}; - bool is_symmetric{}; -}; - -template -edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path); - -template -cugraph::experimental::graph_t +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> +read_edgelist_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +// renumber must be true if multi_gpu is true +template +std::tuple, + rmm::device_uvector> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, - bool test_weighted); + bool test_weighted, + bool renumber); } // namespace test } // namespace cugraph From 860bc159634df9b963dd75fed3fc9a3f86ddc6e9 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 1 Mar 2021 15:36:33 -0500 Subject: [PATCH 180/343] prepare changelog (#1433) Prepare Changelog for Automation (#1433) This PR prepares the changelog to be automatically updated during releases. The contents of the pre-release body linked in this PR will be copied into CHANGELOG.md at release time. Authors: - AJ Schmidt (@ajschmidt8) Approvers: - Dillon Cullinan (@dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1433 --- CHANGELOG.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e58f3b9aa07..fe8e09f1e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,6 @@ # cuGraph 0.19.0 (Date TBD) -## New Features - -## Improvements - -## Bug Fixes +Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.19-latest for the latest changes to this development branch. # cuGraph 0.18.0 (24 Feb 2021) From 07f3d71feb513298a149f282eb84ea46bc2296f9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 2 Mar 2021 17:23:03 -0500 Subject: [PATCH 181/343] Add R-mat generator (#1411) Close #1329 (with https://github.com/rapidsai/cugraph/pull/1401) Authors: - Seunghwa Kang (@seunghwak) Approvers: - Brad Rees (@BradReesWork) - Alex Fender (@afender) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1411 --- cpp/CMakeLists.txt | 1 + cpp/include/experimental/graph_generator.hpp | 84 ++++++ .../experimental/generate_rmat_edgelist.cu | 149 +++++++++ cpp/src/experimental/scramble.cuh | 82 +++++ cpp/tests/CMakeLists.txt | 12 +- cpp/tests/experimental/coarsen_graph_test.cpp | 18 +- cpp/tests/experimental/degree_test.cpp | 8 +- cpp/tests/experimental/generate_rmat_test.cpp | 285 ++++++++++++++++++ cpp/tests/experimental/weight_sum_test.cpp | 8 +- cpp/tests/utilities/test_utilities.hpp | 14 + 10 files changed, 635 insertions(+), 26 deletions(-) create mode 100644 cpp/include/experimental/graph_generator.hpp create mode 100644 cpp/src/experimental/generate_rmat_edgelist.cu create mode 100644 cpp/src/experimental/scramble.cuh create mode 100644 cpp/tests/experimental/generate_rmat_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d211fe9ed5a..108cb0748a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -420,6 +420,7 @@ add_library(cugraph SHARED src/components/connectivity.cu src/centrality/katz_centrality.cu src/centrality/betweenness_centrality.cu + src/experimental/generate_rmat_edgelist.cu src/experimental/graph.cu src/experimental/graph_view.cu src/experimental/coarsen_graph.cu diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp new file mode 100644 index 00000000000..b8495ed7581 --- /dev/null +++ b/cpp/include/experimental/graph_generator.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief generate an edge list for an R-mat graph. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 + * specification (note that scrambling does not affect cuGraph's graph construction performance, so + * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (inculding the diagonal) of the graph adjacency matrix. + * + * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different + * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as + * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * + * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in + * [0, V), where V = 1 << @p scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values + * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor = 16, + double a = 0.57, + double b = 0.19, + double c = 0.19, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu new file mode 100644 index 00000000000..0a6d666432f --- /dev/null +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + CUGRAPH_EXPECTS(size_t{1} << scale <= std::numeric_limits::max(), + "Invalid input argument: scale too large for vertex_t."); + CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0), + "Invalid input argument: a, b, c should be non-negative and a + b + c should not " + "be larger than 1.0."); + + raft::random::Rng rng(seed + 10); + // to limit memory footprint (1024 is a tuning parameter) + auto max_edges_to_generate_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * 1024; + rmm::device_uvector rands( + std::min(num_edges, max_edges_to_generate_per_iteration) * 2 * scale, handle.get_stream()); + + rmm::device_uvector srcs(num_edges, handle.get_stream()); + rmm::device_uvector dsts(num_edges, handle.get_stream()); + + size_t num_edges_generated{0}; + while (num_edges_generated < num_edges) { + auto num_edges_to_generate = + std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) + + num_edges_generated; + rng.uniform( + rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, handle.get_stream()); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_edges_to_generate), + pair_first, + // if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant + [scale, + clip_and_flip, + rands = rands.data(), + a_plus_b = a + b, + a_norm = (a + b) > 0.0 ? a / (a + b) : 0.0, + c_norm = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) { + vertex_t src{0}; + vertex_t dst{0}; + for (size_t bit = scale - 1; bit != 0; --bit) { + auto r0 = rands[i * 2 * scale + 2 * bit]; + auto r1 = rands[i * 2 * scale + 2 * bit + 1]; + auto src_bit_set = r0 > a_plus_b; + auto dst_bit_set = r1 > (src_bit_set ? c_norm : a_norm); + if (clip_and_flip) { + if (src == dst) { + if (!src_bit_set && dst_bit_set) { + src_bit_set = !src_bit_set; + dst_bit_set = !dst_bit_set; + } + } + } + src += src_bit_set ? static_cast(1 << bit) : 0; + dst += dst_bit_set ? static_cast(1 << bit) : 0; + } + return thrust::make_tuple(src, dst); + }); + num_edges_generated += num_edges_to_generate; + } + + if (scramble_vertex_ids) { + rands.resize(0, handle.get_stream()); + rands.shrink_to_fit(handle.get_stream()); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + srcs.size(), + pair_first, + [scale] __device__(auto pair) { + return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale), + detail::scramble(thrust::get<1>(pair), scale)); + }); + } + + return std::make_tuple(std::move(srcs), std::move(dsts)); +} + +// explicit instantiation + +template std::tuple, rmm::device_uvector> +generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::tuple, rmm::device_uvector> +generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/scramble.cuh b/cpp/src/experimental/scramble.cuh new file mode 100644 index 00000000000..875bb5feff0 --- /dev/null +++ b/cpp/src/experimental/scramble.cuh @@ -0,0 +1,82 @@ +/* Copyright (C) 2009-2010 The Trustees of Indiana University. */ +/* */ +/* Use, modification and distribution is subject to the Boost Software */ +/* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */ +/* http://www.boost.org/LICENSE_1_0.txt) */ +/* */ +/* Authors: Jeremiah Willcock */ +/* Andrew Lumsdaine */ + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return __brevll(value); +} + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return __brev(value); +} + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return static_cast(__brev(value) >> 16); +} + +/* Apply a permutation to scramble vertex numbers; a randomly generated + * permutation is not used because applying it at scale is too expensive. */ +template +__device__ vertex_t scramble(vertex_t value, size_t lgN) +{ + constexpr size_t number_of_bits = sizeof(vertex_t) * 8; + + static_assert((number_of_bits == 64) || (number_of_bits == 32) || (number_of_bits == 16)); + assert((std::is_unsigned::value && lgN <= number_of_bits) || + (!std::is_unsigned::value && lgN < number_of_bits)); + assert(value >= 0); + + using uvertex_t = typename std::make_unsigned::type; + + constexpr auto scramble_value0 = static_cast( + sizeof(vertex_t) == 8 ? 606610977102444280 : (sizeof(vertex_t) == 4 ? 282475248 : 0)); + constexpr auto scramble_value1 = static_cast( + sizeof(vertex_t) == 8 ? 11680327234415193037 : (sizeof(vertex_t) == 4 ? 2617694917 : 8620)); + + auto v = static_cast(value); + v += scramble_value0 + scramble_value1; + v *= (scramble_value0 | static_cast(0x4519840211493211)); + v = bitreversal(v) >> (number_of_bits - lgN); + v *= (scramble_value1 | static_cast(0x3050852102C843A5)); + v = bitreversal(v) >> (number_of_bits - lgN); + return static_cast(v); +} + +} // namespace detail +} // namespace experimental +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a93aa0cfabb..18dfdbc8f63 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -317,6 +317,14 @@ set(MST_TEST_SRC ConfigureTest(MST_TEST "${MST_TEST_SRC}") +################################################################################################### +# - Experimental R-mat graph generation tests ----------------------------------------------------- + +set(EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/generate_rmat_test.cpp") + +ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST "${EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS}" "") + ################################################################################################### # - Experimental Graph tests ---------------------------------------------------------------------- @@ -329,8 +337,6 @@ ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") # - Experimental weight-sum tests ----------------------------------------------------------------- set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") @@ -339,8 +345,6 @@ ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS} # - Experimental degree tests --------------------------------------------------------------------- set(EXPERIMENTAL_DEGREE_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 941b33e5661..789619f2cd9 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -36,20 +36,6 @@ #include #include -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return (v >= 0) && (v < num_vertices); -} - -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return v < num_vertices; -} - template void check_coarsened_graph_results(edge_t* org_offsets, vertex_t* org_indices, @@ -68,13 +54,13 @@ void check_coarsened_graph_results(edge_t* org_offsets, ASSERT_TRUE(std::count_if(org_indices, org_indices + org_offsets[num_org_vertices], [num_org_vertices](auto nbr) { - return !is_valid_vertex(num_org_vertices, nbr); + return !cugraph::test::is_valid_vertex(num_org_vertices, nbr); }) == 0); ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); ASSERT_TRUE(std::count_if(coarse_indices, coarse_indices + coarse_offsets[num_coarse_vertices], [num_coarse_vertices](auto nbr) { - return !is_valid_vertex(num_coarse_vertices, nbr); + return !cugraph::test::is_valid_vertex(num_coarse_vertices, nbr); }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index 7c7b41cdacc..581b6b29f64 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -83,9 +83,11 @@ class Tests_Degree : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp new file mode 100644 index 00000000000..249a1a3c6c8 --- /dev/null +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +// this function assumes that vertex IDs are not scrambled +template +void validate_rmat_distribution( + std::tuple* edges, + size_t num_edges, + vertex_t src_first, + vertex_t src_last, + vertex_t dst_first, + vertex_t dst_last, + double a, + double b, + double c, + bool clip_and_flip, + size_t min_edges /* stop recursion if # edges < min_edges */, + double error_tolerance /* (computed a|b|c - input a|b|c) shoud be smaller than error_tolerance*/) +{ + // we cannot expect the ratios of the edges in the four quadrants of the graph adjacency matrix to + // converge close to a, b, c, d if num_edges is not large enough. + if (num_edges < min_edges) { return; } + + auto src_threshold = (src_first + src_last) / 2; + auto dst_threshold = (dst_first + dst_last) / 2; + + auto a_plus_b_last = std::partition(edges, edges + num_edges, [src_threshold](auto edge) { + return std::get<0>(edge) < src_threshold; + }); + auto a_last = std::partition( + edges, a_plus_b_last, [dst_threshold](auto edge) { return std::get<1>(edge) < dst_threshold; }); + auto c_last = std::partition(a_plus_b_last, edges + num_edges, [dst_threshold](auto edge) { + return std::get<1>(edge) < dst_threshold; + }); + + ASSERT_TRUE(std::abs((double)std::distance(edges, a_last) / num_edges - a) < error_tolerance) + << "# edges=" << num_edges << " computed a=" << (double)std::distance(edges, a_last) / num_edges + << " iput a=" << a << " error tolerance=" << error_tolerance << "."; + if (clip_and_flip && (src_first == dst_first) && + (src_last == dst_last)) { // if clip_and_flip and in the diagonal + ASSERT_TRUE(std::distance(a_last, a_plus_b_last) == 0); + ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - (b + c)) < + error_tolerance) + << "# edges=" << num_edges + << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges + << " iput (b + c)=" << (b + c) << " error tolerance=" << error_tolerance << "."; + } else { + ASSERT_TRUE(std::abs((double)std::distance(a_last, a_plus_b_last) / num_edges - b) < + error_tolerance) + << "# edges=" << num_edges + << " computed b=" << (double)std::distance(a_last, a_plus_b_last) / num_edges + << " iput b=" << b << " error tolerance=" << error_tolerance << "."; + ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - c) < + error_tolerance) + << "# edges=" << num_edges + << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges + << " iput c=" << c << " error tolerance=" << error_tolerance << "."; + } + + validate_rmat_distribution(edges, + std::distance(edges, a_last), + src_first, + src_threshold, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(a_last, + std::distance(a_last, a_plus_b_last), + src_first, + (src_first + src_last) / 2, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(a_plus_b_last, + std::distance(a_plus_b_last, c_last), + src_threshold, + src_last, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(c_last, + std::distance(c_last, edges + num_edges), + src_threshold, + src_last, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + + return; +} + +typedef struct GenerateRmat_Usecase_t { + size_t scale{0}; + size_t edge_factor{0}; + double a{0.0}; + double b{0.0}; + double c{0.0}; + bool clip_and_flip{false}; + + GenerateRmat_Usecase_t( + size_t scale, size_t edge_factor, double a, double b, double c, bool clip_and_flip) + : scale(scale), edge_factor(edge_factor), a(a), b(b), c(c), clip_and_flip(clip_and_flip){}; +} GenerateRmat_Usecase; + +class Tests_GenerateRmat : public ::testing::TestWithParam { + public: + Tests_GenerateRmat() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(GenerateRmat_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto num_vertices = static_cast(size_t{1} << configuration.scale); + std::vector no_scramble_out_degrees(num_vertices, 0); + std::vector no_scramble_in_degrees(num_vertices, 0); + std::vector scramble_out_degrees(num_vertices, 0); + std::vector scramble_in_degrees(num_vertices, 0); + for (size_t scramble = 0; scramble < 2; ++scramble) { + rmm::device_uvector d_srcs(0, handle.get_stream()); + rmm::device_uvector d_dsts(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::tie(d_srcs, d_dsts) = cugraph::experimental::generate_rmat_edgelist( + handle, + configuration.scale, + (size_t{1} << configuration.scale) * configuration.edge_factor, + configuration.a, + configuration.b, + configuration.c, + uint64_t{0}, + configuration.clip_and_flip, + static_cast(scramble)); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_srcs(d_srcs.size()); + std::vector h_cugraph_dsts(d_dsts.size()); + + raft::update_host(h_cugraph_srcs.data(), d_srcs.data(), d_srcs.size(), handle.get_stream()); + raft::update_host(h_cugraph_dsts.data(), d_dsts.data(), d_dsts.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE( + (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) && + (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor)) + << "Returned an invalid number of R-mat graph edges."; + ASSERT_TRUE( + std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_vertices = static_cast(size_t{1} << configuration.scale)]( + auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + << "Returned R-mat graph edges have invalid source vertex IDs."; + ASSERT_TRUE( + std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_vertices = static_cast(size_t{1} << configuration.scale)]( + auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + << "Returned R-mat graph edges have invalid destination vertex IDs."; + + if (!scramble) { + if (configuration.clip_and_flip) { + for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) { + ASSERT_TRUE(h_cugraph_srcs[i] >= h_cugraph_dsts[i]); + } + } + + std::vector> h_cugraph_edges(h_cugraph_srcs.size()); + for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) { + h_cugraph_edges[i] = std::make_tuple(h_cugraph_srcs[i], h_cugraph_dsts[i]); + } + + validate_rmat_distribution(h_cugraph_edges.data(), + h_cugraph_edges.size(), + vertex_t{0}, + num_vertices, + vertex_t{0}, + num_vertices, + configuration.a, + configuration.b, + configuration.c, + configuration.clip_and_flip, + size_t{100000}, + 0.01); + } + + if (scramble) { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&scramble_out_degrees](auto src) { scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&scramble_in_degrees](auto dst) { scramble_in_degrees[dst]++; }); + std::sort(scramble_out_degrees.begin(), scramble_out_degrees.end()); + std::sort(scramble_in_degrees.begin(), scramble_in_degrees.end()); + } else { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&no_scramble_out_degrees](auto src) { no_scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&no_scramble_in_degrees](auto dst) { no_scramble_in_degrees[dst]++; }); + std::sort(no_scramble_out_degrees.begin(), no_scramble_out_degrees.end()); + std::sort(no_scramble_in_degrees.begin(), no_scramble_in_degrees.end()); + } + } + + // this relies on the fact that the edge generator is deterministic. + // ideally, we should test that the two graphs are isomorphic, but this is NP hard; insted, we + // just check out-degree & in-degree distributions + ASSERT_TRUE(std::equal(no_scramble_out_degrees.begin(), + no_scramble_out_degrees.end(), + scramble_out_degrees.begin())); + ASSERT_TRUE(std::equal( + no_scramble_in_degrees.begin(), no_scramble_in_degrees.end(), scramble_in_degrees.begin())); + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_GenerateRmat, CheckInt32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_GenerateRmat, + ::testing::Values(GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, true), + GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index aeda7386314..9ab47b69baa 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -85,9 +85,11 @@ class Tests_WeightSum : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 4b5517271f5..4682699df2d 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -130,5 +130,19 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return v < num_vertices; +} + } // namespace test } // namespace cugraph From 79745021553f227b2ed8a5a3508ddb7bda939c54 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 3 Mar 2021 12:28:11 -0500 Subject: [PATCH 182/343] =?UTF-8?q?update=20default=20path=20of=20setup=20?= =?UTF-8?q?to=20use=20the=20new=20directory=20paths=20in=20build=20?= =?UTF-8?q?=E2=80=A6=20(#1425)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent change caused the `setup.py` running in a clean checkout to fail unless either `RAFT_PATH` or `CUGRAPH_BUILD_PATH` were set. These are typically set if you run the build scripts, but if you try running the build steps individually by hand the script would fail unless you set one of these environment variables correctly. This PR fixes the default path (if neither environment variable is specified) to construct the proper location for looking up raft. Closes #1428 Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Rick Ratzel (@rlratzel) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1425 --- python/setuputils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setuputils.py b/python/setuputils.py index 47eaf74d4b6..d93051d05ef 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -152,7 +152,7 @@ def clone_repo_if_needed(name, cpp_build_path=None, repo_path = ( _get_repo_path() + '/python/_external_repositories/' + name + '/') else: - repo_path = os.path.join(cpp_build_path, name + '/src/' + name + '/') + repo_path = os.path.join(cpp_build_path, '_deps', name + '-src') return repo_path, repo_cloned From c1047ed79525d7ff7a5d484e19aaa048271c45da Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 4 Mar 2021 09:05:32 -0500 Subject: [PATCH 183/343] Replace rmm::device_vector & thrust::host_vector with rmm::device_uvector & std::vector, respectively. (#1421) - [x] Replace rmm::device_vector with rmm::device_uvector for better concurrency in multi-stream executions - [x] Replace thrust::host_vector with std::vector This PR partially addresses https://github.com/rapidsai/cugraph/issues/1390 Authors: - Seunghwa Kang (@seunghwak) Approvers: - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1421 --- cpp/include/compute_partition.cuh | 26 ++++++---- cpp/include/patterns/count_if_e.cuh | 4 +- cpp/include/patterns/transform_reduce_e.cuh | 11 ++-- .../update_frontier_v_push_if_out_nbr.cuh | 26 +++++----- cpp/include/patterns/vertex_frontier.cuh | 51 ++++++++++++------- cpp/src/experimental/louvain.cuh | 2 +- 6 files changed, 72 insertions(+), 48 deletions(-) diff --git a/cpp/include/compute_partition.cuh b/cpp/include/compute_partition.cuh index c81a6237b31..5c03b0971f2 100644 --- a/cpp/include/compute_partition.cuh +++ b/cpp/include/compute_partition.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,27 +39,32 @@ class compute_partition_t { using graph_view_t = graph_view_type; using vertex_t = typename graph_view_type::vertex_type; - compute_partition_t(graph_view_t const &graph_view) + compute_partition_t(raft::handle_t const &handle, graph_view_t const &graph_view) + : vertex_partition_offsets_v_(0, handle.get_stream()) { - init(graph_view); + init(handle, graph_view); } private: template * = nullptr> - void init(graph_view_t const &graph_view) + void init(raft::handle_t const &handle, graph_view_t const &graph_view) { } template * = nullptr> - void init(graph_view_t const &graph_view) + void init(raft::handle_t const &handle, graph_view_t const &graph_view) { auto partition = graph_view.get_partition(); row_size_ = partition.get_row_size(); col_size_ = partition.get_col_size(); size_ = row_size_ * col_size_; - vertex_partition_offsets_v_.resize(size_ + 1); - vertex_partition_offsets_v_ = partition.get_vertex_partition_offsets(); + vertex_partition_offsets_v_.resize(size_ + 1, handle.get_stream()); + auto vertex_partition_offsets = partition.get_vertex_partition_offsets(); + raft::update_device(vertex_partition_offsets_v_.data(), + vertex_partition_offsets.data(), + vertex_partition_offsets.size(), + handle.get_stream()); } public: @@ -166,7 +171,7 @@ class compute_partition_t { */ vertex_device_view_t vertex_device_view() const { - return vertex_device_view_t(vertex_partition_offsets_v_.data().get(), size_); + return vertex_device_view_t(vertex_partition_offsets_v_.data(), size_); } /** @@ -176,12 +181,11 @@ class compute_partition_t { */ edge_device_view_t edge_device_view() const { - return edge_device_view_t( - vertex_partition_offsets_v_.data().get(), row_size_, col_size_, size_); + return edge_device_view_t(vertex_partition_offsets_v_.data(), row_size_, col_size_, size_); } private: - rmm::device_vector vertex_partition_offsets_v_{}; + rmm::device_uvector vertex_partition_offsets_v_; int row_size_{1}; int col_size_{1}; int size_{1}; diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 63b31f9c44e..99bfc80f643 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -201,7 +201,7 @@ typename GraphViewType::edge_type count_if_e( detail::count_if_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - rmm::device_vector block_counts(update_grid.num_blocks); + rmm::device_uvector block_counts(update_grid.num_blocks, handle.get_stream()); detail::for_all_major_for_all_nbr_low_degree<< block_results(update_grid.num_blocks); + auto block_result_buffer = + allocate_dataframe_buffer(update_grid.num_blocks, handle.get_stream()); detail::for_all_major_for_all_nbr_low_degree<<(block_result_buffer), e_op); // FIXME: we have several options to implement this. With cooperative group support @@ -225,10 +226,10 @@ T transform_reduce_e(raft::handle_t const& handle, // synchronization point in varying timings and the number of SMs is not very big) auto partial_result = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_results.begin(), - block_results.end(), + get_dataframe_buffer_begin(block_result_buffer), + get_dataframe_buffer_begin(block_result_buffer) + update_grid.num_blocks, T(), - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); result = plus_edge_op_result(result, partial_result); } diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4c76322fa79..4efd32bcac7 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -157,13 +158,14 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the // vertex unless reduce_op is a pure function. rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); - rmm::device_vector values(num_buffer_elements); + auto value_buffer = + allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, buffer_key_output_first + num_buffer_elements, buffer_payload_output_first, keys.begin(), - values.begin(), + get_dataframe_buffer_begin(value_buffer), thrust::equal_to(), reduce_op); auto num_reduced_buffer_elements = @@ -173,13 +175,9 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, keys.begin() + num_reduced_buffer_elements, buffer_key_output_first); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - values.begin(), - values.begin() + num_reduced_buffer_elements, + get_dataframe_buffer_begin(value_buffer), + get_dataframe_buffer_begin(value_buffer) + num_reduced_buffer_elements, buffer_payload_output_first); - // FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // this is necessary as values will become - // out-of-scope once this function returns return num_reduced_buffer_elements; } } @@ -673,15 +671,19 @@ void update_frontier_v_push_if_out_nbr( num_buffer_elements, vertex_value_input_first, vertex_value_output_first, - std::get<0>(bucket_and_bucket_size_device_ptrs).get(), - std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + std::get<0>(bucket_and_bucket_size_device_ptrs), + std::get<1>(bucket_and_bucket_size_device_ptrs), VertexFrontierType::kInvalidBucketIdx, invalid_vertex, v_op); auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - thrust::host_vector bucket_sizes( - bucket_sizes_device_ptr, bucket_sizes_device_ptr + VertexFrontierType::kNumBuckets); + std::vector bucket_sizes(VertexFrontierType::kNumBuckets); + raft::update_host(bucket_sizes.data(), + bucket_sizes_device_ptr, + VertexFrontierType::kNumBuckets, + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) { vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]); } diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index 2126a27ee5a..c11142d3cf7 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -147,13 +147,17 @@ template class Bucket { public: Bucket(raft::handle_t const& handle, size_t capacity) - : handle_ptr_(&handle), elements_(capacity, invalid_vertex_id::value) + : handle_ptr_(&handle), elements_(capacity, handle.get_stream()) { + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + elements_.begin(), + elements_.end(), + invalid_vertex_id::value); } void insert(vertex_t v) { - elements_[size_] = v; + raft::update_device(elements_.data() + size_, &v, 1, handle_ptr_->get_stream()); ++size_; } @@ -177,9 +181,9 @@ class Bucket { size_t capacity() const { return elements_.size(); } - auto const data() const { return elements_.data().get(); } + auto const data() const { return elements_.data(); } - auto data() { return elements_.data().get(); } + auto data() { return elements_.data(); } auto const begin() const { return elements_.begin(); } @@ -191,7 +195,7 @@ class Bucket { private: raft::handle_t const* handle_ptr_{nullptr}; - rmm::device_vector elements_{}; + rmm::device_uvector elements_; size_t size_{0}; }; @@ -206,13 +210,21 @@ class VertexFrontier { VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) : handle_ptr_(&handle), - tmp_bucket_ptrs_(num_buckets, nullptr), - tmp_bucket_sizes_(num_buckets, 0), + tmp_bucket_ptrs_(num_buckets, handle.get_stream()), + tmp_bucket_sizes_(num_buckets, handle.get_stream()), buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr), buffer_idx_(0, handle_ptr_->get_stream()) { CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, "invalid input argument bucket_capacities (size mismatch)"); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + tmp_bucket_ptrs_.begin(), + tmp_bucket_ptrs_.end(), + static_cast(nullptr)); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + tmp_bucket_sizes_.begin(), + tmp_bucket_sizes_.end(), + size_t{0}); for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle, bucket_capacities[i]); } @@ -251,8 +263,8 @@ class VertexFrontier { 0, handle_ptr_->get_stream()>>>(this_bucket.begin(), this_bucket.end(), - std::get<0>(bucket_and_bucket_size_device_ptrs).get(), - std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + std::get<0>(bucket_and_bucket_size_device_ptrs), + std::get<1>(bucket_and_bucket_size_device_ptrs), bucket_idx, kInvalidBucketIdx, invalid_vertex, @@ -269,8 +281,10 @@ class VertexFrontier { [] __device__(auto value) { return value == invalid_vertex; }); auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - thrust::host_vector bucket_sizes(bucket_sizes_device_ptr, - bucket_sizes_device_ptr + kNumBuckets); + std::vector bucket_sizes(kNumBuckets); + raft::update_host( + bucket_sizes.data(), bucket_sizes_device_ptr, kNumBuckets, handle_ptr_->get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); for (size_t i = 0; i < kNumBuckets; ++i) { if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); } } @@ -283,14 +297,17 @@ class VertexFrontier { auto get_bucket_and_bucket_size_device_pointers() { - thrust::host_vector tmp_ptrs(buckets_.size(), nullptr); - thrust::host_vector tmp_sizes(buckets_.size(), 0); + std::vector tmp_ptrs(buckets_.size(), nullptr); + std::vector tmp_sizes(buckets_.size(), 0); for (size_t i = 0; i < buckets_.size(); ++i) { tmp_ptrs[i] = get_bucket(i).data(); tmp_sizes[i] = get_bucket(i).size(); } - tmp_bucket_ptrs_ = tmp_ptrs; - tmp_bucket_sizes_ = tmp_sizes; + raft::update_device( + tmp_bucket_ptrs_.data(), tmp_ptrs.data(), tmp_ptrs.size(), handle_ptr_->get_stream()); + raft::update_device( + tmp_bucket_sizes_.data(), tmp_sizes.data(), tmp_sizes.size(), handle_ptr_->get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); } @@ -345,8 +362,8 @@ class VertexFrontier { raft::handle_t const* handle_ptr_{nullptr}; std::vector> buckets_{}; - rmm::device_vector tmp_bucket_ptrs_{}; - rmm::device_vector tmp_bucket_sizes_{}; + rmm::device_uvector tmp_bucket_ptrs_; + rmm::device_uvector tmp_bucket_sizes_; std::array tuple_element_sizes_ = compute_thrust_tuple_element_sizes()(); diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index f162cd17a61..fe8310a62ca 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -405,7 +405,7 @@ class Louvain { handle_(handle), dendrogram_(std::make_unique>()), current_graph_view_(graph_view), - compute_partition_(graph_view), + compute_partition_(handle, graph_view), local_num_vertices_(graph_view.get_number_of_local_vertices()), local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()), local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()), From 65f2f5b6d3f6c9c0ed88369175f9f14001724b85 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 4 Mar 2021 11:20:25 -0600 Subject: [PATCH 184/343] Remove raft handle duplication (#1436) Closes #1407 Authors: - @Iroy30 Approvers: - Alex Fender (@afender) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1436 --- .../betweenness_centrality_wrapper.pyx | 8 +- .../edge_betweenness_centrality_wrapper.pyx | 4 +- python/cugraph/centrality/katz_centrality.pxd | 4 +- .../centrality/katz_centrality_wrapper.pyx | 7 +- python/cugraph/comms/comms.pxd | 4 +- python/cugraph/comms/comms_wrapper.pyx | 20 +- python/cugraph/community/egonet.pxd | 2 +- python/cugraph/community/egonet_wrapper.pyx | 4 +- .../community/ktruss_subgraph_wrapper.pyx | 8 +- python/cugraph/community/leiden_wrapper.pyx | 3 +- python/cugraph/community/louvain.pxd | 4 +- python/cugraph/community/louvain_wrapper.pyx | 5 +- .../community/spectral_clustering_wrapper.pyx | 5 +- .../community/subgraph_extraction_wrapper.pyx | 2 - .../community/triangle_count_wrapper.pyx | 4 +- .../components/connectivity_wrapper.pyx | 3 +- python/cugraph/cores/core_number_wrapper.pyx | 4 +- python/cugraph/cores/k_core_wrapper.pyx | 7 +- .../dask/centrality/mg_katz_centrality.pxd | 4 +- .../centrality/mg_katz_centrality_wrapper.pyx | 4 +- python/cugraph/dask/community/louvain.pxd | 4 +- .../dask/community/louvain_wrapper.pyx | 5 +- .../dask/link_analysis/mg_pagerank.pxd | 6 +- .../link_analysis/mg_pagerank_wrapper.pyx | 4 +- .../dask/structure/renumber_wrapper.pyx | 2 +- python/cugraph/dask/traversal/mg_bfs.pxd | 4 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 4 +- python/cugraph/dask/traversal/mg_sssp.pxd | 4 +- .../dask/traversal/mg_sssp_wrapper.pyx | 4 +- .../cugraph/layout/force_atlas2_wrapper.pyx | 6 +- .../cugraph/linear_assignment/lap_wrapper.pyx | 3 +- python/cugraph/link_analysis/hits_wrapper.pyx | 7 +- python/cugraph/link_analysis/pagerank.pxd | 4 +- .../link_analysis/pagerank_wrapper.pyx | 5 +- .../link_prediction/jaccard_wrapper.pyx | 4 +- .../link_prediction/overlap_wrapper.pyx | 4 +- python/cugraph/structure/graph_primtypes.pxd | 153 +--------------- python/cugraph/structure/graph_utilities.pxd | 173 ++++++++++++++++++ python/cugraph/structure/utils.pxd | 5 +- python/cugraph/structure/utils_wrapper.pyx | 3 +- python/cugraph/traversal/bfs.pxd | 4 +- python/cugraph/traversal/bfs_wrapper.pyx | 7 +- python/cugraph/traversal/sssp.pxd | 4 +- python/cugraph/traversal/sssp_wrapper.pyx | 9 +- .../traveling_salesperson_wrapper.pyx | 1 - 45 files changed, 255 insertions(+), 280 deletions(-) create mode 100644 python/cugraph/structure/graph_utilities.pxd diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx index e3d6e04006f..855de3327ba 100644 --- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,18 +17,12 @@ # cython: language_level = 3 from cugraph.centrality.betweenness_centrality cimport betweenness_centrality as c_betweenness_centrality -from cugraph.centrality.betweenness_centrality cimport handle_t from cugraph.structure.graph import DiGraph from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf import numpy as np -import numpy.ctypeslib as ctypeslib - -import dask_cudf -import dask_cuda - import cugraph.comms.comms as Comms from cugraph.dask.common.mg_utils import get_client import dask.distributed diff --git a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx index 3c14d590750..136bde1b0e3 100644 --- a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,8 +24,6 @@ from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf import numpy as np -import numpy.ctypeslib as ctypeslib - from cugraph.dask.common.mg_utils import get_client import cugraph.comms.comms as Comms import dask.distributed diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index ebf94c78263..ce9ab5291f6 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index 088042395fd..d38a0b82824 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,13 +17,10 @@ # cython: language_level = 3 from cugraph.centrality.katz_centrality cimport call_katz_centrality -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd index 44f7ee77562..3984ade9a9c 100644 --- a/python/cugraph/comms/comms.pxd +++ b/python/cugraph/comms/comms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport handle_t +from cugraph.raft.common.handle cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/comms/comms_wrapper.pyx b/python/cugraph/comms/comms_wrapper.pyx index c1148b4c887..09fa3b1c5c7 100644 --- a/python/cugraph/comms/comms_wrapper.pyx +++ b/python/cugraph/comms/comms_wrapper.pyx @@ -1,5 +1,23 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -from cugraph.structure.graph_primtypes cimport handle_t +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from cugraph.raft.common.handle cimport * from cugraph.comms.comms cimport init_subcomms as c_init_subcomms diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd index 3ddf929674f..cf1c84fb5f7 100644 --- a/python/cugraph/community/egonet.pxd +++ b/python/cugraph/community/egonet.pxd @@ -12,7 +12,7 @@ # limitations under the License. -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 122dedbfabd..7d2a1169e25 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -12,14 +12,12 @@ # limitations under the License. from cugraph.community.egonet cimport call_egonet -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx index 9f8138f4d57..9f38b33d774 100644 --- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,12 +19,6 @@ from cugraph.community.ktruss_subgraph cimport * from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool -from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - -import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx index 70fcfcf701b..1b41134c625 100644 --- a/python/cugraph/community/leiden_wrapper.pyx +++ b/python/cugraph/community/leiden_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,7 +22,6 @@ from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index eca15ba3d20..1f75c13dbaf 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from libcpp.utility cimport pair -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 6b218a0b962..c7ce4e8db66 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,12 +17,11 @@ # cython: language_level = 3 from cugraph.community cimport louvain as c_louvain -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/spectral_clustering_wrapper.pyx b/python/cugraph/community/spectral_clustering_wrapper.pyx index 0593d987c0d..7934a386bb7 100644 --- a/python/cugraph/community/spectral_clustering_wrapper.pyx +++ b/python/cugraph/community/spectral_clustering_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,12 +23,9 @@ from cugraph.community.spectral_clustering cimport analyzeClustering_edge_cut as from cugraph.community.spectral_clustering cimport analyzeClustering_ratio_cut as c_analyze_clustering_ratio_cut from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t - import cugraph import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 35b3c743987..31c5d2372f0 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -20,9 +20,7 @@ from cugraph.community.subgraph_extraction cimport extract_subgraph_vertex as c_ from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/triangle_count_wrapper.pyx b/python/cugraph/community/triangle_count_wrapper.pyx index d7cabd4676f..f1e842f9de4 100644 --- a/python/cugraph/community/triangle_count_wrapper.pyx +++ b/python/cugraph/community/triangle_count_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,9 +21,7 @@ from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import numpy as np - import cudf -import rmm def triangles(input_graph): diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index 8b678d16ff8..76d279a8116 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,7 +23,6 @@ from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cugraph.structure.symmetrize import symmetrize from cugraph.structure.graph import Graph as type_Graph - import cudf import numpy as np diff --git a/python/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cores/core_number_wrapper.pyx index 3df1df5f8e9..9fcc3b4746c 100644 --- a/python/cugraph/cores/core_number_wrapper.pyx +++ b/python/cugraph/cores/core_number_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,9 +20,7 @@ cimport cugraph.cores.core_number as c_core from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index 51ecec09dc5..a0ef99a8e8b 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,12 +19,7 @@ from cugraph.cores.k_core cimport k_core as c_k_core from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - -import cudf -import rmm import numpy as np diff --git a/python/cugraph/dask/centrality/mg_katz_centrality.pxd b/python/cugraph/dask/centrality/mg_katz_centrality.pxd index 345457b1963..fb1730da13b 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality.pxd +++ b/python/cugraph/dask/centrality/mg_katz_centrality.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index b8cab4e4286..ccae26fe7e6 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.centrality cimport mg_katz_centrality as c_katz_centrality import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index b6b4cd23143..738309dac8a 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ # cython: language_level = 3 from libcpp.utility cimport pair -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index c2a12cf81f3..f58630d07aa 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,8 +19,7 @@ from libc.stdint cimport uintptr_t from cugraph.dask.community cimport louvain as c_louvain -from cugraph.structure.graph_primtypes cimport * - +from cugraph.structure.graph_utilities cimport * import cudf import numpy as np diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 91104d9127c..55bbc0dba7e 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool @@ -31,4 +31,4 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": double alpha, double tolerance, long long max_iter, - bool has_guess) except + \ No newline at end of file + bool has_guess) except + diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 1cd80397b17..12f2342559b 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.link_analysis cimport mg_pagerank as c_pagerank import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/dask/structure/renumber_wrapper.pyx index 40dd80aeb67..1ab290cfb10 100644 --- a/python/cugraph/dask/structure/renumber_wrapper.pyx +++ b/python/cugraph/dask/structure/renumber_wrapper.pyx @@ -16,7 +16,7 @@ from cugraph.structure.utils_wrapper import * import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 82c6e97d668..afd209158c4 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index c92f28eb407..527cb2bcf0a 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.traversal cimport mg_bfs as c_bfs import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd index f846facd269..d56575da567 100644 --- a/python/cugraph/dask/traversal/mg_sssp.pxd +++ b/python/cugraph/dask/traversal/mg_sssp.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index b7aec103098..15d956836b4 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.traversal cimport mg_sssp as c_sssp import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 785ddda47bd..4515c577f78 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,15 +19,11 @@ from cugraph.layout.force_atlas2 cimport force_atlas2 as c_force_atlas2 from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.graph_primtypes cimport * -from cugraph.structure import utils_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t - import cudf -import cudf._lib as libcudf from numba import cuda import numpy as np -import numpy.ctypeslib as ctypeslib cdef extern from "internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index 0769ef42f0f..7cd2124b8d9 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -21,11 +21,10 @@ from cugraph.linear_assignment.lap cimport dense_hungarian as c_dense_hungarian from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cugraph.structure.graph import Graph as type_Graph - import cudf import numpy as np + def sparse_hungarian(input_graph, workers): """ Call the hungarian algorithm diff --git a/python/cugraph/link_analysis/hits_wrapper.pyx b/python/cugraph/link_analysis/hits_wrapper.pyx index 3e19e38a023..2a2d33dea0b 100644 --- a/python/cugraph/link_analysis/hits_wrapper.pyx +++ b/python/cugraph/link_analysis/hits_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,13 +18,10 @@ from cugraph.link_analysis.hits cimport hits as c_hits from cugraph.structure.graph_primtypes cimport * -from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): @@ -48,8 +45,6 @@ def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): df['hubs'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) df['authorities'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) - #cdef bool normalized = 1 - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_hubs = df['hubs'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_authorities = df['authorities'].__cuda_array_interface__['data'][0]; diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index 79cb033f74b..2c8bea12016 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index fea1939db6a..81a68d42360 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -16,16 +16,13 @@ # cython: embedsignature = True # cython: language_level = 3 -#cimport cugraph.link_analysis.pagerank as c_pagerank from cugraph.link_analysis.pagerank cimport call_pagerank -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None): diff --git a/python/cugraph/link_prediction/jaccard_wrapper.pyx b/python/cugraph/link_prediction/jaccard_wrapper.pyx index cacd13dec65..8d236c60ee2 100644 --- a/python/cugraph/link_prediction/jaccard_wrapper.pyx +++ b/python/cugraph/link_prediction/jaccard_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,8 +21,6 @@ from cugraph.link_prediction.jaccard cimport jaccard_list as c_jaccard_list from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cython cimport floating - import cudf import numpy as np diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx index 9e2f3ba49d7..4cb17aa21a6 100644 --- a/python/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/link_prediction/overlap_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,8 +21,6 @@ from cugraph.link_prediction.overlap cimport overlap_list as c_overlap_list from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cython cimport floating - import cudf import numpy as np diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 07132df2598..3bb42633ac1 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -20,12 +20,9 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport pair from libcpp.vector cimport vector - +from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer -cdef extern from "raft/handle.hpp" namespace "raft": - cdef cppclass handle_t: - handle_t() except + cdef extern from "graph.hpp" namespace "cugraph": @@ -127,7 +124,6 @@ cdef extern from "graph.hpp" namespace "cugraph": GraphCSRView[VT,ET,WT] view() - cdef extern from "algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( @@ -144,89 +140,6 @@ cdef extern from "functions.hpp" namespace "cugraph": ET *map_size) except + -# renumber_edgelist() interface: -# -# -# 1. `cdef extern partition_t`: -# -cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": - - cdef cppclass partition_t[vertex_t]: - pass - - -# 2. return type for shuffle: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef cppclass major_minor_weights_t[vertex_t, weight_t]: - major_minor_weights_t(const handle_t &handle) - pair[unique_ptr[device_buffer], size_t] get_major_wrap() - pair[unique_ptr[device_buffer], size_t] get_minor_wrap() - pair[unique_ptr[device_buffer], size_t] get_weights_wrap() - - -ctypedef fused shuffled_vertices_t: - major_minor_weights_t[int, float] - major_minor_weights_t[int, double] - major_minor_weights_t[long, float] - major_minor_weights_t[long, double] - -# 3. return type for renumber: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef cppclass renum_quad_t[vertex_t, edge_t]: - renum_quad_t(const handle_t &handle) - pair[unique_ptr[device_buffer], size_t] get_dv_wrap() - vertex_t& get_num_vertices() - edge_t& get_num_edges() - int get_part_row_size() - int get_part_col_size() - int get_part_comm_rank() - unique_ptr[vector[vertex_t]] get_partition_offsets() - pair[vertex_t, vertex_t] get_part_local_vertex_range() - vertex_t get_part_local_vertex_first() - vertex_t get_part_local_vertex_last() - pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) - size_t get_part_number_of_matrix_partitions() - vertex_t get_part_matrix_partition_major_first(size_t partition_idx) - vertex_t get_part_matrix_partition_major_last(size_t partition_idx) - vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) - pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() - vertex_t get_part_matrix_partition_minor_first() - vertex_t get_part_matrix_partition_minor_last() - -# 4. `groupby_gpuid_and_shuffle_values()` wrapper: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( - const handle_t &handle, - vertex_t *edgelist_major_vertices, - vertex_t *edgelist_minor_vertices, - weight_t* edgelist_weights, - edge_t num_edges, - bool is_hyper_partitioned) except + - - -# 5. `renumber_edgelist()` wrapper -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( - const handle_t &handle, - vertex_t *edgelist_major_vertices, - vertex_t *edgelist_minor_vertices, - edge_t num_edges, - bool is_hyper_partitioned, - bool do_check, - bool multi_gpu) except + - - cdef extern from "" namespace "std" nogil: cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]]) @@ -275,67 +188,3 @@ ctypedef fused GraphViewType: cdef coo_to_df(GraphCOOPtrType graph) cdef csr_to_series(GraphCSRPtrType graph) cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*) - - -# C++ utilities specifically for Cython -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - ctypedef enum numberTypeEnum: - int32Type "cugraph::cython::numberTypeEnum::int32Type" - int64Type "cugraph::cython::numberTypeEnum::int64Type" - floatType "cugraph::cython::numberTypeEnum::floatType" - doubleType "cugraph::cython::numberTypeEnum::doubleType" - - cdef cppclass graph_container_t: - pass - - cdef void populate_graph_container( - graph_container_t &graph_container, - handle_t &handle, - void *src_vertices, - void *dst_vertices, - void *weights, - void *vertex_partition_offsets, - numberTypeEnum vertexType, - numberTypeEnum edgeType, - numberTypeEnum weightType, - size_t num_partition_edges, - size_t num_global_vertices, - size_t num_global_edges, - bool sorted_by_degree, - bool transposed, - bool multi_gpu) except + - - ctypedef enum graphTypeEnum: - LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR" - LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC" - LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO" - - cdef void populate_graph_container_legacy( - graph_container_t &graph_container, - graphTypeEnum legacyType, - const handle_t &handle, - void *offsets, - void *indices, - void *weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, - numberTypeEnum weightType, - size_t num_global_vertices, - size_t num_global_edges, - int *local_vertices, - int *local_edges, - int *local_offsets) except + - - cdef cppclass cy_multi_edgelists_t: - size_t number_of_vertices - size_t number_of_edges - size_t number_of_subgraph - unique_ptr[device_buffer] src_indices - unique_ptr[device_buffer] dst_indices - unique_ptr[device_buffer] edge_data - unique_ptr[device_buffer] subgraph_offsets - -cdef extern from "" namespace "std" nogil: - cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) - cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd new file mode 100644 index 00000000000..10c90f44cb8 --- /dev/null +++ b/python/cugraph/structure/graph_utilities.pxd @@ -0,0 +1,173 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from cugraph.raft.common.handle cimport * +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport pair +from libcpp.vector cimport vector +from rmm._lib.device_buffer cimport device_buffer + +# C++ graph utilities +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + ctypedef enum numberTypeEnum: + int32Type "cugraph::cython::numberTypeEnum::int32Type" + int64Type "cugraph::cython::numberTypeEnum::int64Type" + floatType "cugraph::cython::numberTypeEnum::floatType" + doubleType "cugraph::cython::numberTypeEnum::doubleType" + + cdef cppclass graph_container_t: + pass + + cdef void populate_graph_container( + graph_container_t &graph_container, + handle_t &handle, + void *src_vertices, + void *dst_vertices, + void *weights, + void *vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, + numberTypeEnum weightType, + size_t num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + bool sorted_by_degree, + bool transposed, + bool multi_gpu) except + + + ctypedef enum graphTypeEnum: + LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR" + LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC" + LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO" + + cdef void populate_graph_container_legacy( + graph_container_t &graph_container, + graphTypeEnum legacyType, + const handle_t &handle, + void *offsets, + void *indices, + void *weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + size_t num_global_vertices, + size_t num_global_edges, + int *local_vertices, + int *local_edges, + int *local_offsets) except + + + cdef cppclass cy_multi_edgelists_t: + size_t number_of_vertices + size_t number_of_edges + size_t number_of_subgraph + unique_ptr[device_buffer] src_indices + unique_ptr[device_buffer] dst_indices + unique_ptr[device_buffer] edge_data + unique_ptr[device_buffer] subgraph_offsets + +cdef extern from "" namespace "std" nogil: + cdef device_buffer move(device_buffer) + cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) + cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) + #cdef device_buffer move(device_buffer) + #cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + +# renumber_edgelist() interface utilities: +# +# +# 1. `cdef extern partition_t`: +# +cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": + + cdef cppclass partition_t[vertex_t]: + pass + + +# 2. return type for shuffle: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + major_minor_weights_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_major_wrap() + pair[unique_ptr[device_buffer], size_t] get_minor_wrap() + pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + + +ctypedef fused shuffled_vertices_t: + major_minor_weights_t[int, float] + major_minor_weights_t[int, double] + major_minor_weights_t[long, float] + major_minor_weights_t[long, double] + +# 3. return type for renumber: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass renum_quad_t[vertex_t, edge_t]: + renum_quad_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_dv_wrap() + vertex_t& get_num_vertices() + edge_t& get_num_edges() + int get_part_row_size() + int get_part_col_size() + int get_part_comm_rank() + unique_ptr[vector[vertex_t]] get_partition_offsets() + pair[vertex_t, vertex_t] get_part_local_vertex_range() + vertex_t get_part_local_vertex_first() + vertex_t get_part_local_vertex_last() + pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) + size_t get_part_number_of_matrix_partitions() + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) + pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() + vertex_t get_part_matrix_partition_minor_first() + vertex_t get_part_matrix_partition_minor_last() + +# 4. `sort_and_shuffle_values()` wrapper: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + weight_t* edgelist_weights, + edge_t num_edges, + bool is_hyper_partitioned) except + + +# 5. `renumber_edgelist()` wrapper +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + edge_t num_edges, + bool is_hyper_partitioned, + bool do_check, + bool multi_gpu) except + diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd index 0ec9c914347..c22e64841af 100644 --- a/python/cugraph/structure/utils.pxd +++ b/python/cugraph/structure/utils.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,9 +19,6 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "raft/handle.hpp" namespace "raft": - cdef cppclass handle_t: - handle_t() except + cdef extern from "functions.hpp" namespace "cugraph": diff --git a/python/cugraph/structure/utils_wrapper.pyx b/python/cugraph/structure/utils_wrapper.pyx index 00af5813056..65c1ca09750 100644 --- a/python/cugraph/structure/utils_wrapper.pyx +++ b/python/cugraph/structure/utils_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index 5b73d23045c..0467bf05090 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index ae346aea953..f475842a7bf 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,14 +17,11 @@ # cython: language_level = 3 cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - import cudf -import rmm import numpy as np def bfs(input_graph, start, directed=True, diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index e4b709cb879..59253a5f1e4 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx index 730fe0db94e..36e4797e0c8 100644 --- a/python/cugraph/traversal/sssp_wrapper.pyx +++ b/python/cugraph/traversal/sssp_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,17 +18,14 @@ cimport cugraph.traversal.sssp as c_sssp cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper - from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - import cudf -import rmm import numpy as np + def sssp(input_graph, source): """ Call sssp diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx index 5f87c42a638..6eccce57a37 100644 --- a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx +++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx @@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from numba import cuda - import cudf import numpy as np From b7e68092f09f0062e632b22bea14e3175cbc4284 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 4 Mar 2021 16:21:17 -0500 Subject: [PATCH 185/343] Create cuGraph developers guide (#1431) We have long needed a developer's guide. This PR creates one. The expectation is that this document will be adapted over time. I have copied the version from cuDF, edited things to make it cuGraph focused. There are documentation holes here that will need to eventually be filled in. However, it's probably useful to get something with holes in it that is approved so we can move forward with some documented standard. Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Rick Ratzel (@rlratzel) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1431 --- cpp/docs/DEVELOPER_GUIDE.md | 277 ++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 cpp/docs/DEVELOPER_GUIDE.md diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md new file mode 100644 index 00000000000..ba24d68aca5 --- /dev/null +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -0,0 +1,277 @@ +# cuGraph C++ Developer Guide + +This document serves as a guide for contributors to cuGraph C++ code. Developers should also refer +to these additional files for further documentation of cuGraph best practices. + +* [Documentation Guide](TODO) for guidelines on documenting cuGraph code. +* [Testing Guide](TODO) for guidelines on writing unit tests. +* [Benchmarking Guide](TODO) for guidelines on writing unit benchmarks. + +# Overview + +cuGraph includes a C++ library that provides GPU-accelerated graph algorithms for processing +sparse graphs. + +## Lexicon + +This section defines terminology used within cuGraph + +### COO + +COOrdinate format is one of the standard formats for representing graph data. In COO format the +graph is represented as an array of source vertex ids, an array of destination vertex ids, and an +optional array of edge weights. Edge i is identified by source_vertex_id[i], destination_vertex_id[i] +and weight[i]. + +### MORE + +# Directory Structure and File Naming + +External/public cuGraph APIs are grouped based on functionality into an appropriately titled +header file in `cugraph/cpp/include/`. For example, `cugraph/cpp/include/graph.hpp` +contains the definition of the (legacy) graph objects. Note the `.hpp` +file extension used to indicate a C++ header file. + +Header files should use the `#pragma once` include guard. + +## File extensions + +- `.hpp` : C++ header files +- `.cpp` : C++ source files +- `.cu` : CUDA C++ source files +- `.cuh` : Headers containing CUDA device code + +Header files and source files should use `.hpp` and `.cpp` extensions unless they must +be compiled by nvcc. `.cu` and `.cuh` files are more expensive to compile, so we want +to minimize the use of these files to only when necessary. A good indicator of the need +to use a `.cu` or `.cuh` file is the inclusion of `__device__` and other +symbols that are only recognized by `nvcc`. Another indicator is Thrust +algorithm APIs with a device execution policy (always `rmm::exec_policy` in cuGraph). + +## Code and Documentation Style and Formatting + +cuGraph code uses [snake_case](https://en.wikipedia.org/wiki/Snake_case) for all names except in a +few cases: unit tests and test case names may use Pascal case, aka +[UpperCamelCase](https://en.wikipedia.org/wiki/Camel_case). We do not use +[Hungarian notation](https://en.wikipedia.org/wiki/Hungarian_notation), except for the following examples: + * device data variables should be prefaced by d_ if it makes the intent clearer + * host data variables should be prefaced by h_ if it makes the intent clearer + * template parameters defining a type should be suffixed with _t + * private member variables are typically suffixed with an underscore + +```c++ +template +void algorithm_function(graph_t const &g) +{ + ... +} + +template +class utility_class +{ + ... + private: + vertex_t num_vertices_{}; +} +``` + +C++ formatting is enforced using `clang-format`. You should configure `clang-format` on your +machine to use the `cugraph/cpp/.clang-format` configuration file, and run `clang-format` on all +changed code before committing it. The easiest way to do this is to configure your editor to +"format on save". + +Aspects of code style not discussed in this document and not automatically enforceable are typically +caught during code review, or not enforced. + +### C++ Guidelines + +In general, we recommend following +[C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines). We also +recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/watch?v=W2tWOdzgXHA), +and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." + + * Prefer algorithms from STL and Thrust to raw loops. + * Prefer cugraph and RMM to raw pointers and raw memory allocation. + +Documentation is discussed in the [Documentation Guide](TODO). + +### Includes + +The following guidelines apply to organizing `#include` lines. + + * Group includes by library (e.g. cuGraph, RMM, Thrust, STL). `clang-format` will respect the + groupings and sort the individual includes within a group lexicographically. + * Separate groups by a blank line. + * Order the groups from "nearest" to "farthest". In other words, local includes, then includes + from other RAPIDS libraries, then includes from related libraries, like ``, then + includes from dependencies installed with cuGraph, and then standard headers (for example ``, + ``). + * Use <> instead of "" unless the header is in the same directory as the source file. + * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping + and brackets wrong. + * Always check that includes are only necessary for the file in which they are included. + Try to avoid excessive including especially in header files. Double check this when you remove + code. + +# cuGraph Data Structures + +Application data in cuGraph is contained in graph objects, but there are a variety of other +data structures you will use when developing cuGraph code. + +## Views and Ownership + +Resource ownership is an essential concept in cuGraph. In short, an "owning" object owns a +resource (such as device memory). It acquires that resource during construction and releases the +resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning" +object does not own resources. Any class in cuGraph with the `*_view` suffix is non-owning. + +## `rmm::device_memory_resource` + +cuGraph allocates all device memory via RMM memory resources (MR). See the +[RMM documentation](https://github.com/rapidsai/rmm/blob/main/README.md) for details. + +## Streams + +CUDA streams are not yet exposed in external cuGraph APIs. + +We are currently investigating the best technique for exposing this. + +### Memory Management + +cuGraph code generally eschews raw pointers and direct memory allocation. Use RMM classes built to +use `device_memory_resource`(*)s for device memory allocation with automated lifetime management. + +#### `rmm::device_buffer` +Allocates a specified number of bytes of untyped, uninitialized device memory using a +`device_memory_resource`. If no resource is explicitly provided, uses +`rmm::mr::get_current_device_resource()`. + +`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s +device memory, whereas a move moves ownership of the device memory from one `device_buffer` to +another. + +```c++ +// Allocates at least 100 bytes of uninitialized device memory +// using the specified resource and stream +rmm::device_buffer buff(100, stream, mr); +void * raw_data = buff.data(); // Raw pointer to underlying device memory + +rmm::device_buffer copy(buff); // Deep copies `buff` into `copy` +rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to` + +custom_memory_resource *mr...; +rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource +``` + +#### `rmm::device_uvector` + +Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with key +differences: +- As an optimization, elements are uninitialized and no synchronization occurs at construction. +This limits the types `T` to trivially copyable types. +- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream +on which the operation is performed). + +## Namespaces + +### External +All public cuGraph APIs should be placed in the `cugraph` namespace. Example: +```c++ +namespace cugraph{ + void public_function(...); +} // namespace cugraph +``` + +### Internal + +Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* +namespace, depending on the situation. + +#### `detail` namespace + +Functions or objects that will be used across *multiple* translation units (i.e., source files), +should be exposed in an internal header file and placed in the `detail` namespace. Example: + +```c++ +// some_utilities.hpp +namespace cugraph{ +namespace detail{ +void reusable_helper_function(...); +} // namespace detail +} // namespace cugraph +``` + +#### Anonymous namespace + +Functions or objects that will only be used in a *single* translation unit should be defined in an +*anonymous* namespace in the source file where it is used. Example: + +```c++ +// some_file.cpp +namespace{ +void isolated_helper_function(...); +} // anonymous namespace +``` + +[**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) + +# Error Handling + +cuGraph follows conventions (and provides utilities) enforcing compile-time and run-time +conditions and detecting and handling CUDA errors. Communication of errors is always via C++ +exceptions. + +## Runtime Conditions + +Use the `CUGRAPH_EXPECTS` macro to enforce runtime conditions necessary for correct execution. + +Example usage: +```c++ +CUGRAPH_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch"); +``` + +The first argument is the conditional expression expected to resolve to `true` under normal +conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of `cugraph::logic_error` is thrown. The second argument to `CUGRAPH_EXPECTS` is a short description of the +error that has occurred and is used for the exception's `what()` message. + +There are times where a particular code path, if reached, should indicate an error no matter what. +For example, often the `default` case of a `switch` statement represents an invalid alternative. +Use the `CUGRAPH_FAIL` macro for such errors. This is effectively the same as calling +`CUGRAPH_EXPECTS(false, reason)`. + +Example: +```c++ +CUGRAPH_FAIL("This code path should not be reached."); +``` + +### CUDA Error Checking + +Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This +macro throws a `cugraph::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The +thrown exception includes a description of the CUDA error code in it's `what()` message. + +Example: + +```c++ +CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) ); +``` + +## Compile-Time Conditions + +Use `static_assert` to enforce compile-time conditions. For example, + +```c++ +template +void trivial_types_only(T t){ + static_assert(std::is_trivial::value, "This function requires a trivial type."); +... +} +``` + +# Data Types + +TBD + +# Type Dispatcher + +TBD From e5250403fd0f4f2593ab829c57358152bc879306 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 5 Mar 2021 08:15:52 -0600 Subject: [PATCH 186/343] Streams infra + support in egonet (#1435) - Stream synchronization behavior: switched to per-thread default stream instead of the legacy default stream - Update raft tag - EgoNet upgrade to use `uvector` instead of `device_vector` - EgoNet upgrade to execute on a different stream for each seed - Perf analysis timers/app for EgoNet Concurrency is limited by the number of available blocks on the device. Thrust-based codes may request a lot of blocks without a way to control this. In practice, smaller graphs leverage concurrency better than larger ones where tasks may end up waiting for available resources. We may wait on #1407 before reconciling and merging this Close #957 Authors: - Alex Fender (@afender) Approvers: - Seunghwa Kang (@seunghwak) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1435 --- cpp/CMakeLists.txt | 8 +- cpp/include/algorithms.hpp | 2 +- cpp/src/community/egonet.cu | 116 +++++++++---- cpp/src/experimental/induced_subgraph.cu | 11 +- cpp/src/utilities/high_res_timer.hpp | 4 +- cpp/tests/community/egonet_test.cu | 174 ++++++++++++++++++- python/cugraph/community/egonet_wrapper.pyx | 6 +- python/cugraph/structure/graph_primtypes.pxd | 1 - 8 files changed, 276 insertions(+), 46 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 108cb0748a8..b0365c3cfd6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -113,7 +113,6 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") - # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) @@ -298,7 +297,8 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG a3461b201ea1c9f61571f1927274f739e775d2d2 + GIT_TAG b055cf862a599fd45537d21a309edd8a6e06da4c + SOURCE_SUBDIR raft ) @@ -446,6 +446,10 @@ target_link_directories(cugraph # add_dependencies(cugraph gunrock_ext) +# Per-thread default stream option see https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html +# The per-thread default stream does not synchronize with other streams +target_compile_definitions(cugraph PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) + ################################################################################################### # - include paths --------------------------------------------------------------------------------- target_include_directories(cugraph diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 0b8bd59587f..c3a4f3ec985 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1167,7 +1167,7 @@ void katz_centrality(raft::handle_t const &handle, * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. + * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream. * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view. * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs). * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index fa788aa307b..067d27f9a92 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -22,6 +22,9 @@ #include #include +#include +#include + #include #include @@ -34,6 +37,8 @@ #include #include +#include + namespace { /* @@ -61,58 +66,111 @@ extract( vertex_t n_subgraphs, vertex_t radius) { - auto v = csr_view.get_number_of_vertices(); - auto e = csr_view.get_number_of_edges(); - auto stream = handle.get_stream(); - float avg_degree = e / v; + auto v = csr_view.get_number_of_vertices(); + auto e = csr_view.get_number_of_edges(); + auto user_stream_view = handle.get_stream_view(); rmm::device_vector neighbors_offsets(n_subgraphs + 1); rmm::device_vector neighbors; - // It is the right thing to accept device memory for source_vertex - // FIXME consider adding a device API to BFS (ie. accept source on the device) std::vector h_source_vertex(n_subgraphs); - raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream); + std::vector h_neighbors_offsets(n_subgraphs + 1); + + raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, user_stream_view.value()); + + // Streams will allocate concurrently later + std::vector> reached{}; + reached.reserve(handle.get_num_internal_streams()); - // reserve some reasonable memory, but could grow larger than that - neighbors.reserve(v + avg_degree * n_subgraphs * radius); - neighbors_offsets[0] = 0; - // each source should be done concurently in the future + // h_source_vertex[i] is used by other streams in the for loop + user_stream_view.synchronize(); +#ifdef TIMING + HighResTimer hr_timer; + hr_timer.start("ego_neighbors"); +#endif for (vertex_t i = 0; i < n_subgraphs; i++) { + // get light handle from worker pool + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + + // Allocations and operations are attached to the worker stream + rmm::device_uvector local_reach(v, worker_stream_view); + reached.push_back(std::move(local_reach)); + // BFS with cutoff - rmm::device_vector reached(v); - rmm::device_vector predecessors(v); // not used + // consider adding a device API to BFS (ie. accept source on the device) + rmm::device_uvector predecessors(v, worker_stream_view); // not used bool direction_optimizing = false; - cugraph::experimental::bfs(handle, + thrust::fill(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), + std::numeric_limits::max()); + thrust::fill( + rmm::exec_policy(worker_stream_view), reached[i].begin(), reached[i].begin() + 100, 1.0); + + cugraph::experimental::bfs(light_handle, csr_view, - reached.data().get(), - predecessors.data().get(), + reached[i].data(), + predecessors.data(), h_source_vertex[i], direction_optimizing, radius); // identify reached vertex ids from distance array - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(worker_stream_view), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(v), - reached.begin(), - reached.begin(), + reached[i].begin(), + reached[i].begin(), [sentinel = std::numeric_limits::max()] __device__( auto id, auto val) { return val < sentinel ? id : sentinel; }); // removes unreached data - auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream), - reached.begin(), - reached.end(), + auto reached_end = thrust::remove(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), std::numeric_limits::max()); + // release temp storage + reached[i].resize(thrust::distance(reached[i].begin(), reached_end), worker_stream_view); + reached[i].shrink_to_fit(worker_stream_view); + } - // update extraction input - size_t n_reached = thrust::distance(reached.begin(), reached_end); - neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached; - if (neighbors_offsets[i + 1] > neighbors.capacity()) - neighbors.reserve(neighbors_offsets[i + 1] * 2); - neighbors.insert(neighbors.end(), reached.begin(), reached_end); + // wait on every one to identify their neighboors before proceeding to concatenation + handle.wait_on_internal_streams(); + + // Construct neighboors offsets (just a scan on neighborhod vector sizes) + h_neighbors_offsets[0] = 0; + for (vertex_t i = 0; i < n_subgraphs; i++) { + h_neighbors_offsets[i + 1] = h_neighbors_offsets[i] + reached[i].size(); + } + raft::update_device(neighbors_offsets.data().get(), + &h_neighbors_offsets[0], + n_subgraphs + 1, + user_stream_view.value()); + neighbors.resize(h_neighbors_offsets[n_subgraphs]); + user_stream_view.synchronize(); + + // Construct the neighboors list concurrently + for (vertex_t i = 0; i < n_subgraphs; i++) { + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + thrust::copy(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), + neighbors.begin() + h_neighbors_offsets[i]); + + // reached info is not needed anymore + reached[i].resize(0, worker_stream_view); + reached[i].shrink_to_fit(worker_stream_view); } + // wait on every one before proceeding to grouped extraction + handle.wait_on_internal_streams(); + +#ifdef TIMING + hr_timer.stop(); + hr_timer.display(std::cout); +#endif + // extract return cugraph::experimental::extract_induced_subgraphs( handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs); @@ -207,4 +265,4 @@ extract_ego(raft::handle_t const &, int64_t, int64_t); } // namespace experimental -} // namespace cugraph +} // namespace cugraph \ No newline at end of file diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index a88adf76ef4..5cda36ad7e2 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -32,6 +32,8 @@ #include +#include + namespace cugraph { namespace experimental { @@ -52,6 +54,10 @@ extract_induced_subgraphs( size_t num_subgraphs, bool do_expensive_check) { +#ifdef TIMING + HighResTimer hr_timer; + hr_timer.start("extract_induced_subgraphs"); +#endif // FIXME: this code is inefficient for the vertices with their local degrees much larger than the // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are // included in the local neighbors is more efficient than searching the local neighbors are @@ -244,7 +250,10 @@ extract_induced_subgraphs( subgraph_offsets + (num_subgraphs + 1), subgraph_vertex_output_offsets.begin(), subgraph_edge_offsets.begin()); - +#ifdef TIMING + hr_timer.stop(); + hr_timer.display(std::cout); +#endif return std::make_tuple(std::move(edge_majors), std::move(edge_minors), std::move(edge_weights), diff --git a/cpp/src/utilities/high_res_timer.hpp b/cpp/src/utilities/high_res_timer.hpp index f2d6bc6e13f..a731c5edc9d 100644 --- a/cpp/src/utilities/high_res_timer.hpp +++ b/cpp/src/utilities/high_res_timer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ #include #include +//#define TIMING + class HighResTimer { public: HighResTimer() : timers() {} diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index ef2699bd1d0..a9224b42bc1 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -35,6 +36,8 @@ #include #include +#include + typedef struct InducedEgo_Usecase_t { std::string graph_file_full_path{}; std::vector ego_sources{}; @@ -67,7 +70,8 @@ class Tests_InducedEgo : public ::testing::TestWithParam { template void run_current_test(InducedEgo_Usecase const& configuration) { - raft::handle_t handle{}; + int n_streams = std::min(configuration.ego_sources.size(), static_cast(128)); + raft::handle_t handle(n_streams); cugraph::experimental::graph_t graph( handle); @@ -88,14 +92,18 @@ class Tests_InducedEgo : public ::testing::TestWithParam { rmm::device_uvector d_ego_edgelist_dst(0, handle.get_stream()); rmm::device_uvector d_ego_edgelist_weights(0, handle.get_stream()); rmm::device_uvector d_ego_edge_offsets(0, handle.get_stream()); - + HighResTimer hr_timer; + hr_timer.start("egonet"); + cudaProfilerStart(); std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) = cugraph::experimental::extract_ego(handle, graph_view, d_ego_sources.data(), static_cast(configuration.ego_sources.size()), configuration.radius); - + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); std::vector h_cugraph_ego_edge_offsets(d_ego_edge_offsets.size()); std::vector h_cugraph_ego_edgelist_src(d_ego_edgelist_src.size()); std::vector h_cugraph_ego_edgelist_dst(d_ego_edgelist_dst.size()); @@ -118,13 +126,11 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] == d_ego_edgelist_src.size()); for (size_t i = 0; i < configuration.ego_sources.size(); i++) - ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] < h_cugraph_ego_edge_offsets[i + 1]); + ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] <= h_cugraph_ego_edge_offsets[i + 1]); auto n_vertices = graph_view.get_number_of_vertices(); for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { - ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] >= 0); - ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] < n_vertices); - ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] >= 0); - ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] < n_vertices); + ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); + ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); } /* @@ -170,6 +176,156 @@ INSTANTIATE_TEST_CASE_P( InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), - InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, 2, true))); + InducedEgo_Usecase( + "test/datasets/karate.mtx", std::vector{5, 9, 3, 10, 12, 13}, 2, true))); +// For perf analysis +/* +INSTANTIATE_TEST_CASE_P( +simple_test, +Tests_InducedEgo, +::testing::Values( +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), +InducedEgo_Usecase( +"test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), +InducedEgo_Usecase( +"test/datasets/soc-LiveJournal1.mtx", +std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, + 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, + 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, + 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, + 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, + 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, + 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, + 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, + 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, + 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, + 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, + 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, + 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, + 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, + 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, + 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, + 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, + 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, + 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, 984983, 3114832, 1967741, + 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, 686026, 3989015, + 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, 2186957, + 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, + 2606530, 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, + 134931, 736397, 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, + 1881883, 4757859, 3596257, 2358088, 2578758, 447504, 590720, 1717038, 1869795, + 1133885, 3027521, 840312, 2818881, 3654321, 2730947, 353585, 1134903, 2223378, + 1508824, 3662521, 1363776, 2712071, 288441, 1204581, 3502242, 4645567, 2767267, + 1514366, 3956099, 1422145, 1216608, 2253360, 189132, 4238225, 1345783, 451571, 1599442, + 3237284, 4711405, 929446, 1857675, 150759, 1277633, 761210, 138628, 1026833, + 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, 2044964, 716256, 1660632, + 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, 1870953, 1516385, + 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, 4285177, + 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, + 4410835, 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, + 1600667, 2176195, 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, + 1647273, 3044136, 950354, 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, + 3867343, 72329, 919189, 992521, 3445975, 4712557, 4680974, 188419, 2612093, + 1991268, 3566207, 2281468, 3859078, 2492806, 3398628, 763441, 2679107, 2554420, + 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, 4013060, 3617653, 2040022, + 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, 1083926, 503974, 3529226, + 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, 3022790, 4316365, 76365, + 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, 2938808, 562788, + 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, 214467, + 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, + 1513424, 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, + 3108096, 4311775, 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, + 1861797, 3566460, 4537673, 1164093, 3499764, 4553071, 3518985, 847658, 918948, + 2922351, 1056144, 652895, 1013195, 780505, 1702928, 3562838, 1432719, 2405207, + 1054920, 641647, 2240939, 3617702, 383165, 652641, 879593, 1810739, 2096385, + 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, 2422190, 527647, 1251821, + 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, 2433139, 1710383, + 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, 16864, 2081770, + 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, 2630042, + 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, + 481509, 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, + 4002180, 4718138, 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, + 3828090, 3253691, 4839313, 1203624, 584938, 3901482, 1747543, 1572737, 3533226, + 774708, 1691195, 1037110, 1557763, 225120, 4424243, 3524086, 1717663, 4332507, + 3513592, 4274932, 1232118, 873498, 1416042, 2488925, 111391, 4704545, 4492545, + 445317, 1584812, 2187737, 2471948, 3731678, 219255, 2282627, 2589971, 2372185, + 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, 3184084, 3690756, + 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, 2722165, + 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, + 2596952, 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, + 2174584, 587481, 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, + 4819428, 2591357, 48490, 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, + 37251, 3729300, 2726300, 644966, 1623020, 1419070, 4646747, 2417222, 2680238, + 2561083, 1793801, 2349366, 339747, 611366, 4684147, 4356907, 1277161, 4510381, + 3218352, 4161658, 3200733, 1172372, 3997786, 3169266, 3353418, 2248955, 2875885, + 2365369, 498208, 2968066, 2681505, 2059048, 2097106, 3607540, 1121504, 2016789, + 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, 4046672, 1544367, + 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, 3690724, + 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, + 4687548, 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, + 4086775, 615155, 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, + 2672602, 838828, 4051647, 1709120, 3074610, 693235, 4356087, 3018806, 239410, + 2431497, 691186, 766276, 4462126, 859155, 2370304, 1571808, 1938673, 1694955, + 3871296, 4245059, 3987376, 301524, 2512461, 3410437, 3300380, 684922, 4581995, + 3599557, 683515, 1850634, 3704678, 1937490, 2035591, 3718533, 2065879, 3160765, + 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, 713633, 1976262, 135946, + 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, 4179598, 961045, + 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, 4719693, + 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, + 3504814, 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, + 4730666, 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, + 4468651, 2478792, 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, + 3218600, 1811100, 3443356, 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, + 4782811, 3144712, 3523466, 1491315, 3955852, 1838410, 3164028, 1092543, 776459, + 2959379, 2541744, 4064418, 3908320, 2854145, 3960709, 1348188, 977678, 853619, + 1304291, 2848702, 1657913, 1319826, 3322665, 788037, 2913686, 4471279, 1766285, 348304, + 56570, 1892118, 4017244, 401006, 3524539, 4310134, 1624693, 4081113, 957511, 849400, + 129975, 2616130, 378537, 1556787, 3916162, 1039980, 4407778, 2027690, 4213675, + 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, 1255588, 1947964, + 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, 1123513, + 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, 41760, + 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, + 4335712, 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, + 1382747, 3537242, 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, + 119369, 2856973, 2945854, 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, + 2886508, 1573965, 990618, 3053734, 2918742, 4508753, 1032149, 60943, 4291620, + 722607, 2883224, 169359, 4356585, 3725543, 3678729, 341673, 3592828, 4077251, + 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, 3113385, 4660578, 2539973, + 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, 3796951, 956299, 141730, + 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, 3573511, 314081, 577688, + 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, 1175290, 3749667, + 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, 2079145, + 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, +2, +false)));*/ CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 7d2a1169e25..ff9f2b8b3de 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -56,9 +56,11 @@ def egonet(input_graph, vertices, radius=1): # Pointers for egonet cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] n_subgraphs = vertices.size - + n_streams = 1 + if n_subgraphs > 1 : + n_streams = min(n_subgraphs, 32) cdef unique_ptr[handle_t] handle_ptr - handle_ptr.reset(new handle_t()) + handle_ptr.reset(new handle_t(n_streams)) handle_ = handle_ptr.get(); cdef graph_container_t graph_container diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 3bb42633ac1..1e0d9626727 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -23,7 +23,6 @@ from libcpp.vector cimport vector from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer - cdef extern from "graph.hpp" namespace "cugraph": ctypedef enum PropType: From 85012ca1b8c99d9fe90a2a3a516e1a62fe0324ec Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 5 Mar 2021 09:16:39 -0500 Subject: [PATCH 187/343] Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP to use the new R-mat graph generator (#1438) - [x] Refactor cuGraph C++ test library - [x] Add a utility function to create a graph object from the R-mat generator - [x] Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP tests to use the new R-mat graph generator This partially addresses https://github.com/rapidsai/cugraph/issues/1382 and is a per-requsite for graph primitives performance optimization. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Andrei Schaffer (@aschaffer) - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1438 --- cpp/tests/CMakeLists.txt | 5 +- cpp/tests/experimental/bfs_test.cpp | 49 +- .../experimental/katz_centrality_test.cpp | 66 ++- cpp/tests/experimental/pagerank_test.cpp | 82 ++- cpp/tests/experimental/sssp_test.cpp | 49 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 113 +++- .../utilities/generate_graph_from_edgelist.cu | 526 ++++++++++++++++++ ...ies.cu => matrix_market_file_utilities.cu} | 227 +------- cpp/tests/utilities/misc_utilities.cpp | 33 ++ cpp/tests/utilities/rmat_utilities.cu | 431 ++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 54 +- 11 files changed, 1337 insertions(+), 298 deletions(-) create mode 100644 cpp/tests/utilities/generate_graph_from_edgelist.cu rename cpp/tests/utilities/{test_utilities.cu => matrix_market_file_utilities.cu} (71%) create mode 100644 cpp/tests/utilities/misc_utilities.cpp create mode 100644 cpp/tests/utilities/rmat_utilities.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 18dfdbc8f63..1db2f9df42e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,7 +20,10 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 5b8add98560..ad9ece99ef9 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -73,17 +73,26 @@ void bfs_reference(edge_t const* offsets, } typedef struct BFS_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; size_t source{false}; BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } BFS_Usecase; class Tests_BFS : public ::testing::TestWithParam { @@ -104,8 +113,23 @@ class Tests_BFS : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -193,13 +217,16 @@ class Tests_BFS : public ::testing::TestWithParam { // FIXME: add tests for type combinations TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_BFS, - ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), - BFS_Usecase("test/datasets/polbooks.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 100), - BFS_Usecase("test/datasets/wiki2003.mtx", 1000), - BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_BFS, + ::testing::Values( + BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/polbooks.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 100), + BFS_Usecase("test/datasets/wiki2003.mtx", 1000), + BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000), + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 945248cc4de..776bb60716c 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -89,18 +89,31 @@ void katz_centrality_reference(edge_t const* offsets, } typedef struct KatzCentrality_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + bool test_weighted{false}; KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted) : test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } KatzCentrality_Usecase; class Tests_KatzCentrality : public ::testing::TestWithParam { @@ -119,8 +132,26 @@ class Tests_KatzCentrality : public ::testing::TestWithParam graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -220,13 +251,26 @@ TEST_P(Tests_KatzCentrality, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_KatzCentrality, - ::testing::Values(KatzCentrality_Usecase("test/datasets/karate.mtx", false), - KatzCentrality_Usecase("test/datasets/karate.mtx", true), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true))); + ::testing::Values( + KatzCentrality_Usecase("test/datasets/karate.mtx", false), + KatzCentrality_Usecase("test/datasets/karate.mtx", true), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 514f73e3311..ff3b073cbc7 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -124,7 +124,8 @@ void pagerank_reference(edge_t const* offsets, } typedef struct PageRank_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + double personalization_ratio{0.0}; bool test_weighted{false}; @@ -133,12 +134,24 @@ typedef struct PageRank_Usecase_t { bool test_weighted) : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } PageRank_Usecase; class Tests_PageRank : public ::testing::TestWithParam { @@ -157,8 +170,26 @@ class Tests_PageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -296,21 +327,34 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_PageRank, - ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + ::testing::Values( + PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 7fd59d49a25..611abcb0d75 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -79,17 +79,26 @@ void sssp_reference(edge_t const* offsets, } typedef struct SSSP_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; size_t source{false}; SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } SSSP_Usecase; class Tests_SSSP : public ::testing::TestWithParam { @@ -108,8 +117,23 @@ class Tests_SSSP : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -209,16 +233,13 @@ class Tests_SSSP : public ::testing::TestWithParam { // FIXME: add tests for type combinations TEST_P(Tests_SSSP, CheckInt32Int32Float) { run_current_test(GetParam()); } -#if 0 -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_SSSP, - ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0))); -#else -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_SSSP, - ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0), - SSSP_Usecase("test/datasets/dblp.mtx", 0), - SSSP_Usecase("test/datasets/wiki2003.mtx", 1000))); -#endif +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_SSSP, + ::testing::Values( + SSSP_Usecase("test/datasets/karate.mtx", 0), + SSSP_Usecase("test/datasets/dblp.mtx", 0), + SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index cf9f452162b..85ee9a4243e 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -28,25 +28,38 @@ #include -typedef struct Pagerank_Usecase_t { - std::string graph_file_full_path{}; +typedef struct PageRank_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + double personalization_ratio{0.0}; bool test_weighted{false}; - Pagerank_Usecase_t(std::string const& graph_file_path, + PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, bool test_weighted) : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; -} Pagerank_Usecase; -class Tests_MGPageRank : public ::testing::TestWithParam { + PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} PageRank_Usecase; + +class Tests_MGPageRank : public ::testing::TestWithParam { public: Tests_MGPageRank() {} static void SetupTestCase() {} @@ -57,7 +70,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run template - void run_current_test(Pagerank_Usecase const& configuration) + void run_current_test(PageRank_Usecase const& configuration) { // 1. initialize handle @@ -78,16 +91,51 @@ class Tests_MGPageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t sg_graph(handle); rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); std::tie(sg_graph, d_sg_renumber_map_labels) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, true); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + true) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + true); auto sg_graph_view = sg_graph.view(); cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, true); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test::read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + true) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + true); auto mg_graph_view = mg_graph.view(); @@ -276,21 +324,34 @@ TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_MGPageRank, - ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/karate.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/karate.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/karate.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + ::testing::Values( + PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu new file mode 100644 index 00000000000..1b9fe6051f7 --- /dev/null +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace test { + +namespace detail { + +template +std::enable_if_t< + multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + vertex_t number_of_vertices = static_cast(vertices.size()); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + vertices.resize(thrust::distance(vertices.begin(), + thrust::remove_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices.begin(), + vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + false, comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin(), edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + edgelist_rows.shrink_to_fit(handle.get_stream()); + edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + edgelist_weights.shrink_to_fit(handle.get_stream()); + } + + // 3. renumber + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition{}; + vertex_t aggregate_number_of_vertices{}; + edge_t number_of_edges{}; + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + store_transposed ? edgelist_cols.data() : edgelist_rows.data(), + store_transposed ? edgelist_rows.data() : edgelist_cols.data(), + edgelist_rows.size(), + false, + true); + assert(aggregate_number_of_vertices == number_of_vertices); + + // 4. create a graph + + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + std::vector>{ + cugraph::experimental::edgelist_t{ + edgelist_rows.data(), + edgelist_cols.data(), + test_weighted ? edgelist_weights.data() : nullptr, + static_cast(edgelist_rows.size())}}, + partition, + number_of_vertices, + number_of_edges, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + true, + true), + std::move(renumber_map_labels)); +} + +template +std::enable_if_t< + !multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + vertex_t number_of_vertices = static_cast(vertices.size()); + + // FIXME: set do_expensive_check to false once validated + auto renumber_map_labels = + renumber ? cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + store_transposed ? edgelist_cols.data() : edgelist_rows.data(), + store_transposed ? edgelist_rows.data() : edgelist_cols.data(), + static_cast(edgelist_rows.size()), + true) + : rmm::device_uvector(0, handle.get_stream()); + + // FIXME: set do_expensive_check to false once validated + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + cugraph::experimental::edgelist_t{ + edgelist_rows.data(), + edgelist_cols.data(), + test_weighted ? edgelist_weights.data() : nullptr, + static_cast(edgelist_rows.size())}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + renumber ? true : false, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + return detail:: + generate_graph_from_edgelist( + handle, + std::move(vertices), + std::move(edgelist_rows), + std::move(edgelist_cols), + std::move(edgelist_weights), + is_symmetric, + test_weighted, + renumber); +} + +// explicit instantiations + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu similarity index 71% rename from cpp/tests/utilities/test_utilities.cu rename to cpp/tests/utilities/matrix_market_file_utilities.cu index 0a7b58b32cd..ddbbac603ee 100644 --- a/cpp/tests/utilities/test_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -15,45 +15,19 @@ */ #include -#include -#include -#include #include -#include #include #include #include -#include -#include -#include -#include #include -extern "C" { -#include "mmio.h" -} - -#include -#include -#include -#include +#include namespace cugraph { namespace test { -std::string getFileName(const std::string& s) -{ - char sep = '/'; -#ifdef _WIN32 - sep = '\\'; -#endif - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} - /// Read matrix properties from Matrix Market file /** Matrix Market file is assumed to be a sparse matrix in coordinate * format. @@ -339,155 +313,13 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, is_symmetric); } -namespace detail { - template -std::enable_if_t< - multi_gpu, - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector>> -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted, - bool renumber) -{ - CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); - - // 1. read from the matrix market file - - rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); - vertex_t number_of_vertices{}; - bool is_symmetric{}; - std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = - read_edgelist_from_matrix_market_file( - handle, graph_file_full_path, test_weighted); - - rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - vertex_t{0}); - - // 2. filter non-local vertices & edges - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); - - auto vertex_key_func = - cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; - d_vertices.resize( - thrust::distance( - d_vertices.begin(), - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - [comm_rank, key_func = vertex_key_func] __device__(auto val) { - return key_func(val) != comm_rank; - })), - handle.get_stream()); - d_vertices.shrink_to_fit(handle.get_stream()); - - auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - false, comm_size, row_comm_size, col_comm_size}; - size_t number_of_local_edges{}; - if (test_weighted) { - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + d_edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } else { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + d_edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } - - d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_rows.shrink_to_fit(handle.get_stream()); - d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_cols.shrink_to_fit(handle.get_stream()); - if (test_weighted) { - d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_weights.shrink_to_fit(handle.get_stream()); - } - - // 3. renumber - - rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - cugraph::experimental::partition_t partition{}; - vertex_t aggregate_number_of_vertices{}; - edge_t number_of_edges{}; - // FIXME: set do_expensive_check to false once validated - std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = - cugraph::experimental::renumber_edgelist( - handle, - d_vertices.data(), - static_cast(d_vertices.size()), - store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), - store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), - d_edgelist_rows.size(), - false, - true); - assert(aggregate_number_of_vertices == number_of_vertices); - - // 4. create a graph - - return std::make_tuple( - cugraph::experimental::graph_t( - handle, - std::vector>{ - cugraph::experimental::edgelist_t{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - static_cast(d_edgelist_rows.size())}}, - partition, - number_of_vertices, - number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false}, - true, - true), - std::move(renumber_map_labels)); -} - -template -std::enable_if_t< - !multi_gpu, - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector>> +std::tuple, + rmm::device_uvector> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted, @@ -508,52 +340,17 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, d_vertices.end(), vertex_t{0}); - // FIXME: set do_expensive_check to false once validated - auto renumber_map_labels = - renumber ? cugraph::experimental::renumber_edgelist( - handle, - d_vertices.data(), - static_cast(d_vertices.size()), - store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), - store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), - static_cast(d_edgelist_rows.size()), - true) - : rmm::device_uvector(0, handle.get_stream()); - - // FIXME: set do_expensive_check to false once validated - return std::make_tuple( - cugraph::experimental::graph_t( - handle, - cugraph::experimental::edgelist_t{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - static_cast(d_edgelist_rows.size())}, - number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, - renumber ? true : false, - true), - std::move(renumber_map_labels)); + return generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + is_symmetric, + test_weighted, + renumber); } -} // namespace detail - -template -std::tuple, - rmm::device_uvector> -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted, - bool renumber) -{ - return detail:: - read_graph_from_matrix_market_file( - handle, graph_file_full_path, test_weighted, renumber); -} // explicit instantiations template int32_t mm_to_coo(FILE* f, diff --git a/cpp/tests/utilities/misc_utilities.cpp b/cpp/tests/utilities/misc_utilities.cpp new file mode 100644 index 00000000000..14f0df2f35d --- /dev/null +++ b/cpp/tests/utilities/misc_utilities.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu new file mode 100644 index 00000000000..16ea7a486fc --- /dev/null +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include + +#include +#include + +#include + +#include + +namespace cugraph { +namespace test { + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber) +{ + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + std::tie(d_edgelist_rows, d_edgelist_cols) = + cugraph::experimental::generate_rmat_edgelist( + handle, scale, edge_factor, a, b, c, seed, undirected ? true : false, scramble_vertex_ids); + if (undirected) { + // FIXME: need to symmetrize + CUGRAPH_FAIL("unimplemented."); + } + + rmm::device_uvector d_edgelist_weights(test_weighted ? d_edgelist_rows.size() : 0, + handle.get_stream()); + if (test_weighted) { + raft::random::Rng rng(seed + 1); + rng.uniform(d_edgelist_weights.data(), + d_edgelist_weights.size(), + weight_t{0.0}, + weight_t{1.0}, + handle.get_stream()); + } + + rmm::device_uvector d_vertices(static_cast(size_t{1} << scale), + handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + return generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + false, + test_weighted, + renumber); +} + +// explicit instantiations + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 4682699df2d..37e87c62247 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -19,8 +19,8 @@ #include #include +#include -#include #include #include @@ -130,6 +130,58 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +struct rmat_params_t { + size_t scale{}; + size_t edge_factor{}; + double a{}; + double b{}; + double c{}; + uint64_t seed{}; + bool undirected{}; + bool scramble_vertex_ids{}; +}; + +struct input_graph_specifier_t { + enum { MATRIX_MARKET_FILE_PATH, RMAT_PARAMS } tag{}; + std::string graph_file_full_path{}; + rmat_params_t rmat_params{}; +}; + template std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, vertex_t v) From 128abf0ca220d001d8d6ceb64d2a1ad8b0951fb2 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Tue, 9 Mar 2021 08:32:57 -0500 Subject: [PATCH 188/343] Update FAISS to 1.7.0 (#1444) upgraded FAISS to version 1.7 closes #1440 Authors: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) Approvers: - Alex Fender (@afender) - AJ Schmidt (@ajschmidt8) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1444 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/recipes/libcugraph/meta.yaml | 4 +-- cpp/CMakeLists.txt | 39 +++++++++++---------- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 255366b0a82..1e5a4609d1a 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index e64d7c77b7d..ddc66126257 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 1f05e4762ef..eed08f146ee 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 8f7495eab3c..bd0dde28af9 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -41,7 +41,7 @@ requirements: - ucx-proc=*=gpu - gtest - faiss-proc=*=cuda - - libfaiss=1.6.3 + - conda-forge::libfaiss=1.7.0 - gmock run: - libcudf={{ minor_version }} @@ -50,7 +50,7 @@ requirements: - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda - - libfaiss=1.6.3 + - conda-forge::libfaiss=1.7.0 #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b0365c3cfd6..26a8f98e265 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -297,7 +297,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG b055cf862a599fd45537d21a309edd8a6e06da4c + GIT_TAG 6455e05b3889db2b495cf3189b33c2b07bfbebf2 SOURCE_SUBDIR raft ) @@ -317,9 +317,9 @@ endif(DEFINED ENV{RAFT_PATH}) # https://cmake.org/cmake/help/v3.0/module/ExternalProject.html -# FIXME: gunrock is the only external package still using ExternalProject -# instead of FetchContent. Consider migrating to FetchContent soon (this may -# require updates to the gunrock cmake files to support this). +# FIXME: gunrock is still using ExternalProject instead of +# FetchContent. Consider migrating to FetchContent soon (this may require +# updates to the gunrock cmake files to support this). include(ExternalProject) @@ -360,31 +360,32 @@ if(BUILD_STATIC_FAISS) "Path to FAISS source directory") ExternalProject_Add(faiss GIT_REPOSITORY https://github.com/facebookresearch/faiss.git - GIT_TAG a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d + GIT_TAG 7c2d2388a492d65fdda934c7e74ae87acaeed066 CONFIGURE_COMMAND LIBS=-pthread CPPFLAGS=-w LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure - --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss - --with-blas=${BLAS_LIBRARIES} - --with-cuda=${CUDA_TOOLKIT_ROOT_DIR} - --with-cuda-arch=${FAISS_GPU_ARCHS} - -v + cmake -B build . + -DCMAKE_BUILD_TYPE=Release + -DBUILD_TESTING=OFF + -DFAISS_ENABLE_PYTHON=OFF + -DBUILD_SHARED_LIBS=OFF + -DFAISS_ENABLE_GPU=ON + -DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR} + -DCUDA_ARCHITECTURES=${FAISS_GPU_ARCHS} + -DBLAS_LIBRARIES=${BLAS_LIBRARIES} PREFIX ${FAISS_DIR} - BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 - BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a + BUILD_COMMAND make -C build -j${PARALLEL_LEVEL} VERBOSE=1 + BUILD_BYPRODUCTS ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a BUILD_ALWAYS 1 - INSTALL_COMMAND make -s install > /dev/null + INSTALL_COMMAND "" UPDATE_COMMAND "" - BUILD_IN_SOURCE 1 - PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true) + BUILD_IN_SOURCE 1) ExternalProject_Get_Property(faiss install_dir) add_library(FAISS::FAISS STATIC IMPORTED) - add_dependencies(FAISS::FAISS faiss) set_property(TARGET FAISS::FAISS PROPERTY - IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a) - set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src") + IMPORTED_LOCATION ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a) + set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src/faiss") else() set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) find_package(FAISS REQUIRED) From 6096b60cd453bae05680d84a36e5897fb71d80be Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:10:05 -0500 Subject: [PATCH 189/343] Updated NCCL to version 2.8.4 (#1445) update the environment to use NCCL 2.8.4 Authors: - Brad Rees (@BradReesWork) Approvers: - AJ Schmidt (@ajschmidt8) - @seunghwakang - Seunghwa Kang (@seunghwak) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1445 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/recipes/cugraph/meta.yaml | 4 ++-- conda/recipes/libcugraph/meta.yaml | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 1e5a4609d1a..f26c3dd45d9 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index ddc66126257..2848cc49dc7 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index eed08f146ee..82e8b409d13 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 90f5bed942a..e714b61d774 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: # conda build -c nvidia -c rapidsai -c conda-forge -c defaults . @@ -37,7 +37,7 @@ requirements: - dask-cuda {{ minor_version }} - dask>=2.12.0 - distributed>=2.12.0 - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index bd0dde28af9..bb5e4b468a5 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -36,7 +36,7 @@ requirements: - cudatoolkit {{ cuda_version }}.* - boost-cpp>=1.66 - libcypher-parser - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu - gtest @@ -46,7 +46,7 @@ requirements: run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda From 4535396403fa20b323f1323d92a58d6d2445f5aa Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Wed, 10 Mar 2021 11:18:02 -0500 Subject: [PATCH 190/343] Update Changelog Link (#1446) The tag used for pre-releases was recently changed, so this PR updates the link in the changelog. Authors: - AJ Schmidt (@ajschmidt8) Approvers: - Jordan Jacobelli (@Ethyling) URL: https://github.com/rapidsai/cugraph/pull/1446 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe8e09f1e52..0011b99fbf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # cuGraph 0.19.0 (Date TBD) -Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.19-latest for the latest changes to this development branch. +Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch. # cuGraph 0.18.0 (24 Feb 2021) From 5e3551873339ce9544c7f55f7e77dec150311cdd Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Mon, 15 Mar 2021 11:21:24 -0500 Subject: [PATCH 191/343] Update and Test Renumber bindings (#1427) Authors: - @Iroy30 Approvers: - Seunghwa Kang (@seunghwak) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1427 --- cpp/include/utilities/cython.hpp | 9 +- cpp/src/utilities/cython.cu | 59 ++-- python/cugraph/dask/common/part_utils.py | 6 +- python/cugraph/dask/structure/renumber.py | 71 ---- python/cugraph/structure/new_number_map.py | 317 ++++++++++++++++++ .../{dask => }/structure/renumber_wrapper.pyx | 245 ++++++++------ 6 files changed, 508 insertions(+), 199 deletions(-) delete mode 100644 python/cugraph/dask/structure/renumber.py create mode 100644 python/cugraph/structure/new_number_map.py rename python/cugraph/{dask => }/structure/renumber_wrapper.pyx (73%) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index e94190897b8..98e850abbf0 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -190,10 +190,7 @@ struct major_minor_weights_t { // template struct renum_quad_t { - explicit renum_quad_t(raft::handle_t const& handle) - : dv_(0, handle.get_stream()), part_(std::vector(), false, 0, 0, 0, 0) - { - } + explicit renum_quad_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {} rmm::device_uvector& get_dv(void) { return dv_; } @@ -298,8 +295,8 @@ struct renum_quad_t { private: rmm::device_uvector dv_; cugraph::experimental::partition_t part_; - vertex_t nv_; - edge_t ne_; + vertex_t nv_{0}; + edge_t ne_{0}; }; // FIXME: finish description for vertex_partition_offsets // diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index e95a001cb91..5382b4856f3 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -762,28 +762,49 @@ std::unique_ptr> call_shuffle( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto zip_edge = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); - std::unique_ptr> ptr_ret = std::make_unique>(handle); - std::forward_as_tuple( - std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), - std::ignore) = - cugraph::experimental::groupby_gpuid_and_shuffle_values( - comm, // handle.get_comms(), - zip_edge, - zip_edge + num_edgelist_edges, - [key_func = - cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); + if (edgelist_weights != nullptr) { + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); + + std::forward_as_tuple( + std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); + + std::forward_as_tuple(std::tie(ptr_ret->get_major(), ptr_ret->get_minor()), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } return ptr_ret; // RVO-ed } diff --git a/python/cugraph/dask/common/part_utils.py b/python/cugraph/dask/common/part_utils.py index 505272fa563..ac0ff6a9a43 100644 --- a/python/cugraph/dask/common/part_utils.py +++ b/python/cugraph/dask/common/part_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -83,7 +83,9 @@ async def _extract_partitions(dask_obj, client=None): client = default_client() if client is None else client # dask.dataframe or dask.array if isinstance(dask_obj, (daskDataFrame, daskArray, daskSeries)): - parts = persist_distributed_data(dask_obj, client) + # parts = persist_distributed_data(dask_obj, client) + persisted = client.persist(dask_obj) + parts = futures_of(persisted) # iterable of dask collections (need to colocate them) elif isinstance(dask_obj, collections.Sequence): # NOTE: We colocate (X, y) here by zipping delayed diff --git a/python/cugraph/dask/structure/renumber.py b/python/cugraph/dask/structure/renumber.py deleted file mode 100644 index 606a6bc4dc1..00000000000 --- a/python/cugraph/dask/structure/renumber.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.dask.structure import renumber_wrapper as renumber_w -import cugraph.comms.comms as Comms -import dask_cudf - - -def call_renumber(sID, - data, - num_verts, - num_edges, - is_mnmg): - wid = Comms.get_worker_id(sID) - handle = Comms.get_handle(sID) - return renumber_w.mg_renumber(data[0], - num_verts, - num_edges, - wid, - handle, - is_mnmg) - - -def renumber(input_graph): - - client = default_client() - - ddf = input_graph.edgelist.edgelist_df - - num_edges = len(ddf) - - if isinstance(ddf, dask_cudf.DataFrame): - is_mnmg = True - else: - is_mnmg = False - - num_verts = input_graph.number_of_vertices() - - if is_mnmg: - data = get_distributed_data(ddf) - result = [client.submit(call_renumber, - Comms.get_session_id(), - wf[1], - num_verts, - num_edges, - is_mnmg, - workers=[wf[0]]) - for idx, wf in enumerate(data.worker_to_parts.items())] - wait(result) - ddf = dask_cudf.from_delayed(result) - else: - call_renumber(Comms.get_session_id(), - ddf, - num_verts, - num_edges, - is_mnmg) - return ddf diff --git a/python/cugraph/structure/new_number_map.py b/python/cugraph/structure/new_number_map.py new file mode 100644 index 00000000000..f8a2164d2c4 --- /dev/null +++ b/python/cugraph/structure/new_number_map.py @@ -0,0 +1,317 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure import renumber_wrapper as c_renumber +import cugraph.comms as Comms +import dask_cudf +import numpy as np +import cudf +import cugraph.structure.number_map as legacy_number_map + + +def call_renumber(sID, + data, + num_edges, + is_mnmg, + store_transposed): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return c_renumber.renumber(data[0], + num_edges, + wid, + handle, + is_mnmg, + store_transposed) + + +class NumberMap: + + class SingleGPU: + def __init__(self, df, src_col_names, dst_col_names, id_type, + store_transposed): + self.col_names = NumberMap.compute_vals(src_col_names) + self.df = cudf.DataFrame() + self.id_type = id_type + self.store_transposed = store_transposed + self.numbered = False + + def to_internal_vertex_id(self, df, col_names): + tmp_df = df[col_names].rename( + columns=dict(zip(col_names, self.col_names)), copy=False + ) + index_name = NumberMap.generate_unused_column_name(df.columns) + tmp_df[index_name] = tmp_df.index + return ( + self.df.merge(tmp_df, on=self.col_names, how="right") + .sort_values(index_name) + .drop(columns=[index_name]) + .reset_index()["id"] + ) + + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.df.merge( + df, + right_on=internal_column_name, + left_on="id", + how="right", + ) + if internal_column_name != "id": + tmp_df = tmp_df.drop(columns=["id"]) + if external_column_names is None: + return tmp_df + else: + return tmp_df.rename( + columns=dict(zip(self.col_names, external_column_names)), + copy=False, + ) + + class MultiGPU: + def __init__( + self, ddf, src_col_names, dst_col_names, id_type, store_transposed + ): + self.col_names = NumberMap.compute_vals(src_col_names) + self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) + self.val_types["count"] = np.int32 + self.id_type = id_type + self.store_transposed = store_transposed + self.numbered = False + + def to_internal_vertex_id(self, ddf, col_names): + return self.ddf.merge( + ddf, + right_on=col_names, + left_on=self.col_names, + how="right", + )["global_id"] + + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.ddf.merge( + df, + right_on=internal_column_name, + left_on="global_id", + how="right" + ).map_partitions(lambda df: df.drop(columns="global_id")) + + if external_column_names is None: + return tmp_df + else: + return tmp_df.map_partitions( + lambda df: + df.rename( + columns=dict( + zip(self.col_names, external_column_names) + ), + copy=False + ) + ) + + def __init__(self, id_type=np.int32): + self.implementation = None + self.id_type = id_type + + def compute_vals_types(df, column_names): + """ + Helper function to compute internal column names and types + """ + return { + str(i): df[column_names[i]].dtype for i in range(len(column_names)) + } + + def generate_unused_column_name(column_names): + """ + Helper function to generate an unused column name + """ + name = 'x' + while name in column_names: + name = name + "x" + + return name + + def compute_vals(column_names): + """ + Helper function to compute internal column names based on external + column names + """ + return [str(i) for i in range(len(column_names))] + + def renumber(df, src_col_names, dst_col_names, preserve_order=False, + store_transposed=False): + + if isinstance(src_col_names, list): + renumber_type = 'legacy' + # elif isinstance(df[src_col_names].dtype, string): + # renumber_type = 'legacy' + else: + renumber_type = 'experimental' + + if renumber_type == 'legacy': + renumber_map, renumbered_df = legacy_number_map.renumber( + df, + src_col_names, + dst_col_names, + preserve_order, + store_transposed) + # Add shuffling once algorithms are switched to new renumber + # (ddf, + # num_verts, + # partition_row_size, + # partition_col_size, + # vertex_partition_offsets) = shuffle(input_graph, transposed=True) + return renumber_map, renumbered_df + + renumber_map = NumberMap() + if not isinstance(src_col_names, list): + src_col_names = [src_col_names] + dst_col_names = [dst_col_names] + if type(df) is cudf.DataFrame: + renumber_map.implementation = NumberMap.SingleGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed + ) + elif type(df) is dask_cudf.DataFrame: + renumber_map.implementation = NumberMap.MultiGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed + ) + else: + raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") + + num_edges = len(df) + + if isinstance(df, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + if is_mnmg: + client = default_client() + data = get_distributed_data(df) + result = [(client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_edges, + is_mnmg, + store_transposed, + workers=[wf[0]]), wf[0]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + + def get_renumber_map(data): + return data[0] + + def get_renumbered_df(data): + return data[1] + + renumbering_map = dask_cudf.from_delayed( + [client.submit(get_renumber_map, + data, + workers=[wf]) + for (data, wf) in result]) + renumbered_df = dask_cudf.from_delayed( + [client.submit(get_renumbered_df, + data, + workers=[wf]) + for (data, wf) in result]) + + renumber_map.implementation.ddf = renumbering_map + renumber_map.implementation.numbered = True + + return renumbered_df, renumber_map + else: + renumbering_map, renumbered_df = c_renumber.renumber( + df, + num_edges, + 0, + Comms.get_default_handle(), + is_mnmg, + store_transposed) + renumber_map.implementation.df = renumbering_map + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map + + def unrenumber(self, df, column_name, preserve_order=False): + """ + Given a DataFrame containing internal vertex ids in the identified + column, replace this with external vertex ids. If the renumbering + is from a single column, the output dataframe will use the same + name for the external vertex identifiers. If the renumbering is from + a multi-column input, the output columns will be labeled 0 through + n-1 with a suffix of _column_name. + Note that this function does not guarantee order or partitioning in + multi-GPU mode. + Parameters + ---------- + df: cudf.DataFrame or dask_cudf.DataFrame + A DataFrame containing internal vertex identifiers that will be + converted into external vertex identifiers. + column_name: string + Name of the column containing the internal vertex id. + preserve_order: (optional) bool + If True, preserve the ourder of the rows in the output + DataFrame to match the input DataFrame + Returns + --------- + df : cudf.DataFrame or dask_cudf.DataFrame + The original DataFrame columns exist unmodified. The external + vertex identifiers are added to the DataFrame, the internal + vertex identifier column is removed from the dataframe. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> + >>> df, number_map = NumberMap.renumber(df, '0', '1') + >>> + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(df, 'src', 'dst') + >>> + >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, + >>> tol = 1.0e-05) + >>> + >>> pr = number_map.unrenumber(pr, 'vertex') + >>> + """ + if len(self.col_names) == 1: + # Output will be renamed to match input + mapping = {"0": column_name} + else: + # Output will be renamed to ${i}_${column_name} + mapping = {} + for nm in self.col_names: + mapping[nm] = nm + "_" + column_name + + if preserve_order: + index_name = NumberMap.generate_unused_column_name(df) + df[index_name] = df.index + + df = self.from_internal_vertex_id(df, column_name, drop=True) + + if preserve_order: + df = df.sort_values( + index_name + ).drop(columns=index_name).reset_index(drop=True) + + if type(df) is dask_cudf.DataFrame: + return df.map_partitions( + lambda df: df.rename(columns=mapping, copy=False) + ) + else: + return df.rename(columns=mapping, copy=False) diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx similarity index 73% rename from python/cugraph/dask/structure/renumber_wrapper.pyx rename to python/cugraph/structure/renumber_wrapper.pyx index 1ab290cfb10..302fcfe583b 100644 --- a/python/cugraph/dask/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -25,41 +25,45 @@ import numpy as np from libcpp.utility cimport move from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer -cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w): +cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): # extract shuffled result: # cdef pair[unique_ptr[device_buffer], size_t] pair_s_major = deref(ptr_maj_min_w).get_major_wrap() cdef pair[unique_ptr[device_buffer], size_t] pair_s_minor = deref(ptr_maj_min_w).get_minor_wrap() cdef pair[unique_ptr[device_buffer], size_t] pair_s_weights = deref(ptr_maj_min_w).get_weights_wrap() - shufled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) - shufled_major_buffer = Buffer(shufled_major_buffer) + shuffled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) + shuffled_major_buffer = Buffer(shuffled_major_buffer) - shufled_major_series = cudf.Series(data=shufled_major_buffer, dtype=vertex_t) + shuffled_major_series = cudf.Series(data=shuffled_major_buffer, dtype=vertex_t) - shufled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) - shufled_minor_buffer = Buffer(shufled_minor_buffer) - - shufled_minor_series = cudf.Series(data=shufled_minor_buffer, dtype=vertex_t) - - shufled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) - shufled_weights_buffer = Buffer(shufled_weights_buffer) - - shufled_weights_series = cudf.Series(data=shufled_weights_buffer, dtype=weight_t) + shuffled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) + shuffled_minor_buffer = Buffer(shuffled_minor_buffer) + shuffled_minor_series = cudf.Series(data=shuffled_minor_buffer, dtype=vertex_t) + shuffled_df = cudf.DataFrame() shuffled_df['src']=shuffled_major_series shuffled_df['dst']=shuffled_minor_series - shuffled_df['weights']= shuffled_weights_series + + if weights is not None: + weight_t = weights.dtype + shuffled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) + shuffled_weights_buffer = Buffer(shuffled_weights_buffer) + + shuffled_weights_series = cudf.Series(data=shuffled_weights_buffer, dtype=weight_t) + + shuffled_df['weights']= shuffled_weights_series return shuffled_df -def mg_renumber(input_df, # maybe use cpdef ? - num_global_verts, - num_global_edges, - rank, - handle, - is_multi_gpu): + +def renumber(input_df, # maybe use cpdef ? + num_global_edges, + rank, + handle, + is_multi_gpu, + transposed): """ Call MNMG renumber """ @@ -67,11 +71,16 @@ def mg_renumber(input_df, # maybe use cpdef ? # TODO: get handle_t out of handle... handle_ptr = handle_size_t - src = input_df['src'] - dst = input_df['dst'] + if not transposed: + major_vertices = input_df['src'] + minor_vertices = input_df['dst'] + else: + major_vertices = input_df['dst'] + minor_vertices = input_df['src'] + cdef uintptr_t c_edge_weights = NULL # set below... - vertex_t = src.dtype + vertex_t = major_vertices.dtype if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: @@ -81,6 +90,7 @@ def mg_renumber(input_df, # maybe use cpdef ? weight_t = weights.dtype c_edge_weights = weights.__cuda_array_interface__['data'][0] else: + weights = None weight_t = np.dtype("float32") if (vertex_t != np.dtype("int32") and vertex_t != np.dtype("int64")): @@ -93,10 +103,10 @@ def mg_renumber(input_df, # maybe use cpdef ? raise Exception("Incompatible vertex_t and edge_t types.") # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_partition_edges = len(major_vertices) - cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_major_vertices = major_vertices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_minor_vertices = minor_vertices.__cuda_array_interface__['data'][0] cdef bool is_hyper_partitioned = False # for now @@ -132,27 +142,29 @@ def mg_renumber(input_df, # maybe use cpdef ? if (vertex_t == np.dtype("int32")): if ( edge_t == np.dtype("int32")): if( weight_t == np.dtype("float32")): - ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] - + if(is_multi_gpu): + ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, num_partition_edges, is_hyper_partitioned, - do_check, + 1, mg_flag).release()) pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper @@ -174,8 +186,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # # and add the previous series to it: @@ -185,18 +198,23 @@ def mg_renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): - ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + if(is_multi_gpu): + ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -228,7 +246,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # @@ -241,17 +261,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): - ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + if(is_multi_gpu): + ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -283,8 +307,10 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) + # create new cudf df # # and add the previous series to it: @@ -295,17 +321,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): - ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + if(is_multi_gpu): + ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -337,8 +367,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # # and add the previous series to it: @@ -351,17 +382,21 @@ def mg_renumber(input_df, # maybe use cpdef ? elif (vertex_t == np.dtype("int64")): if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): - ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_64_32.get()) + if(is_multi_gpu): + ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -393,7 +428,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # @@ -405,17 +442,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): - ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_64_64.get()) + if(is_multi_gpu): + ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -447,7 +488,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # From 591f3fb31ded25634efdfc14821eeb5b367a757f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 15 Mar 2021 12:21:41 -0400 Subject: [PATCH 192/343] Fix bugs in copy_v_transform_reduce_key_aggregated_out_nbr & groupby_gpuid_and_shuffle (#1434) Related to addressing Issue https://github.com/rapidsai/cugraph/issues/1381. Fix bugs in MNMG graph primitives to support Louvain. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1434 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 102 ++++-------- cpp/include/utilities/collect_comm.cuh | 153 ++++++++++++++++++ cpp/include/utilities/shuffle_comm.cuh | 28 ++++ cpp/include/utilities/thrust_tuple_utils.cuh | 3 +- 4 files changed, 215 insertions(+), 71 deletions(-) create mode 100644 cpp/include/utilities/collect_comm.cuh diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 8490df1d17d..11cf2cb1137 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -88,16 +88,17 @@ __global__ void for_all_major_for_all_nbr_low_degree( // in-place reduce_by_key vertex_t key_idx{0}; key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? weights[0] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset] : weight_t{1.0}; + for (edge_t i = 1; i < local_degree; ++i) { if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { key_aggregated_edge_weights[local_offset + key_idx] += - weights != nullptr ? weights[i] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; } else { ++key_idx; minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? weights[i] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; } } thrust::fill(thrust::seq, @@ -170,6 +171,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( template insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - rmm::device_uvector unique_keys( - graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); - thrust::copy( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - adj_matrix_col_key_first, - adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(), - unique_keys.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_keys.begin(), - unique_keys.end()); - auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_keys.begin(), - unique_keys.end()); - unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream()); - - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); - std::vector rx_value_counts{}; - std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( - comm, - unique_keys.begin(), - unique_keys.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { - return key_func(val); - }, - handle.get_stream()); - - rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin()); - - rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream()); - - std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - kv_map_ptr.reset(); - - kv_map_ptr = std::make_unique>( - static_cast(static_cast(unique_keys.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - - kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); - } - // 2. aggregate each vertex out-going edges based on keys and transform-reduce. auto loop_count = size_t{1}; @@ -382,9 +322,31 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto val) { return key_func(thrust::get<1>(val)); }, handle.get_stream()); - tmp_major_vertices = std::move(rx_major_vertices); - tmp_minor_keys = std::move(rx_minor_keys); - tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(rx_major_vertices.begin(), rx_minor_keys.begin())); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + rx_major_vertices.size(), + rx_key_aggregated_edge_weights.begin()); + tmp_major_vertices.resize(rx_major_vertices.size(), handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + rx_major_vertices.size(), + rx_key_aggregated_edge_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin())), + tmp_key_aggregated_edge_weights.begin()); + tmp_major_vertices.resize( + thrust::distance(tmp_key_aggregated_edge_weights.begin(), thrust::get<1>(pair_it)), + handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_major_vertices.shrink_to_fit(handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); } auto tmp_e_op_result_buffer = diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh new file mode 100644 index 00000000000..5ca58ebeb17 --- /dev/null +++ b/cpp/include/utilities/collect_comm.cuh @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +// for key = [map_key_first, map_key_last), key_to_gpu_id_op(key) should be coincide with +// comm.get_rank() +template +decltype(allocate_dataframe_buffer::value_type>( + 0, cudaStream_t{nullptr})) +collect_values_for_keys(raft::comms::comms_t const &comm, + VertexIterator0 map_key_first, + VertexIterator0 map_key_last, + ValueIterator map_value_first, + VertexIterator1 collect_key_first, + VertexIterator1 collect_key_last, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + using vertex_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, vertex_t>::value); + using value_t = typename std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary + // search based approach + + // 1. build a cuco::static_map object for the map k, v pairs. + + auto kv_map_ptr = std::make_unique>( + static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / + load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } + + // 2. collect values for the unique keys in [collect_key_first, collect_key_last) + + rmm::device_uvector unique_keys(thrust::distance(collect_key_first, collect_key_last), + stream); + thrust::copy( + rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin()); + // FIXME: sort and unique are unnecessary if the keys in [collect_key_first, collect_key_last) are + // already unique, if this cost becomes a performance bottlenec, we may add + // collect_values_for_unique_keys in the future + thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end()); + unique_keys.resize( + thrust::distance( + unique_keys.begin(), + thrust::unique(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end())), + stream); + + rmm::device_uvector values_for_unique_keys(0, stream); + { + rmm::device_uvector rx_unique_keys(0, stream); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( + comm, + unique_keys.begin(), + unique_keys.end(), + [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, + stream); + + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + + rmm::device_uvector rx_values_for_unique_keys(0, stream); + std::tie(rx_values_for_unique_keys, std::ignore) = + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + + values_for_unique_keys = std::move(rx_values_for_unique_keys); + } + + // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, + // values_for_unique_keys. + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + static_cast(static_cast(unique_keys.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + } + + // 4. find values for [collect_key_first, collect_key_last) + + auto value_buffer = allocate_dataframe_buffer( + thrust::distance(collect_key_first, collect_key_last), stream); + kv_map_ptr->find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + + return value_buffer; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index da86f76b11d..8c363c9a346 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -228,6 +228,15 @@ auto shuffle_values(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + assert(rx_src_ranks[i] < comm_size); + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_value_buffer), rx_counts); } @@ -271,6 +280,14 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_value_buffer), rx_counts); } @@ -282,6 +299,8 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, KeyToGPUIdOp key_to_gpu_id_op, cudaStream_t stream) { + auto const comm_size = comm.get_size(); + auto d_tx_value_counts = detail::sort_and_count( comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); @@ -328,6 +347,15 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + assert(rx_src_ranks[i] < comm_size); + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_keys), std::move(rx_value_buffer), rx_counts); } diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh index 0ad71ba5e05..01843a583eb 100644 --- a/cpp/include/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include From fe0cfc7815017841ce281595e0d94f608029dc03 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 16 Mar 2021 16:53:23 -0500 Subject: [PATCH 193/343] Update to make notebook_list.py compatible with numba 0.53 (#1455) A recent update to numba 0.53 in CI broke this script and caused CI failures. This makes the script compatible with both pre and post numba 0.53 versions. Tested in a local env with numba 0.53 installed. Authors: - Rick Ratzel (@rlratzel) Approvers: - Alex Fender (@afender) - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1455 --- ci/gpu/notebook_list.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py index bb54913ac8d..8748c434006 100644 --- a/ci/gpu/notebook_list.py +++ b/ci/gpu/notebook_list.py @@ -24,7 +24,9 @@ pascal = False device = cuda.get_current_device() -cc = getattr(device, 'COMPUTE_CAPABILITY') +# check for the attribute using both pre and post numba 0.53 names +cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ + getattr(device, 'compute_capability') if (cc[0] < 7): pascal = True From a7c4ebd906dbfa09e6509ab9aac502cce3c7695a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 17 Mar 2021 23:44:08 +1100 Subject: [PATCH 194/343] Remove literals passed to `device_uvector::set_element_async` (#1453) After rapidsai/rmm#725 is merged, this PR updates cuspatial to eliminate passing literal values to device_uvector::set_element_async. Companion PR to rapidsai/cuspatial#367 Authors: - Mark Harris (@harrism) Approvers: - Seunghwa Kang (@seunghwak) - Alex Fender (@afender) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1453 --- cpp/src/experimental/graph.cu | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 498bb4eaefe..5abe141dafd 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -304,9 +304,15 @@ graph_t segment_offsets(detail::num_segments_per_vertex_partition + 1, default_stream); - segment_offsets.set_element_async(0, 0, default_stream); + + // temporaries are necessary because the &&-overload of device_uvector is deleted + // Note that we must sync `default_stream` before these temporaries go out of scope to + // avoid use after free. (The syncs are at the end of this function) + auto zero_vertex = vertex_t{0}; + auto vertex_count = static_cast(degrees.size()); + segment_offsets.set_element_async(0, zero_vertex, default_stream); segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, degrees.size(), default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream); thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), degrees.begin(), @@ -454,9 +460,16 @@ graph_t segment_offsets(detail::num_segments_per_vertex_partition + 1, default_stream); - segment_offsets.set_element_async(0, 0, default_stream); + + // temporaries are necessary because the &&-overload of device_uvector is deleted + // Note that we must sync `default_stream` before these temporaries go out of scope to + // avoid use after free. (The syncs are at the end of this function) + auto zero_vertex = vertex_t{0}; + auto vertex_count = static_cast(this->get_number_of_vertices()); + segment_offsets.set_element_async(0, zero_vertex, default_stream); + segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, this->get_number_of_vertices(), default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream); thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), degree_first, From c5ce11942835fbbf4c55a263b2f73cf5f3274c39 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 17 Mar 2021 12:17:46 -0400 Subject: [PATCH 195/343] Updating docs (#1448) * added Hungarian to api.rst * fixed missing examples * fixed syntax Authors: - Brad Rees (@BradReesWork) Approvers: - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1448 --- README.md | 1 - docs/source/api.rst | 62 ++++++++++++++++ docs/source/dask-cugraph.rst | 71 +++++++------------ python/cugraph/bsp/traversal/bfs_bsp.py | 10 ++- python/cugraph/community/egonet.py | 11 +++ python/cugraph/components/connectivity.py | 4 +- .../dask/centrality/katz_centrality.py | 6 +- python/cugraph/dask/community/louvain.py | 5 +- python/cugraph/dask/link_analysis/pagerank.py | 6 +- python/cugraph/dask/traversal/bfs.py | 6 +- python/cugraph/dask/traversal/sssp.py | 6 +- python/cugraph/tree/minimum_spanning_tree.py | 24 +++++-- 12 files changed, 142 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 62059e9c7b6..77377fe2bbc 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,6 @@ As of Release 0.18 - including 0.18 nightly | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | | Other | | | | -| | Hungarian Algorithm | Single-GPU | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | | | | diff --git a/docs/source/api.rst b/docs/source/api.rst index dcdf3e6ff33..b02f8f488c5 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -48,6 +48,13 @@ Katz Centrality :undoc-members: +Katz Centrality (MG) +-------------------- + +.. automodule:: cugraph.dask.centrality.katz_centrality + :members: + :undoc-members: + Community ========= @@ -86,6 +93,14 @@ Louvain :members: :undoc-members: +Louvain (MG) +------------ + +.. automodule:: cugraph.dask.community.louvain + :members: + :undoc-members: + + Spectral Clustering ------------------- @@ -148,6 +163,17 @@ Force Atlas 2 :undoc-members: +Linear Assignment +================= + +Hungarian +------------- + +.. automodule:: cugraph.linear_assignment.hungarian + :members: + :undoc-members: + + Link Analysis ============= @@ -165,6 +191,13 @@ Pagerank :members: :undoc-members: +Pagerank (MG) +--------- + +.. automodule:: cugraph.dask.link_analysis.pagerank + :members: pagerank + :undoc-members: + Link Prediction =============== @@ -202,6 +235,13 @@ Breadth-first-search :members: :undoc-members: +Breadth-first-search (MG) +-------------------- + +.. automodule:: cugraph.dask.traversal.bfs + :members: + :undoc-members: + Single-source-shortest-path --------------------------- @@ -209,6 +249,13 @@ Single-source-shortest-path :members: :undoc-members: +Single-source-shortest-path (MG) +--------------------------- + +.. automodule:: cugraph.dask.traversal.sssp + :members: + :undoc-members: + Tree ========= @@ -227,3 +274,18 @@ Maximum Spanning Tree :members: :undoc-members: + +DASK MG Helper functions +=========================== + +.. automodule:: cugraph.comms.comms + :members: initialize + :undoc-members: + +.. automodule:: cugraph.comms.comms + :members: destroy + :undoc-members: + +.. automodule:: cugraph.dask.common.read_utils + :members: get_chunksize + :undoc-members: diff --git a/docs/source/dask-cugraph.rst b/docs/source/dask-cugraph.rst index b27ad382809..51487bfbf05 100644 --- a/docs/source/dask-cugraph.rst +++ b/docs/source/dask-cugraph.rst @@ -13,58 +13,41 @@ With cuGraph and Dask, whether you’re using a single NVIDIA GPU or multiple no If your graph comfortably fits in memory on a single GPU, you would want to use the single-GPU version of cuGraph. If you want to distribute your workflow across multiple GPUs and have more data than you can fit in memory on a single GPU, you would want to use cuGraph's multi-GPU features. +Example +======== -Distributed Graph Algorithms ----------------------------- +.. code-block:: python -.. automodule:: cugraph.dask.link_analysis.pagerank - :members: pagerank - :undoc-members: + from dask.distributed import Client, wait + from dask_cuda import LocalCUDACluster + import cugraph.comms as Comms + import cugraph.dask as dask_cugraph -.. automodule:: cugraph.dask.traversal.bfs - :members: bfs - :undoc-members: + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize(p2p=True) + # Helper function to set the reader chunk size to automatically get one partition per GPU + chunksize = dask_cugraph.get_chunksize(input_data_path) -Helper functions ----------------- + # Multi-GPU CSV reader + e_list = dask_cudf.read_csv(input_data_path, + chunksize = chunksize, + delimiter=' ', + names=['src', 'dst'], + dtype=['int32', 'int32']) -.. automodule:: cugraph.comms.comms - :members: initialize - :undoc-members: + G = cugraph.DiGraph() + G.from_dask_cudf_edgelist(e_list, source='src', destination='dst') -.. automodule:: cugraph.comms.comms - :members: destroy - :undoc-members: + # now run PageRank + pr_df = dask_cugraph.pagerank(G, tol=1e-4) -.. automodule:: cugraph.dask.common.read_utils - :members: get_chunksize - :undoc-members: + # All done, clean up + Comms.destroy() + client.close() + cluster.close() -Consolidation -============= -cuGraph can transparently interpret the Dask cuDF Dataframe as a regular Dataframe when loading the edge list. This is particularly helpful for workflows extracting a single GPU sized edge list from a distributed dataset. From there any existing single GPU feature will just work on this input. +| -For instance, consolidation allows leveraging Dask cuDF CSV reader to load file(s) on multiple GPUs and consolidate this input to a single GPU graph. Reading is often the time and memory bottleneck, with this feature users can call the Multi-GPU version of the reader without changing anything else. - -Batch Processing -================ - -cuGraph can leverage multi GPUs to increase processing speed for graphs that fit on a single GPU, providing faster analytics on such graphs. -You will be able to use the Graph the same way as you used to in a Single GPU environment, but analytics that support batch processing will automatically use the GPUs available to the dask client. -For example, Betweenness Centrality scores can be slow to obtain depending on the number of vertices used in the approximation. Thank to Multi GPUs Batch Processing, -you can create Single GPU graph as you would regularly do it using cuDF CSV reader, enable Batch analytics on it, and obtain scores much faster as each GPU will handle a sub-set of the sources. -In order to use Batch Analytics you need to set up a Dask Cluster and Client in addition to the cuGraph communicator, then you can simply call `enable_batch()` on you graph, and algorithms supporting batch processing will use multiple GPUs. - -Algorithms supporting Batch Processing --------------------------------------- -.. automodule:: cugraph.centrality - :members: betweenness_centrality - :undoc-members: - :noindex: - -.. automodule:: cugraph.centrality - :members: edge_betweenness_centrality - :undoc-members: - :noindex: diff --git a/python/cugraph/bsp/traversal/bfs_bsp.py b/python/cugraph/bsp/traversal/bfs_bsp.py index 28a71631443..9a2fd48e201 100644 --- a/python/cugraph/bsp/traversal/bfs_bsp.py +++ b/python/cugraph/bsp/traversal/bfs_bsp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import warnings import cudf from collections import OrderedDict @@ -59,6 +59,12 @@ def bfs_df_pregel(_df, start, src_col='src', dst_col='dst', copy_data=True): """ + warnings.warn( + "This feature is deprecated and will be" + "dropped from cuGraph in release 0.20.", + FutureWarning, + ) + # extract the src and dst into a dataframe that can be modified if copy_data: coo_data = _df[[src_col, dst_col]] diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index 9ff12158b13..ca3c6149ece 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -74,6 +74,17 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): G_ego : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> ego_graph = cugraph.ego_graph(G, seed, radius=2) + """ (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index 7c68afd7ced..72f33ebfcbb 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -378,7 +378,7 @@ def connected_components(G, header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) - >>> df = cugraph.strongly_connected_components(G) + >>> df = cugraph.connected_components(G, connection="weak") """ if connection == "weak": return weakly_connected_components(G, directed, diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index cf6ad95f974..e690e291928 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -115,7 +115,8 @@ def katz_centrality(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -125,7 +126,6 @@ def katz_centrality(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.katz_centrality(dg) - >>> Comms.destroy() """ nstart = None diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 11ecb78375f..495061c0f81 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,7 +55,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0): Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize, delimiter=' ', diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index 1e9d79e0aa6..d8a76f1231e 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -105,7 +105,8 @@ def pagerank(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -115,7 +116,6 @@ def pagerank(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.pagerank(dg) - >>> Comms.destroy() """ from cugraph.structure.graph import null_check diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 7a2c50a3bc0..51e0dc0de5d 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def bfs(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def bfs(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.bfs(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index ce0c7908664..52f2b9b256c 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def sssp(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def sssp(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.sssp(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/tree/minimum_spanning_tree.py index 25a365665df..45e996aa083 100644 --- a/python/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/tree/minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ from cugraph.utilities import cugraph_to_nx -def minimum_spanning_tree_subgraph(G): +def _minimum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -32,7 +32,7 @@ def minimum_spanning_tree_subgraph(G): return mst_subgraph -def maximum_spanning_tree_subgraph(G): +def _maximum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -68,28 +68,33 @@ def minimum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a minimum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = minimum_spanning_tree_subgraph(G) + mst = _minimum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return minimum_spanning_tree_subgraph(G) + return _minimum_spanning_tree_subgraph(G) def maximum_spanning_tree( @@ -103,25 +108,30 @@ def maximum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a maximum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a maximum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = maximum_spanning_tree_subgraph(G) + mst = _maximum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return maximum_spanning_tree_subgraph(G) + return _maximum_spanning_tree_subgraph(G) From 1c8da699a11682d0fb639614bac7fc10aecd66ba Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 18 Mar 2021 08:22:24 -0500 Subject: [PATCH 196/343] Removed unused dependencies from libcugraph recipe, moved non-test script code from test script to gpu build script (#1468) * Removed unused dependencies from the `libcugraph` recipe. This is motivated by the CuPy project to integrate `libcugraph` as the graph analytics backend with minimal extra dependencies ( https://github.com/cupy/cupy/issues/4219, https://github.com/cupy/cupy/issues/2431, https://github.com/cupy/cupy/pull/4054 ) * Moved non-test script code from test script to gpu build script. The `FIXME` addressed for this was added after discussing with @raydouglass earlier, and will allow any Project Flash failures to fail the build immediately instead of attempting to then run tests. * Removed unused cudf lib reference from test cmake file. Tested by doing a successful local `conda build` of the recipe. Authors: - Rick Ratzel (@rlratzel) Approvers: - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1468 --- ci/gpu/build.sh | 21 +++++++++++++++++++-- ci/test.sh | 24 ------------------------ conda/recipes/libcugraph/meta.yaml | 13 ++----------- cpp/tests/CMakeLists.txt | 1 - 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0fef7b62f8d..7242b4a11f5 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -90,8 +90,25 @@ conda list --show-channel-urls ################################################################################ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - gpuci_logger "Build from source" - $WORKSPACE/build.sh -v clean libcugraph cugraph + gpuci_logger "Build from source" + $WORKSPACE/build.sh -v clean libcugraph cugraph +else + export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" + + # Faiss patch + echo "Update libcugraph.so" + cd $LIBCUGRAPH_BUILD_DIR + chrpath -d libcugraph.so + patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so + + CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` + CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension + CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install + echo "Installing $CONDA_FILE" + conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" + + echo "Build cugraph..." + $WORKSPACE/build.sh cugraph fi ################################################################################ diff --git a/ci/test.sh b/ci/test.sh index b0134e97246..58cbb950f73 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -61,30 +61,6 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -# FIXME: if possible, any install and build steps should be moved outside this -# script since a failing install/build step is treated as a failing test command -# and will not stop the script. This script is also only expected to run tests -# in a preconfigured environment, and install/build steps are unexpected side -# effects. -if [[ "$PROJECT_FLASH" == "1" ]]; then - export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" - - # Faiss patch - echo "Update libcugraph.so" - cd $LIBCUGRAPH_BUILD_DIR - chrpath -d libcugraph.so - patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so - - CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` - CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension - CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install - echo "Installing $CONDA_FILE" - conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" - - echo "Build cugraph..." - $WORKSPACE/build.sh cugraph -fi - # Do not abort the script on error from this point on. This allows all tests to # run regardless of pass/fail, but relies on the ERR trap above to manage the # EXITCODE for the script. diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index bb5e4b468a5..2602b2d8608 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -32,31 +32,22 @@ build: requirements: build: - cmake>=3.12.4 - - libcudf={{ minor_version }} - cudatoolkit {{ cuda_version }}.* + - librmm {{ minor_version }}.* - boost-cpp>=1.66 - - libcypher-parser - nccl>=2.8.4 - - ucx-py {{ minor_version }} - ucx-proc=*=gpu - gtest + - gmock - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 - - gmock run: - - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.8.4 - - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 -#test: -# commands: -# - test -f $PREFIX/include/cugraph.h - - about: home: http://rapids.ai/ license: Apache-2.0 diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1db2f9df42e..5571cf5f124 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -91,7 +91,6 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) cugraph GTest::GTest GTest::Main - ${CUDF_LIBRARY} ${NCCL_LIBRARIES} cudart cuda From ab4b77b4d6b32ecfbe965821f1eff737f1b06f07 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 18 Mar 2021 15:47:11 -0500 Subject: [PATCH 197/343] Add additional datasets to improve coverage (#1441) add datasets to test self-loops, string vertex IDs and isolated vertices Update README closes #1214 Authors: - Joseph Nke (@jnke2016) Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1441 --- datasets/README.md | 199 ++++++++++++------- datasets/dolphins_multi_edge.csv | 325 +++++++++++++++++++++++++++++++ datasets/dolphins_s_loop.csv | 321 ++++++++++++++++++++++++++++++ datasets/get_test_data.sh | 10 +- datasets/karate_mod.mtx | 81 ++++++++ datasets/karate_multi_edge.csv | 160 +++++++++++++++ datasets/karate_s_loop.csv | 160 +++++++++++++++ datasets/karate_str.mtx | 78 ++++++++ python/cugraph/tests/utils.py | 15 ++ 9 files changed, 1281 insertions(+), 68 deletions(-) create mode 100644 datasets/dolphins_multi_edge.csv create mode 100644 datasets/dolphins_s_loop.csv create mode 100644 datasets/karate_mod.mtx create mode 100644 datasets/karate_multi_edge.csv create mode 100644 datasets/karate_s_loop.csv create mode 100644 datasets/karate_str.mtx diff --git a/datasets/README.md b/datasets/README.md index c7f76a91dfe..e42413fc996 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -1,67 +1,132 @@ -# Cugraph test and benchmark data - -## Python - -This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details: - -| Graph | V | E | Directed | Weighted | -| ------------- | ----- | ----- | -------- | -------- | -| karate | 34 | 156 | No | No | -| dolphin | 62 | 318 | No | No | -| netscience | 1,589 | 5,484 | No | Yes | - -**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. - -**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). - -**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006. - -## C++ -Cugraph's C++ analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory. -``` -cd /datasets -./get_test_data.sh -``` -You may run this script from elsewhere and store C++ test input to another location. - -Before running the tests, you should let cuGraph know where to find the test input by using: -``` -export RAPIDS_DATASET_ROOT_DIR= -``` - -## Benchmarks -Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option: -``` -cd /datasets -./get_test_data.sh --benchmark -``` -The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs: -``` -/datasets/csv - |- directed - |--- cit-Patents.csv (250M) - |--- soc-LiveJournal1.csv (965M) - |- undirected - |--- europe_osm.csv (1.8G) - |--- hollywood.csv (1.5G) - |--- soc-twitter-2010.csv (8.8G) -``` -The benchmark datasets are described below: -| Graph | V | E | Directed | Weighted | -| ----------------- | ---------- | ------------- | -------- | -------- | -| cit-Patents | 3,774,768 | 16,518,948 | Yes | No | -| soc-LiveJournal1 | 4,847,571 | 43,369,619 | Yes | No | -| europe_osm | 50,912,018 | 54,054,660 | No | No | -| hollywood | 1,139,905 | 57,515,616 | No | No | -| soc-twitter-2010 | 21,297,772 | 265,025,809 | No | No | - -**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations. -**soc-LiveJournal** : A graph of the LiveJournal social network. -**europe_osm** : A graph of OpenStreetMap data for Europe. -**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together. -**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i. - -_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._ - -## Reference -The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/ +# Cugraph test and benchmark data + +## Python + +This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details: + +| Graph | V | E | Directed | Weighted | +| ------------- | ----- | ----- | -------- | -------- | +| karate | 34 | 156 | No | No | +| dolphin | 62 | 318 | No | No | +| netscience | 1,589 | 5,484 | No | Yes | + +**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. + +**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). + +**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006. + + + +### Modified datasets + +The datasets below were added to provide input that contains self-loops, string vertex IDs, isolated vertices, and multiple edges. + +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| ------------------- | ------- | ---------- | -------- | --------- | ---------- | ---------- | ------------ | ----------- | +| karate_multi_edge | 34 | 160 | No | Yes | No | No | No | Yes | +| dolphins_multi_edge | 62 | 325 | No | Yes | No | No | No | Yes | +| karate_s_loop | 34 | 160 | No | Yes | Yes | No | No | No | +| dolphins_s_loop | 62 | 321 | No | Yes | Yes | No | No | No | +| karate_mod | 37 | 156 | No | No | No | Yes | No | No | +| karate_str | 34 | 156 | No | Yes | No | No | Yes | No | + +**karate_multi_edge** : The graph "karate_multi_edge" is a modified version of the "karate" graph where multi-edges were added + +**dolphins_multi_edge** : The graph "dolphins_multi_edge" is a modified version of the "dolphin" graph where multi-edges were added + +**karate_s_loop** : The graph "karate_s_loop" is a modified version of the "karate" graph where self-loops were added + +**dolphins_s_loop** : The graph "dolphins_s_loop" is a modified version of the "dolphin" graph where self-loops were added + +**karate_mod** : The graph "karate_mod" is a modified version of the "karate" graph where vertices and edges were added + +**karate_str** : The graph "karate_str" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. The integer vertices were replaced by strings + + +### Additional datasets + +Larger datasets containing self-loops can be downloaded by running the provided script from the `datasets` directory using the `--self_loops` +option: +``` +cd /datasets +./get_test_data.sh --self_loops +``` +``` +/datasets/self_loops + |-ca-AstroPh (5.3M) + |-ca-CondMat (2.8M) + |-ca-GrQc (348K) + |-ca-HepTh (763K) +``` +These datasets are not currently used by any tests or benchmarks + +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| ------------- | ------- | -------- | -------- | -------- | ---------- | ---------- | ------------ | ----------- | +| ca-AstroPh | 18,772 | 198,110 | No | No | Yes | No | No | No | +| ca-CondMat | 23,133 | 93,497 | No | Yes | Yes | No | No | No | +| ca-GrQc | 5,242 | 14,387 | No | No | Yes | No | No | No | +| ca-HepTh | 9,877 | 25,998 | No | Yes | Yes | No | No | No | + +**ca-AstroPh** : The graph "ca-AstroPh" covers scientific collaborations between authors papers submitted to Astro Physics category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-CondMat** : The graph "ca-CondMat" covers scientific collaborations between authors papers submitted to Condense Matter category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-GrQc** : The graph "ca-GrQc" covers scientific collaborations between authors papers submitted to General Relativity and Quantum Cosmology category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-HepTh** : The graph "ca-HepTh" covers scientific collaborations between authors papers submitted to High Energy Physics - Theory category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + + +## Custom path to larger datasets directory + +Cugraph's C++ and Python analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory. +``` +cd /datasets +./get_test_data.sh +``` +You may run this script from elsewhere and store C++ or Python test input to another location. + +Before running the tests, you should let cuGraph know where to find the test input by using: +``` +export RAPIDS_DATASET_ROOT_DIR= +``` + + +## Benchmarks + +Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option: +``` +cd /datasets +./get_test_data.sh --benchmark +``` +The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs: +``` +/datasets/csv + |- directed + |--- cit-Patents.csv (250M) + |--- soc-LiveJournal1.csv (965M) + |- undirected + |--- europe_osm.csv (1.8G) + |--- hollywood.csv (1.5G) + |--- soc-twitter-2010.csv (8.8G) +``` +The benchmark datasets are described below: +| Graph | V | E | Directed | Weighted | +| ----------------- | ---------- | ------------- | -------- | -------- | +| cit-Patents | 3,774,768 | 16,518,948 | Yes | No | +| soc-LiveJournal1 | 4,847,571 | 43,369,619 | Yes | No | +| europe_osm | 50,912,018 | 54,054,660 | No | No | +| hollywood | 1,139,905 | 57,515,616 | No | No | +| soc-twitter-2010 | 21,297,772 | 265,025,809 | No | No | + +**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations. +**soc-LiveJournal** : A graph of the LiveJournal social network. +**europe_osm** : A graph of OpenStreetMap data for Europe. +**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together. +**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i. + +_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._ + +## Reference +The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/ +The Stanford Network Analysis Platform (SNAP) diff --git a/datasets/dolphins_multi_edge.csv b/datasets/dolphins_multi_edge.csv new file mode 100644 index 00000000000..cf6bc70918e --- /dev/null +++ b/datasets/dolphins_multi_edge.csv @@ -0,0 +1,325 @@ +10 0 1.0 +14 0 1.0 +15 0 1.0 +40 0 1.0 +42 0 1.0 +47 0 1.0 +17 1 1.0 +19 1 1.0 +26 1 1.0 +27 1 1.0 +28 1 1.0 +36 1 1.0 +41 1 1.0 +54 1 1.0 +10 2 1.0 +42 2 1.0 +44 2 1.0 +61 2 1.0 +8 3 1.0 +14 3 1.0 +59 3 1.0 +51 4 1.0 +9 5 1.0 +13 5 1.0 +56 5 1.0 +57 5 1.0 +9 6 1.0 +13 6 1.0 +17 6 1.0 +54 6 1.0 +56 6 1.0 +57 6 1.0 +19 7 1.0 +27 7 1.0 +30 7 1.0 +40 7 1.0 +54 7 1.0 +20 8 1.0 +28 8 1.0 +37 8 1.0 +45 8 1.0 +59 8 1.0 +13 9 1.0 +17 9 1.0 +32 9 1.0 +41 9 1.0 +57 9 1.0 +29 10 1.0 +42 10 1.0 +47 10 1.0 +51 11 1.0 +33 12 1.0 +17 13 1.0 +32 13 1.0 +41 13 1.0 +54 13 1.0 +57 13 1.0 +16 14 1.0 +24 14 1.0 +33 14 1.0 +34 14 1.0 +37 14 1.0 +38 14 1.0 +40 14 1.0 +43 14 1.0 +50 14 1.0 +52 14 1.0 +18 15 1.0 +24 15 1.0 +40 15 1.0 +45 15 1.0 +55 15 1.0 +59 15 1.0 +20 16 1.0 +33 16 1.0 +37 16 1.0 +38 16 1.0 +50 16 1.0 +22 17 1.0 +25 17 1.0 +27 17 1.0 +31 17 1.0 +57 17 1.0 +20 18 1.0 +21 18 1.0 +24 18 1.0 +29 18 1.0 +45 18 1.0 +51 18 1.0 +30 19 1.0 +54 19 1.0 +28 20 1.0 +36 20 1.0 +38 20 1.0 +44 20 1.0 +47 20 1.0 +50 20 1.0 +29 21 1.0 +33 21 1.0 +37 21 1.0 +45 21 1.0 +51 21 1.0 +36 23 1.0 +45 23 1.0 +51 23 1.0 +29 24 1.0 +45 24 1.0 +51 24 1.0 +26 25 1.0 +27 25 1.0 +27 26 1.0 +30 28 1.0 +47 28 1.0 +35 29 1.0 +43 29 1.0 +45 29 1.0 +51 29 1.0 +52 29 1.0 +42 30 1.0 +47 30 1.0 +60 32 1.0 +34 33 1.0 +37 33 1.0 +38 33 1.0 +40 33 1.0 +43 33 1.0 +50 33 1.0 +37 34 1.0 +44 34 1.0 +49 34 1.0 +37 36 1.0 +39 36 1.0 +40 36 1.0 +59 36 1.0 +40 37 1.0 +43 37 1.0 +45 37 1.0 +61 37 1.0 +43 38 1.0 +44 38 1.0 +52 38 1.0 +58 38 1.0 +57 39 1.0 +52 40 1.0 +54 41 1.0 +54 41 1.0 +57 41 1.0 +47 42 1.0 +50 42 1.0 +50 42 1.0 +46 43 1.0 +53 43 1.0 +50 45 1.0 +51 45 1.0 +59 45 1.0 +59 45 1.0 +49 46 1.0 +57 48 1.0 +51 50 1.0 +55 51 1.0 +61 53 1.0 +57 54 1.0 +0 10 1.0 +0 14 1.0 +0 15 1.0 +59 45 1.0 +0 40 1.0 +0 42 1.0 +0 47 1.0 +1 17 1.0 +1 19 1.0 +1 26 1.0 +1 27 1.0 +1 28 1.0 +1 36 1.0 +1 41 1.0 +1 54 1.0 +2 10 1.0 +2 42 1.0 +2 44 1.0 +2 61 1.0 +54 41 1.0 +3 8 1.0 +3 14 1.0 +3 59 1.0 +4 51 1.0 +56 6 1.0 +5 9 1.0 +5 13 1.0 +5 56 1.0 +5 57 1.0 +6 9 1.0 +6 13 1.0 +6 17 1.0 +6 54 1.0 +6 56 1.0 +6 57 1.0 +7 19 1.0 +7 27 1.0 +7 30 1.0 +7 40 1.0 +7 54 1.0 +8 20 1.0 +8 28 1.0 +8 37 1.0 +8 45 1.0 +2 61 1.0 +8 59 1.0 +9 13 1.0 +9 17 1.0 +9 32 1.0 +9 41 1.0 +9 57 1.0 +10 29 1.0 +10 42 1.0 +10 47 1.0 +11 51 1.0 +12 33 1.0 +13 17 1.0 +13 32 1.0 +13 41 1.0 +13 54 1.0 +13 57 1.0 +14 16 1.0 +14 24 1.0 +14 33 1.0 +14 34 1.0 +14 37 1.0 +14 38 1.0 +14 40 1.0 +14 43 1.0 +14 50 1.0 +14 52 1.0 +15 18 1.0 +15 24 1.0 +15 40 1.0 +15 45 1.0 +15 55 1.0 +15 59 1.0 +16 20 1.0 +16 33 1.0 +16 37 1.0 +16 38 1.0 +16 50 1.0 +17 22 1.0 +17 25 1.0 +17 27 1.0 +17 31 1.0 +17 57 1.0 +18 20 1.0 +18 21 1.0 +18 24 1.0 +18 29 1.0 +18 45 1.0 +18 51 1.0 +19 30 1.0 +19 54 1.0 +20 28 1.0 +20 36 1.0 +20 38 1.0 +20 44 1.0 +20 47 1.0 +20 50 1.0 +21 29 1.0 +21 33 1.0 +21 37 1.0 +21 45 1.0 +21 51 1.0 +23 36 1.0 +23 45 1.0 +23 51 1.0 +24 29 1.0 +24 45 1.0 +24 51 1.0 +25 26 1.0 +25 27 1.0 +26 27 1.0 +28 30 1.0 +28 47 1.0 +29 35 1.0 +29 43 1.0 +29 45 1.0 +29 51 1.0 +29 52 1.0 +30 42 1.0 +30 47 1.0 +32 60 1.0 +33 34 1.0 +33 37 1.0 +33 38 1.0 +33 40 1.0 +33 43 1.0 +33 50 1.0 +34 37 1.0 +34 44 1.0 +34 49 1.0 +36 37 1.0 +36 39 1.0 +36 40 1.0 +36 59 1.0 +37 40 1.0 +37 43 1.0 +37 45 1.0 +37 61 1.0 +38 43 1.0 +38 44 1.0 +38 52 1.0 +38 58 1.0 +39 57 1.0 +40 52 1.0 +41 54 1.0 +41 57 1.0 +42 47 1.0 +42 50 1.0 +43 46 1.0 +43 53 1.0 +45 50 1.0 +45 51 1.0 +45 59 1.0 +46 49 1.0 +48 57 1.0 +50 51 1.0 +51 55 1.0 +53 61 1.0 +54 57 1.0 diff --git a/datasets/dolphins_s_loop.csv b/datasets/dolphins_s_loop.csv new file mode 100644 index 00000000000..703b8440afa --- /dev/null +++ b/datasets/dolphins_s_loop.csv @@ -0,0 +1,321 @@ +10 0 1.0 +14 0 1.0 +15 0 1.0 +40 0 1.0 +42 0 1.0 +47 0 1.0 +17 1 1.0 +19 1 1.0 +26 1 1.0 +27 1 1.0 +28 1 1.0 +36 1 1.0 +41 1 1.0 +54 1 1.0 +10 2 1.0 +42 2 1.0 +44 2 1.0 +61 2 1.0 +8 3 1.0 +14 3 1.0 +59 3 1.0 +51 4 1.0 +9 5 1.0 +13 5 1.0 +56 5 1.0 +57 5 1.0 +9 6 1.0 +13 6 1.0 +17 6 1.0 +54 6 1.0 +56 6 1.0 +57 6 1.0 +19 7 1.0 +27 7 1.0 +30 7 1.0 +40 7 1.0 +54 7 1.0 +20 8 1.0 +28 8 1.0 +37 8 1.0 +45 8 1.0 +59 8 1.0 +13 9 1.0 +17 9 1.0 +32 9 1.0 +41 9 1.0 +57 9 1.0 +29 10 1.0 +42 10 1.0 +47 10 1.0 +51 11 1.0 +33 12 1.0 +17 13 1.0 +32 13 1.0 +41 13 1.0 +54 13 1.0 +57 13 1.0 +16 14 1.0 +24 14 1.0 +33 14 1.0 +34 14 1.0 +37 14 1.0 +38 14 1.0 +40 14 1.0 +43 14 1.0 +50 14 1.0 +52 14 1.0 +18 15 1.0 +24 15 1.0 +40 15 1.0 +45 15 1.0 +55 15 1.0 +59 15 1.0 +20 16 1.0 +33 16 1.0 +37 16 1.0 +38 16 1.0 +50 16 1.0 +22 17 1.0 +25 17 1.0 +27 17 1.0 +31 17 1.0 +57 17 1.0 +20 18 1.0 +21 18 1.0 +24 18 1.0 +29 18 1.0 +45 18 1.0 +51 18 1.0 +30 19 1.0 +54 19 1.0 +28 20 1.0 +36 20 1.0 +38 20 1.0 +44 20 1.0 +47 20 1.0 +50 20 1.0 +29 21 1.0 +33 21 1.0 +37 21 1.0 +45 21 1.0 +51 21 1.0 +36 23 1.0 +45 23 1.0 +51 23 1.0 +29 24 1.0 +45 24 1.0 +51 24 1.0 +26 25 1.0 +27 25 1.0 +27 26 1.0 +30 28 1.0 +47 28 1.0 +35 29 1.0 +43 29 1.0 +45 29 1.0 +51 29 1.0 +52 29 1.0 +42 30 1.0 +47 30 1.0 +60 32 1.0 +34 33 1.0 +37 33 1.0 +38 33 1.0 +40 33 1.0 +43 33 1.0 +50 33 1.0 +37 34 1.0 +44 34 1.0 +49 34 1.0 +37 36 1.0 +39 36 1.0 +40 36 1.0 +59 36 1.0 +40 37 1.0 +43 37 1.0 +43 43 1.0 +45 37 1.0 +61 37 1.0 +43 38 1.0 +44 38 1.0 +52 38 1.0 +58 38 1.0 +57 39 1.0 +52 40 1.0 +52 52 1.0 +54 41 1.0 +57 41 1.0 +47 42 1.0 +50 42 1.0 +46 43 1.0 +53 43 1.0 +50 45 1.0 +51 45 1.0 +59 45 1.0 +49 46 1.0 +57 48 1.0 +51 50 1.0 +55 51 1.0 +61 53 1.0 +57 54 1.0 +0 10 1.0 +0 14 1.0 +0 15 1.0 +0 40 1.0 +0 42 1.0 +0 47 1.0 +1 17 1.0 +1 19 1.0 +1 26 1.0 +1 1 1.0 +1 27 1.0 +1 28 1.0 +1 36 1.0 +1 41 1.0 +1 54 1.0 +2 10 1.0 +2 42 1.0 +2 44 1.0 +2 61 1.0 +3 8 1.0 +3 14 1.0 +3 59 1.0 +4 51 1.0 +5 9 1.0 +5 13 1.0 +5 56 1.0 +5 57 1.0 +6 9 1.0 +6 13 1.0 +6 17 1.0 +6 54 1.0 +6 56 1.0 +6 57 1.0 +7 19 1.0 +7 27 1.0 +7 30 1.0 +7 40 1.0 +7 54 1.0 +8 20 1.0 +8 28 1.0 +8 37 1.0 +8 45 1.0 +8 59 1.0 +9 13 1.0 +9 17 1.0 +9 32 1.0 +9 41 1.0 +9 57 1.0 +10 29 1.0 +10 42 1.0 +10 47 1.0 +11 51 1.0 +12 33 1.0 +13 17 1.0 +13 32 1.0 +13 41 1.0 +13 54 1.0 +13 57 1.0 +14 16 1.0 +14 24 1.0 +14 33 1.0 +14 34 1.0 +14 37 1.0 +14 38 1.0 +14 40 1.0 +14 43 1.0 +14 50 1.0 +14 52 1.0 +15 18 1.0 +15 24 1.0 +15 40 1.0 +15 45 1.0 +15 55 1.0 +15 59 1.0 +16 20 1.0 +16 33 1.0 +16 37 1.0 +16 38 1.0 +16 50 1.0 +17 22 1.0 +17 25 1.0 +17 27 1.0 +17 31 1.0 +17 57 1.0 +18 20 1.0 +18 21 1.0 +18 24 1.0 +18 29 1.0 +18 45 1.0 +18 51 1.0 +19 30 1.0 +19 54 1.0 +20 28 1.0 +20 36 1.0 +20 38 1.0 +20 44 1.0 +20 47 1.0 +20 50 1.0 +21 29 1.0 +21 33 1.0 +21 37 1.0 +21 45 1.0 +21 51 1.0 +23 36 1.0 +23 45 1.0 +23 51 1.0 +24 29 1.0 +24 45 1.0 +24 51 1.0 +25 26 1.0 +25 27 1.0 +26 27 1.0 +28 30 1.0 +28 47 1.0 +29 35 1.0 +29 43 1.0 +29 45 1.0 +29 51 1.0 +29 52 1.0 +30 42 1.0 +30 47 1.0 +32 60 1.0 +33 34 1.0 +33 37 1.0 +33 38 1.0 +33 40 1.0 +33 43 1.0 +33 50 1.0 +34 37 1.0 +34 44 1.0 +34 49 1.0 +36 37 1.0 +36 39 1.0 +36 40 1.0 +36 59 1.0 +37 40 1.0 +37 43 1.0 +37 45 1.0 +37 61 1.0 +38 43 1.0 +38 44 1.0 +38 52 1.0 +38 58 1.0 +39 57 1.0 +40 52 1.0 +41 54 1.0 +41 57 1.0 +42 47 1.0 +42 50 1.0 +43 46 1.0 +43 53 1.0 +45 50 1.0 +45 51 1.0 +45 59 1.0 +46 49 1.0 +48 57 1.0 +50 51 1.0 +51 55 1.0 +53 61 1.0 +54 57 1.0 diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index 3e0b6c55c37..0bd97b55cb5 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -61,6 +61,12 @@ BENCHMARK_DATASET_DATA=" https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data.tgz csv " + +SELF_LOOPS_DATASET_DATA=" +# ~1s download +https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data_self_loops.tgz +self_loops +" ################################################################################ # Do not change the script below this line if only adding/updating a dataset @@ -71,7 +77,7 @@ function hasArg { } if hasArg -h || hasArg --help; then - echo "$0 [--subset | --benchmark]" + echo "$0 [--subset | --benchmark | --self_loops]" exit 0 fi @@ -80,6 +86,8 @@ if hasArg "--benchmark"; then DATASET_DATA="${BENCHMARK_DATASET_DATA}" elif hasArg "--subset"; then DATASET_DATA="${BASE_DATASET_DATA}" +elif hasArg "--self_loops"; then + DATASET_DATA="${SELF_LOOPS_DATASET_DATA}" # Do not include benchmark datasets by default - too big else DATASET_DATA="${BASE_DATASET_DATA} ${EXTENDED_DATASET_DATA}" diff --git a/datasets/karate_mod.mtx b/datasets/karate_mod.mtx new file mode 100644 index 00000000000..3a562406800 --- /dev/null +++ b/datasets/karate_mod.mtx @@ -0,0 +1,81 @@ +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +11 1 +12 1 +13 1 +14 1 +18 1 +20 1 +22 1 +32 1 +3 2 +4 2 +8 2 +14 2 +18 2 +20 2 +22 2 +31 2 +4 3 +8 3 +9 3 +10 3 +14 3 +28 3 +29 3 +33 3 +8 4 +13 4 +14 4 +7 5 +11 5 +7 6 +11 6 +17 6 +17 7 +31 9 +33 9 +34 9 +34 10 +34 14 +33 15 +34 15 +33 16 +34 16 +33 19 +34 19 +34 20 +33 21 +34 21 +33 23 +34 23 +26 24 +28 24 +30 24 +33 24 +34 24 +26 25 +28 25 +32 25 +32 26 +30 27 +34 27 +34 28 +32 29 +34 29 +33 30 +34 30 +33 31 +34 31 +33 32 +34 32 +34 33 +35 +36 +37 diff --git a/datasets/karate_multi_edge.csv b/datasets/karate_multi_edge.csv new file mode 100644 index 00000000000..6f331b77a59 --- /dev/null +++ b/datasets/karate_multi_edge.csv @@ -0,0 +1,160 @@ +1 0 1.0 +2 0 1.0 +3 0 1.0 +4 0 1.0 +5 0 1.0 +6 0 1.0 +7 0 1.0 +8 0 1.0 +10 0 1.0 +11 0 1.0 +12 0 1.0 +13 0 1.0 +17 0 1.0 +19 0 1.0 +21 0 1.0 +31 0 1.0 +2 1 1.0 +3 1 1.0 +7 1 1.0 +13 1 1.0 +7 0 1.0 +17 1 1.0 +19 1 1.0 +21 1 1.0 +30 1 1.0 +3 2 1.0 +7 2 1.0 +8 2 1.0 +9 2 1.0 +13 2 1.0 +27 2 1.0 +28 2 1.0 +32 2 1.0 +7 3 1.0 +12 3 1.0 +13 3 1.0 +6 4 1.0 +10 4 1.0 +6 5 1.0 +10 5 1.0 +16 5 1.0 +16 6 1.0 +30 8 1.0 +32 8 1.0 +33 8 1.0 +28 2 1.0 +33 9 1.0 +33 13 1.0 +32 14 1.0 +33 14 1.0 +32 15 1.0 +33 15 1.0 +32 18 1.0 +33 18 1.0 +33 19 1.0 +32 20 1.0 +33 20 1.0 +32 22 1.0 +33 22 1.0 +25 23 1.0 +27 23 1.0 +29 23 1.0 +32 23 1.0 +33 23 1.0 +25 24 1.0 +27 24 1.0 +31 24 1.0 +31 25 1.0 +29 26 1.0 +33 26 1.0 +33 27 1.0 +31 28 1.0 +33 28 1.0 +32 29 1.0 +33 29 1.0 +32 22 1.0 +32 30 1.0 +33 30 1.0 +32 31 1.0 +33 31 1.0 +33 32 1.0 +0 1 1.0 +0 2 1.0 +0 3 1.0 +0 4 1.0 +0 5 1.0 +0 6 1.0 +0 7 1.0 +0 8 1.0 +0 10 1.0 +0 11 1.0 +0 12 1.0 +0 6 1.0 +0 13 1.0 +0 17 1.0 +0 19 1.0 +0 21 1.0 +0 31 1.0 +1 2 1.0 +1 3 1.0 +1 7 1.0 +1 13 1.0 +1 17 1.0 +1 19 1.0 +1 21 1.0 +1 30 1.0 +2 3 1.0 +2 7 1.0 +2 8 1.0 +2 9 1.0 +2 13 1.0 +2 27 1.0 +2 28 1.0 +2 32 1.0 +3 7 1.0 +3 12 1.0 +3 13 1.0 +4 6 1.0 +4 10 1.0 +5 6 1.0 +5 10 1.0 +5 16 1.0 +6 16 1.0 +8 30 1.0 +8 32 1.0 +8 33 1.0 +9 33 1.0 +13 33 1.0 +14 32 1.0 +14 33 1.0 +15 32 1.0 +15 33 1.0 +18 32 1.0 +18 33 1.0 +19 33 1.0 +20 32 1.0 +20 33 1.0 +22 32 1.0 +22 33 1.0 +23 25 1.0 +23 27 1.0 +23 29 1.0 +23 32 1.0 +23 33 1.0 +24 25 1.0 +24 27 1.0 +24 31 1.0 +25 31 1.0 +26 29 1.0 +26 33 1.0 +27 33 1.0 +28 31 1.0 +28 33 1.0 +29 32 1.0 +29 33 1.0 +30 32 1.0 +30 33 1.0 +31 32 1.0 +31 33 1.0 +32 33 1.0 diff --git a/datasets/karate_s_loop.csv b/datasets/karate_s_loop.csv new file mode 100644 index 00000000000..3959e5f98b3 --- /dev/null +++ b/datasets/karate_s_loop.csv @@ -0,0 +1,160 @@ +1 0 1.0 +2 0 1.0 +3 0 1.0 +4 0 1.0 +5 0 1.0 +6 0 1.0 +7 0 1.0 +8 0 1.0 +10 0 1.0 +11 0 1.0 +12 0 1.0 +13 0 1.0 +17 0 1.0 +19 0 1.0 +21 0 1.0 +31 0 1.0 +2 1 1.0 +3 1 1.0 +7 1 1.0 +13 1 1.0 +17 1 1.0 +19 1 1.0 +21 1 1.0 +30 1 1.0 +3 2 1.0 +7 2 1.0 +8 2 1.0 +9 2 1.0 +13 2 1.0 +27 2 1.0 +28 2 1.0 +32 2 1.0 +7 3 1.0 +12 3 1.0 +13 3 1.0 +6 4 1.0 +10 4 1.0 +6 5 1.0 +10 5 1.0 +10 10 1.0 +16 5 1.0 +16 6 1.0 +30 8 1.0 +32 8 1.0 +33 8 1.0 +33 9 1.0 +33 13 1.0 +32 14 1.0 +33 14 1.0 +32 15 1.0 +33 15 1.0 +32 18 1.0 +33 18 1.0 +33 19 1.0 +32 20 1.0 +33 20 1.0 +32 22 1.0 +33 22 1.0 +25 23 1.0 +27 23 1.0 +29 23 1.0 +32 23 1.0 +33 23 1.0 +25 24 1.0 +27 24 1.0 +31 24 1.0 +31 25 1.0 +29 26 1.0 +33 26 1.0 +33 27 1.0 +31 28 1.0 +33 28 1.0 +32 29 1.0 +33 29 1.0 +32 30 1.0 +33 30 1.0 +32 31 1.0 +33 31 1.0 +33 32 1.0 +0 1 1.0 +0 2 1.0 +0 3 1.0 +0 4 1.0 +0 5 1.0 +0 6 1.0 +0 7 1.0 +0 8 1.0 +0 10 1.0 +0 11 1.0 +0 12 1.0 +0 13 1.0 +0 17 1.0 +0 19 1.0 +0 21 1.0 +0 31 1.0 +1 2 1.0 +1 3 1.0 +1 7 1.0 +1 13 1.0 +1 1 1.0 +1 17 1.0 +1 19 1.0 +1 21 1.0 +1 30 1.0 +2 3 1.0 +2 7 1.0 +2 8 1.0 +2 9 1.0 +2 13 1.0 +2 27 1.0 +2 28 1.0 +2 32 1.0 +3 7 1.0 +3 12 1.0 +3 13 1.0 +4 6 1.0 +4 10 1.0 +5 6 1.0 +5 10 1.0 +5 16 1.0 +6 16 1.0 +8 30 1.0 +8 32 1.0 +8 33 1.0 +9 33 1.0 +13 33 1.0 +13 13 1.0 +14 32 1.0 +14 33 1.0 +15 32 1.0 +15 33 1.0 +18 32 1.0 +18 33 1.0 +19 33 1.0 +20 32 1.0 +20 33 1.0 +22 32 1.0 +22 33 1.0 +23 25 1.0 +23 27 1.0 +23 29 1.0 +23 32 1.0 +23 33 1.0 +24 25 1.0 +24 27 1.0 +24 31 1.0 +25 31 1.0 +26 29 1.0 +26 33 1.0 +27 33 1.0 +28 31 1.0 +28 33 1.0 +29 32 1.0 +29 33 1.0 +30 32 1.0 +30 33 1.0 +31 32 1.0 +31 31 1.0 +31 33 1.0 +32 33 1.0 diff --git a/datasets/karate_str.mtx b/datasets/karate_str.mtx new file mode 100644 index 00000000000..0564d30f91d --- /dev/null +++ b/datasets/karate_str.mtx @@ -0,0 +1,78 @@ +9q a9 1 +ts a9 1 +kt a9 1 +j7 a9 1 +wr a9 1 +n3 a9 1 +2w a9 1 +8a a9 1 +ci a9 1 +cq a9 1 +ca a9 1 +gd a9 1 +y4 a9 1 +kx a9 1 +u3 a9 1 +id a9 1 +ts 9q 1 +kt 9q 1 +2w 9q 1 +gd 9q 1 +y4 9q 1 +kx 9q 1 +u3 9q 1 +7p 9q 1 +kt ts 1 +2w ts 1 +8a ts 1 +ax ts 1 +gd ts 1 +84 ts 1 +ar ts 1 +05 ts 1 +2w kt 1 +ca kt 1 +gd kt 1 +n3 j7 1 +ci j7 1 +n3 wr 1 +ci wr 1 +27 wr 1 +27 n3 1 +7p 8a 1 +05 8a 1 +ux 8a 1 +ux ax 1 +ux gd 1 +05 r9 1 +ux r9 1 +05 44 1 +ux 44 1 +05 a6 1 +ux a6 1 +ux kx 1 +05 d5 1 +ux d5 1 +05 gk 1 +ux gk 1 +fo em 1 +84 em 1 +wc em 1 +05 em 1 +ux em 1 +fo 1j 1 +84 1j 1 +id 1j 1 +id fo 1 +wc nm 1 +ux nm 1 +ux 84 1 +id ar 1 +ux ar 1 +05 wc 1 +ux wc 1 +05 7p 1 +ux 7p 1 +05 id 1 +ux id 1 +ux 05 1 diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index c2c14e0c02d..56e90b1f6bb 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -61,6 +61,21 @@ "netscience.csv"] ] +DATASETS_MULTI_EDGES = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_multi_edge.csv", + "dolphins_multi_edge.csv"] +] + +DATASETS_STR_ISLT_V = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_mod.mtx", + "karate_str.mtx"] +] + +DATASETS_SELF_LOOPS = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_s_loop.csv", + "dolphins_s_loop.csv"] +] + # '../datasets/email-Eu-core.csv'] From 254a999fab9e925e7444c968c3942a5ef6e48d8d Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 22 Mar 2021 17:02:03 -0500 Subject: [PATCH 198/343] Added cmake function and .hpp template for generating version_config.hpp file. (#1476) Adds cmake function and .hpp template for generating a `version_config.hpp` file, similar to RMM's file of the same name. This allows C++ clients to include the file from the libcugraph install to query version information for reporting, checking compatibility, etc. Tested by building and installing libcugraph and checking that `version_config.hpp` was present in the conda environment and contained the correct information. closes #1472 FYI @anaruse Authors: - Rick Ratzel (@rlratzel) Approvers: - Brad Rees (@BradReesWork) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1476 --- cpp/CMakeLists.txt | 7 +++++++ cpp/cmake/Modules/Version.cmake | 18 ++++++++++++++++++ cpp/cmake/version_config.hpp.in | 20 ++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 cpp/cmake/Modules/Version.cmake create mode 100644 cpp/cmake/version_config.hpp.in diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 26a8f98e265..34ea935e31d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -18,6 +18,10 @@ cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) +# Write the version header +include(cmake/Modules/Version.cmake) +write_version() + ################################################################################################### # - build type ------------------------------------------------------------------------------------ @@ -560,6 +564,9 @@ install(TARGETS cugraph LIBRARY install(DIRECTORY include/ DESTINATION include/cugraph) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp + DESTINATION include/cugraph) + install(DIRECTORY ${RAFT_DIR}/cpp/include/raft/ DESTINATION include/cugraph/raft) ################################################################################################### diff --git a/cpp/cmake/Modules/Version.cmake b/cpp/cmake/Modules/Version.cmake new file mode 100644 index 00000000000..15046784175 --- /dev/null +++ b/cpp/cmake/Modules/Version.cmake @@ -0,0 +1,18 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# Generate version_config.hpp from the version found in CMakeLists.txt +function(write_version) + message(STATUS "CUGRAPH VERSION: ${CUGRAPH_VERSION}") + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version_config.hpp.in + ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp @ONLY) +endfunction(write_version) diff --git a/cpp/cmake/version_config.hpp.in b/cpp/cmake/version_config.hpp.in new file mode 100644 index 00000000000..c669d1b97f3 --- /dev/null +++ b/cpp/cmake/version_config.hpp.in @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#define CUGRAPH_VERSION_MAJOR @CUGRAPH_VERSION_MAJOR@ +#define CUGRAPH_VERSION_MINOR @CUGRAPH_VERSION_MINOR@ +#define CUGRAPH_VERSION_PATCH @CUGRAPH_VERSION_PATCH@ From 7256f329773afe93deecd43f3cea3eceefc7fed1 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 23 Mar 2021 14:02:14 -0500 Subject: [PATCH 199/343] Fix for bug in SCC on self-loops (#1475) This provides fixes for strongly connected components on graphs with self-loops: https://github.com/rapidsai/cugraph/issues/1471. closes #1471 Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1475 --- cpp/src/components/connectivity.cu | 2 +- cpp/src/components/scc_matrix.cuh | 119 +++++++------ cpp/tests/components/scc_test.cu | 275 ++++++++++++++++++++++++++--- 3 files changed, 310 insertions(+), 86 deletions(-) diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index f4c7bf1d35c..09412160b37 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -78,7 +78,7 @@ std::enable_if_t::value> connected_components_impl( stream); } else { SCC_Data sccd(nrows, graph.offsets, graph.indices); - sccd.run_scc(labels); + auto num_iters = sccd.run_scc(labels); } } } // namespace detail diff --git a/cpp/src/components/scc_matrix.cuh b/cpp/src/components/scc_matrix.cuh index 801f1fe0fad..c7f4506b74e 100644 --- a/cpp/src/components/scc_matrix.cuh +++ b/cpp/src/components/scc_matrix.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,12 +71,13 @@ struct SCC_Data { p_d_r_o_(p_d_r_o), p_d_c_i_(p_d_c_i), d_C(nrows * nrows, 0), - d_Cprev(nrows * nrows, 0) + d_Cprev(nrows * nrows, 0), + p_d_C_(d_C.data().get()) { init(); } - const thrust::device_vector& get_C(void) const { return d_C; } + ByteT const* get_Cptr(void) const { return p_d_C_; } size_t nrows(void) const { return nrows_; } @@ -100,13 +101,12 @@ struct SCC_Data { void get_labels(IndexT* d_labels) const { - auto* p_d_C = d_C.data().get(); - size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_` + size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_` thrust::transform(thrust::device, thrust::make_counting_iterator(0), thrust::make_counting_iterator(nrows_), d_labels, - [n, p_d_C] __device__(IndexT k) { + [n, p_d_C = p_d_C_] __device__(IndexT k) { auto begin = p_d_C + k * n; auto end = begin + n; ByteT one{1}; @@ -124,7 +124,6 @@ struct SCC_Data { size_t nrows = nrows_; size_t count = 0; - ByteT* p_d_C = d_C.data().get(); ByteT* p_d_Cprev = get_Cprev().data().get(); size_t n2 = nrows * nrows; @@ -136,57 +135,60 @@ struct SCC_Data { do { flag.set(0); - thrust::for_each(thrust::device, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(n2), - [nrows, p_d_C, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) { - ByteT one{1}; - - auto i = indx / nrows; - auto j = indx % nrows; - - if ((i == j) || (p_d_Cprev[indx] == one)) - p_d_C[indx] = one; - else { - // this is where a hash-map could help: - // only need hashmap[(i,j)]={0,1} (`1` for "hit"); - // and only for new entries! - // already existent entries are covered by - // the `if`-branch above! - // Hence, hashmap[] can use limited space: - // M = max_l{number(new `1` entries)}, where - // l = #iterations in the do-loop! - // M ~ new `1` entries between A^k and A^{k+1}, - // k=1,2,... - // Might M actually be M ~ nnz(A) = |E| ?! - // Probably, because the primitive hash - //(via find_if) uses a search space of nnz(A) - // - // But, what if more than 1 entry pops-up in a row? - // Not an issue! Because the hash key is (i,j), and no - // more than one entry can exist in position (i,j)! - // - // And remember, we only need to store the new (i,j) keys - // that an iteration produces wrt to the previous iteration! - // - auto begin = p_d_ci + p_d_ro[i]; - auto end = p_d_ci + p_d_ro[i + 1]; - auto pos = thrust::find_if( - thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) { - return (p_d_Cprev[k * nrows + j] == one); - }); - - if (pos != end) p_d_C[indx] = one; - } - - if (p_d_C[indx] != p_d_Cprev[indx]) - *p_d_flag = 1; // race-condition: harmless, worst case many threads - // write the same value - }); + thrust::for_each( + thrust::device, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n2), + [nrows, p_d_C = p_d_C_, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) { + ByteT one{1}; + + auto i = indx / nrows; + auto j = indx % nrows; + + if ((i == j) || (p_d_Cprev[indx] == one)) { + p_d_C[indx] = one; + } else { + // this ammounts to A (^,v) B + // (where A = adjacency matrix defined by (p_ro, p_ci), + // B := p_d_Cprev; (^,v) := (*,+) semiring); + // Here's why: + // (A (^,v) B)[i][j] := A[i][.] (^,v) B[j][.] + // (where X[i][.] := i-th row of X; + // X[.][j] := j-th column of X); + // which is: + // 1, iff A[i][.] and B[j][.] have a 1 in the same location, + // 0, otherwise; + // + // i.e., corresponfing entry in p_d_C is 1 + // if B[k][j] == 1 for any column k in A's i-th row; + // hence, for each column k of row A[i][.], + // which is the set: + // k \in {p_ci + p_ro[i], ..., p_ci + p_ro[i+1] - 1}, + // check if (B[k][j] == 1), + // i.e., p_d_Cprev[k*nrows + j]) == 1: + // + auto begin = p_d_ci + p_d_ro[i]; + auto end = p_d_ci + p_d_ro[i + 1]; + auto pos = thrust::find_if( + thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) { + return (p_d_Cprev[k * nrows + j] == one); + }); + + if (pos != end) p_d_C[indx] = one; + } + + if (p_d_C[indx] != p_d_Cprev[indx]) + *p_d_flag = 1; // race-condition: harmless, + // worst case many threads + // write the _same_ value + }); ++count; cudaDeviceSynchronize(); - std::swap(p_d_C, p_d_Cprev); + std::swap(p_d_C_, p_d_Cprev); // Note 1: this swap makes `p_d_Cprev` the + // most recently updated matrix pointer + // at the end of this loop + // (see `Note 2` why this matters); } while (flag.is_set()); // C & Ct: @@ -196,11 +198,13 @@ struct SCC_Data { thrust::for_each(thrust::device, thrust::make_counting_iterator(0), thrust::make_counting_iterator(n2), - [nrows, p_d_C, p_d_Cprev] __device__(size_t indx) { + [nrows, p_d_C = p_d_C_, p_d_Cprev] __device__(size_t indx) { auto i = indx / nrows; auto j = indx % nrows; auto tindx = j * nrows + i; + // Note 2: per Note 1, p_d_Cprev is latest: + // p_d_C[indx] = (p_d_Cprev[indx]) & (p_d_Cprev[tindx]); }); @@ -215,6 +219,9 @@ struct SCC_Data { const IndexT* p_d_c_i_; // column indices thrust::device_vector d_C; thrust::device_vector d_Cprev; + ByteT* p_d_C_{nullptr}; // holds the most recent update, + // which can have storage in any of d_C or d_Cprev, + // because the pointers get swapped! thrust::device_vector& get_Cprev(void) { return d_Cprev; } }; diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index 9d5b55f34c6..a74b5a0ad27 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -24,6 +24,9 @@ #include +#include +#include +#include #include #include @@ -57,41 +60,48 @@ struct Usecase { std::string matrix_file; }; -// checker of counts of labels for each component -// expensive, for testing purposes only; +// counts number of vertices in each component; +// (of same label); +// potentially expensive, for testing purposes only; // // params: -// p_d_labels: device array of labels of size nrows; -// nrows: |V| for graph G(V, E); -// d_v_counts: #labels for each component; (_not_ pre-allocated!) +// in: p_d_labels: device array of labels of size nrows; +// in: nrows: |V| for graph G(V, E); +// out: d_v_counts: #labels for each component; (_not_ pre-allocated!) +// return: number of components; // template -size_t get_component_sizes(const IndexT* p_d_labels, size_t nrows, DVector& d_v_counts) +size_t get_component_sizes(const IndexT* p_d_labels, + size_t nrows, + DVector& d_num_vs_per_component) { DVector d_sorted_l(p_d_labels, p_d_labels + nrows); thrust::sort(d_sorted_l.begin(), d_sorted_l.end()); - size_t counts = - thrust::distance(d_sorted_l.begin(), thrust::unique(d_sorted_l.begin(), d_sorted_l.end())); + auto pair_it = thrust::reduce_by_key(d_sorted_l.begin(), + d_sorted_l.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), // ignore... + d_num_vs_per_component.begin()); - IndexT* p_d_srt_l = d_sorted_l.data().get(); - - d_v_counts.resize(counts); - thrust::transform( - thrust::device, - d_sorted_l.begin(), - d_sorted_l.begin() + counts, - d_v_counts.begin(), - [p_d_srt_l, counts] __device__(IndexT indx) { - return thrust::count_if( - thrust::seq, p_d_srt_l, p_d_srt_l + counts, [indx](IndexT label) { return label == indx; }); - }); - - // sort the counts: - thrust::sort(d_v_counts.begin(), d_v_counts.end()); + size_t counts = thrust::distance(d_num_vs_per_component.begin(), pair_it.second); + d_num_vs_per_component.resize(counts); return counts; } + +template +DVector byte_matrix_to_int(const DVector& d_adj_byte_matrix) +{ + auto n2 = d_adj_byte_matrix.size(); + thrust::device_vector d_vec_matrix(n2, 0); + thrust::transform(d_adj_byte_matrix.begin(), + d_adj_byte_matrix.end(), + d_vec_matrix.begin(), + [] __device__(auto byte_v) { return static_cast(byte_v); }); + return d_vec_matrix; +} + } // namespace struct Tests_Strongly_CC : ::testing::TestWithParam { @@ -154,8 +164,8 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { // Allocate memory on host std::vector cooRowInd(nnz); std::vector cooColInd(nnz); - std::vector labels(m); // for G(V, E), m := |V| - std::vector verts(m); + std::vector labels(nrows); // for G(V, E), m := |V| + std::vector verts(nrows); // Read: COO Format // @@ -166,11 +176,11 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { << "\n"; ASSERT_EQ(fclose(fpin), 0); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, m, nnz); + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); auto G_unique = cugraph::coo_to_csr(G_coo); cugraph::GraphCSRView G = G_unique->view(); - rmm::device_vector d_labels(m); + rmm::device_vector d_labels(nrows); size_t count = 0; @@ -190,7 +200,7 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { } strongly_cc_counts.push_back(count); - DVector d_counts; + DVector d_counts(nrows); auto count_labels = get_component_sizes(d_labels.data().get(), nrows, d_counts); } }; @@ -208,4 +218,211 @@ INSTANTIATE_TEST_CASE_P( Usecase("test/datasets/cage6.mtx") // DG "small" enough to meet SCC GPU memory requirements )); +struct SCCSmallTest : public ::testing::Test { +}; + +// FIXME: we should take advantage of gtest parameterization over copy-and-paste reuse. +// +TEST_F(SCCSmallTest, CustomGraphSimpleLoops) +{ + using IndexT = int; + + size_t nrows = 5; + size_t n2 = 2 * nrows * nrows; + + cudaDeviceProp prop; + int device = 0; + cudaGetDeviceProperties(&prop, device); + + ASSERT_TRUE(n2 < prop.totalGlobalMem); + + // Allocate memory on host + std::vector cooRowInd{0, 1, 2, 3, 3, 4}; + std::vector cooColInd{1, 0, 0, 1, 4, 3}; + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + EXPECT_EQ(count_components, static_cast(3)); + + std::vector v_counts(d_counts.size()); + + cudaMemcpy(v_counts.data(), + d_counts.data().get(), + sizeof(size_t) * v_counts.size(), + cudaMemcpyDeviceToHost); + + cudaDeviceSynchronize(); + + std::vector v_counts_exp{2, 1, 2}; + + EXPECT_EQ(v_counts, v_counts_exp); +} + +TEST_F(SCCSmallTest, /*DISABLED_*/ CustomGraphWithSelfLoops) +{ + using IndexT = int; + + size_t nrows = 5; + size_t n2 = 2 * nrows * nrows; + + cudaDeviceProp prop; + int device = 0; + cudaGetDeviceProperties(&prop, device); + + ASSERT_TRUE(n2 < prop.totalGlobalMem); + + // Allocate memory on host + std::vector cooRowInd{0, 0, 1, 1, 2, 2, 3, 3, 4}; + std::vector cooColInd{0, 1, 0, 1, 0, 2, 1, 3, 4}; + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + EXPECT_EQ(count_components, static_cast(4)); + + std::vector v_counts(d_counts.size()); + + cudaMemcpy(v_counts.data(), + d_counts.data().get(), + sizeof(size_t) * v_counts.size(), + cudaMemcpyDeviceToHost); + + cudaDeviceSynchronize(); + + std::vector v_counts_exp{2, 1, 1, 1}; + + EXPECT_EQ(v_counts, v_counts_exp); +} + +TEST_F(SCCSmallTest, SmallGraphWithSelfLoops1) +{ + using IndexT = int; + + size_t nrows = 3; + + std::vector cooRowInd{0, 0, 1, 2}; + std::vector cooColInd{0, 1, 0, 0}; + + std::vector v_counts_exp{2, 1}; + + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + // std::cout << "vertex labels:\n"; + // print_v(d_labels, std::cout); + + decltype(count_components) num_components_exp = 2; + + EXPECT_EQ(count_components, num_components_exp); +} + +TEST_F(SCCSmallTest, SmallGraphWithIsolated) +{ + using IndexT = int; + + size_t nrows = 3; + + std::vector cooRowInd{0, 0, 1}; + std::vector cooColInd{0, 1, 0}; + + std::vector v_counts_exp{2, 1}; + + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + // Note: there seems to be a BUG in coo_to_csr() or view() + // COO format doesn't account for isolated vertices; + // + // cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, + // nnz); + // auto G_unique = cugraph::coo_to_csr(G_coo); + // cugraph::GraphCSRView G = G_unique->view(); + // + // + // size_t num_vertices = G.number_of_vertices; + // size_t num_edges = G.number_of_edges; + // + // EXPECT_EQ(num_vertices, nrows); //fails when G was constructed from COO + // EXPECT_EQ(num_edges, nnz); + + std::vector ro{0, 2, 3, 3}; + std::vector ci{0, 1, 0}; + + nnz = ci.size(); + + thrust::device_vector d_ro(ro); + thrust::device_vector d_ci(ci); + + cugraph::GraphCSRView G{ + d_ro.data().get(), d_ci.data().get(), nullptr, static_cast(nrows), static_cast(nnz)}; + + size_t num_vertices = G.number_of_vertices; + size_t num_edges = G.number_of_edges; + + EXPECT_EQ(num_vertices, nrows); + EXPECT_EQ(num_edges, nnz); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + // std::cout << "vertex labels:\n"; + // print_v(d_labels, std::cout); + + decltype(count_components) num_components_exp = 2; + + EXPECT_EQ(count_components, num_components_exp); +} + CUGRAPH_TEST_PROGRAM_MAIN() From 5c05a88b7cf9e007d9d3e4f9519d115dbf781478 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Thu, 25 Mar 2021 14:19:09 -0400 Subject: [PATCH 200/343] DOC v0.20 Updates --- CHANGELOG.md | 4 ++++ conda/environments/cugraph_dev_cuda10.1.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda10.2.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda11.0.yml | 16 ++++++++-------- cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 4 ++-- 6 files changed, 31 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0011b99fbf3..a7b34d3e0fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# cuGraph 0.20.0 (Date TBD) + +Please see https://github.com/rapidsai//releases/tag/v0.20.0a for the latest changes to this development branch. + # cuGraph 0.19.0 (Date TBD) Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch. diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index f26c3dd45d9..cc2b0538fb1 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.19.* -- libcudf=0.19.* -- rmm=0.19.* -- cuxfilter=0.19.* -- librmm=0.19.* +- cudf=0.20.* +- libcudf=0.20.* +- rmm=0.20.* +- cuxfilter=0.20.* +- librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.19* -- dask-cudf=0.19* +- dask-cuda=0.20* +- dask-cudf=0.20* - nccl>=2.8.4 -- ucx-py=0.19* +- ucx-py=0.20* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 2848cc49dc7..06cd917db9d 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.19.* -- libcudf=0.19.* -- rmm=0.19.* -- cuxfilter=0.19.* -- librmm=0.19.* +- cudf=0.20.* +- libcudf=0.20.* +- rmm=0.20.* +- cuxfilter=0.20.* +- librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.19* -- dask-cudf=0.19* +- dask-cuda=0.20* +- dask-cudf=0.20* - nccl>=2.8.4 -- ucx-py=0.19* +- ucx-py=0.20* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 82e8b409d13..00f202a6025 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.19.* -- libcudf=0.19.* -- rmm=0.19.* -- cuxfilter=0.19.* -- librmm=0.19.* +- cudf=0.20.* +- libcudf=0.20.* +- rmm=0.20.* +- cuxfilter=0.20.* +- librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.19* -- dask-cudf=0.19* +- dask-cuda=0.20* +- dask-cudf=0.20* - nccl>=2.8.4 -- ucx-py=0.19* +- ucx-py=0.20* - ucx-proc=*=gpu - scipy - networkx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34ea935e31d..7593a5cb89e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) -project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.20.0 LANGUAGES C CXX CUDA) # Write the version header include(cmake/Modules/Version.cmake) diff --git a/docs/source/conf.py b/docs/source/conf.py index eb4745a61f0..77053a3468a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,9 +80,9 @@ # built documents. # # The short X.Y version. -version = '0.19' +version = '0.20' # The full version, including alpha/beta/rc tags. -release = '0.19.0' +release = '0.20.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 26a203dcb770be7e00e2422f8c78c08346a3cad6 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 25 Mar 2021 14:28:47 -0400 Subject: [PATCH 201/343] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7b34d3e0fe..bd5b313e550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # cuGraph 0.20.0 (Date TBD) -Please see https://github.com/rapidsai//releases/tag/v0.20.0a for the latest changes to this development branch. +Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch. # cuGraph 0.19.0 (Date TBD) From 76fad0e2980ddce5a24cc55da94946b511b56a38 Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Thu, 25 Mar 2021 22:50:31 -0400 Subject: [PATCH 202/343] ENH Change conda build directories to work with ccache (#1452) This updates the default conda build directory to a custom path for gpuCI. Small changes to artifact paths as well to make this compatible with Project Flash. Authors: - Dillon Cullinan (@dillon-cullinan) Approvers: - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1452 --- ci/cpu/build.sh | 11 +++++++---- ci/cpu/upload.sh | 5 +++-- ci/gpu/build.sh | 5 +++-- conda/recipes/cugraph/meta.yaml | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index d69448cda4e..8d12b10a640 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -26,6 +26,7 @@ export GPUCI_CONDA_RETRY_SLEEP=30 # Use Ninja to build export CMAKE_GENERATOR="Ninja" +export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld" ################################################################################ # SETUP - Check environment @@ -58,18 +59,20 @@ conda config --set ssl_verify False gpuci_logger "Build conda pkg for libcugraph" if [ "$BUILD_LIBCUGRAPH" == '1' ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/libcugraph + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcugraph else - conda build --dirty --no-remove-work-dir conda/recipes/libcugraph + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} --dirty --no-remove-work-dir conda/recipes/libcugraph + mkdir -p ${CONDA_BLD_DIR}/libcugraph/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcugraph/work fi fi gpuci_logger "Build conda pkg for cugraph" if [ "$BUILD_CUGRAPH" == "1" ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/cugraph --python=$PYTHON + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph --python=$PYTHON else - conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/.conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON fi fi diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 0fca82216c3..50e4c25b90b 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -1,4 +1,5 @@ #!/bin/bash +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh @@ -29,8 +30,8 @@ fi gpuci_logger "Get conda file output locations" -export LIBCUGRAPH_FILE=`conda build conda/recipes/libcugraph --output` -export CUGRAPH_FILE=`conda build conda/recipes/cugraph --python=$PYTHON --output` +export LIBCUGRAPH_FILE=`conda build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcugraph --output` +export CUGRAPH_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph --python=$PYTHON --output` ################################################################################ # UPLOAD - Conda packages diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7242b4a11f5..30dc7373e15 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -16,6 +16,7 @@ function hasArg { export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} +export CONDA_ARTIFACT_PATH=${WORKSPACE}/ci/artifacts/cugraph/cpu/.conda-bld/ function cleanup { gpuci_logger "Removing datasets and temp files" @@ -101,11 +102,11 @@ else chrpath -d libcugraph.so patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so - CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` + CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcugraph*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" - conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" + conda install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" echo "Build cugraph..." $WORKSPACE/build.sh cugraph diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index e714b61d774..1ef64ddbe72 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -10,7 +10,7 @@ package: version: {{ version }} source: - path: ../../.. + git_url: ../../.. build: number: {{ GIT_DESCRIBE_NUMBER }} From b85bd4788be6cca0f5e57410d222cbecf2786ccf Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 26 Mar 2021 15:28:18 +0100 Subject: [PATCH 203/343] TSP solver bug fix (#1480) Addressed comments from: https://github.com/rapidsai/cugraph/issues/1450 In addition: 1. Made raft handle const. 2. Split algorithm into multiple kernel calls and updated timers. 3. Removed global symbols to retrieve results in struct. Authors: - Hugo Linsenmaier (@hlinsen) Approvers: - Brad Rees (@BradReesWork) - Andrei Schaffer (@aschaffer) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1480 --- cpp/include/algorithms.hpp | 2 +- cpp/src/traversal/tsp.cu | 202 +++++++++++++++++-------------- cpp/src/traversal/tsp.hpp | 57 +++++++-- cpp/src/traversal/tsp_solver.hpp | 147 +++++++++------------- cpp/src/traversal/tsp_utils.hpp | 25 +--- cpp/tests/traversal/tsp_test.cu | 2 +- 6 files changed, 216 insertions(+), 219 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index c3a4f3ec985..8a5474b389c 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -218,7 +218,7 @@ void force_atlas2(GraphCOOView &graph, * @param[out] route Device array containing the returned route. * */ -float traveling_salesperson(raft::handle_t &handle, +float traveling_salesperson(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu index c669246bc49..a28ddbbaa3f 100644 --- a/cpp/src/traversal/tsp.cu +++ b/cpp/src/traversal/tsp.cu @@ -17,13 +17,15 @@ #include #include +#include + #include "tsp.hpp" #include "tsp_solver.hpp" namespace cugraph { namespace detail { -TSP::TSP(raft::handle_t &handle, +TSP::TSP(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, @@ -50,56 +52,77 @@ TSP::TSP(raft::handle_t &handle, max_threads_(handle_.get_device_properties().maxThreadsPerBlock), warp_size_(handle_.get_device_properties().warpSize), sm_count_(handle_.get_device_properties().multiProcessorCount), - restart_batch_(4096) + restart_batch_(8192), + neighbors_vec_((k_ + 1) * nodes_, stream_), + work_vec_(restart_batch_ * ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_), + stream_), + best_x_pos_vec_(1, stream_), + best_y_pos_vec_(1, stream_), + best_route_vec_(1, stream_) { - allocate(); + setup(); } -void TSP::allocate() +void TSP::setup() { - // Scalars - mylock_ = mylock_scalar_.data(); - best_tour_ = best_tour_scalar_.data(); - climbs_ = climbs_scalar_.data(); + mylock_ = mylock_scalar_.data(); - // Vectors - neighbors_vec_.resize((k_ + 1) * nodes_); + neighbors_ = neighbors_vec_.data(); // pre-allocate workspace for climbs, each block needs a separate permutation space and search // buffer. We allocate a work buffer that will store the computed distances, px, py and the route. // We align it on the warp size. - work_vec_.resize(sizeof(float) * restart_batch_ * - ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_)); + work_ = work_vec_.data(); + + results_.best_x_pos = best_x_pos_vec_.data(); + results_.best_y_pos = best_y_pos_vec_.data(); + results_.best_route = best_route_vec_.data(); + results_.best_cost = best_cost_scalar_.data(); +} - // Pointers - neighbors_ = neighbors_vec_.data().get(); - work_ = work_vec_.data().get(); +void TSP::reset_batch() +{ + mylock_scalar_.set_value_zero(stream_); + auto const max{std::numeric_limits::max()}; + best_cost_scalar_.set_value(max, stream_); +} + +void TSP::get_initial_solution(int const batch) +{ + if (!beam_search_) { + random_init<<>>( + work_, x_pos_, y_pos_, vtx_ptr_, nstart_, nodes_, batch, restart_batch_); + CHECK_CUDA(stream_); + } else { + knn_init<<>>( + work_, x_pos_, y_pos_, vtx_ptr_, neighbors_, nstart_, nodes_, k_, batch, restart_batch_); + CHECK_CUDA(stream_); + } } float TSP::compute() { - float valid_coo_dist = 0.f; + float final_cost = 0.f; int num_restart_batches = (restarts_ + restart_batch_ - 1) / restart_batch_; int restart_resid = restarts_ - (num_restart_batches - 1) * restart_batch_; - int global_best = INT_MAX; - float *soln = nullptr; - int *route_sol = nullptr; + int global_best = std::numeric_limits::max(); int best = 0; + std::vector h_x_pos; std::vector h_y_pos; + std::vector h_route; h_x_pos.reserve(nodes_ + 1); h_y_pos.reserve(nodes_ + 1); - - // Stats - int n_timers = 3; - long total_climbs = 0; - std::vector h_times; - struct timeval starttime, endtime; - - // KNN call - knn(); + h_route.reserve(nodes_); + std::vector addr_best_x_pos(1); + std::vector addr_best_y_pos(1); + std::vector addr_best_route(1); + HighResTimer hr_timer; + auto create_timer = [&hr_timer, this](char const *name) { + return VerboseTimer(name, hr_timer, verbose_); + }; if (verbose_) { - std::cout << "Doing " << num_restart_batches - 1 << " batches of size " << restart_batch_ + std::cout << "Doing " << num_restart_batches << " batches of size " << restart_batch_ << ", with " << restart_resid << " tail\n"; std::cout << "configuration: " << nodes_ << " nodes, " << restarts_ << " restart\n"; std::cout << "optimizing graph with kswap = " << kswaps << "\n"; @@ -107,82 +130,75 @@ float TSP::compute() // Tell the cache how we want it to behave cudaFuncSetCacheConfig(search_solution, cudaFuncCachePreferEqual); + best_thread_num_ = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); - int threads = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); - if (verbose_) std::cout << "Calculated best thread number = " << threads << "\n"; + if (verbose_) std::cout << "Calculated best thread number = " << best_thread_num_ << "\n"; - rmm::device_vector times(n_timers * threads + n_timers); - h_times.reserve(n_timers * threads + n_timers); + if (beam_search_) { + auto timer = create_timer("knn"); + knn(); + } - gettimeofday(&starttime, NULL); - for (int b = 0; b < num_restart_batches; ++b) { - reset<<<1, 1, 0, stream_>>>(mylock_, best_tour_, climbs_); - CHECK_CUDA(stream_); + for (auto batch = 0; batch < num_restart_batches; ++batch) { + reset_batch(); + if (batch == num_restart_batches - 1) restart_batch_ = restart_resid; - if (b == num_restart_batches - 1) restart_batch_ = restart_resid; - - search_solution<<>>(mylock_, - best_tour_, - vtx_ptr_, - beam_search_, - k_, - nodes_, - neighbors_, - x_pos_, - y_pos_, - work_, - nstart_, - times.data().get(), - climbs_, - threads); + { + auto timer = create_timer("initial_sol"); + get_initial_solution(batch); + } - CHECK_CUDA(stream_); - cudaDeviceSynchronize(); + { + auto timer = create_timer("search_sol"); + search_solution<<>>( + results_, mylock_, vtx_ptr_, beam_search_, k_, nodes_, x_pos_, y_pos_, work_, nstart_); + CHECK_CUDA(stream_); + } + + { + auto timer = create_timer("optimal_tour"); + get_optimal_tour<<>>(results_, mylock_, work_, nodes_); + CHECK_CUDA(stream_); + } - CUDA_TRY(cudaMemcpy(&best, best_tour_, sizeof(int), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); + best = best_cost_scalar_.value(stream_); + if (verbose_) std::cout << "Best reported by kernel = " << best << "\n"; if (best < global_best) { global_best = best; - CUDA_TRY(cudaMemcpyFromSymbol(&soln, best_soln, sizeof(void *))); - cudaDeviceSynchronize(); - CUDA_TRY(cudaMemcpyFromSymbol(&route_sol, best_route, sizeof(void *))); - cudaDeviceSynchronize(); + + raft::update_host(addr_best_x_pos.data(), results_.best_x_pos, 1, stream_); + raft::update_host(addr_best_y_pos.data(), results_.best_y_pos, 1, stream_); + raft::update_host(addr_best_route.data(), results_.best_route, 1, stream_); + CUDA_TRY(cudaStreamSynchronize(stream_)); + + raft::copy(h_x_pos.data(), addr_best_x_pos[0], nodes_ + 1, stream_); + raft::copy(h_y_pos.data(), addr_best_y_pos[0], nodes_ + 1, stream_); + raft::copy(h_route.data(), addr_best_route[0], nodes_, stream_); + raft::copy(route_, addr_best_route[0], nodes_, stream_); + CHECK_CUDA(stream_); } - total_climbs += climbs_scalar_.value(stream_); - } - gettimeofday(&endtime, NULL); - double runtime = - endtime.tv_sec + endtime.tv_usec / 1e6 - starttime.tv_sec - starttime.tv_usec / 1e6; - long long moves = 1LL * total_climbs * (nodes_ - 2) * (nodes_ - 1) / 2; - - raft::copy(route_, route_sol, nodes_, stream_); - - CUDA_TRY(cudaMemcpy(h_x_pos.data(), soln, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - CUDA_TRY(cudaMemcpy( - h_y_pos.data(), soln + nodes_ + 1, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i = 0; i < nodes_; ++i) { - if (verbose_) { std::cout << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } - valid_coo_dist += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); } - CUDA_TRY(cudaMemcpy(h_times.data(), - times.data().get(), - sizeof(float) * n_timers * threads + n_timers, - cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); + for (auto i = 0; i < nodes_; ++i) { + if (verbose_) { std::cout << h_route[i] << ": " << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } + final_cost += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); + } if (verbose_) { - std::cout << "Search runtime = " << runtime << ", " << moves * 1e-9 / runtime << " Gmoves/s\n"; + hr_timer.display(std::cout); std::cout << "Optimized tour length = " << global_best << "\n"; - print_times(h_times, n_timers, handle_.get_device(), threads); } - return valid_coo_dist; + return final_cost; } void TSP::knn() @@ -192,17 +208,17 @@ void TSP::knn() int dim = 2; bool row_major_order = false; - rmm::device_vector input(nodes_ * dim); - float *input_ptr = input.data().get(); + rmm::device_uvector input(nodes_ * dim, stream_); + float *input_ptr = input.data(); raft::copy(input_ptr, x_pos_, nodes_, stream_); raft::copy(input_ptr + nodes_, y_pos_, nodes_, stream_); - rmm::device_vector search_data(nodes_ * dim); - float *search_data_ptr = search_data.data().get(); + rmm::device_uvector search_data(nodes_ * dim, stream_); + float *search_data_ptr = search_data.data(); raft::copy(search_data_ptr, input_ptr, nodes_ * dim, stream_); - rmm::device_vector distances(nodes_ * (k_ + 1)); - float *distances_ptr = distances.data().get(); + rmm::device_uvector distances(nodes_ * (k_ + 1), stream_); + float *distances_ptr = distances.data(); std::vector input_vec; std::vector sizes_vec; @@ -226,7 +242,7 @@ void TSP::knn() } } // namespace detail -float traveling_salesperson(raft::handle_t &handle, +float traveling_salesperson(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index b065b779b96..1208f8c8790 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -16,18 +16,28 @@ #pragma once -#include -#include #include + +#include #include + +#include #include #include namespace cugraph { namespace detail { + +struct TSPResults { + float **best_x_pos; + float **best_y_pos; + int **best_route; + int *best_cost; +}; + class TSP { public: - TSP(raft::handle_t &handle, + TSP(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, @@ -39,14 +49,16 @@ class TSP { bool verbose, int *route); - void allocate(); + void setup(); + void reset_batch(); + void get_initial_solution(int const batch); float compute(); void knn(); ~TSP(){}; private: // Config - raft::handle_t &handle_; + raft::handle_t const &handle_; cudaStream_t stream_; int max_blocks_; int max_threads_; @@ -54,6 +66,7 @@ class TSP { int sm_count_; // how large a grid we want to run, this is fixed int restart_batch_; + int best_thread_num_; // TSP int const *vtx_ptr_; @@ -69,20 +82,42 @@ class TSP { // Scalars rmm::device_scalar mylock_scalar_; - rmm::device_scalar best_tour_scalar_; - rmm::device_scalar climbs_scalar_; + rmm::device_scalar best_cost_scalar_; int *mylock_; - int *best_tour_; - int *climbs_; + int *best_cost_; // Vectors - rmm::device_vector neighbors_vec_; - rmm::device_vector work_vec_; + rmm::device_uvector neighbors_vec_; + rmm::device_uvector work_vec_; + rmm::device_uvector best_x_pos_vec_; + rmm::device_uvector best_y_pos_vec_; + rmm::device_uvector best_route_vec_; int64_t *neighbors_; int *work_; int *work_route_; + TSPResults results_; }; + +class VerboseTimer { + public: + VerboseTimer(char const *name, HighResTimer &hr_timer, bool verbose) + : name_(name), hr_timer_(hr_timer), verbose_(verbose) + { + if (verbose_) hr_timer_.start(name_); + } + + ~VerboseTimer() + { + if (verbose_) hr_timer_.stop(); + } + + private: + const char *name_; + HighResTimer &hr_timer_; + bool verbose_; +}; + } // namespace detail } // namespace cugraph diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp index 20d826cac5c..c7b8cdaaf1d 100644 --- a/cpp/src/traversal/tsp_solver.hpp +++ b/cpp/src/traversal/tsp_solver.hpp @@ -29,29 +29,20 @@ namespace cugraph { namespace detail { -__device__ float *best_soln; -__device__ int *best_route; -extern __shared__ int shbuf[]; - -__global__ void reset(int *mylock, int *best_tour, int *climbs) -{ - *mylock = 0; - *best_tour = INT_MAX; - *climbs = 0; - best_soln = nullptr; - best_route = nullptr; -} - -// random permutation kernel -__device__ void random_init(float const *posx, +__global__ void random_init(int *work, + float const *posx, float const *posy, int const *vtx_ptr, - int *path, - float *px, - float *py, int const nstart, - int const nodes) + int const nodes, + int const batch, + int const restart_batch) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + // Fill values for (int i = threadIdx.x; i <= nodes; i += blockDim.x) { px[i] = posx[i]; @@ -60,15 +51,15 @@ __device__ void random_init(float const *posx, } __syncthreads(); - - if (threadIdx.x == 0) { /* serial permutation as starting point */ + // serial permutation as starting point + if (threadIdx.x == 0) { // swap to start at nstart node raft::swapVals(px[0], px[nstart]); raft::swapVals(py[0], py[nstart]); raft::swapVals(path[0], path[nstart]); curandState rndstate; - curand_init(blockIdx.x, 0, 0, &rndstate); + curand_init(blockIdx.x + (restart_batch * batch), 0, 0, &rndstate); for (int i = 1; i < nodes; i++) { int j = curand(&rndstate) % (nodes - 1 - i) + i; if (i == j) continue; @@ -76,34 +67,37 @@ __device__ void random_init(float const *posx, raft::swapVals(py[i], py[j]); raft::swapVals(path[i], path[j]); } - px[nodes] = px[0]; /* close the loop now, avoid special cases later */ + // close the loop now, avoid special cases later + px[nodes] = px[0]; py[nodes] = py[0]; path[nodes] = path[0]; } } -// Use KNN as a starting solution -__device__ void knn_init(float const *posx, +__global__ void knn_init(int *work, + float const *posx, float const *posy, int const *vtx_ptr, int64_t const *neighbors, - int *buf, - int *path, - float *px, - float *py, int const nstart, int const nodes, - int const K) + int const K, + int const batch, + int const restart_batch) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + for (int i = threadIdx.x; i < nodes; i += blockDim.x) buf[i] = 0; __syncthreads(); if (threadIdx.x == 0) { curandState rndstate; - curand_init(blockIdx.x, 0, 0, &rndstate); + curand_init(blockIdx.x + (restart_batch * batch), 0, 0, &rndstate); int progress = 0; - int initlen = 0; px[0] = posx[nstart]; py[0] = posy[nstart]; @@ -114,7 +108,6 @@ __device__ void knn_init(float const *posx, while (progress < nodes - 1) { // beam search as starting point for (int i = 1; i <= progress; i++) buf[i] = 0; progress = 0; // reset current location in path and visited array - initlen = 0; int randjumps = 0; while (progress < nodes - 1) { int nj = curand(&rndstate) % K; @@ -146,13 +139,11 @@ __device__ void knn_init(float const *posx, px[progress] = posx[head]; py[progress] = posy[head]; path[progress] = vtx_ptr[head]; - initlen += __float2int_rn(euclidean_dist(px, py, progress, progress - 1)); } } px[nodes] = px[nstart]; py[nodes] = py[nstart]; path[nodes] = path[nstart]; - initlen += __float2int_rn(euclidean_dist(px, py, nodes, nstart)); } } @@ -211,10 +202,23 @@ __device__ void two_opt_search( } } -// This function being runned for each block -__device__ void hill_climbing( - float *px, float *py, int *buf, int *path, int *shbuf, int const nodes, int *climbs) +__global__ __launch_bounds__(2048, 2) void search_solution(TSPResults results, + int *mylock, + int const *vtx_ptr, + bool beam_search, + int const K, + int nodes, + float const *posx, + float const *posy, + int *work, + int const nstart) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + + __shared__ int shbuf[tilesize]; __shared__ int best_change[kswaps]; __shared__ int best_i[kswaps]; __shared__ int best_j[kswaps]; @@ -249,9 +253,6 @@ __device__ void hill_climbing( two_opt_search(buf, px, py, shbuf, &minchange, &mini, &minj, nodes); __syncthreads(); - // Stats only - if (threadIdx.x == 0) atomicAdd(climbs, 1); - shbuf[threadIdx.x] = minchange; int j = blockDim.x; // warp reduction to find best thread results @@ -338,77 +339,45 @@ __device__ void hill_climbing( } while (minchange < 0 && myswaps < 2 * nodes); } -__device__ void get_optimal_tour( - int *mylock, int *best_tour, float *px, float *py, int *path, int *shbuf, int const nodes) +__global__ void get_optimal_tour(TSPResults results, int *mylock, int *work, int const nodes) { + extern __shared__ int accumulator[]; + int climber_id = blockIdx.x; + int *buf = &work[climber_id * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + // Now find actual length of the last tour, result of the climb int term = 0; for (int i = threadIdx.x; i < nodes; i += blockDim.x) { term += __float2int_rn(euclidean_dist(px, py, i, i + 1)); } - shbuf[threadIdx.x] = term; + accumulator[threadIdx.x] = term; __syncthreads(); int j = blockDim.x; // block level reduction do { int k = (j + 1) / 2; - if ((threadIdx.x + k) < j) { shbuf[threadIdx.x] += shbuf[threadIdx.x + k]; } + if ((threadIdx.x + k) < j) { accumulator[threadIdx.x] += accumulator[threadIdx.x + k]; } j = k; // divide active warp size in half __syncthreads(); } while (j > 1); - term = shbuf[0]; + term = accumulator[0]; if (threadIdx.x == 0) { - atomicMin(best_tour, term); + atomicMin(results.best_cost, term); while (atomicExch(mylock, 1) != 0) ; // acquire - if (best_tour[0] == term) { - best_soln = px; - best_route = path; + if (results.best_cost[0] == term) { + results.best_x_pos[0] = px; + results.best_y_pos[0] = py; + results.best_route[0] = path; } *mylock = 0; // release __threadfence(); } } -__global__ __launch_bounds__(2048, 2) void search_solution(int *mylock, - int *best_tour, - int const *vtx_ptr, - bool beam_search, - int const K, - int nodes, - int64_t const *neighbors, - float const *posx, - float const *posy, - int *work, - int const nstart, - float *times, - int *climbs, - int threads) -{ - int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); - __shared__ int shbuf[tilesize]; - clock_t start; - - start = clock64(); - if (!beam_search) - random_init(posx, posy, vtx_ptr, path, px, py, nstart, nodes); - else - knn_init(posx, posy, vtx_ptr, neighbors, buf, path, px, py, nstart, nodes, K); - __syncthreads(); - times[threadIdx.x] = clock64() - start; - - start = clock64(); - hill_climbing(px, py, buf, path, shbuf, nodes, climbs); - __syncthreads(); - times[threads + threadIdx.x + 1] = clock64() - start; - - start = clock64(); - get_optimal_tour(mylock, best_tour, px, py, path, shbuf, nodes); - times[2 * threads + threadIdx.x + 1] = clock64() - start; -} } // namespace detail } // namespace cugraph diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp index 3faa2efea3b..2a3445f1c81 100644 --- a/cpp/src/traversal/tsp_utils.hpp +++ b/cpp/src/traversal/tsp_utils.hpp @@ -26,34 +26,11 @@ namespace cugraph { namespace detail { -__host__ __device__ inline float euclidean_dist(float *px, float *py, int a, int b) +constexpr float euclidean_dist(float *px, float *py, int a, int b) { return sqrtf((px[a] - px[b]) * (px[a] - px[b]) + (py[a] - py[b]) * (py[a] - py[b])); } -static std::vector device_func = {"Find First", "Hill Climbing", "Retrieve Path"}; - -void print_times(std::vector &h_times, int const n_timers, int device, int threads) -{ - int clock_rate; - cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, device); - - double total = 0; - h_times[0] /= (float)clock_rate; - total += h_times[0]; - for (int i = 1; i < n_timers; ++i) { - h_times[i * threads + 1] /= (float)clock_rate; - total += h_times[i * threads + 1]; - } - std::cout << "Stats: \n"; - std::cout << device_func[0] << " time: " << h_times[0] * 1e-3 << " " - << (h_times[0] / total) * 100.0 << "%\n"; - for (int i = 1; i < n_timers; ++i) { - std::cout << device_func[i] << " time: " << h_times[i * threads + 1] * 1e-3 << " " - << (h_times[i * threads + 1] / total) * 100.0 << "%\n"; - } -} - // Get maximum number of threads we can run on based on number of nodes, // shared memory usage, max threads per block and SM, max blocks for SM and registers per SM. int best_thread_count(int nodes, int max_threads, int sm_count, int warp_size) diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 383427a56cf..9ebf464ae3e 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -132,7 +132,7 @@ class Tests_Tsp : public ::testing::TestWithParam { int nodes = load_tsp(param.tsp_file.c_str(), &input); // Device alloc - raft::handle_t handle; + raft::handle_t const handle; rmm::device_uvector vertices(static_cast(nodes), nullptr); rmm::device_uvector route(static_cast(nodes), nullptr); rmm::device_uvector x_pos(static_cast(nodes), nullptr); From 1f0f14eba2e6253423b1a58ca38989261308df6c Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 26 Mar 2021 09:31:29 -0500 Subject: [PATCH 204/343] MS BFS python APIs + EgoNet updates (#1469) There are various things in this PR. Multi-Seed (MS) BFS: - API tentative - Saving research on memory feasability helper function (not in production) - Saving research on running the current BFS concurrently with streams and threads for analysis perf comparison (not in production) EgoNet: - Multithreading in EgoNet which deserializes execution and comes with mild performance improvements on large sizes - Some cleanup Authors: - Alex Fender (@afender) Approvers: - Chuck Hastings (@ChuckHastings) - @Iroy30 - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1469 --- cpp/src/community/egonet.cu | 19 +- cpp/tests/CMakeLists.txt | 16 ++ cpp/tests/community/egonet_test.cu | 283 +++++++++++------------ cpp/tests/experimental/ms_bfs_test.cpp | 301 +++++++++++++++++++++++++ cpp/tests/experimental/streams.cu | 44 ++++ python/cugraph/__init__.py | 4 +- python/cugraph/tests/test_egonet.py | 32 +-- python/cugraph/traversal/__init__.py | 4 +- python/cugraph/traversal/ms_bfs.py | 282 +++++++++++++++++++++++ python/cugraph/utilities/utils.py | 106 +++++---- 10 files changed, 868 insertions(+), 223 deletions(-) create mode 100644 cpp/tests/experimental/ms_bfs_test.cpp create mode 100644 cpp/tests/experimental/streams.cu create mode 100644 python/cugraph/traversal/ms_bfs.py diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 067d27f9a92..336a5c939b8 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -79,7 +79,12 @@ extract( // Streams will allocate concurrently later std::vector> reached{}; - reached.reserve(handle.get_num_internal_streams()); + reached.reserve(n_subgraphs); + for (vertex_t i = 0; i < n_subgraphs; i++) { + // Allocations and operations are attached to the worker stream + rmm::device_uvector local_reach(v, handle.get_internal_stream_view(i)); + reached.push_back(std::move(local_reach)); + } // h_source_vertex[i] is used by other streams in the for loop user_stream_view.synchronize(); @@ -87,15 +92,13 @@ extract( HighResTimer hr_timer; hr_timer.start("ego_neighbors"); #endif + +#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { // get light handle from worker pool raft::handle_t light_handle(handle, i); auto worker_stream_view = light_handle.get_stream_view(); - // Allocations and operations are attached to the worker stream - rmm::device_uvector local_reach(v, worker_stream_view); - reached.push_back(std::move(local_reach)); - // BFS with cutoff // consider adding a device API to BFS (ie. accept source on the device) rmm::device_uvector predecessors(v, worker_stream_view); // not used @@ -149,10 +152,10 @@ extract( neighbors.resize(h_neighbors_offsets[n_subgraphs]); user_stream_view.synchronize(); - // Construct the neighboors list concurrently +// Construct the neighboors list concurrently +#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { - raft::handle_t light_handle(handle, i); - auto worker_stream_view = light_handle.get_stream_view(); + auto worker_stream_view = handle.get_internal_stream_view(i); thrust::copy(rmm::exec_policy(worker_stream_view), reached[i].begin(), reached[i].end(), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5571cf5f124..1dc4a5d3eaa 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -318,6 +318,13 @@ set(MST_TEST_SRC ConfigureTest(MST_TEST "${MST_TEST_SRC}") +################################################################################################### +# - Experimental stream tests ----------------------------------------------------- + +set(EXPERIMENTAL_STREAM_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/streams.cu") + +ConfigureTest(EXPERIMENTAL_STREAM "${EXPERIMENTAL_STREAM_SRCS}" "") ################################################################################################### # - Experimental R-mat graph generation tests ----------------------------------------------------- @@ -375,6 +382,15 @@ set(EXPERIMENTAL_BFS_TEST_SRCS ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") +################################################################################################### +# - Experimental BFS tests ------------------------------------------------------------------------ + +set(EXPERIMENTAL_MSBFS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/ms_bfs_test.cpp") + +ConfigureTest(EXPERIMENTAL_MSBFS_TEST "${EXPERIMENTAL_MSBFS_TEST_SRCS}") + + ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index a9224b42bc1..e7fea43be42 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -182,150 +182,141 @@ INSTANTIATE_TEST_CASE_P( // For perf analysis /* INSTANTIATE_TEST_CASE_P( -simple_test, -Tests_InducedEgo, -::testing::Values( -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), -InducedEgo_Usecase( -"test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), -InducedEgo_Usecase( -"test/datasets/soc-LiveJournal1.mtx", -std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, - 2, - false), - InducedEgo_Usecase( - "test/datasets/soc-LiveJournal1.mtx", - std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, - 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, - 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, - 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, - 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, - 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, - 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, - 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, - 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, - 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, - 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, - 2, - false), - InducedEgo_Usecase( - "test/datasets/soc-LiveJournal1.mtx", - std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, - 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, - 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, - 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, - 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, - 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, - 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, - 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, - 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, - 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, - 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, 984983, 3114832, 1967741, - 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, 686026, 3989015, - 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, 2186957, - 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, - 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, - 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, - 2606530, 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, - 134931, 736397, 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, - 1881883, 4757859, 3596257, 2358088, 2578758, 447504, 590720, 1717038, 1869795, - 1133885, 3027521, 840312, 2818881, 3654321, 2730947, 353585, 1134903, 2223378, - 1508824, 3662521, 1363776, 2712071, 288441, 1204581, 3502242, 4645567, 2767267, - 1514366, 3956099, 1422145, 1216608, 2253360, 189132, 4238225, 1345783, 451571, 1599442, - 3237284, 4711405, 929446, 1857675, 150759, 1277633, 761210, 138628, 1026833, - 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, 2044964, 716256, 1660632, - 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, 1870953, 1516385, - 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, 4285177, - 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, - 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, - 4410835, 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, - 1600667, 2176195, 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, - 1647273, 3044136, 950354, 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, - 3867343, 72329, 919189, 992521, 3445975, 4712557, 4680974, 188419, 2612093, - 1991268, 3566207, 2281468, 3859078, 2492806, 3398628, 763441, 2679107, 2554420, - 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, 4013060, 3617653, 2040022, - 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, 1083926, 503974, 3529226, - 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, 3022790, 4316365, 76365, - 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, 2938808, 562788, - 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, 214467, - 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, - 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, - 1513424, 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, - 3108096, 4311775, 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, - 1861797, 3566460, 4537673, 1164093, 3499764, 4553071, 3518985, 847658, 918948, - 2922351, 1056144, 652895, 1013195, 780505, 1702928, 3562838, 1432719, 2405207, - 1054920, 641647, 2240939, 3617702, 383165, 652641, 879593, 1810739, 2096385, - 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, 2422190, 527647, 1251821, - 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, 2433139, 1710383, - 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, 16864, 2081770, - 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, 2630042, - 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, - 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, - 481509, 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, - 4002180, 4718138, 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, - 3828090, 3253691, 4839313, 1203624, 584938, 3901482, 1747543, 1572737, 3533226, - 774708, 1691195, 1037110, 1557763, 225120, 4424243, 3524086, 1717663, 4332507, - 3513592, 4274932, 1232118, 873498, 1416042, 2488925, 111391, 4704545, 4492545, - 445317, 1584812, 2187737, 2471948, 3731678, 219255, 2282627, 2589971, 2372185, - 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, 3184084, 3690756, - 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, 2722165, - 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, - 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, - 2596952, 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, - 2174584, 587481, 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, - 4819428, 2591357, 48490, 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, - 37251, 3729300, 2726300, 644966, 1623020, 1419070, 4646747, 2417222, 2680238, - 2561083, 1793801, 2349366, 339747, 611366, 4684147, 4356907, 1277161, 4510381, - 3218352, 4161658, 3200733, 1172372, 3997786, 3169266, 3353418, 2248955, 2875885, - 2365369, 498208, 2968066, 2681505, 2059048, 2097106, 3607540, 1121504, 2016789, - 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, 4046672, 1544367, - 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, 3690724, - 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, - 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, - 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, - 4687548, 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, - 4086775, 615155, 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, - 2672602, 838828, 4051647, 1709120, 3074610, 693235, 4356087, 3018806, 239410, - 2431497, 691186, 766276, 4462126, 859155, 2370304, 1571808, 1938673, 1694955, - 3871296, 4245059, 3987376, 301524, 2512461, 3410437, 3300380, 684922, 4581995, - 3599557, 683515, 1850634, 3704678, 1937490, 2035591, 3718533, 2065879, 3160765, - 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, 713633, 1976262, 135946, - 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, 4179598, 961045, - 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, 4719693, - 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, - 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, - 3504814, 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, - 4730666, 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, - 4468651, 2478792, 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, - 3218600, 1811100, 3443356, 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, - 4782811, 3144712, 3523466, 1491315, 3955852, 1838410, 3164028, 1092543, 776459, - 2959379, 2541744, 4064418, 3908320, 2854145, 3960709, 1348188, 977678, 853619, - 1304291, 2848702, 1657913, 1319826, 3322665, 788037, 2913686, 4471279, 1766285, 348304, - 56570, 1892118, 4017244, 401006, 3524539, 4310134, 1624693, 4081113, 957511, 849400, - 129975, 2616130, 378537, 1556787, 3916162, 1039980, 4407778, 2027690, 4213675, - 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, 1255588, 1947964, - 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, 1123513, - 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, 41760, - 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, - 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, - 4335712, 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, - 1382747, 3537242, 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, - 119369, 2856973, 2945854, 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, - 2886508, 1573965, 990618, 3053734, 2918742, 4508753, 1032149, 60943, 4291620, - 722607, 2883224, 169359, 4356585, 3725543, 3678729, 341673, 3592828, 4077251, - 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, 3113385, 4660578, 2539973, - 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, 3796951, 956299, 141730, - 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, 3573511, 314081, 577688, - 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, 1175290, 3749667, - 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, 2079145, - 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, -2, -false)));*/ + simple_test, + Tests_InducedEgo, + ::testing::Values( + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, + 984983, 3114832, 1967741, 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, + 686026, 3989015, 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, + 2186957, 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, 2606530, + 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, 134931, 736397, + 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, 1881883, 4757859, 3596257, + 2358088, 2578758, 447504, 590720, 1717038, 1869795, 1133885, 3027521, 840312, 2818881, + 3654321, 2730947, 353585, 1134903, 2223378, 1508824, 3662521, 1363776, 2712071, 288441, + 1204581, 3502242, 4645567, 2767267, 1514366, 3956099, 1422145, 1216608, 2253360, 189132, + 4238225, 1345783, 451571, 1599442, 3237284, 4711405, 929446, 1857675, 150759, 1277633, + 761210, 138628, 1026833, 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, + 2044964, 716256, 1660632, 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, + 1870953, 1516385, 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, + 4285177, 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, 4410835, + 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, 1600667, 2176195, + 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, 1647273, 3044136, 950354, + 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, 3867343, 72329, 919189, 992521, + 3445975, 4712557, 4680974, 188419, 2612093, 1991268, 3566207, 2281468, 3859078, 2492806, + 3398628, 763441, 2679107, 2554420, 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, + 4013060, 3617653, 2040022, 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, + 1083926, 503974, 3529226, 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, + 3022790, 4316365, 76365, 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, + 2938808, 562788, 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, + 214467, 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, 1513424, + 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, 3108096, 4311775, + 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, 1861797, 3566460, 4537673, + 1164093, 3499764, 4553071, 3518985, 847658, 918948, 2922351, 1056144, 652895, 1013195, + 780505, 1702928, 3562838, 1432719, 2405207, 1054920, 641647, 2240939, 3617702, 383165, + 652641, 879593, 1810739, 2096385, 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, + 2422190, 527647, 1251821, 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, + 2433139, 1710383, 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, + 16864, 2081770, 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, + 2630042, 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, 481509, + 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, 4002180, 4718138, + 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, 3828090, 3253691, 4839313, + 1203624, 584938, 3901482, 1747543, 1572737, 3533226, 774708, 1691195, 1037110, 1557763, + 225120, 4424243, 3524086, 1717663, 4332507, 3513592, 4274932, 1232118, 873498, 1416042, + 2488925, 111391, 4704545, 4492545, 445317, 1584812, 2187737, 2471948, 3731678, 219255, + 2282627, 2589971, 2372185, 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, + 3184084, 3690756, 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, + 2722165, 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, 2596952, + 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, 2174584, 587481, + 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, 4819428, 2591357, 48490, + 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, 37251, 3729300, 2726300, + 644966, 1623020, 1419070, 4646747, 2417222, 2680238, 2561083, 1793801, 2349366, 339747, + 611366, 4684147, 4356907, 1277161, 4510381, 3218352, 4161658, 3200733, 1172372, 3997786, + 3169266, 3353418, 2248955, 2875885, 2365369, 498208, 2968066, 2681505, 2059048, 2097106, + 3607540, 1121504, 2016789, 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, + 4046672, 1544367, 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, + 3690724, 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, 4687548, + 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, 4086775, 615155, + 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, 2672602, 838828, 4051647, + 1709120, 3074610, 693235, 4356087, 3018806, 239410, 2431497, 691186, 766276, 4462126, + 859155, 2370304, 1571808, 1938673, 1694955, 3871296, 4245059, 3987376, 301524, 2512461, + 3410437, 3300380, 684922, 4581995, 3599557, 683515, 1850634, 3704678, 1937490, 2035591, + 3718533, 2065879, 3160765, 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, + 713633, 1976262, 135946, 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, + 4179598, 961045, 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, + 4719693, 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, 3504814, + 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, 4730666, + 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, 4468651, 2478792, + 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, 3218600, 1811100, 3443356, + 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, 4782811, 3144712, 3523466, 1491315, + 3955852, 1838410, 3164028, 1092543, 776459, 2959379, 2541744, 4064418, 3908320, 2854145, + 3960709, 1348188, 977678, 853619, 1304291, 2848702, 1657913, 1319826, 3322665, 788037, + 2913686, 4471279, 1766285, 348304, 56570, 1892118, 4017244, 401006, 3524539, 4310134, + 1624693, 4081113, 957511, 849400, 129975, 2616130, 378537, 1556787, 3916162, 1039980, + 4407778, 2027690, 4213675, 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, + 1255588, 1947964, 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, + 1123513, 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, + 41760, 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, 4335712, + 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, 1382747, 3537242, + 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, 119369, 2856973, 2945854, + 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, 2886508, 1573965, 990618, 3053734, + 2918742, 4508753, 1032149, 60943, 4291620, 722607, 2883224, 169359, 4356585, 3725543, + 3678729, 341673, 3592828, 4077251, 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, + 3113385, 4660578, 2539973, 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, + 3796951, 956299, 141730, 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, + 3573511, 314081, 577688, 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, + 1175290, 3749667, 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, + 2079145, 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, + 2, + false))); +*/ CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp new file mode 100644 index 00000000000..264382c22a3 --- /dev/null +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef struct MsBfs_Usecase_t { + std::string graph_file_full_path{}; + std::vector sources{}; + int32_t radius; + bool test_weighted{false}; + + MsBfs_Usecase_t(std::string const& graph_file_path, + std::vector const& sources, + int32_t radius, + bool test_weighted) + : sources(sources), radius(radius), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} MsBfs_Usecase; + +class Tests_MsBfs : public ::testing::TestWithParam { + public: + Tests_MsBfs() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(MsBfs_Usecase const& configuration) + { + auto n_seeds = configuration.sources.size(); + int n_streams = std::min(n_seeds, static_cast(128)); + raft::handle_t handle(n_streams); + + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + auto graph_view = graph.view(); + // Streams will allocate concurrently later + std::vector> d_distances{}; + std::vector> d_predecessors{}; + + d_distances.reserve(n_seeds); + d_predecessors.reserve(n_seeds); + for (vertex_t i = 0; i < n_seeds; i++) { + // Allocations and operations are attached to the worker stream + rmm::device_uvector tmp_distances(graph_view.get_number_of_vertices(), + handle.get_internal_stream_view(i)); + rmm::device_uvector tmp_predecessors(graph_view.get_number_of_vertices(), + handle.get_internal_stream_view(i)); + + d_distances.push_back(std::move(tmp_distances)); + d_predecessors.push_back(std::move(tmp_predecessors)); + } + + std::vector radius(n_seeds); + std::generate(radius.begin(), radius.end(), [n = 0]() mutable { return (n++ % 12 + 1); }); + + // warm up + cugraph::experimental::bfs(handle, + graph_view, + d_distances[0].begin(), + d_predecessors[0].begin(), + static_cast(configuration.sources[0]), + false, + radius[0]); + + // one by one + HighResTimer hr_timer; + hr_timer.start("bfs"); + cudaProfilerStart(); + for (vertex_t i = 0; i < n_seeds; i++) { + cugraph::experimental::bfs(handle, + graph_view, + d_distances[i].begin(), + d_predecessors[i].begin(), + static_cast(configuration.sources[i]), + false, + radius[i]); + } + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); + + // concurrent + hr_timer.start("bfs"); + cudaProfilerStart(); +#pragma omp parallel for + for (vertex_t i = 0; i < n_seeds; i++) { + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + cugraph::experimental::bfs(light_handle, + graph_view, + d_distances[i].begin(), + d_predecessors[i].begin(), + static_cast(configuration.sources[i]), + false, + radius[i]); + } + + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); + } +}; + +TEST_P(Tests_MsBfs, DISABLED_CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} +/* +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MsBfs, + ::testing::Values( + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{0}, 1, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{0}, 2, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), + MsBfs_Usecase( + "test/datasets/karate.mtx", std::vector{5, 9, 3, 10, 12, 13}, 2, true))); +*/ +// For perf analysis + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MsBfs, + ::testing::Values( + MsBfs_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, + 984983, 3114832, 1967741, 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, + 686026, 3989015, 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, + 2186957, 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, 2606530, + 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, 134931, 736397, + 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, 1881883, 4757859, 3596257, + 2358088, 2578758, 447504, 590720, 1717038, 1869795, 1133885, 3027521, 840312, 2818881, + 3654321, 2730947, 353585, 1134903, 2223378, 1508824, 3662521, 1363776, 2712071, 288441, + 1204581, 3502242, 4645567, 2767267, 1514366, 3956099, 1422145, 1216608, 2253360, 189132, + 4238225, 1345783, 451571, 1599442, 3237284, 4711405, 929446, 1857675, 150759, 1277633, + 761210, 138628, 1026833, 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, + 2044964, 716256, 1660632, 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, + 1870953, 1516385, 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, + 4285177, 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, 4410835, + 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, 1600667, 2176195, + 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, 1647273, 3044136, 950354, + 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, 3867343, 72329, 919189, 992521, + 3445975, 4712557, 4680974, 188419, 2612093, 1991268, 3566207, 2281468, 3859078, 2492806, + 3398628, 763441, 2679107, 2554420, 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, + 4013060, 3617653, 2040022, 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, + 1083926, 503974, 3529226, 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, + 3022790, 4316365, 76365, 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, + 2938808, 562788, 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, + 214467, 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, 1513424, + 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, 3108096, 4311775, + 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, 1861797, 3566460, 4537673, + 1164093, 3499764, 4553071, 3518985, 847658, 918948, 2922351, 1056144, 652895, 1013195, + 780505, 1702928, 3562838, 1432719, 2405207, 1054920, 641647, 2240939, 3617702, 383165, + 652641, 879593, 1810739, 2096385, 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, + 2422190, 527647, 1251821, 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, + 2433139, 1710383, 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, + 16864, 2081770, 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, + 2630042, 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, 481509, + 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, 4002180, 4718138, + 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, 3828090, 3253691, 4839313, + 1203624, 584938, 3901482, 1747543, 1572737, 3533226, 774708, 1691195, 1037110, 1557763, + 225120, 4424243, 3524086, 1717663, 4332507, 3513592, 4274932, 1232118, 873498, 1416042, + 2488925, 111391, 4704545, 4492545, 445317, 1584812, 2187737, 2471948, 3731678, 219255, + 2282627, 2589971, 2372185, 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, + 3184084, 3690756, 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, + 2722165, 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, 2596952, + 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, 2174584, 587481, + 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, 4819428, 2591357, 48490, + 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, 37251, 3729300, 2726300, + 644966, 1623020, 1419070, 4646747, 2417222, 2680238, 2561083, 1793801, 2349366, 339747, + 611366, 4684147, 4356907, 1277161, 4510381, 3218352, 4161658, 3200733, 1172372, 3997786, + 3169266, 3353418, 2248955, 2875885, 2365369, 498208, 2968066, 2681505, 2059048, 2097106, + 3607540, 1121504, 2016789, 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, + 4046672, 1544367, 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, + 3690724, 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, 4687548, + 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, 4086775, 615155, + 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, 2672602, 838828, 4051647, + 1709120, 3074610, 693235, 4356087, 3018806, 239410, 2431497, 691186, 766276, 4462126, + 859155, 2370304, 1571808, 1938673, 1694955, 3871296, 4245059, 3987376, 301524, 2512461, + 3410437, 3300380, 684922, 4581995, 3599557, 683515, 1850634, 3704678, 1937490, 2035591, + 3718533, 2065879, 3160765, 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, + 713633, 1976262, 135946, 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, + 4179598, 961045, 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, + 4719693, 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, 3504814, + 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, 4730666, + 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, 4468651, 2478792, + 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, 3218600, 1811100, 3443356, + 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, 4782811, 3144712, 3523466, 1491315, + 3955852, 1838410, 3164028, 1092543, 776459, 2959379, 2541744, 4064418, 3908320, 2854145, + 3960709, 1348188, 977678, 853619, 1304291, 2848702, 1657913, 1319826, 3322665, 788037, + 2913686, 4471279, 1766285, 348304, 56570, 1892118, 4017244, 401006, 3524539, 4310134, + 1624693, 4081113, 957511, 849400, 129975, 2616130, 378537, 1556787, 3916162, 1039980, + 4407778, 2027690, 4213675, 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, + 1255588, 1947964, 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, + 1123513, 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, + 41760, 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, 4335712, + 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, 1382747, 3537242, + 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, 119369, 2856973, 2945854, + 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, 2886508, 1573965, 990618, 3053734, + 2918742, 4508753, 1032149, 60943, 4291620, 722607, 2883224, 169359, 4356585, 3725543, + 3678729, 341673, 3592828, 4077251, 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, + 3113385, 4660578, 2539973, 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, + 3796951, 956299, 141730, 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, + 3573511, 314081, 577688, 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, + 1175290, 3749667, 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, + 2079145, 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, + 2, + false))); +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/streams.cu b/cpp/tests/experimental/streams.cu new file mode 100644 index 00000000000..c89ffe1e532 --- /dev/null +++ b/cpp/tests/experimental/streams.cu @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include "gtest/gtest.h" +struct StreamTest : public ::testing::Test { +}; +TEST_F(StreamTest, basic_test) +{ + int n_streams = 4; + raft::handle_t handle(n_streams); + + const size_t intput_size = 4096; + +#pragma omp parallel for + for (int i = 0; i < n_streams; i++) { + rmm::device_uvector u(intput_size, handle.get_internal_stream_view(i)), + v(intput_size, handle.get_internal_stream_view(i)); + thrust::transform(rmm::exec_policy(handle.get_internal_stream_view(i)), + u.begin(), + u.end(), + v.begin(), + v.begin(), + 2 * thrust::placeholders::_1 + thrust::placeholders::_2); + } +} \ No newline at end of file diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 8a847d1f1d4..11ba2d6ef96 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -82,7 +82,9 @@ shortest_path, filter_unreachable, shortest_path_length, - traveling_salesperson + traveling_salesperson, + concurrent_bfs, + multi_source_bfs, ) from cugraph.tree import minimum_spanning_tree, maximum_spanning_tree diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index 009fd1252f1..b259c2567dc 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -58,29 +58,6 @@ def test_ego_graph_nx(graph_file, seed, radius): @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) def test_batched_ego_graphs(graph_file, seeds, radius): - """ - Compute the induced subgraph of neighbors for each node in seeds - within a given radius. - Parameters - ---------- - G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix - Graph or matrix object, which should contain the connectivity - information. Edge weights, if present, should be single or double - precision floating point values. - seeds : cudf.Series - Specifies the seeds of the induced egonet subgraphs - radius: integer, optional - Include all neighbors of distance<=radius from n. - - Returns - ------- - ego_edge_lists : cudf.DataFrame - GPU data frame containing all induced sources identifiers, - destination identifiers, edge weights - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each seed. - """ gc.collect() # Nx @@ -93,9 +70,8 @@ def test_batched_ego_graphs(graph_file, seeds, radius): df, offsets = cugraph.batched_ego_graphs(Gnx, seeds, radius=radius) for i in range(len(seeds)): ego_nx = nx.ego_graph(Gnx, seeds[i], radius=radius) - ego_df = df[offsets[i]:offsets[i+1]] - ego_cugraph = nx.from_pandas_edgelist(ego_df, - source="src", - target="dst", - edge_attr="weight") + ego_df = df[offsets[i]:offsets[i + 1]] + ego_cugraph = nx.from_pandas_edgelist( + ego_df, source="src", target="dst", edge_attr="weight" + ) assert nx.is_isomorphic(ego_nx, ego_cugraph) diff --git a/python/cugraph/traversal/__init__.py b/python/cugraph/traversal/__init__.py index 5944ebe0865..e74266d29fc 100644 --- a/python/cugraph/traversal/__init__.py +++ b/python/cugraph/traversal/__init__.py @@ -17,6 +17,8 @@ sssp, shortest_path, filter_unreachable, - shortest_path_length + shortest_path_length, ) from cugraph.traversal.traveling_salesperson import traveling_salesperson + +from cugraph.traversal.ms_bfs import concurrent_bfs, multi_source_bfs diff --git a/python/cugraph/traversal/ms_bfs.py b/python/cugraph/traversal/ms_bfs.py new file mode 100644 index 00000000000..e4b799e30e4 --- /dev/null +++ b/python/cugraph/traversal/ms_bfs.py @@ -0,0 +1,282 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cudf + +# from cugraph.structure.graph import Graph, DiGraph +# from cugraph.utilities.utils import get_device_memory_info +import warnings + + +def _get_feasibility(G, sources, components=None, depth_limit=None): + """ + Evaluate the feasibility for breadth first traversal from multiple sources + in a graph. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + The adjacency list will be computed if not already present. + + sources : cudf.Series + Subset of vertices from which the traversals start. A BFS is run for + each source in the Series. + The size of the series should be at least one and cannot exceed + the size of the graph. + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + components : cudf.DataFrame, optional, default=None + GPU Dataframe containing the component information. + Passing this information may impact the return type. + When no component information is passed BFS uses one component + behavior settings. + + components['vertex'] : cudf.Series + vertex IDs + components['color'] : cudf.Series + component IDs/color for vertices. + + Returns + ------- + mem_footprint : integer + Estimated memory foot print size in Bytes + """ + + # Fixme not implemented in RMM yet + # using 96GB upper bound for now + # mem = get_device_memory_info() + mem = 9.6e10 + n_sources = sources.size + V = G.number_of_vertices() + E = G.number_of_edges() + mean_component_sz = V + n_components = 1 + + # Retreive types + size_of_v = 4 + size_of_e = 4 + size_of_w = 0 + if G.adjlist.weights is not None: + if G.adjlist.weights.dtype is np.float64: + size_of_w = 8 + else: + size_of_w = 4 + if G.adjlist.offsets.dtype is np.float64: + size_of_v = 8 + if G.adjlist.indices.dtype is np.float64: + size_of_e = 8 + + # Graph size + G_sz = E * size_of_e + E * size_of_w + V * size_of_v + + # The impact of depth limit depends on the sparsity + # pattern and diameter. We cannot leverage it without + # traversing the full dataset a the moment. + + # dense output + output_sz = n_sources * 2 * V * size_of_v + + # sparse output + if components is not None: + tmp = components["color"].value_counts() + n_components = tmp.size + if n_sources / n_components > 100: + warnings.warn( + "High number of seeds per component result in large output." + ) + mean_component_sz = tmp.mean() + output_sz = mean_component_sz * n_sources * 2 * size_of_e + + # counting 10% for context, handle and temporary allocations + mem_footprint = (G_sz + output_sz) * 1.1 + if mem_footprint > mem: + warnings.warn(f"Cannot execute in-memory :{mem_footprint} Bytes") + + return mem_footprint + + +def concurrent_bfs(Graphs, sources, depth_limit=None, offload=False): + """ + Find the breadth first traversals of multiple graphs with multiple sources + in each graph. + + Parameters + ---------- + Graphs : list of cugraph.Graph or cugraph.DiGraph + The adjacency lists will be computed if not already present. + + sources : list of cudf.Series + For each graph, subset of vertices from which the traversals start. + A BFS is run in Graphs[i] for each source in the Series at sources[i]. + The size of this list must match the size of the graph list. + The size of each Series (ie. the number of sources per graph) + is flexible, but cannot exceed the size of the corresponding graph. + + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + offload : boolean, optional, default=False + Indicates if output should be written to the disk. + When not provided, the algorithms decides if offloading is needed + based on the input parameters. + + Returns + ------- + Return type is decided based on the input parameters (size of + sources, size of the graph, number of graphs and offload setting) + + If G is a cugraph.Graph and output fits in memory: + BFS_edge_lists : cudf.DataFrame + GPU data frame containing all BFS edges + source_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each source. + + If offload is True, or if the output does not fit in memory : + Writes csv files containing BFS output to the disk. + """ + raise NotImplementedError( + "concurrent_bfs is coming soon! Please up vote the github issue 1465\ + to help us prioritize" + ) + if not isinstance(Graphs, list): + raise TypeError( + "Graphs should be a list of cugraph.Graph or cugraph.DiGraph" + ) + if not isinstance(sources, list): + raise TypeError("sources should be a list of cudf.Series") + if len(Graphs) != len(sources): + raise ValueError( + "The size of the sources list must match\ + the size of the graph list." + ) + if offload is True: + raise NotImplementedError( + "Offloading is coming soon! Please up vote the github issue 1461\ + to help us prioritize" + ) + + # Consolidate graphs in a single graph and record components + + # Renumber and concatenate sources in a single df + + # Call multi_source_bfs + # multi_source_bfs( + # G, + # sources, + # components=components, + # depth_limit=depth_limit, + # offload=offload, + # ) + + +def multi_source_bfs( + G, sources, components=None, depth_limit=None, offload=False +): + """ + Find the breadth first traversal from multiple sources in a graph. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + The adjacency list will be computed if not already present. + + sources : cudf.Series + Subset of vertices from which the traversals start. A BFS is run for + each source in the Series. + The size of the series should be at least one and cannot exceed the + size of the graph. + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + components : cudf.DataFrame, optional, default=None + GPU Dataframe containing the component information. + Passing this information may impact the return type. + When no component information is passed BFS uses one component + behavior settings. + + components['vertex'] : cudf.Series + vertex IDs + components['color'] : cudf.Series + component IDs/color for vertices. + + offload : boolean, optional, default=False + Indicates if output should be written to the disk. + When not provided, the algorithms decides if offloading is needed + based on the input parameters. + + Returns + ------- + Return value type is decided based on the input parameters (size of + sources, size of the graph, number of components and offload setting) + If G is a cugraph.Graph, returns : + cudf.DataFrame + df['vertex'] vertex IDs + + df['distance_'] path distance for each vertex from the + starting vertex. One column per source. + + df['predecessor_'] for each i'th position in the column, + the vertex ID immediately preceding the vertex at position i in + the 'vertex' column. One column per source. + + If G is a cugraph.Graph and component information is present returns : + BFS_edge_lists : cudf.DataFrame + GPU data frame containing all BFS edges + source_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each source. + + If offload is True, or if the output does not fit in memory : + Writes csv files containing BFS output to the disk. + """ + raise NotImplementedError( + "concurrent_bfs is coming soon! Please up vote the github issue 1465\ + to help us prioritize" + ) + # if components is not None: + # null_check(components["vertex"]) + # null_check(components["colors"]) + # + # if depth_limit is not None: + # raise NotImplementedError( + # "depth limit implementation of BFS is not currently supported" + # ) + + # if offload is True: + # raise NotImplementedError( + # "Offloading is coming soon! Please up vote the github issue 1461 + # to help us prioritize" + # ) + if isinstance(sources, list): + sources = cudf.Series(sources) + if G.renumbered is True: + sources = G.lookup_internal_vertex_id(cudf.Series(sources)) + if not G.adjlist: + G.view_adj_list() + # Memory footprint check + footprint = _get_feasibility( + G, sources, components=components, depth_limit=depth_limit + ) + print(footprint) + # Call multi_source_bfs + # FIXME remove when implemented + # raise NotImplementedError("Commming soon") diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index 39b789d7f79..adaec0f9e44 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -26,6 +26,7 @@ from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix + CP_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] CP_COMPRESSED_MATRIX_TYPES = [cp_csr_matrix, cp_csc_matrix] except ModuleNotFoundError: @@ -38,6 +39,7 @@ from scipy.sparse.coo import coo_matrix as sp_coo_matrix from scipy.sparse.csr import csr_matrix as sp_csr_matrix from scipy.sparse.csc import csc_matrix as sp_csc_matrix + SP_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] SP_COMPRESSED_MATRIX_TYPES = [sp_csr_matrix, sp_csc_matrix] except ModuleNotFoundError: @@ -80,15 +82,21 @@ def get_traversed_path(df, id): >>> path = cugraph.utils.get_traversed_path(sssp_df, 32) """ - if 'vertex' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'vertex' column missing") - if 'distance' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'distance' column missing") - if 'predecessor' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'predecessor' column missing") + if "vertex" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing" + ) + if "distance" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing" + ) + if "predecessor" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing" + ) if type(id) != int: raise ValueError("The vertex 'id' needs to be an integer") @@ -96,17 +104,17 @@ def get_traversed_path(df, id): # or edited. Therefore we cannot assume that using the vertex ID # as an index will work - ddf = df[df['vertex'] == id] + ddf = df[df["vertex"] == id] if len(ddf) == 0: raise ValueError("The vertex (", id, " is not in the result set") - pred = ddf['predecessor'].iloc[0] + pred = ddf["predecessor"].iloc[0] answer = [] answer.append(ddf) while pred != -1: - ddf = df[df['vertex'] == pred] - pred = ddf['predecessor'].iloc[0] + ddf = df[df["vertex"] == pred] + pred = ddf["predecessor"].iloc[0] answer.append(ddf) return cudf.concat(answer) @@ -138,15 +146,21 @@ def get_traversed_path_list(df, id): >>> path = cugraph.utils.get_traversed_path_list(sssp_df, 32) """ - if 'vertex' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'vertex' column missing") - if 'distance' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'distance' column missing") - if 'predecessor' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'predecessor' column missing") + if "vertex" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing" + ) + if "distance" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing" + ) + if "predecessor" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing" + ) if type(id) != int: raise ValueError("The vertex 'id' needs to be an integer") @@ -158,17 +172,17 @@ def get_traversed_path_list(df, id): answer = [] answer.append(id) - ddf = df[df['vertex'] == id] + ddf = df[df["vertex"] == id] if len(ddf) == 0: raise ValueError("The vertex (", id, " is not in the result set") - pred = ddf['predecessor'].iloc[0] + pred = ddf["predecessor"].iloc[0] while pred != -1: answer.append(pred) - ddf = df[df['vertex'] == pred] - pred = ddf['predecessor'].iloc[0] + ddf = df[df["vertex"] == pred] + pred = ddf["predecessor"].iloc[0] return answer @@ -206,6 +220,14 @@ def is_device_version_less_than(min_version=(7, 0)): return False +def get_device_memory_info(): + """ + Returns the total amount of global memory on the device in bytes + """ + meminfo = cuda.current_context().get_memory_info() + return meminfo[1] + + # FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if # set. An additional optional parameter for the weight attr name when accepting # Nx graphs may be needed. From the Nx docs: @@ -229,29 +251,35 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None): elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): return (convert_from_nx(obj, weight=nx_weight_attr), input_type) - elif (input_type in CP_MATRIX_TYPES) or \ - (input_type in SP_MATRIX_TYPES): + elif (input_type in CP_MATRIX_TYPES) or (input_type in SP_MATRIX_TYPES): if matrix_graph_type is None: matrix_graph_type = Graph elif matrix_graph_type not in [Graph, DiGraph]: - raise TypeError(f"matrix_graph_type must be either a cugraph " - f"Graph or DiGraph, got: {matrix_graph_type}") - - if input_type in (CP_COMPRESSED_MATRIX_TYPES + - SP_COMPRESSED_MATRIX_TYPES): + raise TypeError( + f"matrix_graph_type must be either a cugraph " + f"Graph or DiGraph, got: {matrix_graph_type}" + ) + + if input_type in ( + CP_COMPRESSED_MATRIX_TYPES + SP_COMPRESSED_MATRIX_TYPES + ): coo = obj.tocoo(copy=False) else: coo = obj if input_type in CP_MATRIX_TYPES: - df = cudf.DataFrame({"source": cp.ascontiguousarray(coo.row), - "destination": cp.ascontiguousarray(coo.col), - "weight": cp.ascontiguousarray(coo.data)}) + df = cudf.DataFrame( + { + "source": cp.ascontiguousarray(coo.row), + "destination": cp.ascontiguousarray(coo.col), + "weight": cp.ascontiguousarray(coo.data), + } + ) else: - df = cudf.DataFrame({"source": coo.row, - "destination": coo.col, - "weight": coo.data}) + df = cudf.DataFrame( + {"source": coo.row, "destination": coo.col, "weight": coo.data} + ) # FIXME: # * do a quick check that symmetry is stored explicitly in the cupy # data for sym matrices (ie. for each uv, check vu is there) From ce807985c6ebed409485ba46a61291d92eb0ed9b Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Mon, 29 Mar 2021 12:26:34 -0400 Subject: [PATCH 205/343] ENH Update conda recipes pinning of repo dependencies (#1485) Ensure all conda packages created in this repo that depend on other packages are all version pinned to the same build number. This way it prevents a conda solve from picking mismatched versions of `cugraph` and `libcugraph` that can break this repo during builds and testing. Authors: - Mike Wendt (@mike-wendt) Approvers: - Brad Rees (@BradReesWork) - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1485 --- conda/recipes/cugraph/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 1ef64ddbe72..4b845583181 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -25,13 +25,13 @@ requirements: build: - python x.x - cython>=0.29,<0.30 - - libcugraph={{ version }} + - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} - ucx-proc=*=gpu run: - python x.x - - libcugraph={{ version }} + - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} From e60d9f7744e7c4cef70b7cddb3392c0e6f83936d Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 30 Mar 2021 10:19:56 -0400 Subject: [PATCH 206/343] Update Louvain to use new graph primitives and pattern accelerators (#1423) Implement the `update_by_delta_modularity` method using the new graph primitives and pattern accelerators. This eliminates all of the custom MNMG implementation originally created for MNMG Louvain a few releases ago and replaces it with the new pattern accelerator and graph primitives that have been added in the last couple of releases. This depends on the following PRs and should not be merged until after them: * #1394 * #1399 closes #1220 Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Andrei Schaffer (@aschaffer) - Seunghwa Kang (@seunghwak) - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1423 --- cpp/include/algorithms.hpp | 79 +- .../dendrogram.cuh => include/dendrogram.hpp} | 39 +- .../experimental/include_cuco_static_map.cuh | 0 cpp/include/graph.hpp | 6 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 2 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 - cpp/include/utilities/collect_comm.cuh | 2 +- cpp/include/utilities/device_comm.cuh | 18 +- cpp/src/community/ecg.cu | 2 +- cpp/src/community/flatten_dendrogram.cuh | 39 +- cpp/src/community/leiden.cuh | 2 +- cpp/src/community/louvain.cu | 191 ++- cpp/src/community/louvain.cuh | 10 +- cpp/src/experimental/louvain.cuh | 1382 ++++------------- cpp/src/experimental/shuffle.cuh | 226 --- cpp/tests/CMakeLists.txt | 19 +- cpp/tests/community/louvain_test.cpp | 176 ++- cpp/tests/community/mg_louvain_helper.cu | 353 +++++ cpp/tests/community/mg_louvain_helper.hpp | 53 + cpp/tests/community/mg_louvain_test.cpp | 233 +++ cpp/tests/experimental/louvain_test.cu | 133 -- cpp/tests/utilities/base_fixture.hpp | 5 + 22 files changed, 1415 insertions(+), 1557 deletions(-) rename cpp/{src/community/dendrogram.cuh => include/dendrogram.hpp} (55%) rename cpp/{src => include}/experimental/include_cuco_static_map.cuh (100%) delete mode 100644 cpp/src/experimental/shuffle.cuh create mode 100644 cpp/tests/community/mg_louvain_helper.cu create mode 100644 cpp/tests/community/mg_louvain_helper.hpp create mode 100644 cpp/tests/community/mg_louvain_test.cpp delete mode 100644 cpp/tests/experimental/louvain_test.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 8a5474b389c..b8706d81e21 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -14,10 +14,14 @@ * limitations under the License. */ #pragma once + +#include #include #include + #include #include + #include namespace cugraph { @@ -612,7 +616,7 @@ weight_t hungarian(raft::handle_t const &handle, * * @throws cugraph::logic_error when an error occurs. * - * @tparam graph_t Type of graph + * @tparam graph_view_t Type of graph * * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph input graph object (CSR) @@ -629,13 +633,74 @@ weight_t hungarian(raft::handle_t const &handle, * 2) modularity of the returned clustering * */ -template -std::pair louvain( +template +std::pair louvain( raft::handle_t const &handle, - graph_t const &graph, - typename graph_t::vertex_type *clustering, - size_t max_level = 100, - typename graph_t::weight_type resolution = typename graph_t::weight_type{1}); + graph_view_t const &graph_view, + typename graph_view_t::vertex_type *clustering, + size_t max_level = 100, + typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); + +/** + * @brief Louvain implementation, returning dendrogram + * + * Compute a clustering of the graph by maximizing modularity + * + * Computed using the Louvain method described in: + * + * VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast unfolding of + * community hierarchies in large networks, J Stat Mech P10008 (2008), + * http://arxiv.org/abs/0803.0476 + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam graph_view_t Type of graph + * + * @param[in] handle Library handle (RAFT) + * @param[in] graph_view Input graph view object (CSR) + * @param[in] max_level (optional) maximum number of levels to run (default 100) + * @param[in] resolution (optional) The value of the resolution parameter to use. + * Called gamma in the modularity formula, this changes the size + * of the communities. Higher resolutions lead to more smaller + * communities, lower resolutions lead to fewer larger + * communities. (default 1) + * + * @return a pair containing: + * 1) unique pointer to dendrogram + * 2) modularity of the returned clustering + * + */ +template +std::pair>, + typename graph_view_t::weight_type> +louvain(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t max_level = 100, + typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); + +/** + * @brief Flatten a Dendrogram at a particular level + * + * A Dendrogram represents a hierarchical clustering/partitioning of + * a graph. This function will flatten the hierarchical clustering into + * a label for each vertex representing the final cluster/partition to + * which it is assigned + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam graph_view_t Type of graph + * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param[in] graph input graph object + * @param[in] dendrogram input dendrogram object + * @param[out] clustering Pointer to device array where the clustering should be stored + * + */ +template +void flatten_dendrogram(raft::handle_t const &handle, + graph_view_t const &graph_view, + Dendrogram const &dendrogram, + typename graph_view_t::vertex_type *clustering); /** * @brief Leiden implementation diff --git a/cpp/src/community/dendrogram.cuh b/cpp/include/dendrogram.hpp similarity index 55% rename from cpp/src/community/dendrogram.cuh rename to cpp/include/dendrogram.hpp index 414f5f3854d..bb9ba470a52 100644 --- a/cpp/src/community/dendrogram.cuh +++ b/cpp/include/dendrogram.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -25,30 +25,26 @@ namespace cugraph { template class Dendrogram { public: - void add_level(vertex_t num_verts, + void add_level(vertex_t first_index, + vertex_t num_verts, cudaStream_t stream = 0, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - level_ptr_.push_back( - std::make_unique(num_verts * sizeof(vertex_t), stream, mr)); - level_size_.push_back(num_verts); + level_ptr_.push_back(std::make_unique>(num_verts, stream, mr)); + level_first_index_.push_back(first_index); } - size_t current_level() const { return level_size_.size() - 1; } + size_t current_level() const { return level_ptr_.size() - 1; } - size_t num_levels() const { return level_size_.size(); } + size_t num_levels() const { return level_ptr_.size(); } - vertex_t const *get_level_ptr_nocheck(size_t level) const - { - return static_cast(level_ptr_[level]->data()); - } + vertex_t const *get_level_ptr_nocheck(size_t level) const { return level_ptr_[level]->data(); } - vertex_t *get_level_ptr_nocheck(size_t level) - { - return static_cast(level_ptr_[level]->data()); - } + vertex_t *get_level_ptr_nocheck(size_t level) { return level_ptr_[level]->data(); } - vertex_t get_level_size_nocheck(size_t level) const { return level_size_[level]; } + size_t get_level_size_nocheck(size_t level) const { return level_ptr_[level]->size(); } + + vertex_t get_level_first_index_nocheck(size_t level) const { return level_first_index_[level]; } vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } @@ -58,11 +54,16 @@ class Dendrogram { vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } - vertex_t current_level_size() const { return get_level_size_nocheck(current_level()); } + size_t current_level_size() const { return get_level_size_nocheck(current_level()); } + + vertex_t current_level_first_index() const + { + return get_level_first_index_nocheck(current_level()); + } private: - std::vector level_size_; - std::vector> level_ptr_; + std::vector level_first_index_; + std::vector>> level_ptr_; }; } // namespace cugraph diff --git a/cpp/src/experimental/include_cuco_static_map.cuh b/cpp/include/experimental/include_cuco_static_map.cuh similarity index 100% rename from cpp/src/experimental/include_cuco_static_map.cuh rename to cpp/include/experimental/include_cuco_static_map.cuh diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index b30159566b5..8ea58546ce1 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,10 @@ class GraphViewBase { edge_t *local_edges; vertex_t *local_offsets; + vertex_t get_number_of_vertices() const { return number_of_vertices; } + + vertex_t get_local_vertex_first() const { return vertex_t{0}; } + /** * @brief Fill the identifiers array with the vertex identifiers. * diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 11cf2cb1137..19a5f67c9de 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -27,7 +27,7 @@ #include -#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 0b3588bc8c5..e621ed91ddb 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -25,8 +25,6 @@ #include -#include - #include namespace cugraph { diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh index 5ca58ebeb17..8d2227c0f60 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/utilities/collect_comm.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 7b9956902cc..53711f21a6c 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -238,10 +238,12 @@ template struct device_sendrecv_tuple_iterator_element_impl { void run(raft::comms::comms_t const& comm, InputIterator input_first, - size_t count, + size_t tx_count, int dst, - int base_tag, - raft::comms::request_t* requests) const + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) const { } }; @@ -460,7 +462,7 @@ struct device_reduce_tuple_iterator_element_impl { op, root, stream); - device_reduce_tuple_iterator_element_impl( + device_reduce_tuple_iterator_element_impl().run( comm, input_first, output_first, count, op, root, stream); } }; @@ -889,9 +891,11 @@ device_reduce(raft::comms::comms_t const& comm, size_t constexpr tuple_size = thrust::tuple_size::value_type>::value; - detail:: - device_reduce_tuple_iterator_element_impl( - comm, input_first, output_first, count, op, root, stream); + detail::device_reduce_tuple_iterator_element_impl() + .run(comm, input_first, output_first, count, op, root, stream); } template diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index 994204ecd32..45f7d723191 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -117,7 +117,7 @@ class EcgLouvain : public cugraph::Louvain { void initialize_dendrogram_level(vertex_t num_vertices) override { - this->dendrogram_->add_level(num_vertices); + this->dendrogram_->add_level(0, num_vertices); get_permutation_vector( num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 892fe2d1c51..6d455a68192 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -31,23 +31,28 @@ void partition_at_level(raft::handle_t const &handle, size_t level) { vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); + rmm::device_uvector local_vertex_ids_v(local_num_verts, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertex_ids, - d_vertex_ids + local_num_verts, - d_partition); - - std::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(level), - [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) { - cugraph::experimental::relabel( - handle, - std::tuple( - d_vertex_ids, dendrogram.get_level_ptr_nocheck(l)), - dendrogram.get_level_size_nocheck(l), - d_partition, - local_num_verts); - }); + raft::copy(d_partition, d_vertex_ids, local_num_verts, handle.get_stream()); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(level), + [&handle, &dendrogram, &local_vertex_ids_v, d_vertex_ids, &d_partition, local_num_verts]( + size_t l) { + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertex_ids_v.begin(), + local_vertex_ids_v.begin() + dendrogram.get_level_size_nocheck(l), + dendrogram.get_level_first_index_nocheck(l)); + + cugraph::experimental::relabel( + handle, + std::tuple(local_vertex_ids_v.data(), + dendrogram.get_level_ptr_nocheck(l)), + dendrogram.get_level_size_nocheck(l), + d_partition, + local_num_verts); + }); } } // namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index 141f8beac40..aae2d3712b5 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -132,7 +132,7 @@ class Leiden : public Louvain { // // Initialize every cluster to reference each vertex to itself // - this->dendrogram_->add_level(current_graph.number_of_vertices); + this->dendrogram_->add_level(0, current_graph.number_of_vertices); thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->dendrogram_->current_level_begin(), diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index a851777ad93..2affcf29805 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -26,50 +26,28 @@ namespace cugraph { namespace detail { template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph_view, - vertex_t *clustering, - size_t max_level, - weight_t resolution) +std::pair>, weight_t> louvain( + raft::handle_t const &handle, + GraphCSRView const &graph_view, + size_t max_level, + weight_t resolution) { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, - "Invalid input argument: clustering is null, should be a device pointer to " - "memory for storing the result"); Louvain> runner(handle, graph_view); weight_t wt = runner(max_level, resolution); - rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_ids_v.begin(), - vertex_ids_v.end(), - vertex_t{0}); - - partition_at_level(handle, - runner.get_dendrogram(), - vertex_ids_v.data(), - clustering, - runner.get_dendrogram().num_levels()); - - // FIXME: Consider returning the Dendrogram at some point - return std::make_pair(runner.get_dendrogram().num_levels(), wt); + return std::make_pair(runner.move_dendrogram(), wt); } template -std::pair louvain( +std::pair>, weight_t> louvain( raft::handle_t const &handle, experimental::graph_view_t const &graph_view, - vertex_t *clustering, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(clustering != nullptr, - "Invalid input argument: clustering is null, should be a device pointer to " - "memory for storing the result"); - // "FIXME": remove this check and the guards below // // Disable louvain(experimental::graph_view_t,...) @@ -87,40 +65,153 @@ std::pair louvain( weight_t wt = runner(max_level, resolution); - rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), - handle.get_stream()); + return std::make_pair(runner.move_dendrogram(), wt); + } +} - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_ids_v.begin(), - vertex_ids_v.end(), - graph_view.get_local_vertex_first()); +template +void flatten_dendrogram(raft::handle_t const &handle, + GraphCSRView const &graph_view, + Dendrogram const &dendrogram, + vertex_t *clustering) +{ + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - partition_at_level(handle, - runner.get_dendrogram(), - vertex_ids_v.data(), - clustering, - runner.get_dendrogram().num_levels()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + vertex_t{0}); - // FIXME: Consider returning the Dendrogram at some point - return std::make_pair(runner.get_dendrogram().num_levels(), wt); - } + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); +} + +template +void flatten_dendrogram( + raft::handle_t const &handle, + experimental::graph_view_t const &graph_view, + Dendrogram const &dendrogram, + vertex_t *clustering) +{ + rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), + handle.get_stream()); + + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + graph_view.get_local_vertex_first()); + + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); } } // namespace detail -template -std::pair louvain(raft::handle_t const &handle, - graph_t const &graph, - typename graph_t::vertex_type *clustering, - size_t max_level, - typename graph_t::weight_type resolution) +template +std::pair>, + typename graph_view_t::weight_type> +louvain(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t max_level, + typename graph_view_t::weight_type resolution) +{ + return detail::louvain(handle, graph_view, max_level, resolution); +} + +template +void flatten_dendrogram(raft::handle_t const &handle, + graph_view_t const &graph_view, + Dendrogram const &dendrogram, + typename graph_view_t::vertex_type *clustering) { + detail::flatten_dendrogram(handle, graph_view, dendrogram, clustering); +} + +template +std::pair louvain( + raft::handle_t const &handle, + graph_view_t const &graph_view, + typename graph_view_t::vertex_type *clustering, + size_t max_level, + typename graph_view_t::weight_type resolution) +{ + using vertex_t = typename graph_view_t::vertex_type; + using weight_t = typename graph_view_t::weight_type; + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); - return detail::louvain(handle, graph, clustering, max_level, resolution); + std::unique_ptr> dendrogram; + weight_t modularity; + + std::tie(dendrogram, modularity) = louvain(handle, graph_view, max_level, resolution); + + flatten_dendrogram(handle, graph_view, *dendrogram, clustering); + + return std::make_pair(dendrogram->num_levels(), modularity); } // Explicit template instantations +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); + template std::pair louvain( raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); template std::pair louvain(raft::handle_t const &, diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e28f0f1746d..0862bbc62a9 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include @@ -138,9 +138,11 @@ class Louvain { return Q; } - Dendrogram &get_dendrogram() const { return *dendrogram_; } + Dendrogram const &get_dendrogram() const { return *dendrogram_; } - std::unique_ptr> move_dendrogram() { return dendrogram_; } + Dendrogram &get_dendrogram() { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } virtual weight_t operator()(size_t max_level, weight_t resolution) { @@ -208,7 +210,7 @@ class Louvain { virtual void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(num_vertices); + dendrogram_->add_level(0, num_vertices); thrust::sequence(rmm::exec_policy(stream_)->on(stream_), dendrogram_->current_level_begin(), diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index fe8310a62ca..3136515faa6 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -15,28 +15,21 @@ */ #pragma once -#include +#include #include - -#include -#include -#include -#include - -#include - #include + #include #include +#include +#include #include #include +#include -#include - -#include - -#include +#include +#include //#define TIMING @@ -47,343 +40,6 @@ namespace cugraph { namespace experimental { -namespace detail { - -#ifdef CUCO_STATIC_MAP_DEFINED -template -struct create_cuco_pair_t { - cuco::pair_type __device__ operator()(data_t data) - { - cuco::pair_type tmp; - tmp.first = data; - tmp.second = data_t{0}; - return tmp; - } -}; -#endif - -// -// These classes should allow cuco::static_map to generate hash tables of -// different configurations. -// - -// -// Compare edges based on src[e] and dst[e] matching -// -template -class src_dst_equality_comparator_t { - public: - src_dst_equality_comparator_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - sentinel_t sentinel_value) - : d_src_{src.data().get()}, d_dst_{dst.data().get()}, sentinel_value_(sentinel_value) - { - } - - src_dst_equality_comparator_t(data_t const *d_src, data_t const *d_dst, sentinel_t sentinel_value) - : d_src_{d_src}, d_dst_{d_dst}, sentinel_value_(sentinel_value) - { - } - - template - __device__ bool operator()(idx_type lhs_index, idx_type rhs_index) const noexcept - { - return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && - (d_src_[lhs_index] == d_src_[rhs_index]) && (d_dst_[lhs_index] == d_dst_[rhs_index]); - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - sentinel_t sentinel_value_; -}; - -// -// Hash edges based src[e] and dst[e] -// -template -class src_dst_hasher_t { - public: - src_dst_hasher_t(rmm::device_vector const &src, rmm::device_vector const &dst) - : d_src_{src.data().get()}, d_dst_{dst.data().get()} - { - } - - src_dst_hasher_t(data_t const *d_src, data_t const *d_dst) : d_src_{d_src}, d_dst_{d_dst} {} - - template - __device__ auto operator()(idx_type index) const - { - cuco::detail::MurmurHash3_32 hasher; - - auto h_src = hasher(d_src_[index]); - auto h_dst = hasher(d_dst_[index]); - - /* - * Combine the source hash and the dest hash into a single hash value - * - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - */ - h_src ^= h_dst + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); - - return h_src; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; -}; - -// -// Compare edges based on src[e] and cluster[dst[e]] matching -// -template -class src_cluster_equality_comparator_t { - public: - src_cluster_equality_comparator_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - rmm::device_vector const &dst_cluster_cache, - data_t base_dst_id, - sentinel_t sentinel_value) - : d_src_{src.data().get()}, - d_dst_{dst.data().get()}, - d_dst_cluster_{dst_cluster_cache.data().get()}, - base_dst_id_(base_dst_id), - sentinel_value_(sentinel_value) - { - } - - src_cluster_equality_comparator_t(data_t const *d_src, - data_t const *d_dst, - data_t const *d_dst_cluster_cache, - data_t base_dst_id, - sentinel_t sentinel_value) - : d_src_{d_src}, - d_dst_{d_dst}, - d_dst_cluster_{d_dst_cluster_cache}, - base_dst_id_(base_dst_id), - sentinel_value_(sentinel_value) - { - } - - __device__ bool operator()(sentinel_t lhs_index, sentinel_t rhs_index) const noexcept - { - return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && - (d_src_[lhs_index] == d_src_[rhs_index]) && - (d_dst_cluster_[d_dst_[lhs_index] - base_dst_id_] == - d_dst_cluster_[d_dst_[rhs_index] - base_dst_id_]); - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - data_t const *d_dst_cluster_; - data_t base_dst_id_; - sentinel_t sentinel_value_; -}; - -// -// Hash edges based src[e] and cluster[dst[e]] -// -template -class src_cluster_hasher_t { - public: - src_cluster_hasher_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - rmm::device_vector const &dst_cluster_cache, - data_t base_dst_id) - : d_src_{src.data().get()}, - d_dst_{dst.data().get()}, - d_dst_cluster_{dst_cluster_cache.data().get()}, - base_dst_id_(base_dst_id) - { - } - - src_cluster_hasher_t(data_t const *d_src, - data_t const *d_dst, - data_t const *d_dst_cluster_cache, - data_t base_dst_id) - : d_src_{d_src}, d_dst_{d_dst}, d_dst_cluster_{d_dst_cluster_cache}, base_dst_id_(base_dst_id) - { - } - - template - __device__ auto operator()(idx_type index) const - { - cuco::detail::MurmurHash3_32 hasher; - - auto h_src = hasher(d_src_[index]); - auto h_cluster = hasher(d_dst_cluster_[d_dst_[index] - base_dst_id_]); - - /* - * Combine the source hash and the cluster hash into a single hash value - * - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - */ - h_src ^= h_cluster + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); - - return h_src; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - data_t const *d_dst_cluster_; - data_t base_dst_id_; -}; - -// -// Skip edges where src[e] == dst[e] -// -template -class skip_edge_t { - public: - skip_edge_t(rmm::device_vector const &src, rmm::device_vector const &dst) - : d_src_{src.data().get()}, d_dst_{dst.data().get()} - { - } - - skip_edge_t(data_t const *src, data_t const *dst) : d_src_{src}, d_dst_{dst} {} - - template - __device__ auto operator()(idx_type index) const - { - return d_src_[index] == d_dst_[index]; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; -}; - -template -struct lookup_by_vertex_id { - public: - lookup_by_vertex_id(data_t const *d_array, vertex_t const *d_vertices, vertex_t base_vertex_id) - : d_array_(d_array), d_vertices_(d_vertices), base_vertex_id_(base_vertex_id) - { - } - - template - data_t operator() __device__(edge_t edge_id) const - { - return d_array_[d_vertices_[edge_id] - base_vertex_id_]; - } - - private: - data_t const *d_array_; - vertex_t const *d_vertices_; - vertex_t base_vertex_id_; -}; - -template -vector_t remove_elements_from_vector(vector_t const &input_v, - iterator_t iterator_begin, - iterator_t iterator_end, - function_t function, - cudaStream_t stream) -{ - vector_t temp_v(input_v.size()); - - auto last = thrust::copy_if( - rmm::exec_policy(stream)->on(stream), iterator_begin, iterator_end, temp_v.begin(), function); - - temp_v.resize(thrust::distance(temp_v.begin(), last)); - - return temp_v; -} - -template -vector_t remove_elements_from_vector(vector_t const &input_v, - function_t function, - cudaStream_t stream) -{ - return remove_elements_from_vector(input_v, input_v.begin(), input_v.end(), function, stream); -} - -// FIXME: This should be a generic utility. The one in cython.cu -// is very close to this -template * = nullptr> -std::unique_ptr> -create_graph(raft::handle_t const &handle, - rmm::device_vector const &src_v, - rmm::device_vector const &dst_v, - rmm::device_vector const &weight_v, - std::size_t num_local_verts, - experimental::graph_properties_t graph_props, - view_t const &view) -{ - std::vector> edgelist( - {{src_v.data().get(), - dst_v.data().get(), - weight_v.data().get(), - static_cast(src_v.size())}}); - - return std::make_unique>( - handle, - edgelist, - view.get_partition(), - num_local_verts, - src_v.size(), - graph_props, - false, - false); -} - -template * = nullptr> -std::unique_ptr> -create_graph(raft::handle_t const &handle, - rmm::device_vector const &src_v, - rmm::device_vector const &dst_v, - rmm::device_vector const &weight_v, - std::size_t num_local_verts, - experimental::graph_properties_t graph_props, - view_t const &view) -{ - experimental::edgelist_t edgelist{ - src_v.data().get(), - dst_v.data().get(), - weight_v.data().get(), - static_cast(src_v.size())}; - - return std::make_unique>( - handle, edgelist, num_local_verts, graph_props, false, false); -} - -} // namespace detail - -// -// FIXME: Ultimately, this would be cleaner and more efficient if we did the following: -// -// 1) Create an object that does a single level Louvain computation on an input graph -// (no graph contraction) -// 2) Create an object that does graph contraction -// 3) Create Louvain to use these objects in sequence to compute the aggregate result. -// -// In MNMG-world, the graph contraction step is going to create another graph that likely -// fits efficiently in a smaller number of GPUs (eventually one). Decomposing the algorithm -// as above would allow us to eventually run the single GPU version of single level Louvain -// on the contracted graphs - which should be more efficient. -// -// FIXME: We should return the dendrogram and let the python layer clean it up (or have a -// separate C++ function to flatten the dendrogram). There are customers that might -// like the dendrogram and the implementation would be a bit cleaner if we did the -// collapsing as a separate step -// template class Louvain { public: @@ -405,67 +61,31 @@ class Louvain { handle_(handle), dendrogram_(std::make_unique>()), current_graph_view_(graph_view), - compute_partition_(handle, graph_view), - local_num_vertices_(graph_view.get_number_of_local_vertices()), - local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()), - local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()), - local_num_edges_(graph_view.get_number_of_edges()), - vertex_weights_v_(graph_view.get_number_of_local_vertices()), - cluster_weights_v_(graph_view.get_number_of_local_vertices()), - number_of_vertices_(graph_view.get_number_of_local_vertices()), - stream_(handle.get_stream()) + cluster_keys_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + cluster_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + vertex_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + src_vertex_weights_cache_v_(0, handle.get_stream()), + src_cluster_cache_v_(0, handle.get_stream()), + dst_cluster_cache_v_(0, handle.get_stream()) { - if (graph_view_t::is_multi_gpu) { - rank_ = handle.get_comms().get_rank(); - base_vertex_id_ = graph_view.get_local_vertex_first(); - base_src_vertex_id_ = graph_view.get_local_adj_matrix_partition_row_first(0); - base_dst_vertex_id_ = graph_view.get_local_adj_matrix_partition_col_first(0); - - local_num_edges_ = thrust::transform_reduce( - thrust::host, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator( - graph_view.get_number_of_local_adj_matrix_partitions()), - [&graph_view](auto indx) { - return graph_view.get_number_of_local_adj_matrix_partition_edges(indx); - }, - size_t{0}, - thrust::plus()); - - CUDA_TRY(cudaStreamSynchronize(stream_)); - } - - src_indices_v_.resize(local_num_edges_); - - cugraph::detail::offsets_to_indices( - current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - - if (base_src_vertex_id_ > 0) { - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - src_indices_v_.begin(), - src_indices_v_.end(), - thrust::make_constant_iterator(base_src_vertex_id_), - src_indices_v_.begin(), - thrust::plus()); - } } - Dendrogram &get_dendrogram() const { return *dendrogram_; } + Dendrogram const &get_dendrogram() const { return *dendrogram_; } + + Dendrogram &get_dendrogram() { return *dendrogram_; } - std::unique_ptr> move_dendrogram() { return dendrogram_; } + std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } virtual weight_t operator()(size_t max_level, weight_t resolution) { weight_t best_modularity = weight_t{-1}; -#ifdef CUCO_STATIC_MAP_DEFINED - weight_t total_edge_weight; - total_edge_weight = experimental::transform_reduce_e( + weight_t total_edge_weight = experimental::transform_reduce_e( handle_, current_graph_view_, thrust::make_constant_iterator(0), thrust::make_constant_iterator(0), - [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, + [] __device__(auto src, auto dst, weight_t wt, auto, auto) { return wt; }, weight_t{0}); while (dendrogram_->num_levels() < max_level) { @@ -486,7 +106,6 @@ class Louvain { } timer_display(std::cout); -#endif return best_modularity; } @@ -495,14 +114,23 @@ class Louvain { void timer_start(std::string const ®ion) { #ifdef TIMING - if (rank_ == 0) hr_timer_.start(region); + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) hr_timer_.start(region); + } else { + hr_timer_.start(region); + } #endif } void timer_stop(cudaStream_t stream) { #ifdef TIMING - if (rank_ == 0) { + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) { + CUDA_TRY(cudaStreamSynchronize(stream)); + hr_timer_.stop(); + } + } else { CUDA_TRY(cudaStreamSynchronize(stream)); hr_timer_.stop(); } @@ -512,36 +140,46 @@ class Louvain { void timer_display(std::ostream &os) { #ifdef TIMING - if (rank_ == 0) hr_timer_.display(os); + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) hr_timer_.display(os); + } else { + hr_timer_.display(os); + } #endif } protected: void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(num_vertices); + dendrogram_->add_level(current_graph_view_.get_local_vertex_first(), num_vertices); - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), dendrogram_->current_level_begin(), dendrogram_->current_level_end(), - base_vertex_id_); + current_graph_view_.get_local_vertex_first()); } public: weight_t modularity(weight_t total_edge_weight, weight_t resolution) { - weight_t sum_degree_squared = experimental::transform_reduce_v( - handle_, - current_graph_view_, + weight_t sum_degree_squared = thrust::transform_reduce( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_weights_v_.begin(), + cluster_weights_v_.end(), [] __device__(weight_t p) { return p * p; }, - weight_t{0}); + weight_t{0}, + thrust::plus()); + + if (graph_t::is_multi_gpu) { + sum_degree_squared = + host_scalar_allreduce(handle_.get_comms(), sum_degree_squared, handle_.get_stream()); + } weight_t sum_internal = experimental::transform_reduce_e( handle_, current_graph_view_, - src_cluster_cache_v_.begin(), - dst_cluster_cache_v_.begin(), + d_src_cluster_cache_, + d_dst_cluster_cache_, [] __device__(auto src, auto dst, weight_t wt, auto src_cluster, auto nbr_cluster) { if (src_cluster == nbr_cluster) { return wt; @@ -561,58 +199,86 @@ class Louvain { { timer_start("compute_vertex_and_cluster_weights"); - experimental::copy_v_transform_reduce_out_nbr( - handle_, - current_graph_view_, - thrust::make_constant_iterator(0), - thrust::make_constant_iterator(0), - [] __device__(auto src, auto, auto wt, auto, auto) { return wt; }, - weight_t{0}, - vertex_weights_v_.begin()); + vertex_weights_v_ = current_graph_view_.compute_out_weight_sums(handle_); - thrust::copy(rmm::exec_policy(stream_)->on(stream_), - vertex_weights_v_.begin(), - vertex_weights_v_.end(), - cluster_weights_v_.begin()); + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + current_graph_view_.get_local_vertex_first()); - cache_vertex_properties( - vertex_weights_v_.begin(), src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + raft::copy(cluster_weights_v_.begin(), + vertex_weights_v_.begin(), + vertex_weights_v_.size(), + handle_.get_stream()); - cache_vertex_properties( - cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + d_src_vertex_weights_cache_ = + cache_src_vertex_properties(vertex_weights_v_, src_vertex_weights_cache_v_); + + if (graph_view_t::is_multi_gpu) { + auto const comm_size = handle_.get_comms().get_size(); + rmm::device_uvector rx_keys_v(0, handle_.get_stream()); + rmm::device_uvector rx_weights_v(0, handle_.get_stream()); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(cluster_keys_v_.begin(), cluster_weights_v_.begin())); + + std::forward_as_tuple(std::tie(rx_keys_v, rx_weights_v), std::ignore) = + groupby_gpuid_and_shuffle_values( + handle_.get_comms(), + pair_first, + pair_first + current_graph_view_.get_number_of_local_vertices(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ + comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle_.get_stream()); + + cluster_keys_v_ = std::move(rx_keys_v); + cluster_weights_v_ = std::move(rx_weights_v); + } - timer_stop(stream_); + timer_stop(handle_.get_stream()); } - template - void cache_vertex_properties(iterator_t const &local_input_iterator, - rmm::device_vector &src_cache_v, - rmm::device_vector &dst_cache_v, - bool src = true, - bool dst = true) + template + T *cache_src_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &src_cache_v) { - if (src) { - src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); - copy_to_adj_matrix_row( - handle_, current_graph_view_, local_input_iterator, src_cache_v.begin()); + if (graph_view_t::is_multi_gpu) { + src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), + handle_.get_stream()); + copy_to_adj_matrix_row(handle_, current_graph_view_, input.begin(), src_cache_v.begin()); + return src_cache_v.begin(); + } else { + return input.begin(); } + } - if (dst) { - dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); - copy_to_adj_matrix_col( - handle_, current_graph_view_, local_input_iterator, dst_cache_v.begin()); + template + T *cache_dst_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &dst_cache_v) + { + if (graph_view_t::is_multi_gpu) { + dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols(), + handle_.get_stream()); + copy_to_adj_matrix_col(handle_, current_graph_view_, input.begin(), dst_cache_v.begin()); + return dst_cache_v.begin(); + } else { + return input.begin(); } } -#ifdef CUCO_STATIC_MAP_DEFINED virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution) { timer_start("update_clustering"); - rmm::device_vector next_cluster_v(dendrogram_->current_level_begin(), - dendrogram_->current_level_end()); + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), + handle_.get_stream()); - cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); + raft::copy(next_cluster_v.begin(), + dendrogram_->current_level_begin(), + dendrogram_->current_level_size(), + handle_.get_stream()); + + d_src_cluster_cache_ = cache_src_vertex_properties(next_cluster_v, src_cluster_cache_v_); + d_dst_cluster_cache_ = cache_dst_vertex_properties(next_cluster_v, dst_cluster_cache_v_); weight_t new_Q = modularity(total_edge_weight, resolution); weight_t cur_Q = new_Q - 1; @@ -629,691 +295,271 @@ class Louvain { up_down = !up_down; - cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); - new_Q = modularity(total_edge_weight, resolution); if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(stream_)->on(stream_), - next_cluster_v.begin(), - next_cluster_v.end(), - dendrogram_->current_level_begin()); + raft::copy(dendrogram_->current_level_begin(), + next_cluster_v.begin(), + next_cluster_v.size(), + handle_.get_stream()); } } - // cache the final clustering locally on each cpu - cache_vertex_properties( - dendrogram_->current_level_begin(), src_cluster_cache_v_, dst_cluster_cache_v_); - - timer_stop(stream_); + timer_stop(handle_.get_stream()); return cur_Q; } - void update_by_delta_modularity(weight_t total_edge_weight, - weight_t resolution, - rmm::device_vector &next_cluster_v, - bool up_down) + void compute_cluster_sum_and_subtract(rmm::device_uvector &old_cluster_sum_v, + rmm::device_uvector &cluster_subtract_v) { - rmm::device_vector old_cluster_sum_v(local_num_vertices_); - rmm::device_vector src_old_cluster_sum_cache_v; + auto output_buffer = + cugraph::experimental::allocate_dataframe_buffer>( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); experimental::copy_v_transform_reduce_out_nbr( handle_, current_graph_view_, - src_cluster_cache_v_.begin(), - dst_cluster_cache_v_.begin(), + d_src_cluster_cache_, + d_dst_cluster_cache_, [] __device__(auto src, auto dst, auto wt, auto src_cluster, auto nbr_cluster) { - if ((src != dst) && (src_cluster == nbr_cluster)) { - return wt; - } else - return weight_t{0}; - }, - weight_t{0}, - old_cluster_sum_v.begin()); - - cache_vertex_properties( - old_cluster_sum_v.begin(), src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); - - detail::src_cluster_equality_comparator_t compare( - src_indices_v_.data().get(), - current_graph_view_.indices(), - dst_cluster_cache_v_.data().get(), - base_dst_vertex_id_, - std::numeric_limits::max()); - detail::src_cluster_hasher_t hasher(src_indices_v_.data().get(), - current_graph_view_.indices(), - dst_cluster_cache_v_.data().get(), - base_dst_vertex_id_); - detail::skip_edge_t skip_edge(src_indices_v_.data().get(), - current_graph_view_.indices()); - - // - // Group edges that lead from same source to same neighboring cluster together - // local_cluster_edge_ids_v will contain edge ids of unique pairs of (src,nbr_cluster). - // If multiple edges exist, one edge id will be chosen (by a parallel race). - // nbr_weights_v will contain the combined weight of all of the edges that connect - // that pair. - // - rmm::device_vector local_cluster_edge_ids_v; - rmm::device_vector nbr_weights_v; - - // - // Perform this combining on the local edges - // - std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( - hasher, compare, skip_edge, current_graph_view_.weights(), local_num_edges_); - - // - // In order to compute delta_Q for a given src/nbr_cluster pair, I need the following - // information: - // src - // old_cluster - the cluster that src is currently assigned to - // nbr_cluster - // sum of edges going to new cluster - // vertex weight of the src vertex - // sum of edges going to old cluster - // cluster_weights of old cluster - // cluster_weights of nbr_cluster - // - // Each GPU has locally cached: - // The sum of edges going to the old cluster (computed from - // experimental::copy_v_transform_reduce_out_nbr call above. - // old_cluster - // nbr_cluster - // vertex weight of src vertex - // partial sum of edges going to the new cluster (in nbr_weights) - // - // So the plan is to take the tuple: - // (src, old_cluster, src_vertex_weight, old_cluster_sum, nbr_cluster, nbr_weights) - // and shuffle it around the cluster so that they arrive at the GPU where the pair - // (old_cluster, new_cluster) would be assigned. Then we can aggregate this information - // and compute the delta_Q values. - // - - // - // Define the communication pattern, we're going to send detail - // for edge i to the GPU that is responsible for the vertex - // pair (cluster[src[i]], cluster[dst[i]]) - // - auto communication_schedule = thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - [d_edge_device_view = compute_partition_.edge_device_view(), - d_src_indices = src_indices_v_.data().get(), - d_src_cluster = src_cluster_cache_v_.data().get(), - d_dst_indices = current_graph_view_.indices(), - d_dst_cluster = dst_cluster_cache_v_.data().get(), - base_src_vertex_id = base_src_vertex_id_, - base_dst_vertex_id = base_dst_vertex_id_] __device__(edge_t edge_id) { - return d_edge_device_view(d_src_cluster[d_src_indices[edge_id] - base_src_vertex_id], - d_dst_cluster[d_dst_indices[edge_id] - base_dst_vertex_id]); - }); + weight_t subtract{0}; + weight_t sum{0}; - // FIXME: This should really be a variable_shuffle of a tuple, for time - // reasons I'm just doing 6 independent shuffles. - // - rmm::device_vector ocs_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id(src_old_cluster_sum_cache_v.data().get(), - src_indices_v_.data().get(), - base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_cluster_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id( - src_cluster_cache_v_.data().get(), src_indices_v_.data().get(), base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_vertex_weight_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id(src_vertex_weights_cache_v_.data().get(), - src_indices_v_.data().get(), - base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(src_indices_v_.begin(), local_cluster_edge_ids_v.begin()), - communication_schedule); + if (src == dst) + subtract = wt; + else if (src_cluster == nbr_cluster) + sum = wt; - rmm::device_vector nbr_cluster_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id( - dst_cluster_cache_v_.data().get(), current_graph_view_.indices(), base_dst_vertex_id_)), - communication_schedule); - - nbr_weights_v = variable_shuffle( - handle_, nbr_weights_v.size(), nbr_weights_v.begin(), communication_schedule); - - // - // At this point, src_v, nbr_cluster_v and nbr_weights_v have been - // shuffled to the correct GPU. We can now compute the final - // value of delta_Q for each neigboring cluster - // - // Again, we'll combine edges that connect the same source to the same - // neighboring cluster and sum their weights. - // - detail::src_dst_equality_comparator_t compare2( - src_v, nbr_cluster_v, std::numeric_limits::max()); - detail::src_dst_hasher_t hasher2(src_v, nbr_cluster_v); - - auto skip_edge2 = [] __device__(auto) { return false; }; - - std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( - hasher2, compare2, skip_edge2, nbr_weights_v.data().get(), src_v.size()); - - // - // Now local_cluster_edge_ids_v contains the edge ids of the src id/dest - // cluster id pairs, and nbr_weights_v contains the weight of edges - // going to that cluster id - // - // Now we can compute (locally) each delta_Q value - // - auto iter = thrust::make_zip_iterator( - thrust::make_tuple(local_cluster_edge_ids_v.begin(), nbr_weights_v.begin())); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - iter, - iter + local_cluster_edge_ids_v.size(), - nbr_weights_v.begin(), - [total_edge_weight, - resolution, - d_src = src_v.data().get(), - d_src_cluster = src_cluster_v.data().get(), - d_nbr_cluster = nbr_cluster_v.data().get(), - d_src_vertex_weights = src_vertex_weight_v.data().get(), - d_src_cluster_weights = src_cluster_weights_cache_v_.data().get(), - d_dst_cluster_weights = dst_cluster_weights_cache_v_.data().get(), - d_ocs = ocs_v.data().get(), - base_src_vertex_id = base_src_vertex_id_, - base_dst_vertex_id = base_dst_vertex_id_] __device__(auto tuple) { - edge_t edge_id = thrust::get<0>(tuple); - vertex_t nbr_cluster = d_nbr_cluster[edge_id]; - weight_t new_cluster_sum = thrust::get<1>(tuple); - vertex_t old_cluster = d_src_cluster[edge_id]; - weight_t k_k = d_src_vertex_weights[edge_id]; - weight_t old_cluster_sum = d_ocs[edge_id]; - - weight_t a_old = d_src_cluster_weights[old_cluster - base_src_vertex_id]; - weight_t a_new = d_dst_cluster_weights[nbr_cluster - base_dst_vertex_id]; - - return 2 * (((new_cluster_sum - old_cluster_sum) / total_edge_weight) - - resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / - (total_edge_weight * total_edge_weight)); - }); - - // - // Pick the largest delta_Q value for each vertex on this gpu. - // Then we will shuffle back to the gpu by vertex id - // - rmm::device_vector final_src_v(local_cluster_edge_ids_v.size()); - rmm::device_vector final_nbr_cluster_v(local_cluster_edge_ids_v.size()); - rmm::device_vector final_nbr_weights_v(local_cluster_edge_ids_v.size()); - - auto final_input_iter = thrust::make_zip_iterator(thrust::make_tuple( - thrust::make_permutation_iterator(src_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_permutation_iterator(nbr_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - nbr_weights_v.begin())); - - auto final_output_iter = thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_cluster_v.begin(), final_nbr_weights_v.begin())); - - auto final_output_pos = - thrust::copy_if(rmm::exec_policy(stream_)->on(stream_), - final_input_iter, - final_input_iter + local_cluster_edge_ids_v.size(), - final_output_iter, - [] __device__(auto p) { return (thrust::get<2>(p) > weight_t{0}); }); - - final_src_v.resize(thrust::distance(final_output_iter, final_output_pos)); - final_nbr_cluster_v.resize(thrust::distance(final_output_iter, final_output_pos)); - final_nbr_weights_v.resize(thrust::distance(final_output_iter, final_output_pos)); - - // - // Sort the results, pick the largest version - // - thrust::sort(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), - [] __device__(auto left, auto right) { - if (thrust::get<0>(left) < thrust::get<0>(right)) return true; - if (thrust::get<0>(left) > thrust::get<0>(right)) return false; - if (thrust::get<1>(left) > thrust::get<1>(right)) return true; - if (thrust::get<1>(left) < thrust::get<1>(right)) return false; - return (thrust::get<2>(left) < thrust::get<2>(right)); - }); - - // - // Now that we're sorted the first entry for each src value is the largest. - // - local_cluster_edge_ids_v.resize(final_src_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_src_v.size()), - local_cluster_edge_ids_v.begin(), - [sentinel = std::numeric_limits::max(), - d_src = final_src_v.data().get()] __device__(edge_t edge_id) { - if (edge_id == 0) { return edge_id; } - - if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } - - return sentinel; - }); - - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { - return (edge_id != sentinel); + return thrust::make_tuple(subtract, sum); }, - stream_); + thrust::make_tuple(weight_t{0}, weight_t{0}), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer)); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer) + + current_graph_view_.get_number_of_local_vertices(), + old_cluster_sum_v.begin(), + [] __device__(auto p) { return thrust::get<1>(p); }); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer) + + current_graph_view_.get_number_of_local_vertices(), + cluster_subtract_v.begin(), + [] __device__(auto p) { return thrust::get<0>(p); }); + } - final_nbr_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_nbr_weights_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_weights_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_src_v = variable_shuffle( + void update_by_delta_modularity(weight_t total_edge_weight, + weight_t resolution, + rmm::device_uvector &next_cluster_v, + bool up_down) + { +#ifdef CUCO_STATIC_MAP_DEFINED + rmm::device_uvector old_cluster_sum_v( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + rmm::device_uvector cluster_subtract_v( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + rmm::device_uvector src_cluster_weights_v(next_cluster_v.size(), + handle_.get_stream()); + rmm::device_uvector dst_cluster_weights_v(next_cluster_v.size(), + handle_.get_stream()); + + compute_cluster_sum_and_subtract(old_cluster_sum_v, cluster_subtract_v); + + auto output_buffer = + cugraph::experimental::allocate_dataframe_buffer>( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + + vertex_t *map_key_first; + vertex_t *map_key_last; + weight_t *map_value_first; + + if (graph_t::is_multi_gpu) { + cugraph::experimental::detail::compute_gpu_id_from_vertex_t vertex_to_gpu_id_op{ + handle_.get_comms().get_size()}; + + src_cluster_weights_v = cugraph::experimental::collect_values_for_keys( + handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + d_src_cluster_cache_, + d_src_cluster_cache_ + src_cluster_cache_v_.size(), + vertex_to_gpu_id_op, + handle_.get_stream()); + + dst_cluster_weights_v = cugraph::experimental::collect_values_for_keys( + handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + d_dst_cluster_cache_, + d_dst_cluster_cache_ + dst_cluster_cache_v_.size(), + vertex_to_gpu_id_op, + handle_.get_stream()); + + map_key_first = d_dst_cluster_cache_; + map_key_last = d_dst_cluster_cache_ + dst_cluster_cache_v_.size(); + map_value_first = dst_cluster_weights_v.begin(); + } else { + thrust::sort_by_key(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.begin()); + + thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + next_cluster_v.begin(), + next_cluster_v.end(), + src_cluster_weights_v.begin(), + [d_cluster_weights = cluster_weights_v_.data(), + d_cluster_keys = cluster_keys_v_.data(), + num_clusters = cluster_keys_v_.size()] __device__(vertex_t cluster) { + auto pos = thrust::lower_bound( + thrust::seq, d_cluster_keys, d_cluster_keys + num_clusters, cluster); + return d_cluster_weights[pos - d_cluster_keys]; + }); + + map_key_first = d_src_cluster_cache_; + map_key_last = d_src_cluster_cache_ + src_cluster_weights_v.size(); + map_value_first = src_cluster_weights_v.begin(); + } + + copy_v_transform_reduce_key_aggregated_out_nbr( handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - // - // At this point... - // final_src_v contains the source indices - // final_nbr_cluster_v contains the neighboring clusters - // final_nbr_weights_v contains delta_Q for moving src to the neighboring - // - // They have been shuffled to the gpus responsible for their source vertex - // - // FIXME: Think about how this should work. - // I think Leiden is broken. I don't think that the code we have - // actually does anything. For now I'm going to ignore Leiden in - // MNMG, we can reconsider this later. - // - // If we ignore Leiden, I'd like to think about whether the reduction - // should occur now... - // - - // - // Sort the results, pick the largest version - // - thrust::sort(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), - [] __device__(auto left, auto right) { - if (thrust::get<0>(left) < thrust::get<0>(right)) return true; - if (thrust::get<0>(left) > thrust::get<0>(right)) return false; - if (thrust::get<1>(left) > thrust::get<1>(right)) return true; - if (thrust::get<1>(left) < thrust::get<1>(right)) return false; - return (thrust::get<2>(left) < thrust::get<2>(right)); - }); - - // - // Now that we're sorted (ascending), the last entry for each src value is the largest. - // - local_cluster_edge_ids_v.resize(final_src_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_src_v.size()), - local_cluster_edge_ids_v.begin(), - [sentinel = std::numeric_limits::max(), - d_src = final_src_v.data().get()] __device__(edge_t edge_id) { - if (edge_id == 0) { return edge_id; } - - if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } - - return sentinel; - }); - - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { - return (edge_id != sentinel); - }, - stream_); - - rmm::device_vector cluster_increase_v(final_src_v.size()); - rmm::device_vector cluster_decrease_v(final_src_v.size()); - rmm::device_vector old_cluster_v(final_src_v.size()); - - // - // Then we can, on each gpu, do a local assignment for all of the - // vertices assigned to that gpu using the up_down logic - // - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - local_cluster_edge_ids_v.begin(), - local_cluster_edge_ids_v.end(), - [d_final_src = final_src_v.data().get(), - d_final_nbr_cluster = final_nbr_cluster_v.data().get(), - d_final_nbr_weights = final_nbr_weights_v.data().get(), - d_cluster_increase = cluster_increase_v.data().get(), - d_cluster_decrease = cluster_decrease_v.data().get(), - d_vertex_weights = src_vertex_weights_cache_v_.data().get(), - d_next_cluster = next_cluster_v.data().get(), - d_old_cluster = old_cluster_v.data().get(), - base_vertex_id = base_vertex_id_, - base_src_vertex_id = base_src_vertex_id_, - up_down] __device__(edge_t idx) { - vertex_t src = d_final_src[idx]; - vertex_t new_cluster = d_final_nbr_cluster[idx]; - vertex_t old_cluster = d_next_cluster[src - base_vertex_id]; - weight_t src_weight = d_vertex_weights[src - base_src_vertex_id]; - - if (d_final_nbr_weights[idx] <= weight_t{0}) return false; - if (new_cluster == old_cluster) return false; - if ((new_cluster > old_cluster) != up_down) return false; - - d_next_cluster[src - base_vertex_id] = new_cluster; - d_cluster_increase[idx] = src_weight; - d_cluster_decrease[idx] = src_weight; - d_old_cluster[idx] = old_cluster; - return true; + current_graph_view_, + thrust::make_zip_iterator(thrust::make_tuple(old_cluster_sum_v.begin(), + d_src_vertex_weights_cache_, + cluster_subtract_v.begin(), + d_src_cluster_cache_, + src_cluster_weights_v.begin())), + + d_dst_cluster_cache_, + map_key_first, + map_key_last, + map_value_first, + [total_edge_weight, resolution] __device__( + auto src, auto neighbor_cluster, auto new_cluster_sum, auto src_info, auto a_new) { + auto old_cluster_sum = thrust::get<0>(src_info); + auto k_k = thrust::get<1>(src_info); + auto cluster_subtract = thrust::get<2>(src_info); + auto src_cluster = thrust::get<3>(src_info); + auto a_old = thrust::get<4>(src_info); + + if (src_cluster == neighbor_cluster) new_cluster_sum -= cluster_subtract; + + weight_t delta_modularity = 2 * (((new_cluster_sum - old_cluster_sum) / total_edge_weight) - + resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / + (total_edge_weight * total_edge_weight)); + + return thrust::make_tuple(neighbor_cluster, delta_modularity); }, - stream_); + [] __device__(auto p1, auto p2) { + auto id1 = thrust::get<0>(p1); + auto id2 = thrust::get<0>(p2); + auto wt1 = thrust::get<1>(p1); + auto wt2 = thrust::get<1>(p2); - cluster_increase_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(cluster_increase_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_nbr_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - cluster_decrease_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(cluster_decrease_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - old_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(final_nbr_cluster_v.begin(), cluster_increase_v.begin())), - thrust::make_zip_iterator( - thrust::make_tuple(final_nbr_cluster_v.end(), cluster_increase_v.end())), - [d_cluster_weights = cluster_weights_v_.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto p) { - vertex_t cluster_id = thrust::get<0>(p); - weight_t weight = thrust::get<1>(p); - - atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, weight); - }); - - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(old_cluster_v.begin(), cluster_decrease_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple(old_cluster_v.end(), cluster_decrease_v.end())), - [d_cluster_weights = cluster_weights_v_.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto p) { - vertex_t cluster_id = thrust::get<0>(p); - weight_t weight = thrust::get<1>(p); - - atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, -weight); + return (wt1 < wt2) ? p2 : ((wt1 > wt2) ? p1 : ((id1 < id2) ? p1 : p2)); + }, + thrust::make_tuple(vertex_t{-1}, weight_t{0}), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer)); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + next_cluster_v.begin(), + next_cluster_v.end(), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + next_cluster_v.begin(), + [up_down] __device__(vertex_t old_cluster, auto p) { + vertex_t new_cluster = thrust::get<0>(p); + weight_t delta_modularity = thrust::get<1>(p); + + return (delta_modularity > weight_t{0}) + ? (((new_cluster > old_cluster) != up_down) ? old_cluster : new_cluster) + : old_cluster; }); - cache_vertex_properties( - cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); - } - - template - std::pair, rmm::device_vector> - combine_local_src_nbr_cluster_weights(hash_t hasher, - compare_t compare, - skip_edge_t skip_edge, - weight_t const *d_weights, - count_t num_weights) - { - rmm::device_vector relevant_edges_v; - rmm::device_vector relevant_edge_weights_v; - - if (num_weights > 0) { - std::size_t capacity{static_cast(num_weights / 0.7)}; - - cuco::static_map hash_map( - capacity, std::numeric_limits::max(), count_t{0}); - detail::create_cuco_pair_t create_cuco_pair; - - CUDA_TRY(cudaStreamSynchronize(stream_)); - - hash_map.insert(thrust::make_transform_iterator(thrust::make_counting_iterator(0), - create_cuco_pair), - thrust::make_transform_iterator( - thrust::make_counting_iterator(num_weights), create_cuco_pair), - hasher, - compare); - - CUDA_TRY(cudaStreamSynchronize(stream_)); - - relevant_edges_v.resize(num_weights); - - relevant_edges_v = detail::remove_elements_from_vector( - relevant_edges_v, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_weights), - [d_hash_map = hash_map.get_device_view(), hasher, compare] __device__(count_t idx) { - auto pos = d_hash_map.find(idx, hasher, compare); - return (pos->first == idx); - }, - stream_); - - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - relevant_edges_v.size(), - [d_hash_map = hash_map.get_device_view(), - hasher, - compare, - d_relevant_edges = relevant_edges_v.data().get()] __device__(count_t idx) mutable { - count_t edge_id = d_relevant_edges[idx]; - auto pos = d_hash_map.find(edge_id, hasher, compare); - pos->second.store(idx); - }); - - relevant_edge_weights_v.resize(relevant_edges_v.size()); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), - relevant_edge_weights_v.begin(), - relevant_edge_weights_v.end(), - weight_t{0}); - - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - num_weights, - [d_hash_map = hash_map.get_device_view(), - hasher, - compare, - skip_edge, - d_relevant_edge_weights = relevant_edge_weights_v.data().get(), - d_weights] __device__(count_t idx) { - if (!skip_edge(idx)) { - auto pos = d_hash_map.find(idx, hasher, compare); - if (pos != d_hash_map.end()) { - atomicAdd(d_relevant_edge_weights + pos->second.load(cuda::std::memory_order_relaxed), - d_weights[idx]); - } - } - }); - } + d_src_cluster_cache_ = cache_src_vertex_properties(next_cluster_v, src_cluster_cache_v_); + d_dst_cluster_cache_ = cache_dst_vertex_properties(next_cluster_v, dst_cluster_cache_v_); - return std::make_pair(relevant_edges_v, relevant_edge_weights_v); - } + std::tie(cluster_keys_v_, cluster_weights_v_) = + cugraph::experimental::transform_reduce_by_adj_matrix_row_key_e( + handle_, + current_graph_view_, + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0), + d_src_cluster_cache_, + [] __device__(auto src, auto dst, auto wt, auto x, auto y) { return wt; }, + weight_t{0}); #endif + } void shrink_graph() { timer_start("shrinking graph"); - rmm::device_uvector numbering_map(0, stream_); + rmm::device_uvector numbering_map(0, handle_.get_stream()); std::tie(current_graph_, numbering_map) = coarsen_graph(handle_, current_graph_view_, dendrogram_->current_level_begin()); current_graph_view_ = current_graph_->view(); - local_num_vertices_ = current_graph_view_.get_number_of_local_vertices(); - local_num_rows_ = current_graph_view_.get_number_of_local_adj_matrix_partition_rows(); - local_num_cols_ = current_graph_view_.get_number_of_local_adj_matrix_partition_cols(); - base_vertex_id_ = current_graph_view_.get_local_vertex_first(); - - local_num_edges_ = thrust::transform_reduce( - thrust::host, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator( - current_graph_view_.get_number_of_local_adj_matrix_partitions()), - [this](auto indx) { - return current_graph_view_.get_number_of_local_adj_matrix_partition_edges(indx); - }, - size_t{0}, - thrust::plus()); - - src_indices_v_.resize(local_num_edges_); - - cugraph::detail::offsets_to_indices( - current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - - rmm::device_uvector numbering_indices(numbering_map.size(), stream_); - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + rmm::device_uvector numbering_indices(numbering_map.size(), handle_.get_stream()); + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), numbering_indices.begin(), numbering_indices.end(), - base_vertex_id_); + current_graph_view_.get_local_vertex_first()); relabel( handle_, std::make_tuple(static_cast(numbering_map.begin()), static_cast(numbering_indices.begin())), - local_num_vertices_, + current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), dendrogram_->current_level_size()); - timer_stop(stream_); + timer_stop(handle_.get_stream()); } protected: raft::handle_t const &handle_; - cudaStream_t stream_; std::unique_ptr> dendrogram_; - vertex_t number_of_vertices_; - vertex_t base_vertex_id_{0}; - vertex_t base_src_vertex_id_{0}; - vertex_t base_dst_vertex_id_{0}; - int rank_{0}; - - vertex_t local_num_vertices_; - vertex_t local_num_rows_; - vertex_t local_num_cols_; - edge_t local_num_edges_; - // - // Copy of graph + // Initially we run on the input graph view, + // but as we shrink the graph we'll keep the + // current graph here // std::unique_ptr current_graph_{}; graph_view_t current_graph_view_; - // - // For partitioning - // - detail::compute_partition_t compute_partition_; + rmm::device_uvector vertex_weights_v_; + rmm::device_uvector src_vertex_weights_cache_v_; + rmm::device_uvector src_cluster_cache_v_; + rmm::device_uvector dst_cluster_cache_v_; + rmm::device_uvector cluster_keys_v_; + rmm::device_uvector cluster_weights_v_; - rmm::device_vector src_indices_v_; - - // - // Weights and clustering across iterations of algorithm - // - rmm::device_vector vertex_weights_v_; - rmm::device_vector src_vertex_weights_cache_v_{}; - rmm::device_vector dst_vertex_weights_cache_v_{}; - - rmm::device_vector cluster_weights_v_; - rmm::device_vector src_cluster_weights_cache_v_{}; - rmm::device_vector dst_cluster_weights_cache_v_{}; - - rmm::device_vector src_cluster_cache_v_{}; - rmm::device_vector dst_cluster_cache_v_{}; - - rmm::device_vector empty_cache_weight_v_{}; + weight_t *d_src_vertex_weights_cache_; + vertex_t *d_src_cluster_cache_; + vertex_t *d_dst_cluster_cache_; #ifdef TIMING HighResTimer hr_timer_; #endif -}; // namespace experimental +}; } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/shuffle.cuh b/cpp/src/experimental/shuffle.cuh deleted file mode 100644 index 40f3b510b10..00000000000 --- a/cpp/src/experimental/shuffle.cuh +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -namespace cugraph { -namespace experimental { - -namespace detail { - -// -// FIXME: This implementation of variable_shuffle stages the data for transfer -// in host memory. It would be more efficient, I believe, to stage the -// data in device memory, but it would require actually instantiating -// the data in device memory which is already precious in the Louvain -// implementation. We should explore if it's actually more efficient -// through device memory and whether the improvement is worth the extra -// memory required. -// -template -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - // - // We need to compute the size of data movement - // - raft::comms::comms_t const &comms = handle.get_comms(); - - cudaStream_t stream = handle.get_stream(); - int num_gpus = comms.get_size(); - int my_gpu = comms.get_rank(); - - rmm::device_vector local_sizes_v(num_gpus, size_t{0}); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - partition_iter, - partition_iter + n_elements, - [num_gpus, d_local_sizes = local_sizes_v.data().get()] __device__(auto p) { - atomicAdd(d_local_sizes + p, size_t{1}); - }); - - std::vector h_local_sizes_v(num_gpus); - std::vector h_global_sizes_v(num_gpus); - std::vector h_input_v(n_elements); - std::vector h_partitions_v(n_elements); - - thrust::copy(local_sizes_v.begin(), local_sizes_v.end(), h_local_sizes_v.begin()); - thrust::copy(partition_iter, partition_iter + n_elements, h_partitions_v.begin()); - - std::vector requests(2 * num_gpus); - - int request_pos = 0; - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (gpu != my_gpu) { - comms.irecv(&h_global_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); - ++request_pos; - comms.isend(&h_local_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); - ++request_pos; - } else { - h_global_sizes_v[gpu] = h_local_sizes_v[gpu]; - } - } - - if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } - - comms.barrier(); - - // - // Now global_sizes contains all of the counts, we need to - // allocate an array of the appropriate size - // - int64_t receive_size = - thrust::reduce(thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end()); - - std::vector temp_data; - - if (receive_size > 0) temp_data.resize(receive_size); - - rmm::device_vector input_v(n_elements); - - auto input_start = input_v.begin(); - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - input_start = thrust::copy_if(rmm::exec_policy(stream)->on(stream), - data_iter, - data_iter + n_elements, - partition_iter, - input_start, - [gpu] __device__(int32_t p) { return p == gpu; }); - } - - thrust::copy(input_v.begin(), input_v.end(), h_input_v.begin()); - - std::vector temp_v(num_gpus + 1); - - thrust::exclusive_scan( - thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end(), temp_v.begin()); - - temp_v[num_gpus] = temp_v[num_gpus - 1] + h_global_sizes_v[num_gpus - 1]; - h_global_sizes_v = temp_v; - - thrust::exclusive_scan( - thrust::host, h_local_sizes_v.begin(), h_local_sizes_v.end(), temp_v.begin()); - - temp_v[num_gpus] = temp_v[num_gpus - 1] + h_local_sizes_v[num_gpus - 1]; - h_local_sizes_v = temp_v; - - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - comms.barrier(); - - request_pos = 0; - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - size_t to_receive = h_global_sizes_v[gpu + 1] - h_global_sizes_v[gpu]; - size_t to_send = h_local_sizes_v[gpu + 1] - h_local_sizes_v[gpu]; - - if (gpu != my_gpu) { - if (to_receive > 0) { - comms.irecv( - temp_data.data() + h_global_sizes_v[gpu], to_receive, gpu, 0, &requests[request_pos]); - ++request_pos; - } - - if (to_send > 0) { - comms.isend( - h_input_v.data() + h_local_sizes_v[gpu], to_send, gpu, 0, &requests[request_pos]); - ++request_pos; - } - } else if (to_receive > 0) { - std::copy(h_input_v.begin() + h_local_sizes_v[gpu], - h_input_v.begin() + h_local_sizes_v[gpu + 1], - temp_data.begin() + h_global_sizes_v[gpu]); - } - } - - comms.barrier(); - - if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } - - comms.barrier(); - - return rmm::device_vector(temp_data); -} - -} // namespace detail - -/** - * @brief shuffle data to the desired partition - * - * MNMG algorithms require shuffling data between partitions - * to get the data to the right location for computation. - * - * This function operates dynamically, there is no - * a priori knowledge about where the data will need - * to be transferred. - * - * This function will be executed on each GPU. Each gpu - * has a portion of the data (specified by begin_data and - * end_data iterators) and an iterator that identifies - * (for each corresponding element) which GPU the data - * should be shuffled to. - * - * The return value will be a device vector containing - * the data received by this GPU. - * - * Note that this function accepts iterators as input. - * `partition_iterator` will be traversed multiple times. - * - * @tparam is_multi_gpu If true, multi-gpu - shuffle will occur - * If false, single GPU - simple copy will occur - * @tparam data_t Type of the data being shuffled - * @tparam iterator_t Iterator referencing data to be shuffled - * @tparam partition_iter_t Iterator identifying the destination partition - * - * @param handle Library handle (RAFT) - * @param n_elements Number of elements to transfer - * @param data_iter Iterator that returns the elements to be transfered - * @param partition_iter Iterator that returns the partition where elements - * should be transfered. - */ -template * = nullptr> -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - return detail::variable_shuffle(handle, n_elements, data_iter, partition_iter); -} - -template * = nullptr> -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - return rmm::device_vector(data_iter, data_iter + n_elements); -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1dc4a5d3eaa..5292f9f9997 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -407,14 +407,6 @@ set(EXPERIMENTAL_PAGERANK_TEST_SRCS ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") -################################################################################################### -# - Experimental LOUVAIN tests ------------------------------------------------------------------- - -set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") - -ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") - ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ @@ -438,6 +430,17 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ########################################################################################### + # - MG LOUVAIN tests --------------------------------------------------------------------- + + set(MG_LOUVAIN_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_test.cpp") + + ConfigureTest(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") + target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + target_link_libraries(MG_LOUVAIN_TEST PRIVATE cugraph) + else(MPI_CXX_FOUND) message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif(MPI_CXX_FOUND) diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index d3024282be3..2ebf9a85902 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -9,15 +9,157 @@ * */ #include +#include + +#include +#include +#include +#include + +#include #include -#include -#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +struct Louvain_Usecase { + std::string graph_file_full_path_{}; + bool test_weighted_{false}; + int expected_level_{0}; + float expected_modularity_{0}; + + Louvain_Usecase(std::string const& graph_file_path, + bool test_weighted, + int expected_level, + float expected_modularity) + : test_weighted_(test_weighted), + expected_level_(expected_level), + expected_modularity_(expected_modularity) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path_ = graph_file_path; + } + }; +}; + +class Tests_Louvain : public ::testing::TestWithParam { + public: + Tests_Louvain() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_legacy_test(Louvain_Usecase const& configuration) + { + raft::handle_t handle{}; + + bool directed{false}; + + auto graph = cugraph::test::generate_graph_csr_from_mm( + directed, configuration.graph_file_full_path_); + auto graph_view = graph->view(); + + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + cudaDeviceProp device_prop; + CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); + + if (device_prop.major < 7) { + EXPECT_THROW(louvain(graph_view, + graph_view.get_number_of_vertices(), + configuration.expected_level_, + configuration.expected_modularity_), + cugraph::logic_error); + } else { + louvain(graph_view, + graph_view.get_number_of_vertices(), + configuration.expected_level_, + configuration.expected_modularity_); + } + } -#include + template + void run_current_test(Louvain_Usecase const& configuration) + { + raft::handle_t handle{}; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path_, configuration.test_weighted_, false); + + auto graph_view = graph.view(); + + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + cudaDeviceProp device_prop; + CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); + + if (device_prop.major < 7) { + EXPECT_THROW(louvain(graph_view, + graph_view.get_number_of_local_vertices(), + configuration.expected_level_, + configuration.expected_modularity_), + cugraph::logic_error); + } else { + louvain(graph_view, + graph_view.get_number_of_local_vertices(), + configuration.expected_level_, + configuration.expected_modularity_); + } + } + + template + void louvain(graph_t const& graph_view, + typename graph_t::vertex_type num_vertices, + int expected_level, + float expected_modularity) + { + using vertex_t = typename graph_t::vertex_type; + using weight_t = typename graph_t::weight_type; + + raft::handle_t handle{}; + + rmm::device_uvector clustering_v(num_vertices, handle.get_stream()); + size_t level; + weight_t modularity; + + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, clustering_v.data(), size_t{100}, weight_t{1}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + float compare_modularity = static_cast(modularity); -TEST(louvain, success) + ASSERT_FLOAT_EQ(compare_modularity, expected_modularity); + ASSERT_EQ(level, expected_level); + } +}; + +// FIXME: add tests for type combinations + +TEST(louvain_legacy, success) { raft::handle_t handle; @@ -84,15 +226,13 @@ TEST(louvain, success) int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_FLOAT_EQ(modularity, 0.408695); ASSERT_EQ(cluster_id, result_h); } } -TEST(louvain_renumbered, success) +TEST(louvain_legacy_renumbered, success) { raft::handle_t handle; @@ -157,11 +297,25 @@ TEST(louvain_renumbered, success) int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_FLOAT_EQ(modularity, 0.41880345); } } +TEST_P(Tests_Louvain, CheckInt32Int32FloatFloatLegacy) +{ + run_legacy_test(GetParam()); +} + +TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +// FIXME: Expand testing once we evaluate RMM memory use +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_Louvain, + ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 3, 0.408695))); + CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu new file mode 100644 index 00000000000..a7f95e6d718 --- /dev/null +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_louvain_helper.hpp" + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + T const *d_input, + size_t size) +{ + auto rx_sizes = + cugraph::experimental::host_scalar_gather(handle.get_comms(), size, 0, handle.get_stream()); + std::vector rx_displs(static_cast(handle.get_comms().get_rank()) == 0 + ? handle.get_comms().get_size() + : int{0}, + size_t{0}); + if (static_cast(handle.get_comms().get_rank()) == 0) { + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + } + + auto total_size = thrust::reduce(thrust::host, rx_sizes.begin(), rx_sizes.end()); + rmm::device_uvector gathered_v(total_size, handle.get_stream()); + + cugraph::experimental::device_gatherv(handle.get_comms(), + d_input, + gathered_v.data(), + size, + rx_sizes, + rx_displs, + 0, + handle.get_stream()); + + return gathered_v; +} + +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2) +{ + vertex_t max = 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + v1.begin(), + v1.end(), + vertex_t{0}); + + rmm::device_uvector map(max, size_t{0}); + + auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); + + thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + iter, + iter + v1.size(), + [d_map = map.data()] __device__(auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + d_map[e1] = e2; + }); + + auto error_count = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + iter, + iter + v1.size(), + [d_map = map.data()] __device__(auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + return (d_map[e1] != e2); + }); + + return (error_count == 0); +} + +template +void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, + rmm::device_uvector &edgelist_rows_v, + rmm::device_uvector &edgelist_cols_v, + rmm::device_uvector &renumber_map_gathered_v) +{ + rmm::device_uvector index_v(renumber_map_gathered_v.size(), handle.get_stream()); + + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(renumber_map_gathered_v.size()), + [d_renumber_map_gathered = renumber_map_gathered_v.data(), d_index = index_v.data()] __device__( + auto idx) { d_index[d_renumber_map_gathered[idx]] = idx; }); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_rows_v.begin(), + edgelist_rows_v.end(), + edgelist_rows_v.begin(), + [d_index = index_v.data()] __device__(auto v) { return d_index[v]; }); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_cols_v.begin(), + edgelist_cols_v.end(), + edgelist_cols_v.begin(), + [d_index = index_v.data()] __device__(auto v) { return d_index[v]; }); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) +{ + edge_t number_of_edges{0}; + raft::update_host( + &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); + CUDA_TRY(cudaStreamSynchronize(stream)); + rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights( + compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + + // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can + // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA + // warp per vertex, and low-degree vertices using one CUDA thread per block + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [compressed_sparse_offsets, + major_first, + p_majors = edgelist_major_vertices.begin()] __device__(auto v) { + auto first = compressed_sparse_offsets[v - major_first]; + auto last = compressed_sparse_offsets[v - major_first + 1]; + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_indices, + compressed_sparse_indices + number_of_edges, + edgelist_minor_vertices.begin()); + if (compressed_sparse_weights != nullptr) { + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + } + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +template +void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */, + cudaStream_t stream) +{ + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + + size_t number_of_edges{0}; + if (edgelist_weights.size() > 0) { + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin()); + + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); + auto it = thrust::reduce_by_key( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin())), + tmp_edgelist_weights.begin()); + number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + + edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); + edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); + edgelist_weights = std::move(tmp_edgelist_weights); + } else { + thrust::sort(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + number_of_edges = thrust::distance(pair_first, it); + } + + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_weights.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.shrink_to_fit(stream); + edgelist_weights.shrink_to_fit(stream); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) +{ + // FIXME: it might be possible to directly create relabled & coarsened edgelist from the + // compressed sparse format to save memory + + rmm::device_uvector edgelist_major_vertices(0, stream); + rmm::device_uvector edgelist_minor_vertices(0, stream); + rmm::device_uvector edgelist_weights(0, stream); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_edgelist(compressed_sparse_offsets, + compressed_sparse_indices, + compressed_sparse_weights, + major_first, + major_last, + stream); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + thrust::transform( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + sort_and_coarsen_edgelist( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +// single-GPU version +template +std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const + &graph_view, + vertex_t const *labels) +{ + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + labels, + labels, + vertex_t{0}, + graph_view.get_number_of_vertices(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::edgelist_t edgelist{}; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + + vertex_t new_number_of_vertices = + 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + graph_view.get_number_of_vertices(), + vertex_t{0}, + thrust::maximum()); + + return std::make_unique< + cugraph::experimental::graph_t>( + handle, + edgelist, + new_number_of_vertices, + cugraph::experimental::graph_properties_t{graph_view.is_symmetric(), false}, + true); +} + +// explicit instantiation + +template void single_gpu_renumber_edgelist_given_number_map( + raft::handle_t const &handle, + rmm::device_uvector &d_edgelist_rows, + rmm::device_uvector &d_edgelist_cols, + rmm::device_uvector &d_renumber_map_gathered_v); + +template rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + int const *d_input, + size_t size); + +template bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2); + +template std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const &graph_view, + int32_t const *labels); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp new file mode 100644 index 00000000000..43eb294cd13 --- /dev/null +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + T const *d_input, + size_t size); + +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2); + +template +void single_gpu_renumber_edgelist_given_number_map( + raft::handle_t const &handle, + rmm::device_uvector &d_edgelist_rows, + rmm::device_uvector &d_edgelist_cols, + rmm::device_uvector &d_renumber_map_gathered_v); + +template +std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const + &graph_view, + vertex_t const *labels); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp new file mode 100644 index 00000000000..f6596a6b59a --- /dev/null +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_louvain_helper.hpp" + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include + +void compare(float modularity, float sg_modularity) { ASSERT_FLOAT_EQ(modularity, sg_modularity); } +void compare(double modularity, double sg_modularity) +{ + ASSERT_DOUBLE_EQ(modularity, sg_modularity); +} + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_CASE_P() +// +struct Louvain_Usecase { + std::string graph_file_full_path{}; + bool weighted{false}; + size_t max_level; + double resolution; + + // FIXME: We really should have a Graph_Testparms_Base class or something + // like that which can handle this graph_full_path thing. + // + Louvain_Usecase(std::string const& graph_file_path, + bool weighted, + size_t max_level, + double resolution) + : weighted(weighted), max_level(max_level), resolution(resolution) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class Louvain_MG_Testfixture : public ::testing::TestWithParam { + public: + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + // Run once for each test instance + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of MNMG Louvain with the results of running + // each step of SG Louvain, renumbering the coarsened graphs based + // on the MNMG renumbering. + template + void compare_sg_results(raft::handle_t const& handle, + std::string const& graph_filename, + rmm::device_uvector& d_renumber_map_gathered_v, + cugraph::Dendrogram const& dendrogram, + weight_t resolution, + int rank, + weight_t modularity) + { + auto sg_graph = + std::make_unique>( + handle); + rmm::device_uvector d_clustering_v(0, handle.get_stream()); + weight_t sg_modularity; + + if (rank == 0) { + // Create initial SG graph, renumbered according to the MNMG renumber map + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + + std::tie( + d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + cugraph::test::read_edgelist_from_matrix_market_file( + handle, graph_filename, true); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + std::vector h_vertices(number_of_vertices); + + d_clustering_v.resize(d_vertices.size(), handle.get_stream()); + + thrust::sequence(thrust::host, h_vertices.begin(), h_vertices.end(), vertex_t{0}); + raft::update_device( + d_vertices.data(), h_vertices.data(), d_vertices.size(), handle.get_stream()); + + // renumber using d_renumber_map_gathered_v + cugraph::test::single_gpu_renumber_edgelist_given_number_map( + handle, d_edgelist_rows, d_edgelist_cols, d_renumber_map_gathered_v); + + std::tie(*sg_graph, std::ignore) = + cugraph::test::generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + is_symmetric, + true, + false); + } + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(dendrogram.num_levels()), + [&dendrogram, &sg_graph, &d_clustering_v, &sg_modularity, &handle, resolution, rank]( + size_t i) { + auto d_dendrogram_gathered_v = cugraph::test::gather_distributed_vector( + handle, dendrogram.get_level_ptr_nocheck(i), dendrogram.get_level_size_nocheck(i)); + + if (rank == 0) { + auto graph_view = sg_graph->view(); + + d_clustering_v.resize(graph_view.get_number_of_vertices(), handle.get_stream()); + + std::tie(std::ignore, sg_modularity) = + cugraph::louvain(handle, graph_view, d_clustering_v.data(), size_t{1}, resolution); + + EXPECT_TRUE(cugraph::test::compare_renumbered_vectors( + handle, d_clustering_v, d_dendrogram_gathered_v)); + + sg_graph = + cugraph::test::coarsen_graph(handle, graph_view, d_dendrogram_gathered_v.data()); + } + }); + + if (rank == 0) compare(modularity, sg_modularity); + } + + // Compare the results of running louvain on multiple GPUs to that of a + // single-GPU run for the configuration in param. Note that MNMG Louvain + // and single GPU Louvain are ONLY deterministic through a single + // iteration of the outer loop. Renumbering of the partitions when coarsening + // the graph is a function of the number of GPUs in the GPU cluster. + template + void run_test(const Louvain_Usecase& param) + { + raft::handle_t handle; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + cudaStream_t stream = handle.get_stream(); + + cugraph::experimental::graph_t mg_graph(handle); + + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + + std::tie(mg_graph, d_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, param.graph_file_full_path, true, true); + + auto mg_graph_view = mg_graph.view(); + + std::unique_ptr> dendrogram; + weight_t modularity; + + std::tie(dendrogram, modularity) = + cugraph::louvain(handle, mg_graph_view, param.max_level, param.resolution); + + SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); + + auto d_renumber_map_gathered_v = cugraph::test::gather_distributed_vector( + handle, d_renumber_map_labels.data(), d_renumber_map_labels.size()); + + compare_sg_results(handle, + param.graph_file_full_path, + d_renumber_map_gathered_v, + *dendrogram, + param.resolution, + comm_rank, + modularity); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(Louvain_MG_Testfixture, CheckInt32Int32Float) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Louvain_MG_Testfixture, + ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 100, 1) + //,Louvain_Usecase("test/datasets/smallworld.mtx", true, 100, 1) + )); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu deleted file mode 100644 index 56fb2c109bf..00000000000 --- a/cpp/tests/experimental/louvain_test.cu +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governin_from_mtxg permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -typedef struct Louvain_Usecase_t { - std::string graph_file_full_path{}; - bool test_weighted{false}; - - Louvain_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - }; -} Louvain_Usecase; - -class Tests_Louvain : public ::testing::TestWithParam { - public: - Tests_Louvain() {} - static void SetupTestCase() {} - static void TearDownTestCase() {} - - virtual void SetUp() {} - virtual void TearDown() {} - - template - void run_current_test(Louvain_Usecase const& configuration) - { - raft::handle_t handle{}; - - std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; - - cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); - - auto graph_view = graph.view(); - - // "FIXME": remove this check once we drop support for Pascal - // - // Calling louvain on Pascal will throw an exception, we'll check that - // this is the behavior while we still support Pascal (device_prop.major < 7) - // - cudaDeviceProp device_prop; - CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); - - if (device_prop.major < 7) { - EXPECT_THROW(louvain(graph_view), cugraph::logic_error); - } else { - louvain(graph_view); - } - } - - template - void louvain(graph_t const& graph_view) - { - using vertex_t = typename graph_t::vertex_type; - using weight_t = typename graph_t::weight_type; - - raft::handle_t handle{}; - - rmm::device_vector clustering_v(graph_view.get_number_of_local_vertices()); - size_t level; - weight_t modularity; - - std::tie(level, modularity) = - cugraph::louvain(handle, graph_view, clustering_v.data().get(), size_t{100}, weight_t{1}); - - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - std::cout << "level = " << level << std::endl; - std::cout << "modularity = " << modularity << std::endl; - } -}; - -// FIXME: add tests for type combinations -TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) -{ - run_current_test(GetParam()); -} - -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Louvain, - ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true) -#if 0 - , - Louvain_Usecase("test/datasets/web-Google.mtx", true), - Louvain_Usecase("test/datasets/ljournal-2008.mtx", true), - Louvain_Usecase("test/datasets/webbase-1M.mtx", true) -#endif - )); - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index e8f11acfbf4..79a86e1fc95 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -160,6 +160,11 @@ inline auto parse_test_options(int argc, char **argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cugraph::test::create_memory_resource(rmm_mode); \ + \ + if (comm_rank != 0) { \ + auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); \ + delete listeners.Release(listeners.default_result_printer()); \ + } \ rmm::mr::set_current_device_resource(resource.get()); \ auto ret = RUN_ALL_TESTS(); \ MPI_TRY(MPI_Finalize()); \ From 22e9e2bb6b7fb6599f3d7c5b1cc35683591fd6c6 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 30 Mar 2021 09:23:52 -0500 Subject: [PATCH 207/343] Add indirection and replace algorithms with new renumbering (#1484) Authors: - @Iroy30 Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1484 --- python/cugraph/community/egonet_wrapper.pyx | 3 +- .../community/ktruss_subgraph_wrapper.pyx | 4 + .../community/subgraph_extraction_wrapper.pyx | 1 + python/cugraph/cores/k_core_wrapper.pyx | 4 + .../dask/centrality/katz_centrality.py | 12 +- python/cugraph/dask/common/input_utils.py | 14 +- python/cugraph/dask/community/louvain.py | 13 +- python/cugraph/dask/link_analysis/pagerank.py | 13 +- python/cugraph/dask/traversal/bfs.py | 13 +- python/cugraph/dask/traversal/sssp.py | 12 +- python/cugraph/structure/new_number_map.py | 317 -------- python/cugraph/structure/number_map.py | 686 ++++++------------ python/cugraph/structure/renumber_wrapper.pyx | 127 ++-- python/cugraph/tests/test_renumber.py | 140 ++-- 14 files changed, 417 insertions(+), 942 deletions(-) delete mode 100644 python/cugraph/structure/new_number_map.py diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index ff9f2b8b3de..ead41705628 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -33,7 +33,7 @@ def egonet(input_graph, vertices, radius=1): np.dtype("float32") : numberTypeEnum.floatType, np.dtype("double") : numberTypeEnum.doubleType} - [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) vertex_t = src.dtype edge_t = np.dtype("int32") weights = None @@ -54,6 +54,7 @@ def egonet(input_graph, vertices, radius=1): weight_t = np.dtype("float32") # Pointers for egonet + vertices = vertices.astype('int32') cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] n_subgraphs = vertices.size n_streams = 1 diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx index 9f38b33d774..d3b7a38ba41 100644 --- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx @@ -33,6 +33,10 @@ def ktruss_subgraph_double(input_graph, k, use_weights): def ktruss_subgraph(input_graph, k, use_weights): + [input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if graph_primtypes_wrapper.weight_type(input_graph) == np.float64 and use_weights: return ktruss_subgraph_double(input_graph, k, use_weights) else: diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 31c5d2372f0..46dc5c07eaf 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -59,6 +59,7 @@ def subgraph(input_graph, vertices): if weights is not None: c_weights = weights.__cuda_array_interface__['data'][0] + [vertices] = graph_primtypes_wrapper.datatype_cast([vertices], [np.int32]) cdef uintptr_t c_vertices = vertices.__cuda_array_interface__['data'][0] if use_float: diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index a0ef99a8e8b..28bb191f4f4 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -49,6 +49,10 @@ def k_core(input_graph, k, core_number): """ Call k_core """ + [input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if graph_primtypes_wrapper.weight_type(input_graph) == np.float64: return k_core_double(input_graph, k, core_number) else: diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index e690e291928..a2f83a0b2a8 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.centrality import\ mg_katz_centrality_wrapper as mg_katz_centrality import cugraph.comms.comms as Comms @@ -133,11 +133,9 @@ def katz_centrality(input_graph, client = default_client() input_graph.compute_renumber_edge_list(transposed=True) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=True) + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index bbc914da502..0248f429a09 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -217,3 +217,15 @@ def get_distributed_data(input_ddf): if data.worker_info is None and comms is not None: data.calculate_worker_and_rank_info(comms) return data + + +def get_vertex_partition_offsets(input_graph): + import cudf + renumber_vertex_count = input_graph.renumber_map.implementation.ddf.\ + map_partitions(len).compute() + renumber_vertex_cumsum = renumber_vertex_count.cumsum() + vertex_dtype = input_graph.edgelist.edgelist_df['src'].dtype + vertex_partition_offsets = cudf.Series([0], dtype=vertex_dtype) + vertex_partition_offsets = vertex_partition_offsets.append(cudf.Series( + renumber_vertex_cumsum, dtype=vertex_dtype)) + return vertex_partition_offsets diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 495061c0f81..c9af0f526c9 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -16,8 +16,8 @@ from dask.distributed import wait, default_client import cugraph.comms.comms as Comms -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.community import louvain_wrapper as c_mg_louvain from cugraph.utilities.utils import is_cuda_version_less_than @@ -86,12 +86,9 @@ def louvain(input_graph, max_iter=100, resolution=1.0): input_graph.compute_renumber_edge_list(transposed=False) sorted_by_degree = True - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=False) - + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index d8a76f1231e..bfaada85a6f 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.link_analysis import mg_pagerank_wrapper as mg_pagerank import cugraph.comms.comms as Comms import dask_cudf @@ -124,11 +124,10 @@ def pagerank(input_graph, client = default_client() input_graph.compute_renumber_edge_list(transposed=True) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=True) + + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 51e0dc0de5d..d108730f665 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.traversal import mg_bfs_wrapper as mg_bfs import cugraph.comms.comms as Comms import cudf @@ -91,11 +91,10 @@ def bfs(graph, client = default_client() graph.compute_renumber_edge_list(transposed=False) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(graph, transposed=False) + ddf = graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(graph) + num_verts = vertex_partition_offsets.iloc[-1] + num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index 52f2b9b256c..32e7401023a 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.traversal import mg_sssp_wrapper as mg_sssp import cugraph.comms.comms as Comms import cudf @@ -91,11 +91,9 @@ def sssp(graph, client = default_client() graph.compute_renumber_edge_list(transposed=False) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(graph, transposed=False) + ddf = graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/structure/new_number_map.py b/python/cugraph/structure/new_number_map.py deleted file mode 100644 index f8a2164d2c4..00000000000 --- a/python/cugraph/structure/new_number_map.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure import renumber_wrapper as c_renumber -import cugraph.comms as Comms -import dask_cudf -import numpy as np -import cudf -import cugraph.structure.number_map as legacy_number_map - - -def call_renumber(sID, - data, - num_edges, - is_mnmg, - store_transposed): - wid = Comms.get_worker_id(sID) - handle = Comms.get_handle(sID) - return c_renumber.renumber(data[0], - num_edges, - wid, - handle, - is_mnmg, - store_transposed) - - -class NumberMap: - - class SingleGPU: - def __init__(self, df, src_col_names, dst_col_names, id_type, - store_transposed): - self.col_names = NumberMap.compute_vals(src_col_names) - self.df = cudf.DataFrame() - self.id_type = id_type - self.store_transposed = store_transposed - self.numbered = False - - def to_internal_vertex_id(self, df, col_names): - tmp_df = df[col_names].rename( - columns=dict(zip(col_names, self.col_names)), copy=False - ) - index_name = NumberMap.generate_unused_column_name(df.columns) - tmp_df[index_name] = tmp_df.index - return ( - self.df.merge(tmp_df, on=self.col_names, how="right") - .sort_values(index_name) - .drop(columns=[index_name]) - .reset_index()["id"] - ) - - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.df.merge( - df, - right_on=internal_column_name, - left_on="id", - how="right", - ) - if internal_column_name != "id": - tmp_df = tmp_df.drop(columns=["id"]) - if external_column_names is None: - return tmp_df - else: - return tmp_df.rename( - columns=dict(zip(self.col_names, external_column_names)), - copy=False, - ) - - class MultiGPU: - def __init__( - self, ddf, src_col_names, dst_col_names, id_type, store_transposed - ): - self.col_names = NumberMap.compute_vals(src_col_names) - self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) - self.val_types["count"] = np.int32 - self.id_type = id_type - self.store_transposed = store_transposed - self.numbered = False - - def to_internal_vertex_id(self, ddf, col_names): - return self.ddf.merge( - ddf, - right_on=col_names, - left_on=self.col_names, - how="right", - )["global_id"] - - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.ddf.merge( - df, - right_on=internal_column_name, - left_on="global_id", - how="right" - ).map_partitions(lambda df: df.drop(columns="global_id")) - - if external_column_names is None: - return tmp_df - else: - return tmp_df.map_partitions( - lambda df: - df.rename( - columns=dict( - zip(self.col_names, external_column_names) - ), - copy=False - ) - ) - - def __init__(self, id_type=np.int32): - self.implementation = None - self.id_type = id_type - - def compute_vals_types(df, column_names): - """ - Helper function to compute internal column names and types - """ - return { - str(i): df[column_names[i]].dtype for i in range(len(column_names)) - } - - def generate_unused_column_name(column_names): - """ - Helper function to generate an unused column name - """ - name = 'x' - while name in column_names: - name = name + "x" - - return name - - def compute_vals(column_names): - """ - Helper function to compute internal column names based on external - column names - """ - return [str(i) for i in range(len(column_names))] - - def renumber(df, src_col_names, dst_col_names, preserve_order=False, - store_transposed=False): - - if isinstance(src_col_names, list): - renumber_type = 'legacy' - # elif isinstance(df[src_col_names].dtype, string): - # renumber_type = 'legacy' - else: - renumber_type = 'experimental' - - if renumber_type == 'legacy': - renumber_map, renumbered_df = legacy_number_map.renumber( - df, - src_col_names, - dst_col_names, - preserve_order, - store_transposed) - # Add shuffling once algorithms are switched to new renumber - # (ddf, - # num_verts, - # partition_row_size, - # partition_col_size, - # vertex_partition_offsets) = shuffle(input_graph, transposed=True) - return renumber_map, renumbered_df - - renumber_map = NumberMap() - if not isinstance(src_col_names, list): - src_col_names = [src_col_names] - dst_col_names = [dst_col_names] - if type(df) is cudf.DataFrame: - renumber_map.implementation = NumberMap.SingleGPU( - df, src_col_names, dst_col_names, renumber_map.id_type, - store_transposed - ) - elif type(df) is dask_cudf.DataFrame: - renumber_map.implementation = NumberMap.MultiGPU( - df, src_col_names, dst_col_names, renumber_map.id_type, - store_transposed - ) - else: - raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") - - num_edges = len(df) - - if isinstance(df, dask_cudf.DataFrame): - is_mnmg = True - else: - is_mnmg = False - - if is_mnmg: - client = default_client() - data = get_distributed_data(df) - result = [(client.submit(call_renumber, - Comms.get_session_id(), - wf[1], - num_edges, - is_mnmg, - store_transposed, - workers=[wf[0]]), wf[0]) - for idx, wf in enumerate(data.worker_to_parts.items())] - wait(result) - - def get_renumber_map(data): - return data[0] - - def get_renumbered_df(data): - return data[1] - - renumbering_map = dask_cudf.from_delayed( - [client.submit(get_renumber_map, - data, - workers=[wf]) - for (data, wf) in result]) - renumbered_df = dask_cudf.from_delayed( - [client.submit(get_renumbered_df, - data, - workers=[wf]) - for (data, wf) in result]) - - renumber_map.implementation.ddf = renumbering_map - renumber_map.implementation.numbered = True - - return renumbered_df, renumber_map - else: - renumbering_map, renumbered_df = c_renumber.renumber( - df, - num_edges, - 0, - Comms.get_default_handle(), - is_mnmg, - store_transposed) - renumber_map.implementation.df = renumbering_map - renumber_map.implementation.numbered = True - return renumbered_df, renumber_map - - def unrenumber(self, df, column_name, preserve_order=False): - """ - Given a DataFrame containing internal vertex ids in the identified - column, replace this with external vertex ids. If the renumbering - is from a single column, the output dataframe will use the same - name for the external vertex identifiers. If the renumbering is from - a multi-column input, the output columns will be labeled 0 through - n-1 with a suffix of _column_name. - Note that this function does not guarantee order or partitioning in - multi-GPU mode. - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing internal vertex identifiers that will be - converted into external vertex identifiers. - column_name: string - Name of the column containing the internal vertex id. - preserve_order: (optional) bool - If True, preserve the ourder of the rows in the output - DataFrame to match the input DataFrame - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. The external - vertex identifiers are added to the DataFrame, the internal - vertex identifier column is removed from the dataframe. - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> - >>> df, number_map = NumberMap.renumber(df, '0', '1') - >>> - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, 'src', 'dst') - >>> - >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, - >>> tol = 1.0e-05) - >>> - >>> pr = number_map.unrenumber(pr, 'vertex') - >>> - """ - if len(self.col_names) == 1: - # Output will be renamed to match input - mapping = {"0": column_name} - else: - # Output will be renamed to ${i}_${column_name} - mapping = {} - for nm in self.col_names: - mapping[nm] = nm + "_" + column_name - - if preserve_order: - index_name = NumberMap.generate_unused_column_name(df) - df[index_name] = df.index - - df = self.from_internal_vertex_id(df, column_name, drop=True) - - if preserve_order: - df = df.sort_values( - index_name - ).drop(columns=index_name).reset_index(drop=True) - - if type(df) is dask_cudf.DataFrame: - return df.map_partitions( - lambda df: df.rename(columns=mapping, copy=False) - ) - else: - return df.rename(columns=mapping, copy=False) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index deb2b9f4114..5f801eb0d90 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -1,4 +1,5 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,100 +11,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -import cudf +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure import renumber_wrapper as c_renumber +import cugraph.comms.comms as Comms import dask_cudf import numpy as np -import bisect +import cudf + + +def call_renumber(sID, + data, + num_edges, + is_mnmg, + store_transposed): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return c_renumber.renumber(data[0], + num_edges, + wid, + handle, + is_mnmg, + store_transposed) class NumberMap: - """ - Class used to translate external vertex ids to internal vertex ids - in the cuGraph framework. - - Internal vertex ids are assigned by hashing the external vertex ids - into a structure to eliminate duplicates, and the resulting list - of unique vertices are assigned integers from [0, V) where V is - the number of unique vertices. - - In Single GPU mode, internal vertex ids are constructed using - cudf functions, with a cudf.DataFrame containing the mapping - from external vertex identifiers and internal vertex identifiers - allowing for mapping vertex identifiers in either direction. In - this mode, the order of the output from the mapping functions is - non-deterministic. cudf makes no guarantees about order. If - matching the input order is required set the preserve_order - to True. - - In Multi GPU mode, internal vertex ids are constucted using - dask_cudf functions, with a dask_cudf.DataFrame containing - the mapping from external vertex identifiers and internal - vertex identifiers allowing for mapping vertex identifiers - in either direction. In this mode, the partitioning of - the number_map and the output from any of the mapping functions - are non-deterministic. dask_cudf makes no guarantees about the - partitioning or order of the output. As of this release, - there is no mechanism for controlling that, this will be - addressed at some point. - """ class SingleGPU: def __init__(self, df, src_col_names, dst_col_names, id_type, store_transposed): self.col_names = NumberMap.compute_vals(src_col_names) - self.df = cudf.DataFrame() + self.src_col_names = src_col_names + self.dst_col_names = dst_col_names + self.df = df self.id_type = id_type self.store_transposed = store_transposed - - source_count = 0 - dest_count = 0 - - if store_transposed: - dest_count = 1 - else: - source_count = 1 - - tmp = ( - df[src_col_names] - .assign(count=source_count) - .groupby(src_col_names) - .sum() - .reset_index() - .rename( - columns=dict(zip(src_col_names, self.col_names)), - copy=False, - ) - ) - - if dst_col_names is not None: - tmp_dst = ( - df[dst_col_names] - .assign(count=dest_count) - .groupby(dst_col_names) - .sum() - .reset_index() - ) - for newname, oldname in zip(self.col_names, dst_col_names): - self.df[newname] = tmp[newname].append(tmp_dst[oldname]) - self.df['count'] = tmp['count'].append(tmp_dst['count']) - else: - for newname in self.col_names: - self.df[newname] = tmp[newname] - self.df['count'] = tmp['count'] - self.numbered = False - def compute(self): - if not self.numbered: - tmp = self.df.groupby(self.col_names).sum().sort_values( - 'count', ascending=False - ).reset_index().drop(columns='count') - - tmp["id"] = tmp.index.astype(self.id_type) - self.df = tmp - self.numbered = True - def to_internal_vertex_id(self, df, col_names): tmp_df = df[col_names].rename( columns=dict(zip(col_names, self.col_names)), copy=False @@ -117,6 +63,25 @@ def to_internal_vertex_id(self, df, col_names): .reset_index()["id"] ) + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.df.merge( + df, + right_on=internal_column_name, + left_on="id", + how="right", + ) + if internal_column_name != "id": + tmp_df = tmp_df.drop(columns=["id"]) + if external_column_names is None: + return tmp_df + else: + return tmp_df.rename( + columns=dict(zip(self.col_names, external_column_names)), + copy=False, + ) + def add_internal_vertex_id(self, df, id_column_name, col_names, drop, preserve_order): ret = None @@ -162,76 +127,39 @@ def add_internal_vertex_id(self, df, id_column_name, col_names, return ret - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.df.merge( - df, - right_on=internal_column_name, - left_on="id", - how="right", - ) - if internal_column_name != "id": - tmp_df = tmp_df.drop(columns=["id"]) - if external_column_names is None: - return tmp_df - else: - return tmp_df.rename( - columns=dict(zip(self.col_names, external_column_names)), - copy=False, - ) - - class MultiGPU: - def extract_vertices( - df, src_col_names, dst_col_names, - internal_col_names, store_transposed - ): - source_count = 0 - dest_count = 0 - - if store_transposed: - dest_count = 1 - else: - source_count = 1 + def indirection_map(self, df, src_col_names, dst_col_names): + tmp_df = cudf.DataFrame() - s = ( + tmp = ( df[src_col_names] - .assign(count=source_count) .groupby(src_col_names) - .sum() + .count() .reset_index() .rename( - columns=dict(zip(src_col_names, internal_col_names)), + columns=dict(zip(src_col_names, self.col_names)), copy=False, ) ) - d = None if dst_col_names is not None: - d = ( + tmp_dst = ( df[dst_col_names] - .assign(count=dest_count) .groupby(dst_col_names) - .sum() + .count() .reset_index() - .rename( - columns=dict(zip(dst_col_names, internal_col_names)), - copy=False, - ) ) + for newname, oldname in zip(self.col_names, dst_col_names): + tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) + else: + for newname in self.col_names: + tmp_df[newname] = tmp[newname] - reply = cudf.DataFrame() - - for i in internal_col_names: - if d is None: - reply[i] = s[i] - else: - reply[i] = s[i].append(d[i]) - - reply['count'] = s['count'].append(d['count']) - - return reply + tmp_df = tmp_df.groupby(self.col_names).count().reset_index() + tmp_df["id"] = tmp_df.index.astype(self.id_type) + self.df = tmp_df + return tmp_df + class MultiGPU: def __init__( self, ddf, src_col_names, dst_col_names, id_type, store_transposed ): @@ -239,110 +167,10 @@ def __init__( self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) self.val_types["count"] = np.int32 self.id_type = id_type + self.ddf = ddf self.store_transposed = store_transposed - self.ddf = ddf.map_partitions( - NumberMap.MultiGPU.extract_vertices, - src_col_names, - dst_col_names, - self.col_names, - store_transposed, - meta=self.val_types, - ) self.numbered = False - # Function to compute partitions based on known divisions of the - # hash value - def compute_partition(df, divisions): - sample = df.index[0] - partition_id = bisect.bisect_right(divisions, sample) - 1 - return df.assign(partition=partition_id) - - def assign_internal_identifiers_kernel( - local_id, partition, global_id, base_addresses - ): - for i in range(len(local_id)): - global_id[i] = local_id[i] + base_addresses[partition[i]] - - def assign_internal_identifiers(df, base_addresses, id_type): - df = df.assign(local_id=df.index.astype(np.int64)) - df = df.apply_rows( - NumberMap.MultiGPU.assign_internal_identifiers_kernel, - incols=["local_id", "partition"], - outcols={"global_id": id_type}, - kwargs={"base_addresses": base_addresses}, - ) - - return df.drop(columns=["local_id", "hash", "partition"]) - - def assign_global_id(self, ddf, base_addresses, val_types): - val_types["global_id"] = self.id_type - del val_types["hash"] - del val_types["partition"] - - ddf = ddf.map_partitions( - lambda df: NumberMap.MultiGPU.assign_internal_identifiers( - df, base_addresses, self.id_type - ), - meta=val_types, - ) - return ddf - - def compute(self): - if not self.numbered: - val_types = self.val_types - val_types["hash"] = np.int32 - - vertices = self.ddf.map_partitions( - lambda df: df.assign(hash=df.hash_columns(self.col_names)), - meta=val_types, - ) - - # Redistribute the ddf based on the hash values - rehashed = vertices.set_index("hash", drop=False) - - # Compute the local partition id (obsolete once - # https://github.com/dask/dask/issues/3707 is completed) - val_types["partition"] = np.int32 - - rehashed_with_partition_id = rehashed.map_partitions( - NumberMap.MultiGPU.compute_partition, - rehashed.divisions, - meta=val_types, - ) - - val_types.pop('count') - - numbering_map = rehashed_with_partition_id.map_partitions( - lambda df: df.groupby( - self.col_names + ["hash", "partition"] - ).sum() - .sort_values('count', ascending=False) - .reset_index() - .drop(columns='count'), - meta=val_types - ) - - # - # Compute base address for each partition - # - counts = numbering_map.map_partitions( - lambda df: df.groupby("partition").count() - ).compute()["hash"].to_pandas() - base_addresses = np.zeros(len(counts) + 1, self.id_type) - - for i in range(len(counts)): - base_addresses[i + 1] = base_addresses[i] + counts[i] - - # - # Update each partition with the base address - # - numbering_map = self.assign_global_id( - numbering_map, cudf.Series(base_addresses), val_types - ) - - self.ddf = numbering_map - self.numbered = True - def to_internal_vertex_id(self, ddf, col_names): return self.ddf.merge( ddf, @@ -351,6 +179,29 @@ def to_internal_vertex_id(self, ddf, col_names): how="right", )["global_id"] + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.ddf.merge( + df, + right_on=internal_column_name, + left_on="global_id", + how="right" + ).map_partitions(lambda df: df.drop(columns="global_id")) + + if external_column_names is None: + return tmp_df + else: + return tmp_df.map_partitions( + lambda df: + df.rename( + columns=dict( + zip(self.col_names, external_column_names) + ), + copy=False + ) + ) + def add_internal_vertex_id(self, ddf, id_column_name, col_names, drop, preserve_order): # At the moment, preserve_order cannot be done on @@ -385,39 +236,50 @@ def add_internal_vertex_id(self, ddf, id_column_name, col_names, drop, return ret - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.ddf.merge( - df, - right_on=internal_column_name, - left_on="global_id", - how="right" - ).map_partitions(lambda df: df.drop(columns="global_id")) + def indirection_map(self, ddf, src_col_names, dst_col_names): - if external_column_names is None: - return tmp_df - else: - return tmp_df.map_partitions( - lambda df: - df.rename( - columns=dict( - zip(self.col_names, external_column_names) - ), - copy=False - ) + tmp = ( + ddf[src_col_names] + .groupby(src_col_names) + .count() + .reset_index() + .rename( + columns=dict(zip(src_col_names, self.col_names)), ) + ) + + if dst_col_names is not None: + tmp_dst = ( + ddf[dst_col_names] + .groupby(dst_col_names) + .count() + .reset_index() + ) + for i, (newname, oldname) in enumerate(zip(self.col_names, + dst_col_names)): + if i == 0: + tmp_df = tmp[newname].append(tmp_dst[oldname]).\ + to_frame(name=newname) + else: + tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) + print(tmp_df.columns) + else: + for newname in self.col_names: + tmp_df[newname] = tmp[newname] + tmp_ddf = tmp_df.groupby(self.col_names).count().reset_index() + + # Set global index + tmp_ddf = tmp_ddf.assign(idx=1) + tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1 + tmp_ddf = tmp_ddf.drop(columns='idx') + + self.ddf = tmp_ddf + return tmp_ddf def __init__(self, id_type=np.int32): self.implementation = None self.id_type = id_type - def aggregate_count_and_partition(df): - d = {} - d['count'] = df['count'].sum() - d['partition'] = df['partition'].min() - return cudf.Series(d, index=['count', 'partition']) - def compute_vals_types(df, column_names): """ Helper function to compute internal column names and types @@ -443,125 +305,19 @@ def compute_vals(column_names): """ return [str(i) for i in range(len(column_names))] - def from_dataframe( - self, df, src_col_names, dst_col_names=None, store_transposed=False - ): - """ - Populate the numbering map with vertices from the specified - columns of the provided DataFrame. - - Parameters - ---------- - df : cudf.DataFrame or dask_cudf.DataFrame - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - src_col_names: list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for source vertices - dst_col_names: list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for destination vertices - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - - """ - if self.implementation is not None: - raise Exception("NumberMap is already populated") - - if dst_col_names is not None and len(src_col_names) != len( - dst_col_names - ): - raise Exception( - "src_col_names must have same length as dst_col_names" - ) - - if type(df) is cudf.DataFrame: - self.implementation = NumberMap.SingleGPU( - df, src_col_names, dst_col_names, self.id_type, - store_transposed - ) - elif type(df) is dask_cudf.DataFrame: - self.implementation = NumberMap.MultiGPU( - df, src_col_names, dst_col_names, self.id_type, - store_transposed - ) - else: - raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") - - self.implementation.compute() - - def from_series(self, src_series, dst_series=None, store_transposed=False): - """ - Populate the numbering map with vertices from the specified - pair of series objects, one for the source and one for - the destination - - Parameters - ---------- - src_series: cudf.Series or dask_cudf.Series - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - dst_series: cudf.Series or dask_cudf.Series - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - """ - if self.implementation is not None: - raise Exception("NumberMap is already populated") - - if dst_series is not None and type(src_series) != type(dst_series): - raise Exception("src_series and dst_series must have same type") - - if type(src_series) is cudf.Series: - dst_series_list = None - df = cudf.DataFrame() - df["s"] = src_series - if dst_series is not None: - df["d"] = dst_series - dst_series_list = ["d"] - self.implementation = NumberMap.SingleGPU( - df, ["s"], dst_series_list, self.id_type, store_transposed - ) - elif type(src_series) is dask_cudf.Series: - dst_series_list = None - df = dask_cudf.DataFrame() - df["s"] = src_series - if dst_series is not None: - df["d"] = dst_series - dst_series_list = ["d"] - self.implementation = NumberMap.MultiGPU( - df, ["s"], dst_series_list, self.id_type, store_transposed - ) - else: - raise Exception( - "src_series must be cudf.Series or " "dask_cudf.Series" - ) - - self.implementation.compute() - def to_internal_vertex_id(self, df, col_names=None): """ Given a collection of external vertex ids, return the internal vertex ids - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers - col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier - Returns --------- vertex_ids : cudf.Series or dask_cudf.Series @@ -569,7 +325,6 @@ def to_internal_vertex_id(self, df, col_names=None): does not guarantee order or partitioning (in the case of dask_cudf) of vertex ids. If order matters use add_internal_vertex_id - """ tmp_df = None tmp_col_names = None @@ -600,34 +355,27 @@ def add_internal_vertex_id( """ Given a collection of external vertex ids, return the internal vertex ids combined with the input data. - If a series-type input is provided then the series will be in a column named '0'. Otherwise the input column names in the DataFrame will be preserved. - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers - id_column_name: (optional) string The name to be applied to the column containing the id (defaults to 'id') - col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier - drop: (optional) boolean If True, drop the column names specified in col_names from the returned DataFrame. Defaults to False. - preserve_order: (optional) boolean If True, do extra sorting work to preserve the order of the input DataFrame. Defaults to False. - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -635,7 +383,6 @@ def add_internal_vertex_id( with an additional column containing the internal vertex id. Note that there is no guarantee of the order or partitioning of elements in the returned DataFrame. - """ tmp_df = None tmp_col_names = None @@ -671,7 +418,6 @@ def from_internal_vertex_id( """ Given a collection of internal vertex ids, return a DataFrame of the external vertex ids - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series @@ -681,20 +427,16 @@ def from_internal_vertex_id( in a column labeled 'id'. If df is a dataframe type object then internal_column_name should identify which column corresponds the the internal vertex id that should be converted - internal_column_name: (optional) string Name of the column containing the internal vertex id. If df is a series then this parameter is ignored. If df is a DataFrame this parameter is required. - external_column_names: (optional) string or list of strings Name of the columns that define an external vertex id. If not specified, columns will be labeled '0', '1,', ..., 'n-1' - drop: (optional) boolean If True the internal column name will be dropped from the DataFrame. Defaults to False. - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -727,107 +469,117 @@ def from_internal_vertex_id( return output_df - def column_names(self): - """ - Return the list of internal column names - - Returns - ---------- - List of column names ('0', '1', ..., 'n-1') - """ - return self.implementation.col_names - def renumber(df, src_col_names, dst_col_names, preserve_order=False, store_transposed=False): - """ - Given a single GPU or distributed DataFrame, use src_col_names and - dst_col_names to identify the source vertex identifiers and destination - vertex identifiers, respectively. - - Internal vertex identifiers will be created, numbering vertices as - integers starting from 0. - - The function will return a DataFrame containing the original dataframe - contents with a new column labeled 'src' containing the renumbered - source vertices and a new column labeled 'dst' containing the - renumbered dest vertices, along with a NumberMap object that contains - the number map for the numbering that was used. - - Note that this function does not guarantee order in single GPU mode, - and does not guarantee order or partitioning in multi-GPU mode. If you - wish to preserve ordering, add an index column to df and sort the - return by that index column. - - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - src_col_names: string or list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for source vertices - dst_col_names: string or list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for destination vertices - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. Columns - are added to the DataFrame to identify the external vertex - identifiers. If external_columns is specified, these names - are used as the names of the output columns. If external_columns - is not specifed the columns are labeled '0', ... 'n-1' based on - the number of columns identifying the external vertex identifiers. - - number_map : NumberMap - The number map object object that retains the mapping between - internal vertex identifiers and external vertex identifiers. + if isinstance(src_col_names, list): + renumber_type = 'legacy' + elif not (df[src_col_names].dtype == np.int32 or + df[src_col_names].dtype == np.int64): + renumber_type = 'legacy' + else: + renumber_type = 'experimental' + df = df.rename(columns={src_col_names: "src", + dst_col_names: "dst"}) - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> - >>> df, number_map = NumberMap.renumber(df, '0', '1') - >>> - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, 'src', 'dst') - """ renumber_map = NumberMap() - - if isinstance(src_col_names, list): - renumber_map.from_dataframe(df, src_col_names, dst_col_names) - df = renumber_map.add_internal_vertex_id( - df, "src", src_col_names, drop=True, - preserve_order=preserve_order + if not isinstance(src_col_names, list): + src_col_names = [src_col_names] + dst_col_names = [dst_col_names] + if type(df) is cudf.DataFrame: + renumber_map.implementation = NumberMap.SingleGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed ) - df = renumber_map.add_internal_vertex_id( - df, "dst", dst_col_names, drop=True, - preserve_order=preserve_order + elif type(df) is dask_cudf.DataFrame: + renumber_map.implementation = NumberMap.MultiGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed ) else: - renumber_map.from_dataframe(df, [src_col_names], [dst_col_names]) + raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") + + if renumber_type == 'legacy': + indirection_map = renumber_map.implementation.\ + indirection_map(df, + src_col_names, + dst_col_names) df = renumber_map.add_internal_vertex_id( df, "src", src_col_names, drop=True, preserve_order=preserve_order ) - df = renumber_map.add_internal_vertex_id( df, "dst", dst_col_names, drop=True, preserve_order=preserve_order ) - if type(df) is dask_cudf.DataFrame: - df = df.persist() + num_edges = len(df) + + if isinstance(df, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + if is_mnmg: + client = default_client() + data = get_distributed_data(df) + result = [(client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_edges, + is_mnmg, + store_transposed, + workers=[wf[0]]), wf[0]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + + def get_renumber_map(data): + return data[0] + + def get_renumbered_df(data): + return data[1] + + renumbering_map = dask_cudf.from_delayed( + [client.submit(get_renumber_map, + data, + workers=[wf]) + for (data, wf) in result]) + renumbered_df = dask_cudf.from_delayed( + [client.submit(get_renumbered_df, + data, + workers=[wf]) + for (data, wf) in result]) + if renumber_type == 'legacy': + renumber_map.implementation.ddf = indirection_map.merge( + renumbering_map, + right_on='original_ids', left_on='global_id', + how='right').\ + drop(columns=['global_id', 'original_ids'])\ + .rename(columns={'new_ids': 'global_id'}) + else: + renumber_map.implementation.ddf = renumbering_map.rename( + columns={'original_ids': '0', 'new_ids': 'global_id'}) + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map - return df, renumber_map + else: + renumbering_map, renumbered_df = c_renumber.renumber( + df, + num_edges, + 0, + Comms.get_default_handle(), + is_mnmg, + store_transposed) + if renumber_type == 'legacy': + renumber_map.implementation.df = indirection_map.\ + merge(renumbering_map, + right_on='original_ids', left_on='id').\ + drop(columns=['id', 'original_ids'])\ + .rename(columns={'new_ids': 'id'}, copy=False) + else: + renumber_map.implementation.df = renumbering_map.rename( + columns={'original_ids': '0', 'new_ids': 'id'}, copy=False) + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map def unrenumber(self, df, column_name, preserve_order=False): """ @@ -837,30 +589,24 @@ def unrenumber(self, df, column_name, preserve_order=False): name for the external vertex identifiers. If the renumbering is from a multi-column input, the output columns will be labeled 0 through n-1 with a suffix of _column_name. - Note that this function does not guarantee order or partitioning in multi-GPU mode. - Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame A DataFrame containing internal vertex identifiers that will be converted into external vertex identifiers. - column_name: string Name of the column containing the internal vertex id. - preserve_order: (optional) bool If True, preserve the ourder of the rows in the output DataFrame to match the input DataFrame - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame The original DataFrame columns exist unmodified. The external vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. - Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 302fcfe583b..682c6b32a0f 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -43,8 +43,8 @@ cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): shuffled_minor_series = cudf.Series(data=shuffled_minor_buffer, dtype=vertex_t) shuffled_df = cudf.DataFrame() - shuffled_df['src']=shuffled_major_series - shuffled_df['dst']=shuffled_minor_series + shuffled_df['major_vertices']=shuffled_major_series + shuffled_df['minor_vertices']=shuffled_minor_series if weights is not None: weight_t = weights.dtype @@ -53,7 +53,7 @@ cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): shuffled_weights_series = cudf.Series(data=shuffled_weights_buffer, dtype=weight_t) - shuffled_df['weights']= shuffled_weights_series + shuffled_df['value']= shuffled_weights_series return shuffled_df @@ -84,7 +84,7 @@ def renumber(input_df, # maybe use cpdef ? if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -150,15 +150,19 @@ def renumber(input_df, # maybe use cpdef ? num_partition_edges, is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: - shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) + shuffled_df = input_df - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, @@ -209,15 +213,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, @@ -259,6 +267,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): @@ -270,15 +279,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, @@ -330,15 +343,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, @@ -379,6 +396,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif (vertex_t == np.dtype("int64")): if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): @@ -391,15 +409,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, @@ -428,8 +450,8 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), + uniq_partition_vector_64.get()[0].at(1)), dtype=vertex_t) # create new cudf df @@ -441,6 +463,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): if(is_multi_gpu): ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), @@ -451,15 +474,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, @@ -488,8 +515,8 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), + uniq_partition_vector_64.get()[0].at(1)), dtype=vertex_t) # create new cudf df diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 6f88d5f85c4..5362d3f5804 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -44,13 +44,14 @@ def test_renumber_ips(): gdf["source_as_int"] = gdf["source_list"].str.ip2int() gdf["dest_as_int"] = gdf["dest_list"].str.ip2int() - numbering = NumberMap() - numbering.from_series(gdf["source_as_int"], gdf["dest_as_int"]) - src = numbering.to_internal_vertex_id(gdf["source_as_int"]) - dst = numbering.to_internal_vertex_id(gdf["dest_as_int"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_as_int", + "dest_as_int") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_as_int"]) assert check_dst.equals(gdf["dest_as_int"]) @@ -78,13 +79,14 @@ def test_renumber_ips_cols(): gdf["source_as_int"] = gdf["source_list"].str.ip2int() gdf["dest_as_int"] = gdf["dest_list"].str.ip2int() - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_as_int"], ["dest_as_int"]) - src = numbering.to_internal_vertex_id(gdf["source_as_int"]) - dst = numbering.to_internal_vertex_id(gdf["dest_as_int"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + ["source_as_int"], + ["dest_as_int"]) - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_as_int"]) assert check_dst.equals(gdf["dest_as_int"]) @@ -110,13 +112,14 @@ def test_renumber_ips_str_cols(): gdf = cudf.from_pandas(pdf) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + ["source_as_int"], + ["dest_as_int"]) - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) @@ -130,13 +133,14 @@ def test_renumber_negative(): gdf = cudf.DataFrame.from_pandas(df[["source_list", "dest_list"]]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_list", + "dest_list") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) @@ -150,19 +154,21 @@ def test_renumber_negative_col(): gdf = cudf.DataFrame.from_pandas(df[["source_list", "dest_list"]]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_list", + "dest_list") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): gc.collect() @@ -215,19 +221,21 @@ def test_renumber_files(graph_file): df["dst"] = cudf.Series([x + translate for x in destinations. values_host]) - numbering = NumberMap() - numbering.from_series(df["src"], df["dst"]) + exp_src = cudf.Series([x + translate for x in sources. + values_host]) + exp_dst = cudf.Series([x + translate for x in destinations. + values_host]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(df, "src_id", ["src"]), - "dst_id", ["dst"] - ) + renumbered_df, renumber_map = NumberMap.renumber(df, "src", "dst", + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - assert check_src["src"].equals(check_src["0"]) - assert check_dst["dst"].equals(check_dst["0"]) + assert exp_src.equals(unrenumbered_df["src"]) + assert exp_dst.equals(unrenumbered_df["dst"]) # Test all combinations of default/managed and pooled/non-pooled allocation @@ -246,19 +254,21 @@ def test_renumber_files_col(graph_file): gdf['dst'] = cudf.Series([x + translate for x in destinations. values_host]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["src"], ["dst"]) + exp_src = cudf.Series([x + translate for x in sources. + values_host]) + exp_dst = cudf.Series([x + translate for x in destinations. + values_host]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(gdf, "src_id", ["src"]), - "dst_id", ["dst"] - ) + renumbered_df, renumber_map = NumberMap.renumber(gdf, ["src"], ["dst"], + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - assert check_src["src"].equals(check_src["0"]) - assert check_dst["dst"].equals(check_dst["0"]) + assert exp_src.equals(unrenumbered_df["src"]) + assert exp_dst.equals(unrenumbered_df["dst"]) # Test all combinations of default/managed and pooled/non-pooled allocation @@ -278,21 +288,17 @@ def test_renumber_files_multi_col(graph_file): gdf["src"] = sources + translate gdf["dst"] = destinations + translate - numbering = NumberMap() - numbering.from_dataframe(gdf, ["src", "src_old"], ["dst", "dst_old"]) + renumbered_df, renumber_map = NumberMap.renumber(gdf, + ["src", "src_old"], + ["dst", "dst_old"], + preserve_order=True) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id( - gdf, "src_id", ["src", "src_old"] - ), - "dst_id", - ["dst", "dst_old"], - ) + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") - - assert check_src["src"].equals(check_src["0"]) - assert check_src["src_old"].equals(check_src["1"]) - assert check_dst["dst"].equals(check_dst["0"]) - assert check_dst["dst_old"].equals(check_dst["1"]) + assert gdf["src"].equals(unrenumbered_df["0_src"]) + assert gdf["src_old"].equals(unrenumbered_df["1_src"]) + assert gdf["dst"].equals(unrenumbered_df["0_dst"]) + assert gdf["dst_old"].equals(unrenumbered_df["1_dst"]) From f2e5a8755e18aadfc151ca65787dd4a3775efb85 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 30 Mar 2021 13:09:33 -0500 Subject: [PATCH 208/343] Implement C/CUDA RandomWalks functionality (#1439) This PR tracks work on issue: https://github.com/rapidsai/cugraph/issues/1380. Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1439 --- cpp/CMakeLists.txt | 1 + cpp/include/algorithms.hpp | 28 + cpp/include/utilities/cython.hpp | 24 + cpp/include/utilities/graph_traits.hpp | 61 ++ cpp/src/experimental/random_walks.cuh | 887 ++++++++++++++++++ cpp/src/sampling/random_walks.cu | 78 ++ cpp/src/utilities/cython.cu | 76 ++ cpp/tests/CMakeLists.txt | 14 + cpp/tests/experimental/random_walks_test.cu | 152 +++ cpp/tests/experimental/random_walks_utils.cuh | 152 +++ cpp/tests/experimental/rw_low_level_test.cu | 783 ++++++++++++++++ 11 files changed, 2256 insertions(+) create mode 100644 cpp/include/utilities/graph_traits.hpp create mode 100644 cpp/src/experimental/random_walks.cuh create mode 100644 cpp/src/sampling/random_walks.cu create mode 100644 cpp/tests/experimental/random_walks_test.cu create mode 100644 cpp/tests/experimental/random_walks_utils.cuh create mode 100644 cpp/tests/experimental/rw_low_level_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34ea935e31d..57f324a60a9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -420,6 +420,7 @@ add_library(cugraph SHARED src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu src/community/egonet.cu + src/sampling/random_walks.cu src/cores/core_number.cu src/traversal/two_hop_neighbors.cu src/components/connectivity.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index b8706d81e21..0b45b799357 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1252,5 +1252,33 @@ extract_ego(raft::handle_t const &handle, vertex_t *source_vertex, vertex_t n_subgraphs, vertex_t radius); + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph/view (typically, graph_view_t). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph (view )object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes + * is represented as a 1D array. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const &handle, + graph_t const &graph, + typename graph_t::vertex_type const *ptr_d_start, + index_t num_paths, + index_t max_depth); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 98e850abbf0..a58331d465a 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -185,6 +186,19 @@ struct major_minor_weights_t { rmm::device_uvector shuffled_weights_; }; +// aggregate for random_walks() return type +// to be exposed to cython: +// +struct random_walk_ret_t { + size_t coalesced_sz_v_; + size_t coalesced_sz_w_; + size_t num_paths_; + size_t max_depth_; + std::unique_ptr d_coalesced_v_; + std::unique_ptr d_coalesced_w_; + std::unique_ptr d_sizes_; +}; + // wrapper for renumber_edgelist() return // (unrenumbering maps, etc.) // @@ -442,6 +456,16 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); +// wrapper for random_walks. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth); // wrapper for shuffling: // diff --git a/cpp/include/utilities/graph_traits.hpp b/cpp/include/utilities/graph_traits.hpp new file mode 100644 index 00000000000..363a13190be --- /dev/null +++ b/cpp/include/utilities/graph_traits.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { +namespace experimental { + +// primary template: +// +template +struct is_one_of; // purposely empty + +// partial specializations: +// +template +struct is_one_of { + static constexpr bool value = std::is_same::value || is_one_of::value; +}; + +template +struct is_one_of { + static constexpr bool value = false; +}; + +// meta-function that constrains +// vertex_t and edge_t template param candidates: +// +template +struct is_vertex_edge_combo { + static constexpr bool value = is_one_of::value && + is_one_of::value && + (sizeof(vertex_t) <= sizeof(edge_t)); +}; + +// meta-function that constrains +// all 3 template param candidates: +// +template +struct is_candidate { + static constexpr bool value = + is_vertex_edge_combo::value && is_one_of::value; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/random_walks.cuh b/cpp/src/experimental/random_walks.cuh new file mode 100644 index 00000000000..aea8f3d8420 --- /dev/null +++ b/cpp/src/experimental/random_walks.cuh @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +using device_vec_t = rmm::device_uvector; + +template +using device_v_it = typename device_vec_t::iterator; + +template +value_t* raw_ptr(device_vec_t& dv) +{ + return dv.data(); +} + +template +value_t const* raw_const_ptr(device_vec_t const& dv) +{ + return dv.data(); +} + +template +struct device_const_vector_view { + device_const_vector_view(value_t const* d_buffer, index_t size) : d_buffer_(d_buffer), size_(size) + { + } + + device_const_vector_view(device_const_vector_view const& other) = delete; + device_const_vector_view& operator=(device_const_vector_view const& other) = delete; + + device_const_vector_view(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + } + device_const_vector_view& operator=(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + + return *this; + } + + value_t const* begin(void) const { return d_buffer_; } + + value_t const* end() const { return d_buffer_ + size_; } + + index_t size(void) const { return size_; } + + private: + value_t const* d_buffer_{nullptr}; + index_t size_; +}; + +// raft random generator: +// (using upper-bound cached "map" +// giving out_deg(v) for each v in [0, |V|); +// and a pre-generated vector of float random values +// in [0,1] to be brought into [0, d_ub[v])) +// +template +struct rrandom_gen_t { + using seed_type = seed_t; + using real_type = real_t; + + rrandom_gen_t(raft::handle_t const& handle, + index_t num_paths, + device_vec_t& d_random, // scratch-pad, non-coalesced + device_vec_t const& d_crt_out_deg, // non-coalesced + seed_t seed = seed_t{}) + : handle_(handle), + seed_(seed), + num_paths_(num_paths), + d_ptr_out_degs_(raw_const_ptr(d_crt_out_deg)), + d_ptr_random_(raw_ptr(d_random)) + { + auto rnd_sz = d_random.size(); + + CUGRAPH_EXPECTS(rnd_sz >= static_cast(num_paths), + "Un-allocated random buffer."); + + // done in constructor; + // this must be done at each step, + // but this object is constructed at each step; + // + raft::random::Rng rng(seed_); + rng.uniform( + d_ptr_random_, num_paths, real_t{0.0}, real_t{1.0}, handle.get_stream()); + } + + // in place: + // for each v in [0, num_paths) { + // if out_deg(v) > 0 + // d_col_indx[v] = random index in [0, out_deg(v)) + //} + void generate_col_indices(device_vec_t& d_col_indx) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_ptr_random_, + d_ptr_random_ + num_paths_, // input1 + d_ptr_out_degs_, // input2 + d_ptr_out_degs_, // also stencil + d_col_indx.begin(), + [] __device__(real_t rnd_vindx, edge_t crt_out_deg) { + real_t max_ub = static_cast(crt_out_deg - 1); + auto interp_vindx = rnd_vindx * max_ub + real_t{.5}; + vertex_t v_indx = static_cast(interp_vindx); + return (v_indx >= crt_out_deg ? crt_out_deg - 1 : v_indx); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + edge_t const* d_ptr_out_degs_; // device buffer with out-deg of current set of vertices (most + // recent vertex in each path); size = num_paths_ + real_t* d_ptr_random_; // device buffer with real random values; size = num_paths_ + seed_t seed_; // seed to be used for current batch +}; + +// seeding policy: time (clock) dependent, +// to avoid RW calls repeating same random data: +// +template +struct clock_seeding_t { + clock_seeding_t(void) = default; + + seed_t operator()(void) { return static_cast(std::time(nullptr)); } +}; + +// seeding policy: fixed for debug/testing repro +// +template +struct fixed_seeding_t { + // purposely no default cnstr. + + fixed_seeding_t(seed_t seed) : seed_(seed) {} + seed_t operator()(void) { return seed_; } + + private: + seed_t seed_; +}; + +// classes abstracting the next vertex extraction mechanism: +// +// primary template, purposely undefined +template +struct col_indx_extract_t; + +// specialization for single-gpu functionality: +// +template +struct col_indx_extract_t> { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + col_indx_extract_t(raft::handle_t const& handle, + device_vec_t const& d_indices, + device_vec_t const& d_offsets, + device_vec_t const& d_values, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(raw_const_ptr(d_indices)), + row_offsets_(raw_const_ptr(d_offsets)), + values_(raw_const_ptr(d_values)), + out_degs_(raw_const_ptr(d_crt_out_degs)), + sizes_(raw_const_ptr(d_sizes)), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + vertex_t const* p_d_indices, + edge_t const* p_d_offsets, + weight_t const* p_d_values, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(p_d_indices), + row_offsets_(p_d_offsets), + values_(p_d_values), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + graph_t const& graph, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(graph.indices()), + row_offsets_(graph.offsets()), + values_(graph.weights()), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + // in-place extractor of next set of vertices and weights, + // (d_v_next_vertices, d_v_next_weights), + // given start set of vertices. d_v_src_vertices, + // and corresponding column index set, d_v_col_indx: + // + // for each indx in [0, num_paths){ + // v_indx = d_v_src_vertices[indx*max_depth + d_sizes[indx] - 1]; + // if( out_degs_[v_indx] > 0 ) { + // start_row = row_offsets_[v_indx]; + // delta = d_v_col_indx[indx]; + // d_v_next_vertices[indx] = col_indices_[start_row + delta]; + // } + // (use tranform_if() with transform iterator) + // + void operator()( + device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices + device_vec_t const& + d_v_col_indx, // in: column indices, given by stepper's random engine + device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step + device_vec_t& + d_v_next_weights) // out: set of weights between src and destination vertices, for next step + const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil + thrust::make_zip_iterator( + thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output + [max_depth = max_depth_, + ptr_d_sizes = sizes_, + ptr_d_coalesced_v = raw_const_ptr(d_coalesced_src_v), + row_offsets = row_offsets_, + col_indices = col_indices_, + values = values_] __device__(auto indx, auto col_indx) { + auto delta = ptr_d_sizes[indx] - 1; + auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; + auto start_row = row_offsets[v_indx]; + return thrust::make_tuple(col_indices[start_row + col_indx], values[start_row + col_indx]); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + vertex_t const* col_indices_; + edge_t const* row_offsets_; + weight_t const* values_; + + edge_t const* out_degs_; + index_t const* sizes_; + index_t num_paths_; + index_t max_depth_; +}; + +/** + * @brief Class abstracting the RW initialization, stepping, and stopping functionality + * The outline of the algorithm is as follows: + * + * (1) vertex sets are coalesced into d_coalesced_v, + * weight sets are coalesced into d_coalesced_w; + * i.e., the 2 coalesced vectors are allocated to + * num_paths * max_depth, and num_paths * (max_depth -1), respectively + * (since each path has a number of edges equal one + * less than the number of vertices); + * d_coalesced_v is initialized for each i*max_depth entry + * (i=0,,,,num_paths-1) to the corresponding starting vertices; + * (2) d_sizes maintains the current size is for each path; + * Note that a path may end prematurely if it reaches a sink vertex; + * (3) d_crt_out_degs maintains the out-degree of each of the latest + * vertices in the path; i.e., if N(v) := set of destination + * vertices from v, then this vector stores |N(v)| + * for last v in each path; i.e., + * d_crt_out_degs[i] = + * out-degree( d_coalesced_v[i*max_depth + d_sizes[i]-1] ), + * for i in {0,..., num_paths-1}; + * (4) a set of num_paths floating point numbers between [0,1] + * are generated at each step; then they get translated into + * _indices_ k in {0,...d_crt_out_degs[i]-1}; + * (5) the next vertex v is then picked as the k-th out-neighbor: + * next(v) = N(v)[k]; + * (6) d_sizes are incremented accordingly; i.e., for those paths whose + * corresponding last vertex has out-degree > 0; + * (7) then next(v) and corresponding weight of (v, next(v)) are stored + * at appropriate location in their corresponding coalesced vectors; + * (8) the client of this class (the random_walks() function) then repeats + * this process max_depth times or until all paths + * have reached sinks; i.e., d_crt_out_degs = {0, 0,...,0}, + * whichever comes first; + * (9) in the end some post-processing is done (stop()) to remove + * unused entries from the 2 coalesced vectors; + * (10) the triplet made of the 2 coalesced vectors and d_sizes is then returned; + * + */ +template , + typename index_t = typename graph_t::edge_type> +struct random_walker_t { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + random_walker_t(raft::handle_t const& handle, + graph_t const& graph, + index_t num_paths, + index_t max_depth) + : handle_(handle), + num_paths_(num_paths), + max_depth_(max_depth), + d_cached_out_degs_(graph.compute_out_degrees(handle_)) + { + } + + // for each i in [0..num_paths_) { + // d_paths_v_set[i*max_depth] = d_src_init_v[i]; + // + void start(device_const_vector_view& d_src_init_v, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + // intialize path sizes to 1, as they contain at least one vertex each: + // the initial set: d_src_init_v; + // + thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_constant_iterator(1), + num_paths_, + d_sizes.begin()); + + // scatter d_src_init_v to coalesced vertex vector: + // + auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src_init_v.begin(), + d_src_init_v.end(), + map_it_begin, + d_paths_v_set.begin()); + } + + // overload for start() with device_uvector d_v_start + // (handy for testing) + // + void start(device_vec_t const& d_start, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + device_const_vector_view d_start_cview{d_start.data(), + static_cast(d_start.size())}; + + start(d_start_cview, d_paths_v_set, d_sizes); + } + + // in-place updates its arguments from one step to next + // (to avoid copying); all "crt" arguments are updated at each step() + // and passed as scratchpad space to avoid copying them + // from one step to another + // + // take one step in sync for all paths that have not reached sinks: + // + void step( + graph_t const& graph, + seed_t seed, + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& d_crt_out_degs, // crt out-degs for current set of vertices + device_vec_t& d_random, // crt set of random real values + device_vec_t& d_col_indx, // crt col col indices to be used for retrieving next step + device_vec_t& d_next_v, // crt set of destination vertices, for next step + device_vec_t& d_next_w) + const // set of weights between src and destination vertices, for next step + { + // update crt snapshot of out-degs, + // from cached out degs, using + // latest vertex in each path as source: + // + gather_from_coalesced( + d_coalesced_v, d_cached_out_degs_, d_paths_sz, d_crt_out_degs, max_depth_, num_paths_); + + // generate random destination indices: + // + random_engine_t rgen(handle_, num_paths_, d_random, d_crt_out_degs, seed); + + rgen.generate_col_indices(d_col_indx); + + // dst extraction from dst indices: + // + col_indx_extract_t col_extractor(handle_, + graph, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_paths_sz), + num_paths_, + max_depth_); + + // The following steps update the next entry in each path, + // except the paths that reached sinks; + // + // for each indx in [0..num_paths) { + // v_indx = d_v_rnd_n_indx[indx]; + // + // -- get the `v_indx`-th out-vertex of d_v_paths_v_set[indx] vertex: + // -- also, note the size deltas increased by 1 in dst (d_sizes[]): + // + // d_coalesced_v[indx*num_paths + d_sizes[indx]] = + // get_out_vertex(graph, d_coalesced_v[indx*num_paths + d_sizes[indx] -1)], v_indx); + // d_coalesced_w[indx*(num_paths-1) + d_sizes[indx] - 1] = + // get_out_edge_weight(graph, d_coalesced_v[indx*num_paths + d_sizes[indx]-2], v_indx); + // + // (1) generate actual vertex destinations: + // + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + // (2) update path sizes: + // + update_path_sizes(d_crt_out_degs, d_paths_sz); + + // (3) actual coalesced updates: + // + scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_paths_sz); + scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_paths_sz); + } + + // returns true if all paths reached sinks: + // + bool all_paths_stopped(device_vec_t const& d_crt_out_degs) const + { + auto how_many_stopped = + thrust::count_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_crt_out_degs.begin(), + d_crt_out_degs.end(), + [] __device__(auto crt_out_deg) { return crt_out_deg == 0; }); + return (static_cast(how_many_stopped) == d_crt_out_degs.size()); + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + void stop(device_vec_t& d_coalesced_v, // coalesced vertex set + device_vec_t& d_coalesced_w, // coalesced weight set + device_vec_t const& d_sizes) const // paths sizes + { + assert(max_depth_ > 1); // else, no need to step; and no edges + + index_t const* ptr_d_sizes = d_sizes.data(); + + auto predicate_v = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / max_depth; + auto col_indx = indx % max_depth; + + return (col_indx >= ptr_d_sizes[row_indx]); + }; + + auto predicate_w = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / (max_depth - 1); + auto col_indx = indx % (max_depth - 1); + + return (col_indx >= ptr_d_sizes[row_indx] - 1); + }; + + auto new_end_v = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_v.begin(), + d_coalesced_v.end(), + thrust::make_counting_iterator(0), + predicate_v); + + auto new_end_w = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_w.begin(), + d_coalesced_w.end(), + thrust::make_counting_iterator(0), + predicate_w); + + CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); + + d_coalesced_v.resize(thrust::distance(d_coalesced_v.begin(), new_end_v), handle_.get_stream()); + d_coalesced_w.resize(thrust::distance(d_coalesced_w.begin(), new_end_w), handle_.get_stream()); + } + + // in-place non-static (needs handle_): + // for indx in [0, nelems): + // gather d_result[indx] = d_src[d_coalesced[indx*stride + d_sizes[indx] -1]] + // + template + void gather_from_coalesced( + device_vec_t const& d_coalesced, // |gather map| = stride*nelems + device_vec_t const& d_src, // |gather input| = nelems + device_vec_t const& d_sizes, // |paths sizes| = nelems, elems in [1, stride] + device_vec_t& d_result, // |output| = nelems + index_t stride, // stride = coalesce block size (typically max_depth) + index_t nelems) const // nelems = number of elements to gather (typically num_paths_) + { + vertex_t const* ptr_d_coalesced = raw_const_ptr(d_coalesced); + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + // delta = ptr_d_sizes[indx] - 1 + // + auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::gather(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + map_it_begin, + map_it_begin + nelems, + d_src.begin(), + d_result.begin()); + } + + // in-place non-static (needs handle_); + // pre-condition: path sizes are assumed updated + // to reflect new vertex additions; + // + // for indx in [0, nelems): + // if ( d_crt_out_degs[indx] > 0 ) + // d_coalesced[indx*stride + (d_sizes[indx] - adjust)- 1] = d_src[indx] + // + // adjust := 0 for coalesced vertices; 1 for weights + // (because |edges| = |vertices| - 1, in each path); + // + template + void scatter_to_coalesced( + device_vec_t const& d_src, // |scatter input| = nelems + device_vec_t& d_coalesced, // |scatter input| = stride*nelems + device_vec_t const& d_crt_out_degs, // |current set of vertex out degrees| = nelems, + // to be used as stencil (don't scatter if 0) + device_vec_t const& + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; + // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter + index_t + stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) + index_t nelems, // nelems = number of elements to gather (typically num_paths_) + index_t adjust = 0) + const // adjusting parameter for scattering vertices (0) or weights (1); see above for more; + { + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src.begin(), + d_src.end(), + map_it_begin, + d_crt_out_degs.begin(), + d_coalesced.begin(), + [] __device__(auto crt_out_deg) { + return crt_out_deg > 0; // predicate + }); + } + + // updates the entries in the corresponding coalesced vector, + // for which out_deg > 0 + // + void scatter_vertices(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced(d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_, num_paths_); + } + // + void scatter_weights(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced( + d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_ - 1, num_paths_, 1); + } + + // in-place update (increment) path sizes for paths + // that have not reached a sink; i.e., for which + // d_crt_out_degs[indx]>0: + // + void update_path_sizes(device_vec_t const& d_crt_out_degs, + device_vec_t& d_sizes) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sizes.begin(), + d_sizes.end(), // input + d_crt_out_degs.begin(), // stencil + d_sizes.begin(), // output: in-place + [] __device__(auto crt_sz) { return crt_sz + 1; }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + device_vec_t const& get_out_degs(void) const { return d_cached_out_degs_; } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + index_t max_depth_; + device_vec_t d_cached_out_degs_; +}; + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Single-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. + * number(paths) == d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + vertex_t num_vertices = graph.get_number_of_vertices(); + + auto how_many_valid = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_v_start.begin(), + d_v_start.end(), + [num_vertices] __device__(auto crt_vertex) { + return (crt_vertex >= 0) && (crt_vertex < num_vertices); + }); + + CUGRAPH_EXPECTS(static_cast(how_many_valid) == d_v_start.size(), + "Invalid set of starting vertices."); + + auto num_paths = d_v_start.size(); + auto stream = handle.get_stream(); + + random_walker_t rand_walker{ + handle, graph, static_cast(num_paths), static_cast(max_depth)}; + + // pre-allocate num_paths * max_depth; + // + auto coalesced_sz = num_paths * max_depth; + device_vec_t d_coalesced_v(coalesced_sz, stream); // coalesced vertex set + device_vec_t d_coalesced_w(coalesced_sz, stream); // coalesced weight set + device_vec_t d_paths_sz(num_paths, stream); // paths sizes + device_vec_t d_crt_out_degs(num_paths, stream); // out-degs for current set of vertices + device_vec_t d_random(num_paths, stream); + device_vec_t d_col_indx(num_paths, stream); + device_vec_t d_next_v(num_paths, stream); + device_vec_t d_next_w(num_paths, stream); + + // abstracted out seed initialization: + // + seed_t seed0 = static_cast(seeder()); + + // very first vertex, for each path: + // + rand_walker.start(d_v_start, d_coalesced_v, d_paths_sz); + + // start from 1, as 0-th was initialized above: + // + for (decltype(max_depth) step_indx = 1; step_indx < max_depth; ++step_indx) { + // take one-step in-sync for each path in parallel: + // + rand_walker.step(graph, + seed0 + static_cast(step_indx), + d_coalesced_v, + d_coalesced_w, + d_paths_sz, + d_crt_out_degs, + d_random, + d_col_indx, + d_next_v, + d_next_w); + + // early exit: all paths have reached sinks: + // + if (rand_walker.all_paths_stopped(d_crt_out_degs)) break; + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_paths_sz); + + // because device_uvector is not copy-cnstr-able: + // + return std::make_tuple(std::move(d_coalesced_v), + std::move(d_coalesced_w), + std::move(d_paths_sz), + seed0); // also return seed for repro +} + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Multi-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. number(RW) == + * d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + CUGRAPH_FAIL("Not implemented yet."); +} + +} // namespace detail + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph (view). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const& handle, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + index_t num_paths, + index_t max_depth) +{ + using vertex_t = typename graph_t::vertex_type; + + // 0-copy const device view: + // + detail::device_const_vector_view d_v_start{ptr_d_start, num_paths}; + + auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth); + // ignore last element of the quad, seed, + // since it's meant for testing / debugging, only: + // + return std::make_tuple(std::move(std::get<0>(quad_tuple)), + std::move(std::get<1>(quad_tuple)), + std::move(std::get<2>(quad_tuple))); +} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu new file mode 100644 index 00000000000..88d5d9ed5c8 --- /dev/null +++ b/cpp/src/sampling/random_walks.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include +#include + +namespace cugraph { +namespace experimental { +// template explicit instantiation directives (EIDir's): +// +// SG FP32{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +// +// SG FP64{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 5382b4856f3..a9e3146bbcd 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -696,6 +696,61 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, } } +// Wrapper for random_walks() through a graph container +// to expose the API to cython. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth) +{ + if (graph_container.weightType == numberTypeEnum::floatType) { + using weight_t = float; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else if (graph_container.weightType == numberTypeEnum::doubleType) { + using weight_t = double; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else { + CUGRAPH_FAIL("Unsupported weight type."); + } +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -1038,6 +1093,27 @@ template std::unique_ptr call_egonet( int64_t n_subgraphs, int64_t radius); +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int32_t num_paths, + int32_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, int32_t* identifiers, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5292f9f9997..3b65b0edb29 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -415,6 +415,20 @@ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +################################################################################################### +# - Experimental RANDOM_WALKS tests ------------------------------------------------------------ + +set(EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/random_walks_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_TEST "${EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS}") + +################################################################################################### +set(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/rw_low_level_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_TEST "${EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS}") + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- diff --git a/cpp/tests/experimental/random_walks_test.cu b/cpp/tests/experimental/random_walks_test.cu new file mode 100644 index 00000000000..9fb1716f62b --- /dev/null +++ b/cpp/tests/experimental/random_walks_test.cu @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cuda_profiler_api.h" +#include "gtest/gtest.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include + +namespace { // anonym. +template +void fill_start(raft::handle_t const& handle, + rmm::device_uvector& d_start, + index_t num_vertices) +{ + index_t num_paths = d_start.size(); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + + d_start.begin(), + [num_vertices] __device__(auto indx) { return indx % num_vertices; }); +} +} // namespace + +struct RandomWalks_Usecase { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + RandomWalks_Usecase(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +class Tests_RandomWalks : public ::testing::TestWithParam { + public: + Tests_RandomWalks() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(RandomWalks_Usecase const& configuration) + { + raft::handle_t handle{}; + + // debuf info: + // + // std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + + auto graph_view = graph.view(); + + // call random_walks: + start_random_walks(graph_view); + } + + template + void start_random_walks(graph_vt const& graph_view) + { + using vertex_t = typename graph_vt::vertex_type; + using edge_t = typename graph_vt::edge_type; + using weight_t = typename graph_vt::weight_type; + + raft::handle_t handle{}; + edge_t num_paths = 10; + rmm::device_uvector d_start(num_paths, handle.get_stream()); + + vertex_t num_vertices = graph_view.get_number_of_vertices(); + fill_start(handle, d_start, num_vertices); + + // 0-copy const device view: + // + cugraph::experimental::detail::device_const_vector_view d_start_view{ + d_start.data(), num_paths}; + + edge_t max_depth{10}; + + auto ret_tuple = + cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + // check results: + // + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + + if (!test_all_paths) + std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; + + ASSERT_TRUE(test_all_paths); + } +}; + +TEST_P(Tests_RandomWalks, Initialize_i32_i32_f) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_RandomWalks, + ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), + RandomWalks_Usecase("test/datasets/web-Google.mtx", true), + RandomWalks_Usecase("test/datasets/ljournal-2008.mtx", true), + RandomWalks_Usecase("test/datasets/webbase-1M.mtx", true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/random_walks_utils.cuh b/cpp/tests/experimental/random_walks_utils.cuh new file mode 100644 index 00000000000..863094dc310 --- /dev/null +++ b/cpp/tests/experimental/random_walks_utils.cuh @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +// utilities for testing / verification of Random Walks functionality: +// +namespace cugraph { +namespace test { + +template +using vector_test_t = cugraph::experimental::detail::device_vec_t; // for debug purposes + +// host side utility to check a if a sequence of vertices is connected: +// +template +bool host_check_path(std::vector const& row_offsets, + std::vector const& col_inds, + std::vector const& values, + typename std::vector::const_iterator v_path_begin, + typename std::vector::const_iterator v_path_end, + typename std::vector::const_iterator w_path_begin) +{ + bool assert1 = (row_offsets.size() > 0); + bool assert2 = (col_inds.size() == values.size()); + + vertex_t num_rows = row_offsets.size() - 1; + edge_t nnz = row_offsets.back(); + + bool assert3 = (nnz == static_cast(col_inds.size())); + if (assert1 == false || assert2 == false || assert3 == false) { + std::cout << "CSR inconsistency\n"; + return false; + } + + auto it_w = w_path_begin; + for (auto it_v = v_path_begin; it_v != v_path_end - 1; ++it_v, ++it_w) { + auto crt_vertex = *it_v; + auto next_vertex = *(it_v + 1); + + auto begin = col_inds.begin() + row_offsets[crt_vertex]; + auto end = col_inds.begin() + row_offsets[crt_vertex + 1]; + auto found_next = std::find_if( + begin, end, [next_vertex](auto dst_vertex) { return dst_vertex == next_vertex; }); + if (found_next == end) { + std::cout << "vertex not found: " << next_vertex << " as neighbor of " << crt_vertex << '\n'; + return false; + } + + auto delta = row_offsets[crt_vertex] + std::distance(begin, found_next); + + // std::cout << "delta in ci: " << delta << '\n'; + auto found_edge = values.begin() + delta; + if (*found_edge != *it_w) { + std::cout << "weight not found: " << *found_edge << " between " << crt_vertex << " and " + << next_vertex << '\n'; + return false; + } + } + return true; +} + +template +bool host_check_rw_paths( + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& graph_view, + vector_test_t const& d_coalesced_v, + vector_test_t const& d_coalesced_w, + vector_test_t const& d_sizes) +{ + edge_t num_edges = graph_view.get_number_of_edges(); + vertex_t num_vertices = graph_view.get_number_of_vertices(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_coalesced(d_coalesced_v.size()); + std::vector w_coalesced(d_coalesced_w.size()); + std::vector v_sizes(d_sizes.size()); + + raft::update_host(v_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_v), + d_coalesced_v.size(), + handle.get_stream()); + raft::update_host(w_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_w), + d_coalesced_w.size(), + handle.get_stream()); + raft::update_host(v_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_sizes), + d_sizes.size(), + handle.get_stream()); + + auto it_v_begin = v_coalesced.begin(); + auto it_w_begin = w_coalesced.begin(); + for (auto&& crt_sz : v_sizes) { + auto it_v_end = it_v_begin + crt_sz; + + bool test_path = host_check_path(v_ro, v_ci, v_vals, it_v_begin, it_v_end, it_w_begin); + + it_v_begin = it_v_end; + it_w_begin += crt_sz - 1; + + if (!test_path) { // something went wrong; print to debug (since it's random) + raft::print_host_vector("sizes", v_sizes.data(), v_sizes.size(), std::cout); + + raft::print_host_vector("coalesced v", v_coalesced.data(), v_coalesced.size(), std::cout); + + raft::print_host_vector("coalesced w", w_coalesced.data(), w_coalesced.size(), std::cout); + + return false; + } + } + return true; +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/experimental/rw_low_level_test.cu new file mode 100644 index 00000000000..a32e258d366 --- /dev/null +++ b/cpp/tests/experimental/rw_low_level_test.cu @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "cuda_profiler_api.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include + +using namespace cugraph::experimental; + +template +using vector_test_t = detail::device_vec_t; // for debug purposes + +namespace { // anonym. + +template +graph_t make_graph(raft::handle_t const& handle, + std::vector const& v_src, + std::vector const& v_dst, + std::vector const& v_w, + vertex_t num_vertices, + edge_t num_edges) +{ + vector_test_t d_src(num_edges, handle.get_stream()); + vector_test_t d_dst(num_edges, handle.get_stream()); + vector_test_t d_weights(num_edges, handle.get_stream()); + + raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); + raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + edgelist_t edgelist{ + d_src.data(), d_dst.data(), d_weights.data(), num_edges}; + + graph_t graph( + handle, edgelist, num_vertices, graph_properties_t{}, false); + + return graph; +} + +template +bool check_col_indices(raft::handle_t const& handle, + vector_test_t const& d_crt_out_degs, + vector_test_t const& d_col_indx, + index_t num_paths) +{ + bool all_indices_within_degs = thrust::all_of( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + [p_d_col_indx = detail::raw_const_ptr(d_col_indx), + p_d_crt_out_degs = detail::raw_const_ptr(d_crt_out_degs)] __device__(auto indx) { + if (p_d_crt_out_degs[indx] > 0) + return ((p_d_col_indx[indx] >= 0) && (p_d_col_indx[indx] < p_d_crt_out_degs[indx])); + else + return true; + }); + return all_indices_within_degs; +} + +} // namespace + +// FIXME (per rlratzel request): +// This test may be considered an e2e test +// which could be moved to a different test suite: +// +struct RandomWalksPrimsTest : public ::testing::Test { +}; + +TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vs(num_edges); + + raft::update_host(v_ro.data(), offsets, num_vertices + 1, handle.get_stream()); + raft::update_host(v_ci.data(), indices, num_edges, handle.get_stream()); + raft::update_host(v_vs.data(), values, num_edges, handle.get_stream()); + + std::vector v_ro_expected{0, 1, 3, 6, 7, 8, 8}; + std::vector v_ci_expected{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_vs_expected{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + EXPECT_EQ(v_ro, v_ro_expected); + EXPECT_EQ(v_ci, v_ci_expected); + EXPECT_EQ(v_vs, v_vs_expected); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + + std::vector v_sizes{1, 1, 1, 1}; + std::vector v_sz_exp(num_paths); + raft::update_host(v_sz_exp.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + + EXPECT_EQ(v_sizes, v_sz_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + EXPECT_EQ(static_cast(num_vertices), d_out_degs.size()); + + std::vector v_out_degs(num_vertices); + raft::update_host( + v_out_degs.data(), raw_const_ptr(d_out_degs), num_vertices, handle.get_stream()); + + std::vector v_out_degs_exp{1, 2, 3, 1, 1, 0}; + EXPECT_EQ(v_out_degs, v_out_degs_exp); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + // random engine generated: + // + vector_test_t d_col_indx(num_paths, handle.get_stream()); + vector_test_t d_random(num_paths, handle.get_stream()); + + seed_t seed = static_cast(std::time(nullptr)); + random_engine_t rgen(handle, num_paths, d_random, d_crt_out_degs, seed); + rgen.generate_col_indices(d_col_indx); + + bool all_indices_within_degs = check_col_indices(handle, d_crt_out_degs, d_col_indx, num_paths); + + ASSERT_TRUE(all_indices_within_degs); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // Fixed set of out-degs, as opposed to have them generated by the algorithm. + // That's because I want to test a certain functionality in isolation + // + std::vector v_crt_out_degs{2, 0, 1, 0}; + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + raft::update_device( + d_crt_out_degs.data(), v_crt_out_degs.data(), d_crt_out_degs.size(), handle.get_stream()); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 1, 2, 1}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + + EXPECT_EQ(v_sizes, v_sizes_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + // check start(): + // + { + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + } + + // check crt_out_degs: + // + { + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); + } + + // check paths sizes update: + // + { + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 2, 2, 2}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + EXPECT_EQ(v_sizes, v_sizes_exp); + } + + // check next step: + // + { + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); + } + + rand_walker.scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_sizes); + rand_walker.scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_sizes); + + // check vertex/weight scatter: + // + { + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), total_sz - num_paths, handle.get_stream()); + + std::vector v_coalesced_exp{1, 4, -1, 0, 1, -1, 4, 5, -1, 2, 3, -1}; + std::vector w_coalesced_exp{2.1, -1, 0.1, -1, 7.1, -1, 5.1, -1}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_sizes{1, 2, 2, 1}; + vector_test_t d_sizes(num_paths, handle.get_stream()); + raft::update_device(d_sizes.data(), v_sizes.data(), d_sizes.size(), handle.get_stream()); + + std::vector v_coalesced(total_sz, -1); + v_coalesced[0] = 3; + v_coalesced[max_depth] = 5; + v_coalesced[max_depth + 1] = 2; + v_coalesced[2 * max_depth] = 4; + v_coalesced[2 * max_depth + 1] = 0; + v_coalesced[3 * max_depth] = 1; + + std::vector w_coalesced(total_sz - num_paths, -1); + w_coalesced[max_depth - 1] = 10.1; + w_coalesced[2 * max_depth - 2] = 11.2; + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_sizes); + + // check vertex/weight defragment: + // + { + v_coalesced.resize(d_coalesced_v.size()); + w_coalesced.resize(d_coalesced_w.size()); + + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), d_coalesced_v.size(), handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_coalesced_exp{3, 5, 2, 4, 0, 1}; + std::vector w_coalesced_exp{10.1, 11.2}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + bool test_all_paths = + cugraph::test::host_check_rw_paths(handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +} From 7a2b02b4a867d7b18b5323c31c4dfb8030f095ab Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Wed, 31 Mar 2021 14:14:23 -0500 Subject: [PATCH 209/343] Multiple graph generator with power law distribution on sizes (#1483) Add a function that leverages the RMAT generator and creates k graphs with a power-law or uniform distribution of graph sizes. Closes #1458 Consider adding bindings for this as part of #1473 Authors: - Alex Fender (https://github.com/afender) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1483 --- cpp/include/experimental/graph_generator.hpp | 55 +++++++++++- .../experimental/generate_rmat_edgelist.cu | 78 ++++++++++++++++- cpp/tests/experimental/generate_rmat_test.cpp | 86 +++++++++++++++++++ 3 files changed, 217 insertions(+), 2 deletions(-) diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp index b8495ed7581..bc7337944f3 100644 --- a/cpp/include/experimental/graph_generator.hpp +++ b/cpp/include/experimental/graph_generator.hpp @@ -72,7 +72,7 @@ template std::tuple, rmm::device_uvector> generate_rmat_edgelist( raft::handle_t const& handle, size_t scale, - size_t edge_factor = 16, + size_t num_edges, double a = 0.57, double b = 0.19, double c = 0.19, @@ -80,5 +80,58 @@ std::tuple, rmm::device_uvector> generat bool clip_and_flip = false, bool scramble_vertex_ids = false); +enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; + +/** + * @brief generate multiple edge lists using the R-mat graph generator. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 + * specification (note that scrambling does not affect cuGraph's graph construction performance, so + * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (inculding the diagonal) of the graph adjacency matrix. + * + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param n_edgelists Number of edge lists (graphs) to generate + * @param min_scale Scale factor to set the minimum number of verties in the graph. + * @param max_scale Scale factor to set the maximum number of verties in the graph. + * @param edge_factor Average number of edges per vertex to generate. + * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the + * R-MAT generator + * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + * are set. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values + * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * @return A vector of std::tuple, rmm::device_uvector> of + *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge + *source vertex IDs and edge destination vertex IDs. + */ +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists( + raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor = 16, + generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, + generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index 0a6d666432f..185fa837a70 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -27,7 +27,9 @@ #include #include +#include #include +#include "rmm/detail/error.hpp" namespace cugraph { namespace experimental { @@ -121,7 +123,57 @@ std::tuple, rmm::device_uvector> generat return std::make_tuple(std::move(srcs), std::move(dsts)); } -// explicit instantiation +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); + CUGRAPH_EXPECTS(size_t{1} << max_scale <= std::numeric_limits::max(), + "Invalid input argument: scale too large for vertex_t."); + + std::vector, rmm::device_uvector>> output{}; + output.reserve(n_edgelists); + std::vector scale(n_edgelists); + + std::default_random_engine eng; + eng.seed(seed); + if (component_distribution == generator_distribution_t::UNIFORM) { + std::uniform_int_distribution dist(min_scale, max_scale); + std::generate(scale.begin(), scale.end(), [&dist, &eng]() { return dist(eng); }); + } else { + // May expose this as a parameter in the future + std::exponential_distribution dist(4); + // The modulo is here to protect the range because exponential distribution is defined on + // [0,infinity). With exponent 4 most values are between 0 and 1 + auto range = max_scale - min_scale; + std::generate(scale.begin(), scale.end(), [&dist, &eng, &min_scale, &range]() { + return min_scale + static_cast(static_cast(range) * dist(eng)) % range; + }); + } + + // intialized to standard powerlaw values + double a = 0.57, b = 0.19, c = 0.19; + if (edge_distribution == generator_distribution_t::UNIFORM) { + a = 0.25; + b = a; + c = a; + } + + for (size_t i = 0; i < n_edgelists; i++) { + output.push_back(generate_rmat_edgelist( + handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids)); + } + return output; +} template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -145,5 +197,29 @@ generate_rmat_edgelist(raft::handle_t const& handle, bool clip_and_flip, bool scramble_vertex_ids); +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 249a1a3c6c8..666106d62ca 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -281,5 +282,90 @@ INSTANTIATE_TEST_CASE_P(simple_test, GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); +typedef struct GenerateRmats_Usecase_t { + size_t n_edgelists{0}; + size_t min_scale{0}; + size_t max_scale{0}; + size_t edge_factor{0}; + cugraph::experimental::generator_distribution_t component_distribution; + cugraph::experimental::generator_distribution_t edge_distribution; + + GenerateRmats_Usecase_t(size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t component_distribution, + cugraph::experimental::generator_distribution_t edge_distribution) + : n_edgelists(n_edgelists), + min_scale(min_scale), + max_scale(max_scale), + component_distribution(component_distribution), + edge_distribution(edge_distribution), + edge_factor(edge_factor){}; +} GenerateRmats_Usecase; +class Tests_GenerateRmats : public ::testing::TestWithParam { + public: + Tests_GenerateRmats() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + template + void run_current_test(GenerateRmats_Usecase const& configuration) + { + raft::handle_t handle{}; + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto outputs = + cugraph::experimental::generate_rmat_edgelists(handle, + configuration.n_edgelists, + configuration.min_scale, + configuration.max_scale, + configuration.edge_factor, + configuration.component_distribution, + configuration.edge_distribution, + uint64_t{0}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + ASSERT_EQ(configuration.n_edgelists, outputs.size()); + for (auto i = outputs.begin(); i != outputs.end(); ++i) { + ASSERT_EQ(std::get<0>(*i).size(), std::get<1>(*i).size()); + ASSERT_TRUE((configuration.min_scale * configuration.edge_factor) <= std::get<0>(*i).size()); + ASSERT_TRUE((configuration.max_scale * configuration.edge_factor) >= std::get<0>(*i).size()); + } + } +}; +TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_GenerateRmats, + ::testing::Values( + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::POWER_LAW), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::POWER_LAW))); CUGRAPH_TEST_PROGRAM_MAIN() From daa96221a9b32728897b9ed9785bf91195665e46 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 31 Mar 2021 16:10:03 -0400 Subject: [PATCH 210/343] Create C++ documentation (#1489) Improve the C++ documentation for the following algorithms: Pagerank, SSSP, BFS, and Louvain. We need, generally, to improve the C++ documentation for cuGraph. This is intended to improve the documentation for the algorithms that have been adapted to use the new graph primitives and will hopefully provide a construct that we can use for updating other C++ documentation. As we migrate new algorithms to use the new graph primitives over the next several releases we will create documentation for those algorithms as well. Closes #1490 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1489 --- README.md | 10 ++--- cpp/src/centrality/README.md | 81 ++++++++++++++++++++++++++++++++++++ cpp/src/community/README.md | 79 +++++++++++++++++++++++++++++++++++ cpp/src/traversal/README.md | 56 +++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 cpp/src/centrality/README.md create mode 100644 cpp/src/community/README.md create mode 100644 cpp/src/traversal/README.md diff --git a/README.md b/README.md index 77377fe2bbc..4bdbcd00280 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ As of Release 0.18 - including 0.18 nightly | Community | | | | | | EgoNet | Single-GPU | | | | Leiden | Single-GPU | | -| | Louvain | Multi-GPU | | +| | Louvain | Multi-GPU | [C++ README](cpp/src/community/README.md#Louvain) | | | Ensemble Clustering for Graphs | Single-GPU | | | | Spectral-Clustering - Balanced Cut | Single-GPU | | | | Spectral-Clustering - Modularity | Single-GPU | | @@ -71,16 +71,16 @@ As of Release 0.18 - including 0.18 nightly | Linear Assignment| | | | | | Hungarian | Single-GPU | [README](cpp/src/linear_assignment/README-hungarian.md) | | Link Analysis| | | | -| | Pagerank | Multi-GPU | | -| | Personal Pagerank | Multi-GPU | | +| | Pagerank | Multi-GPU | [C++ README](cpp/src/centrality/README.md#Pagerank) | +| | Personal Pagerank | Multi-GPU | [C++ README](cpp/src/centrality/README.md#Personalized-Pagerank) | | | HITS | Single-GPU | leverages Gunrock | | Link Prediction | | | | | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | | Traversal | | | | -| | Breadth First Search (BFS) | Multi-GPU | with cutoff support | -| | Single Source Shortest Path (SSSP) | Multi-GPU | | +| | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) | +| | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) | | | Traveling Salesperson Problem (TSP) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | diff --git a/cpp/src/centrality/README.md b/cpp/src/centrality/README.md new file mode 100644 index 00000000000..db7838fb0cc --- /dev/null +++ b/cpp/src/centrality/README.md @@ -0,0 +1,81 @@ +# Centrality algorithms +cuGraph Pagerank is implemented using our graph primitive library + +## Pagerank + +The unit test code is the best place to search for examples on calling pagerank. + + * [SG Implementation](../../tests/experimental/pagerank_test.cpp) + * [MG Implementation](../../tests/pagerank/mg_pagerank_test.cpp) + +## Simple pagerank + +The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +result_t constexpr alpha{0.85}; +result_t constexpr epsilon{1e-6}; + +rmm::device_uvector pageranks_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +// pagerank optionally supports three additional parameters: +// max_iterations - maximum number of iterations, if pagerank doesn't coverge by +// then we abort +// has_initial_guess - if true, values in the pagerank array when the call is initiated +// will be used as the initial pagerank values. These values will +// be normalized before use. If false (the default), the values +// in the pagerank array will be set to 1/num_vertices before +// starting the computation. +// do_expensive_check - perform extensive validation of the input data before +// executing algorithm. Off by default. Note: turning this on +// is expensive +cugraph::experimental::pagerank(handle, graph_view, nullptr, nullptr, nullptr, vertex_t{0}, + pageranks_v.data(), alpha, epsilon); +``` + +## Personalized Pagerank + +The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. Additionally, the caller must create personalization_vertices and personalized_values vectors in device memory, populate them and pass in the raw pointers to those vectors. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t number_of_personalization_vertices; // Provided by caller + +result_t constexpr alpha{0.85}; +result_t constexpr epsilon{1e-6}; + +rmm::device_uvector pageranks_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector personalization_vertices(number_of_personalization_vertices, handle.get_stream()); +rmm::device_uvector personalization_values(number_of_personalization_vertices, handle.get_stream()); + +// Populate personalization_vertices, personalization_values with user provided data + +// pagerank optionally supports three additional parameters: +// max_iterations - maximum number of iterations, if pagerank doesn't coverge by +// then we abort +// has_initial_guess - if true, values in the pagerank array when the call is initiated +// will be used as the initial pagerank values. These values will +// be normalized before use. If false (the default), the values +// in the pagerank array will be set to 1/num_vertices before +// starting the computation. +// do_expensive_check - perform extensive validation of the input data before +// executing algorithm. Off by default. Note: turning this on +// is expensive +cugraph::experimental::pagerank(handle, graph_view, nullptr, personalization_vertices.data(), + personalization_values.data(), number_of_personalization_vertices, + pageranks_v.data(), alpha, epsilon); +``` diff --git a/cpp/src/community/README.md b/cpp/src/community/README.md new file mode 100644 index 00000000000..4bff0a6e77e --- /dev/null +++ b/cpp/src/community/README.md @@ -0,0 +1,79 @@ +# Louvain and Related Clustering Algorithms +cuGraph contains a GPU implementation of the Louvain algorithm and several related clustering algorithms (Leiden and ECG). + +## Louvain + +The Louvain implementation is designed to assign clusters attempting to optimize modularity. The algorithm is derived from the serial implementation described in the following paper: + + * VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast unfolding of community hierarchies in large networks, J Stat Mech P10008 (2008), http://arxiv.org/abs/0803.0476 + +It leverages some parallelism ideas from the following paper: + * Hao Lu, Mahantesh Halappanavar, Ananth Kalyanaraman: Parallel heuristics for scalable community detection, Elsevier Parallel Computing (2015), https://www.sciencedirect.com/science/article/pii/S0167819115000472 + + +The challenge in parallelizing Louvain lies in the primary loop which visits the vertices in serial. For each vertex v the change in modularity is computed for moving the vertex from its currently assigned cluster to each of the clusters to which v's neighbors are assigned. The largest positive delta modularity is used to select a new cluster (if there are no positive delta modularities then the vertex is not moved). If the vertex v is moved to a new cluster then the statistics of the vertex v's old cluster and new cluster change. This change in cluster statistics may affect the delta modularity computations of all vertices that follow vertex v in the serial iteration, creating a dependency between the different iterations of the loop. + +In order to make efficient use of the GPU parallelism, the cuGraph implementation computes the delta modularity for *all* vertex/neighbor pairs using the *current* vertex assignment. Decisions on moving vertices will be made based upon these delta modularities. This will potentially make choices that the serial version would not make. In order to minimize some of the negative effects of this (as described in the Lu paper), the cuGraph implementation uses an Up/Down technique. In even numbered iterations a vertex can only move from cluster i to cluster j if i > j; in odd numbered iterations a vertex can only move from cluster i to cluster j if i < j. This prevents two vertices from swapping clusters in the same iteration of the loop. We have had great success in converging on high modularity clustering using this technique. + +## Calling Louvain + +The unit test code is the best place to search for examples on calling louvain. + + * [SG Implementation](../../tests/community/louvain_test.cpp) + * [MG Implementation](../../tests/community/mg_louvain_test.cpp) + +The API itself is very simple. There are two variations: + * Return a flat clustering + * Return a Dendrogram + +### Return a flat clustering + +The example assumes that you create an SG or MG graph somehow. The caller must create the clustering vector in device memory and pass in the raw pointer to that vector into the louvain function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +size_t level; +weight_t modularity; + +rmm::device_uvector clustering_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +// louvain optionally supports two additional parameters: +// max_level - maximum level of the Dendrogram +// resolution - constant in the modularity computation +std::tie(level, modularity) = cugraph::louvain(handle, graph_view, clustering_v.data()); +``` + +### Return a Dendrogram + +The Dendrogram represents the levels of hierarchical clustering that the Louvain algorithm computes. There is a separate function that will flatten the clustering into the same result as above. Returning the Dendrogram, however, provides a finer level of detail on the intermediate results which can be helpful in more fully understanding the data. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +cugraph::Dendrogram dendrogram; +weight_t modularity; + +// louvain optionally supports two additional parameters: +// max_level - maximum level of the Dendrogram +// resolution - constant in the modularity computation +std::tie(dendrogram, modularity) = cugraph::louvain(handle, graph_view); + +// This will get the equivalent result to the earlier example +rmm::device_uvector clustering_v(graph_view.get_number_of_vertices(), handle.get_stream()); +cugraph::flatten_dendrogram(handle, graph_view, dendrogram, clustering.data()); +``` + +## Leiden + +## ECG diff --git a/cpp/src/traversal/README.md b/cpp/src/traversal/README.md new file mode 100644 index 00000000000..7f436926de8 --- /dev/null +++ b/cpp/src/traversal/README.md @@ -0,0 +1,56 @@ +# Traversal +cuGraph traversal algorithms are contained in this directory + +## SSSP + +The unit test code is the best place to search for examples on calling SSSP. + + * [SG Implementation](../../tests/experimental/sssp_test.cpp) + * MG Implementation - TBD + +## Simple SSSP + +The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the SSSP function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t source; // Initialized by user + +rmm::device_uvector distances_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector predecessors_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +cugraph::experimental::sssp(handle, graph_view, distances_v.begin(), predecessors_v.begin(), source, std::numeric_limits::max(), false); +``` + +## BFS + +The unit test code is the best place to search for examples on calling BFS. + + * [SG Implementation](../../tests/experimental/bfs_test.cpp) + * MG Implementation - TBD + +## Simple BFS + +The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the BFS function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t source; // Initialized by user + +rmm::device_uvector distances_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector predecessors_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +cugraph::experimental::bfs(handle, graph_view, d_distances.begin(), d_predecessors.begin(), source, false, std::numeric_limits::max(), false); +``` From 59f6df62badb2f85c236d207ba0fbaab236079d4 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Wed, 31 Mar 2021 18:02:37 -0400 Subject: [PATCH 211/343] Revert "Update conda recipes pinning of repo dependencies" (#1493) Reverts rapidsai/cugraph#1485 Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - Mike Wendt (https://github.com/mike-wendt) URL: https://github.com/rapidsai/cugraph/pull/1493 --- conda/recipes/cugraph/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 4b845583181..1ef64ddbe72 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -25,13 +25,13 @@ requirements: build: - python x.x - cython>=0.29,<0.30 - - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} + - libcugraph={{ version }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} - ucx-proc=*=gpu run: - python x.x - - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} + - libcugraph={{ version }} - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} From a7b634286a6bdb711e03ca1eefb90e88db513ab6 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 2 Apr 2021 12:31:25 -0500 Subject: [PATCH 212/343] Fix MNMG test failures and skip tests that are not supported on Pascal (#1498) * Made various fixes to `test_mg_betweenness_centrality.py` to address recent updates that were breaking these tests. * @afender updated egonet.cu to address an occasional test failure related to mem usage in certain environments. * @rlratzel updated python and notebook tests that contain code not supported on Pascal to be skipped when running on that arch. Also added a script to detect Pascal and used it to skip **all** C++ tests. _Note: a better way would be to edit each C++ test to use GTEST_SKIP to conditionally skip on Pascal, but since there are now so many to skip, and we get coverage from Python, this was the faster solution without sacrificing much/any coverage for a single platform._ Authors: - Joseph Nke (https://github.com/jnke2016) - Alex Fender (https://github.com/afender) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Alex Fender (https://github.com/afender) - Rick Ratzel (https://github.com/rlratzel) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1498 --- benchmarks/bench_algos.py | 32 +++++ ci/test.sh | 18 ++- ci/utils/is_pascal.py | 39 ++++++ cpp/src/community/egonet.cu | 6 +- notebooks/centrality/Betweenness.ipynb | 1 + notebooks/centrality/Katz.ipynb | 1 + notebooks/community/Spectral-Clustering.ipynb | 1 + notebooks/community/Triangle-Counting.ipynb | 1 + .../components/ConnectedComponents.ipynb | 1 + notebooks/cores/core-number.ipynb | 1 + notebooks/cores/kcore.ipynb | 1 + notebooks/cores/ktruss.ipynb | 1 + notebooks/link_analysis/HITS.ipynb | 1 + .../link_prediction/Jaccard-Similarity.ipynb | 1 + .../link_prediction/Overlap-Similarity.ipynb | 1 + notebooks/structure/Renumber-2.ipynb | 1 + notebooks/structure/Renumber.ipynb | 1 + notebooks/structure/Symmetrize.ipynb | 1 + notebooks/traversal/SSSP.ipynb | 1 + .../test_mg_batch_betweenness_centrality.py | 5 +- python/cugraph/tests/test_balanced_cut.py | 12 +- .../tests/test_betweenness_centrality.py | 124 ++++++++++-------- python/cugraph/tests/test_bfs.py | 15 ++- python/cugraph/tests/test_connectivity.py | 24 +++- python/cugraph/tests/test_convert_matrix.py | 15 ++- python/cugraph/tests/test_core_number.py | 9 +- python/cugraph/tests/test_ecg.py | 3 + .../tests/test_edge_betweenness_centrality.py | 21 ++- python/cugraph/tests/test_egonet.py | 7 + .../cugraph/tests/test_filter_unreachable.py | 6 +- python/cugraph/tests/test_graph.py | 60 ++++++++- python/cugraph/tests/test_hits.py | 6 +- python/cugraph/tests/test_hypergraph.py | 68 +++++++++- python/cugraph/tests/test_jaccard.py | 16 +++ python/cugraph/tests/test_k_core.py | 9 +- python/cugraph/tests/test_k_truss_subgraph.py | 7 + python/cugraph/tests/test_katz_centrality.py | 9 +- .../tests/test_maximum_spanning_tree.py | 9 +- .../tests/test_minimum_spanning_tree.py | 9 +- python/cugraph/tests/test_modularity.py | 6 +- python/cugraph/tests/test_multigraph.py | 23 ++++ python/cugraph/tests/test_nx_convert.py | 13 +- python/cugraph/tests/test_overlap.py | 9 +- python/cugraph/tests/test_pagerank.py | 7 + python/cugraph/tests/test_paths.py | 41 +++++- python/cugraph/tests/test_renumber.py | 30 ++++- python/cugraph/tests/test_sssp.py | 21 ++- .../cugraph/tests/test_subgraph_extraction.py | 14 +- python/cugraph/tests/test_triangle_count.py | 12 +- python/cugraph/tests/test_utils.py | 7 + python/cugraph/tests/test_wjaccard.py | 6 +- python/cugraph/tests/test_woverlap.py | 7 +- 52 files changed, 633 insertions(+), 107 deletions(-) create mode 100644 ci/utils/is_pascal.py diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index f9f8bf9cf53..14c15ebc08c 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -162,6 +162,8 @@ def anyGraphWithTransposedAdjListComputed(request): ############################################################################### # Benchmarks @pytest.mark.ETL +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_graph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -179,6 +181,8 @@ def bench_create_graph(gpubenchmark, edgelistCreated): warmup_iterations=10, max_time=0.005 ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_digraph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -188,27 +192,39 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): @pytest.mark.ETL +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_renumber(gpubenchmark, edgelistCreated): gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): gpubenchmark(cugraph.pagerank, anyGraphWithTransposedAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.force_atlas2, anyGraphWithAdjListComputed, max_iter=50) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) @@ -219,20 +235,28 @@ def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_weakly_connected_components(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.weakly_connected_components, anyGraphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.overlap, anyGraphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_triangles(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.triangles, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_spectralBalancedCutClustering(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.spectralBalancedCutClustering, @@ -247,19 +271,27 @@ def bench_spectralModularityMaximizationClustering( anyGraphWithAdjListComputed, 2) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degree(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degree) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degrees(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degrees) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.betweenness_centrality, anyGraphWithAdjListComputed, k=10, seed=123) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_edge_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.edge_betweenness_centrality, diff --git a/ci/test.sh b/ci/test.sh index 58cbb950f73..31660cd15ec 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -66,13 +66,17 @@ fi # EXITCODE for the script. set +e -echo "C++ gtests for cuGraph..." -for gt in tests/*_TEST; do - test_name=$(basename $gt) - echo "Running gtest $test_name" - ${gt} ${GTEST_FILTER} ${GTEST_ARGS} - echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" -done +if (python ${CUGRAPH_ROOT}/ci/utils/is_pascal.py); then + echo "WARNING: skipping C++ tests on Pascal GPU arch." +else + echo "C++ gtests for cuGraph..." + for gt in tests/*_TEST; do + test_name=$(basename $gt) + echo "Running gtest $test_name" + ${gt} ${GTEST_FILTER} ${GTEST_ARGS} + echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" + done +fi echo "Python pytest for cuGraph..." cd ${CUGRAPH_ROOT}/python diff --git a/ci/utils/is_pascal.py b/ci/utils/is_pascal.py new file mode 100644 index 00000000000..e55a3153a12 --- /dev/null +++ b/ci/utils/is_pascal.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +import glob + +from numba import cuda + +# FIXME: consolidate this code with ci/gpu/notebook_list.py + +# +# Not strictly true... however what we mean is +# Pascal or earlier +# +pascal = False + +device = cuda.get_current_device() +# check for the attribute using both pre and post numba 0.53 names +cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ + getattr(device, 'compute_capability') +if (cc[0] < 7): + pascal = True + +# Return zero (success) if pascal is True +if pascal: + sys.exit(0) +else: + sys.exit(1) diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 336a5c939b8..85ee327edb2 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -93,7 +93,6 @@ extract( hr_timer.start("ego_neighbors"); #endif -#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { // get light handle from worker pool raft::handle_t light_handle(handle, i); @@ -152,8 +151,7 @@ extract( neighbors.resize(h_neighbors_offsets[n_subgraphs]); user_stream_view.synchronize(); -// Construct the neighboors list concurrently -#pragma omp parallel for + // Construct the neighboors list concurrently for (vertex_t i = 0; i < n_subgraphs; i++) { auto worker_stream_view = handle.get_internal_stream_view(i); thrust::copy(rmm::exec_policy(worker_stream_view), @@ -268,4 +266,4 @@ extract_ego(raft::handle_t const &, int64_t, int64_t); } // namespace experimental -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/notebooks/centrality/Betweenness.ipynb b/notebooks/centrality/Betweenness.ipynb index e4e33ef91e5..d748defe74c 100644 --- a/notebooks/centrality/Betweenness.ipynb +++ b/notebooks/centrality/Betweenness.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Betweenness Centrality\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Betweenness centrality for both vertices and edges in our test datase using cuGraph and NetworkX. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/centrality/Katz.ipynb b/notebooks/centrality/Katz.ipynb index 2330fc08de8..cdf8828b80a 100755 --- a/notebooks/centrality/Katz.ipynb +++ b/notebooks/centrality/Katz.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Katz Centrality\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Katz centrality of each vertex in our test datase using both cuGraph and NetworkX. Additionally, NetworkX also contains a Numpy implementation that will used. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/community/Spectral-Clustering.ipynb b/notebooks/community/Spectral-Clustering.ipynb index fcefae5eb60..a3aa538b062 100755 --- a/notebooks/community/Spectral-Clustering.ipynb +++ b/notebooks/community/Spectral-Clustering.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Spectral Clustering \n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using Spectral Clustering with both the (A) Balance Cut metric, and (B) the Modularity Maximization metric\n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 19d3f838fc6..796f6e59fe6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Triangle Counting\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will count the numner of trianges in our test dataset. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/components/ConnectedComponents.ipynb b/notebooks/components/ConnectedComponents.ipynb index a9c82e6669f..e9669d75b38 100755 --- a/notebooks/components/ConnectedComponents.ipynb +++ b/notebooks/components/ConnectedComponents.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Connected Components\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute weakly and strongly connected components of a graph and display some useful information about the resulting components.\n", "\n", diff --git a/notebooks/cores/core-number.ipynb b/notebooks/cores/core-number.ipynb index 6190f653020..127898fb094 100755 --- a/notebooks/cores/core-number.ipynb +++ b/notebooks/cores/core-number.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Core Number\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to compute the core number of every vertex in our test graph \n", diff --git a/notebooks/cores/kcore.ipynb b/notebooks/cores/kcore.ipynb index 342f4ecd5f7..250a1ea2aa5 100755 --- a/notebooks/cores/kcore.ipynb +++ b/notebooks/cores/kcore.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# K-Cores\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Cores clusters in a test graph \n", diff --git a/notebooks/cores/ktruss.ipynb b/notebooks/cores/ktruss.ipynb index e6470110666..2fe93247d67 100644 --- a/notebooks/cores/ktruss.ipynb +++ b/notebooks/cores/ktruss.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# K-Truss\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Truss clusters in a test graph \n", diff --git a/notebooks/link_analysis/HITS.ipynb b/notebooks/link_analysis/HITS.ipynb index 01fd22929d5..891133a277c 100755 --- a/notebooks/link_analysis/HITS.ipynb +++ b/notebooks/link_analysis/HITS.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# HITS\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use both NetworkX and cuGraph to compute HITS. \n", "The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 21835da1cce..9a53e559323 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Jaccard Similarity\n", + "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Jaccard vertex similarity metrics available in cuGraph. cuGraph supports:\n", diff --git a/notebooks/link_prediction/Overlap-Similarity.ipynb b/notebooks/link_prediction/Overlap-Similarity.ipynb index b8733ce4d80..ec02a8ebbea 100755 --- a/notebooks/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/link_prediction/Overlap-Similarity.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Overlap Similarity\n", + "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Overlap Coefficient and compare it again Jaccard. Similarity can be between neighboring vertices (default) or second hop neighbors\n", diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index d17c2b32191..aa923ba003f 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Renumber\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 047b53d62df..2a2dab0a1a7 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Renumbering Test\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Symmetrize.ipynb b/notebooks/structure/Symmetrize.ipynb index 3cb84317742..5ba692b4696 100755 --- a/notebooks/structure/Symmetrize.ipynb +++ b/notebooks/structure/Symmetrize.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Symmetrize\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _symmetrize_ function to create bi-directional edges in an undirected graph\n", "\n", diff --git a/notebooks/traversal/SSSP.ipynb b/notebooks/traversal/SSSP.ipynb index d2baeb12e74..abea30eba15 100755 --- a/notebooks/traversal/SSSP.ipynb +++ b/notebooks/traversal/SSSP.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Single Source Shortest Path (SSSP)\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute the shortest path from a starting vertex to everyother vertex in our training dataset.\n", "\n", diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 4b0f6629bc3..6e1e5ea380a 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,7 @@ from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.tests import utils # Get parameters from standard betwenness_centrality_test from cugraph.tests.test_betweenness_centrality import ( @@ -36,7 +37,7 @@ # ============================================================================= # Parameters # ============================================================================= -DATASETS = ["../datasets/karate.csv"] +DATASETS = [utils.DATASETS_UNDIRECTED[0]] MG_DEVICE_COUNT_OPTIONS = [pytest.param(1, marks=pytest.mark.preset_gpu_count), pytest.param(2, marks=pytest.mark.preset_gpu_count), pytest.param(3, marks=pytest.mark.preset_gpu_count), diff --git a/python/cugraph/tests/test_balanced_cut.py b/python/cugraph/tests/test_balanced_cut.py index f0fc7152e56..4a609e1ef13 100644 --- a/python/cugraph/tests/test_balanced_cut.py +++ b/python/cugraph/tests/test_balanced_cut.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -59,6 +60,9 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering(graph_file, partitions): @@ -81,6 +85,9 @@ def test_edge_cut_clustering(graph_file, partitions): assert cu_score < rand_score +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @@ -123,6 +130,9 @@ def test_digraph_rejected(): cugraph_call(G, 2) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index f338e5aa633..3177b78de47 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cudf @@ -55,7 +56,7 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - graph_obj_tuple, + graph_file, directed=True, k=None, normalized=False, @@ -68,49 +69,36 @@ def calc_betweenness_centrality( edgevals=False, ): """ Generate both cugraph and networkx betweenness centrality - Parameters ---------- graph_file : string Path to COO Graph representation in .csv format - directed : bool, optional, default=True - k : int or None, optional, default=None int: Number of sources to sample from None: All sources are used to compute - normalized : bool True: Normalize Betweenness Centrality scores False: Scores are left unnormalized - weight : cudf.DataFrame: Not supported as of 06/2020 - endpoints : bool True: Endpoints are included when computing scores False: Endpoints are not considered - seed : int or None, optional, default=None Seed for random sampling of the starting point - result_dtype : numpy.dtype Expected type of the result, either np.float32 or np.float64 - use_k_full : bool When True, if k is None replaces k by the number of sources of the Graph - multi_gpu_batch : bool When True, enable mg batch after constructing the graph - edgevals: bool When True, enable tests with weighted graph, should be ignored during computation. - Returns ------- - sorted_df : cudf.DataFrame Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' and 'ref_bc' are the two betweenness centrality scores to compare. @@ -120,7 +108,8 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = graph_obj_tuple + G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed, + edgevals=edgevals) assert G is not None and Gnx is not None if multi_gpu_batch: @@ -298,67 +287,52 @@ def prepare_test(): gc.collect() -# ============================================================================= -# Pytest Fixtures -# ============================================================================= -DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] -DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] -WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] - - -small_graph_fixture_params = utils.genFixtureParamsProduct( - (DATASETS_SMALL, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -unrenumbered_graph_fixture_params = utils.genFixtureParamsProduct( - (DATASETS_UNRENUMBERED, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - - -@pytest.fixture(scope="module", params=small_graph_fixture_params) -def get_cu_nx_graph_datasets_small(request): - return utils.build_cu_and_nx_graphs(*request.param) - - -@pytest.fixture(scope="module", params=unrenumbered_graph_fixture_params) -def get_cu_nx_graph_datasets_unrenumbered(request): - return utils.build_cu_and_nx_graphs(*request.param) - - # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, normalized=normalized, k=subset_size, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -366,8 +340,10 @@ def test_betweenness_centrality( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_k_full( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, @@ -375,12 +351,14 @@ def test_betweenness_centrality_k_full( subset_seed, result_dtype, use_k_full, + edgevals ): """Tests full betweenness centrality by using k = G.number_of_vertices() instead of k=None, checks that k scales properly""" prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, normalized=normalized, k=subset_size, weight=weight, @@ -388,6 +366,7 @@ def test_betweenness_centrality_k_full( seed=subset_seed, result_dtype=result_dtype, use_k_full=use_k_full, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") @@ -396,103 +375,134 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_fixed_sample( - get_cu_nx_graph_datasets_unrenumbered, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Test Betweenness Centrality using a subset - Only k sources are considered for an approximate Betweenness Centrality """ prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_unrenumbered, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [[]]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_weight_except( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Calls betwenness_centrality with weight - As of 05/28/2020, weight is not supported and should raise a NotImplementedError """ prepare_test() with pytest.raises(NotImplementedError): sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_invalid_dtype( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Test calls edge_betwenness_centrality an invalid type""" prepare_test() with pytest.raises(TypeError): sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index 0070a34248c..00996fd3bb3 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -440,6 +441,9 @@ def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, cugraph_input_type): @@ -467,6 +471,9 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_bfs_nonnative_inputs(gpubenchmark, @@ -477,6 +484,9 @@ def test_bfs_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, cugraph_input_type): @@ -507,6 +517,9 @@ def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_connectivity.py b/python/cugraph/tests/test_connectivity.py index f957c4b417b..14572ab748d 100644 --- a/python/cugraph/tests/test_connectivity.py +++ b/python/cugraph/tests/test_connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -28,6 +28,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -290,6 +291,9 @@ def single_dataset_nxresults_strong(request): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): (graph_file, netx_labels, @@ -329,6 +333,9 @@ def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): assert nx_vertices == cg_vertices +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_weak_cc_nonnative_inputs(gpubenchmark, @@ -339,6 +346,9 @@ def test_weak_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_strong_cc(gpubenchmark, dataset_nxresults_strong, cugraph_input_type): @@ -382,6 +392,9 @@ def test_strong_cc(gpubenchmark, dataset_nxresults_strong, assert nx_vertices == cg_vertices +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_strong_cc_nonnative_inputs(gpubenchmark, @@ -392,16 +405,25 @@ def test_strong_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat_weak(single_dataset_nxresults_weak): (graph_file, _, _, _, api_type) = single_dataset_nxresults_weak assert_scipy_api_compat(graph_file, api_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat_strong(single_dataset_nxresults_strong): (graph_file, _, _, _, api_type) = single_dataset_nxresults_strong assert_scipy_api_compat(graph_file, api_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("connection_type", ["strong", "weak"]) def test_scipy_api_compat(connection_type): if connection_type == "strong": diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py index d418dd7ce2e..4d6c90364d8 100644 --- a/python/cugraph/tests/test_convert_matrix.py +++ b/python/cugraph/tests/test_convert_matrix.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -36,6 +37,9 @@ def setup_function(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_to_from_pandas(graph_file): # Read in the graph @@ -82,6 +86,9 @@ def test_to_from_pandas(graph_file): assert exp_pdf.equals(res_pdf) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_to_numpy(graph_file): # Read in the graph @@ -150,6 +157,9 @@ def test_from_to_numpy(graph_file): assert exp_pdf.equals(res_pdf) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_edgelist(graph_file): """ @@ -165,6 +175,9 @@ def test_from_edgelist(graph_file): assert G1.EdgeList == G2.EdgeList +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_adjlist(graph_file): """ diff --git a/python/cugraph/tests/test_core_number.py b/python/cugraph/tests/test_core_number.py index edbc7b0597b..c2394cdf735 100644 --- a/python/cugraph/tests/test_core_number.py +++ b/python/cugraph/tests/test_core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from cugraph.utilities import df_score_to_dictionary # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -78,6 +79,9 @@ def calc_core_number(graph_file): # https://github.com/rapidsai/cugraph/issues/1045 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number(graph_file): gc.collect() @@ -91,6 +95,9 @@ def test_core_number(graph_file): assert cg_num_dic == nx_num +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index ba705a787ee..60f97715efa 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -55,6 +55,9 @@ def golden_call(graph_file): ENSEMBLE_SIZES = [16, 32] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index 529b0b9de9c..224998df48c 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Copyright (c) 2019-2021, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cupy @@ -300,6 +301,9 @@ def prepare_test(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -332,6 +336,9 @@ def test_edge_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -373,6 +380,9 @@ def test_edge_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -409,6 +419,9 @@ def test_edge_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -447,6 +460,9 @@ def test_edge_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -482,6 +498,9 @@ def test_edge_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index b259c2567dc..fb04674a52b 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -35,6 +36,9 @@ RADIUS = [1, 2, 3] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) @@ -54,6 +58,9 @@ def test_ego_graph_nx(graph_file, seed, radius): assert nx.is_isomorphic(ego_nx, ego_cugraph) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) diff --git a/python/cugraph/tests/test_filter_unreachable.py b/python/cugraph/tests/test_filter_unreachable.py index 29b862f0285..f89dbba4e30 100644 --- a/python/cugraph/tests/test_filter_unreachable.py +++ b/python/cugraph/tests/test_filter_unreachable.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -36,6 +37,9 @@ SOURCES = [1] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_filter_unreachable(graph_file, source): diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index d8d5a504070..1a032bdaf17 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,6 +23,7 @@ from cudf.tests.utils import assert_eq import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # MG import cugraph.dask as dcg @@ -162,6 +163,9 @@ def test_version(): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_list_to_adj_list(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -182,6 +186,9 @@ def test_add_edge_list_to_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_adj_list_to_edge_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -208,6 +215,9 @@ def test_add_adj_list_to_edge_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_from_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -229,6 +239,9 @@ def test_view_edge_list_from_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -257,6 +270,9 @@ def test_delete_edge_list_delete_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -296,6 +312,9 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -334,6 +353,9 @@ def test_edges_for_Graph(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -377,6 +399,9 @@ def test_view_edge_list_for_Graph(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_consolidation(graph_file): cluster = LocalCUDACluster() @@ -411,6 +436,9 @@ def test_consolidation(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL) def test_two_hop_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -430,6 +458,9 @@ def test_two_hop_neighbors(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degree_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -468,6 +499,9 @@ def test_degree_functionality(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degrees_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -499,6 +533,9 @@ def test_degrees_functionality(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_number_of_vertices(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -517,6 +554,9 @@ def test_number_of_vertices(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_directed(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -544,6 +584,9 @@ def test_to_directed(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_undirected(graph_file): # Read data and then convert to directed by dropped some edges @@ -578,6 +621,9 @@ def test_to_undirected(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_edge(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -593,6 +639,9 @@ def test_has_edge(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_node(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -606,6 +655,9 @@ def test_has_node(graph_file): assert G.has_node(n) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_invalid_has_node(): df = cudf.DataFrame([[1, 2]], columns=["src", "dst"]) G = cugraph.Graph() @@ -615,6 +667,9 @@ def test_invalid_has_node(): assert not G.has_node(G.number_of_nodes() + 1) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_bipartite_api(graph_file): # This test only tests the functionality of adding set of nodes and @@ -648,6 +703,9 @@ def test_bipartite_api(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 6b6f54937a6..58c03bebd88 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -79,6 +80,9 @@ def networkx_call(M, max_iter, tol): TOLERANCE = [1.0e-06] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index dbce89905cd..9027fdcffd6 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,12 +34,15 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import cudf -from cudf.tests.utils import assert_eq -import cugraph import datetime as dt + import pandas as pd import pytest +import cudf +from cudf.tests.utils import assert_eq + +import cugraph +from cugraph.utilities.utils import is_device_version_less_than simple_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -56,6 +59,9 @@ })) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_complex_df(): complex_df = pd.DataFrame({ "src": [0, 1, 2, 3], @@ -101,6 +107,9 @@ def test_complex_df(): cugraph.hypergraph(complex_df) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_hyperedges(categorical_metadata): @@ -171,6 +180,9 @@ def test_hyperedges(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) @@ -179,6 +191,9 @@ def test_hyperedges_direct(): assert_eq(len(h["nodes"]), 9) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct_categories(): h = cugraph.hypergraph( @@ -195,6 +210,9 @@ def test_hyperedges_direct_categories(): assert_eq(len(h["nodes"]), 6) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct_manual_shaping(): h1 = cugraph.hypergraph( @@ -212,6 +230,9 @@ def test_hyperedges_direct_manual_shaping(): assert_eq(len(h2["edges"]), 12) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs(categorical_metadata): @@ -263,6 +284,9 @@ def test_drop_edge_attrs(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs_direct(categorical_metadata): @@ -304,6 +328,9 @@ def test_drop_edge_attrs_direct(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -318,6 +345,9 @@ def test_skip_hyper(): assert len(hg["graph"].edges()) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_drop_na_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -332,6 +362,9 @@ def test_skip_drop_na_hyper(): assert len(hg["graph"].edges()) == 5 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -346,6 +379,9 @@ def test_skip_direct(): assert len(hg["graph"].edges()) == 3 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_drop_na_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -360,6 +396,9 @@ def test_skip_drop_na_direct(): assert len(hg["graph"].edges()) == 2 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_drop_na_hyper(): df = cudf.DataFrame.from_pandas( @@ -372,6 +411,9 @@ def test_drop_na_hyper(): assert len(hg["graph"].edges()) == 4 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_drop_na_direct(): df = cudf.DataFrame.from_pandas( @@ -384,6 +426,9 @@ def test_drop_na_direct(): assert len(hg["graph"].edges()) == 1 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_na_hyperedge(): nans_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -403,6 +448,9 @@ def test_skip_na_hyperedge(): assert_eq(len(default_h_edges), len(expected_hits)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_vanilla(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -417,6 +465,9 @@ def test_hyper_to_pa_vanilla(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_mixed(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -431,6 +482,9 @@ def test_hyper_to_pa_mixed(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_na(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -448,6 +502,9 @@ def test_hyper_to_pa_na(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_all(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"]) nodes_arr = hg["graph"].nodes().to_arrow() @@ -458,6 +515,9 @@ def test_hyper_to_pa_all(): assert len(edges_err) == 9 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_all_direct(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"], direct=True) nodes_arr = hg["graph"].nodes().to_arrow() diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index b61101ef1d0..87e9a5a3cc7 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -91,6 +92,9 @@ def networkx_call(M): return src, dst, coeff +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard(graph_file): gc.collect() @@ -113,6 +117,9 @@ def test_jaccard(graph_file): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", [PurePath( utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] ) @@ -137,6 +144,9 @@ def test_jaccard_edgevals(graph_file): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop(graph_file): gc.collect() @@ -169,6 +179,9 @@ def test_jaccard_two_hop(graph_file): assert diff < 1.0e-6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop_edge_vals(graph_file): gc.collect() @@ -203,6 +216,9 @@ def test_jaccard_two_hop_edge_vals(graph_file): assert diff < 1.0e-6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 5e3220dcfb1..4d3e4903d33 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -65,6 +66,9 @@ def compare_edges(cg, nxg): return True +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph(graph_file): gc.collect() @@ -74,6 +78,9 @@ def test_core_number_Graph(graph_file): assert compare_edges(cu_kcore, nx_kcore) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index a86490fb561..02b95f01a01 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -73,6 +74,9 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() @@ -86,6 +90,9 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): compare_k_truss(k_subgraph, k, nx_ground_truth) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): gc.collect() diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index a2a03c1518b..864b2974117 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -70,6 +71,9 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -82,6 +86,9 @@ def test_katz_centrality(graph_file): assert topKNX.equals(topKCU) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/tests/test_maximum_spanning_tree.py index e20e2f72267..0e55c7f15d7 100644 --- a/python/cugraph/tests/test_maximum_spanning_tree.py +++ b/python/cugraph/tests/test_maximum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm import cudf import time @@ -36,6 +37,9 @@ print("Networkx version : {} ".format(nx.__version__)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_maximum_spanning_tree_nx(graph_file): gc.collect() @@ -71,6 +75,9 @@ def test_maximum_spanning_tree_nx(graph_file): ] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_maximum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/tests/test_minimum_spanning_tree.py index 55ebdcfda08..15404bc8acf 100644 --- a/python/cugraph/tests/test_minimum_spanning_tree.py +++ b/python/cugraph/tests/test_minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm import cudf import time @@ -36,6 +37,9 @@ print("Networkx version : {} ".format(nx.__version__)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_minimum_spanning_tree_nx(graph_file): gc.collect() @@ -71,6 +75,9 @@ def test_minimum_spanning_tree_nx(graph_file): ] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_minimum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index 7a7d42d1592..2956d8f1913 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -52,6 +53,9 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index cb659bc7e24..62245bcf65d 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -1,6 +1,20 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import cugraph import networkx as nx from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import pytest import gc import numpy as np @@ -13,6 +27,9 @@ def setup_function(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -46,6 +63,9 @@ def test_multigraph(graph_file): assert nxedges.equals(cuedges[["source", "target", "weight"]]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -79,6 +99,9 @@ def test_Graph_from_MultiGraph(graph_file): assert Gnxd.number_of_edges() == Gd.number_of_edges() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph_sssp(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py index 08a96a801e2..5799b88157e 100644 --- a/python/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/tests/test_nx_convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -48,6 +49,9 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert cu_df.to_dict() == nx_df.to_dict() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs @@ -77,7 +81,9 @@ def test_networkx_compatibility(graph_file): _compare_graphs(nxG, cuG) -# Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert(graph_file): gc.collect() @@ -91,6 +97,9 @@ def test_nx_convert(graph_file): _compare_graphs(nxG, cuG, has_wt=False) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert_multicol(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index 53d279478f7..96e510c0294 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import scipy import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(cu_M, pairs, edgevals=False): @@ -82,6 +83,9 @@ def cpu_call(M, first, second): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap(graph_file): gc.collect() @@ -116,6 +120,9 @@ def test_overlap(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap_edge_vals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 3ce8dd4ffe9..48ab1b39caa 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -144,6 +145,9 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # https://github.com/rapidsai/cugraph/issues/533 # +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -194,6 +198,9 @@ def test_pagerank( assert err < (0.01 * len(cugraph_pr)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_paths.py b/python/cugraph/tests/test_paths.py index 7467d024051..f58195570b8 100644 --- a/python/cugraph/tests/test_paths.py +++ b/python/cugraph/tests/test_paths.py @@ -1,11 +1,28 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from tempfile import NamedTemporaryFile + import cudf -import cugraph from cupy.sparse import coo_matrix as cupy_coo_matrix import cupy import networkx as nx import pytest -import sys -from tempfile import NamedTemporaryFile + +import cugraph +from cugraph.utilities.utils import is_device_version_less_than + CONNECTED_GRAPH = """1,5,3 1,4,1 @@ -58,6 +75,9 @@ def graphs(request): yield cugraph_G, nx_G, cupy_df +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_connected_graph_shortest_path_length(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -91,6 +111,9 @@ def test_connected_graph_shortest_path_length(graphs): assert path_1_to_6_length == cugraph.shortest_path_length(cupy_df, 1, 6) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_source(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -105,6 +128,9 @@ def test_shortest_path_length_invalid_source(graphs): cugraph.shortest_path_length(cupy_df, -1, 1) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_target(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -119,6 +145,9 @@ def test_shortest_path_length_invalid_target(graphs): cugraph.shortest_path_length(cupy_df, 1, 10) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_vertexes(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -133,6 +162,9 @@ def test_shortest_path_length_invalid_vertexes(graphs): cugraph.shortest_path_length(cupy_df, 0, 42) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_path(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -143,6 +175,9 @@ def test_shortest_path_length_no_path(graphs): assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_target(graphs): cugraph_G, nx_G, cupy_df = graphs diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 5362d3f5804..57912150b12 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -17,12 +17,16 @@ import pandas as pd import pytest - import cudf + from cugraph.structure.number_map import NumberMap from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_ips(): source_list = [ "192.168.1.1", @@ -57,6 +61,9 @@ def test_renumber_ips(): assert check_dst.equals(gdf["dest_as_int"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_ips_cols(): source_list = [ @@ -125,6 +132,9 @@ def test_renumber_ips_str_cols(): assert check_dst.equals(gdf["dest_list"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_negative(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -146,6 +156,9 @@ def test_renumber_negative(): assert check_dst.equals(gdf["dest_list"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_negative_col(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -167,7 +180,9 @@ def test_renumber_negative_col(): assert check_dst.equals(gdf["dest_list"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): @@ -205,6 +220,9 @@ def test_renumber_series(graph_file): assert check_dst["0_y"].equals(check_dst["0_x"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): gc.collect() @@ -238,7 +256,9 @@ def test_renumber_files(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_col(graph_file): gc.collect() @@ -271,7 +291,9 @@ def test_renumber_files_col(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_multi_col(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_sssp.py b/python/cugraph/tests/test_sssp.py index 0a5347a6290..9e866c84f07 100644 --- a/python/cugraph/tests/test_sssp.py +++ b/python/cugraph/tests/test_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -28,6 +28,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -203,6 +204,9 @@ def single_dataset_source_nxresults_weighted(request): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): # Extract the params generated from the fixture @@ -232,6 +236,9 @@ def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_nonnative_inputs(gpubenchmark, @@ -242,6 +249,9 @@ def test_sssp_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, cugraph_input_type): @@ -276,6 +286,9 @@ def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_edgevals_nonnative_inputs( @@ -287,6 +300,9 @@ def test_sssp_edgevals_nonnative_inputs( cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_sssp_data_type_conversion(graph_file, source): @@ -341,6 +357,9 @@ def test_sssp_data_type_conversion(graph_file, source): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 9e9eccc4347..5be80f341b5 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -70,7 +71,9 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -85,9 +88,9 @@ def test_subgraph_extraction_DiGraph(graph_file): assert compare_edges(cu_sg, nx_sg) -# Test all combinations of default/managed and pooled/non-pooled allocation - - +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph(graph_file): gc.collect() @@ -102,6 +105,9 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_triangle_count.py b/python/cugraph/tests/test_triangle_count.py index ff28f55838d..d768183e4ad 100644 --- a/python/cugraph/tests/test_triangle_count.py +++ b/python/cugraph/tests/test_triangle_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -66,6 +67,9 @@ def networkx_call(M): # https://github.com/rapidsai/cugraph/issues/1043 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles(graph_file): gc.collect() @@ -76,6 +80,9 @@ def test_triangles(graph_file): assert cu_count == nx_count +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_edge_vals(graph_file): gc.collect() @@ -86,6 +93,9 @@ def test_triangles_edge_vals(graph_file): assert cu_count == nx_count +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 55410817f90..b350ef27efd 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -15,9 +15,13 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() @@ -43,6 +47,9 @@ def test_bfs_paths(): assert "not in the result set" in str(ErrorMsg) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index c5cab18484c..f31d65de652 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -86,6 +87,9 @@ def networkx_call(M): return coeff +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_wjaccard(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index e7da21014ba..50d7b0ecf84 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -83,7 +84,9 @@ def cpu_call(M, first, second): return result -# Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_woverlap(graph_file): gc.collect() From d1ec7ac873da74f34fae7a82513d107077cc945c Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 2 Apr 2021 22:41:35 +0200 Subject: [PATCH 213/343] Clean up nullptr cuda_stream_view arguments (#1504) Cugraph fix for: https://github.com/rapidsai/cugraph/issues/1499 Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Brad Rees (https://github.com/BradReesWork) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1504 --- cpp/CMakeLists.txt | 2 +- cpp/tests/traversal/tsp_test.cu | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 57f324a60a9..3b1e93d0781 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -275,7 +275,7 @@ message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") FetchContent_Declare( cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad + GIT_TAG e58d0ecdbc270fc28867d66c965787a62a7a882c GIT_SHALLOW true SOURCE_SUBDIR hornet ) diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 9ebf464ae3e..d4e9ff90f35 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -133,10 +133,11 @@ class Tests_Tsp : public ::testing::TestWithParam { // Device alloc raft::handle_t const handle; - rmm::device_uvector vertices(static_cast(nodes), nullptr); - rmm::device_uvector route(static_cast(nodes), nullptr); - rmm::device_uvector x_pos(static_cast(nodes), nullptr); - rmm::device_uvector y_pos(static_cast(nodes), nullptr); + auto stream = handle.get_stream(); + rmm::device_uvector vertices(static_cast(nodes), stream); + rmm::device_uvector route(static_cast(nodes), stream); + rmm::device_uvector x_pos(static_cast(nodes), stream); + rmm::device_uvector y_pos(static_cast(nodes), stream); int* vtx_ptr = vertices.data(); int* d_route = route.data(); From 8222f315dfdc25d89bc958235dc60d56f291751d Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Fri, 2 Apr 2021 18:38:55 -0500 Subject: [PATCH 214/343] pascal renumbering fix (#1505) Authors: - https://github.com/Iroy30 - Rick Ratzel (https://github.com/rlratzel) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1505 --- benchmarks/bench_algos.py | 32 ---------- notebooks/centrality/Betweenness.ipynb | 1 - notebooks/centrality/Katz.ipynb | 1 - notebooks/community/Spectral-Clustering.ipynb | 1 - notebooks/community/Triangle-Counting.ipynb | 1 - .../components/ConnectedComponents.ipynb | 1 - notebooks/cores/core-number.ipynb | 1 - notebooks/cores/kcore.ipynb | 1 - notebooks/cores/ktruss.ipynb | 1 - notebooks/link_analysis/HITS.ipynb | 1 - .../link_prediction/Jaccard-Similarity.ipynb | 1 - .../link_prediction/Overlap-Similarity.ipynb | 1 - notebooks/structure/Renumber-2.ipynb | 1 - notebooks/structure/Renumber.ipynb | 1 - notebooks/structure/Symmetrize.ipynb | 1 - notebooks/traversal/SSSP.ipynb | 1 - python/cugraph/structure/number_map.py | 10 ++++ python/cugraph/tests/test_balanced_cut.py | 10 ---- .../tests/test_betweenness_centrality.py | 19 ------ python/cugraph/tests/test_bfs.py | 13 ----- python/cugraph/tests/test_connectivity.py | 22 ------- python/cugraph/tests/test_convert_matrix.py | 13 ----- python/cugraph/tests/test_core_number.py | 7 --- python/cugraph/tests/test_ecg.py | 3 - .../tests/test_edge_betweenness_centrality.py | 19 ------ python/cugraph/tests/test_egonet.py | 7 --- .../cugraph/tests/test_filter_unreachable.py | 4 -- python/cugraph/tests/test_graph.py | 58 ------------------- python/cugraph/tests/test_hits.py | 4 -- python/cugraph/tests/test_hypergraph.py | 58 ------------------- python/cugraph/tests/test_jaccard.py | 16 ----- python/cugraph/tests/test_k_core.py | 7 --- python/cugraph/tests/test_k_truss_subgraph.py | 7 --- python/cugraph/tests/test_katz_centrality.py | 7 --- .../tests/test_maximum_spanning_tree.py | 16 ++--- .../tests/test_minimum_spanning_tree.py | 16 ++--- python/cugraph/tests/test_modularity.py | 4 -- python/cugraph/tests/test_multigraph.py | 20 ++----- python/cugraph/tests/test_nx_convert.py | 13 +---- python/cugraph/tests/test_overlap.py | 8 +-- python/cugraph/tests/test_pagerank.py | 8 +-- python/cugraph/tests/test_paths.py | 19 ------ python/cugraph/tests/test_renumber.py | 25 -------- python/cugraph/tests/test_sssp.py | 20 +------ .../cugraph/tests/test_subgraph_extraction.py | 11 +--- python/cugraph/tests/test_triangle_count.py | 11 +--- python/cugraph/tests/test_utils.py | 11 +--- python/cugraph/tests/test_wjaccard.py | 5 +- python/cugraph/tests/test_woverlap.py | 7 +-- 49 files changed, 40 insertions(+), 485 deletions(-) diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index 14c15ebc08c..f9f8bf9cf53 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -162,8 +162,6 @@ def anyGraphWithTransposedAdjListComputed(request): ############################################################################### # Benchmarks @pytest.mark.ETL -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_graph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -181,8 +179,6 @@ def bench_create_graph(gpubenchmark, edgelistCreated): warmup_iterations=10, max_time=0.005 ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_digraph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -192,39 +188,27 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): @pytest.mark.ETL -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_renumber(gpubenchmark, edgelistCreated): gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): gpubenchmark(cugraph.pagerank, anyGraphWithTransposedAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.force_atlas2, anyGraphWithAdjListComputed, max_iter=50) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) @@ -235,28 +219,20 @@ def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_weakly_connected_components(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.weakly_connected_components, anyGraphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.overlap, anyGraphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_triangles(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.triangles, graphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_spectralBalancedCutClustering(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.spectralBalancedCutClustering, @@ -271,27 +247,19 @@ def bench_spectralModularityMaximizationClustering( anyGraphWithAdjListComputed, 2) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degree(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degree) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degrees(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degrees) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.betweenness_centrality, anyGraphWithAdjListComputed, k=10, seed=123) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_edge_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.edge_betweenness_centrality, diff --git a/notebooks/centrality/Betweenness.ipynb b/notebooks/centrality/Betweenness.ipynb index d748defe74c..e4e33ef91e5 100644 --- a/notebooks/centrality/Betweenness.ipynb +++ b/notebooks/centrality/Betweenness.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Betweenness Centrality\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Betweenness centrality for both vertices and edges in our test datase using cuGraph and NetworkX. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/centrality/Katz.ipynb b/notebooks/centrality/Katz.ipynb index cdf8828b80a..2330fc08de8 100755 --- a/notebooks/centrality/Katz.ipynb +++ b/notebooks/centrality/Katz.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Katz Centrality\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Katz centrality of each vertex in our test datase using both cuGraph and NetworkX. Additionally, NetworkX also contains a Numpy implementation that will used. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/community/Spectral-Clustering.ipynb b/notebooks/community/Spectral-Clustering.ipynb index a3aa538b062..fcefae5eb60 100755 --- a/notebooks/community/Spectral-Clustering.ipynb +++ b/notebooks/community/Spectral-Clustering.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Spectral Clustering \n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using Spectral Clustering with both the (A) Balance Cut metric, and (B) the Modularity Maximization metric\n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 796f6e59fe6..19d3f838fc6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Triangle Counting\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will count the numner of trianges in our test dataset. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/components/ConnectedComponents.ipynb b/notebooks/components/ConnectedComponents.ipynb index e9669d75b38..a9c82e6669f 100755 --- a/notebooks/components/ConnectedComponents.ipynb +++ b/notebooks/components/ConnectedComponents.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Connected Components\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute weakly and strongly connected components of a graph and display some useful information about the resulting components.\n", "\n", diff --git a/notebooks/cores/core-number.ipynb b/notebooks/cores/core-number.ipynb index 127898fb094..6190f653020 100755 --- a/notebooks/cores/core-number.ipynb +++ b/notebooks/cores/core-number.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Core Number\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to compute the core number of every vertex in our test graph \n", diff --git a/notebooks/cores/kcore.ipynb b/notebooks/cores/kcore.ipynb index 250a1ea2aa5..342f4ecd5f7 100755 --- a/notebooks/cores/kcore.ipynb +++ b/notebooks/cores/kcore.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# K-Cores\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Cores clusters in a test graph \n", diff --git a/notebooks/cores/ktruss.ipynb b/notebooks/cores/ktruss.ipynb index 2fe93247d67..e6470110666 100644 --- a/notebooks/cores/ktruss.ipynb +++ b/notebooks/cores/ktruss.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# K-Truss\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Truss clusters in a test graph \n", diff --git a/notebooks/link_analysis/HITS.ipynb b/notebooks/link_analysis/HITS.ipynb index 891133a277c..01fd22929d5 100755 --- a/notebooks/link_analysis/HITS.ipynb +++ b/notebooks/link_analysis/HITS.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# HITS\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use both NetworkX and cuGraph to compute HITS. \n", "The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 9a53e559323..21835da1cce 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Jaccard Similarity\n", - "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Jaccard vertex similarity metrics available in cuGraph. cuGraph supports:\n", diff --git a/notebooks/link_prediction/Overlap-Similarity.ipynb b/notebooks/link_prediction/Overlap-Similarity.ipynb index ec02a8ebbea..b8733ce4d80 100755 --- a/notebooks/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/link_prediction/Overlap-Similarity.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Overlap Similarity\n", - "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Overlap Coefficient and compare it again Jaccard. Similarity can be between neighboring vertices (default) or second hop neighbors\n", diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index aa923ba003f..d17c2b32191 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Renumber\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 2a2dab0a1a7..047b53d62df 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Renumbering Test\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Symmetrize.ipynb b/notebooks/structure/Symmetrize.ipynb index 5ba692b4696..3cb84317742 100755 --- a/notebooks/structure/Symmetrize.ipynb +++ b/notebooks/structure/Symmetrize.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Symmetrize\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _symmetrize_ function to create bi-directional edges in an undirected graph\n", "\n", diff --git a/notebooks/traversal/SSSP.ipynb b/notebooks/traversal/SSSP.ipynb index abea30eba15..d2baeb12e74 100755 --- a/notebooks/traversal/SSSP.ipynb +++ b/notebooks/traversal/SSSP.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Single Source Shortest Path (SSSP)\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute the shortest path from a starting vertex to everyother vertex in our training dataset.\n", "\n", diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 5f801eb0d90..e45a50d6dbe 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -16,6 +16,7 @@ from dask.distributed import wait, default_client from cugraph.dask.common.input_utils import get_distributed_data from cugraph.structure import renumber_wrapper as c_renumber +from cugraph.utilities.utils import is_device_version_less_than import cugraph.comms.comms as Comms import dask_cudf import numpy as np @@ -476,6 +477,8 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, elif not (df[src_col_names].dtype == np.int32 or df[src_col_names].dtype == np.int64): renumber_type = 'legacy' + elif is_device_version_less_than((7, 0)): + renumber_type = 'legacy' else: renumber_type = 'experimental' df = df.rename(columns={src_col_names: "src", @@ -562,6 +565,12 @@ def get_renumbered_df(data): return renumbered_df, renumber_map else: + if is_device_version_less_than((7, 0)): + renumbered_df = df + renumber_map.implementation.df = indirection_map + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map + renumbering_map, renumbered_df = c_renumber.renumber( df, num_edges, @@ -578,6 +587,7 @@ def get_renumbered_df(data): else: renumber_map.implementation.df = renumbering_map.rename( columns={'original_ids': '0', 'new_ids': 'id'}, copy=False) + renumber_map.implementation.numbered = True return renumbered_df, renumber_map diff --git a/python/cugraph/tests/test_balanced_cut.py b/python/cugraph/tests/test_balanced_cut.py index 4a609e1ef13..2492017511a 100644 --- a/python/cugraph/tests/test_balanced_cut.py +++ b/python/cugraph/tests/test_balanced_cut.py @@ -20,7 +20,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -60,9 +59,6 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering(graph_file, partitions): @@ -85,9 +81,6 @@ def test_edge_cut_clustering(graph_file, partitions): assert cu_score < rand_score -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @@ -130,9 +123,6 @@ def test_digraph_rejected(): cugraph_call(G, 2) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 3177b78de47..29c012e95a2 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cudf @@ -290,9 +289,6 @@ def prepare_test(): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -328,9 +324,6 @@ def test_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -375,9 +368,6 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -416,9 +406,6 @@ def test_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -459,9 +446,6 @@ def test_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -500,9 +484,6 @@ def test_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index 00996fd3bb3..d04ef957104 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -19,7 +19,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -441,9 +440,6 @@ def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, cugraph_input_type): @@ -471,9 +467,6 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_bfs_nonnative_inputs(gpubenchmark, @@ -484,9 +477,6 @@ def test_bfs_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, cugraph_input_type): @@ -517,9 +507,6 @@ def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_connectivity.py b/python/cugraph/tests/test_connectivity.py index 14572ab748d..194147ab620 100644 --- a/python/cugraph/tests/test_connectivity.py +++ b/python/cugraph/tests/test_connectivity.py @@ -28,7 +28,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -291,9 +290,6 @@ def single_dataset_nxresults_strong(request): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): (graph_file, netx_labels, @@ -333,9 +329,6 @@ def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): assert nx_vertices == cg_vertices -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_weak_cc_nonnative_inputs(gpubenchmark, @@ -346,9 +339,6 @@ def test_weak_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_strong_cc(gpubenchmark, dataset_nxresults_strong, cugraph_input_type): @@ -392,9 +382,6 @@ def test_strong_cc(gpubenchmark, dataset_nxresults_strong, assert nx_vertices == cg_vertices -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_strong_cc_nonnative_inputs(gpubenchmark, @@ -405,25 +392,16 @@ def test_strong_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat_weak(single_dataset_nxresults_weak): (graph_file, _, _, _, api_type) = single_dataset_nxresults_weak assert_scipy_api_compat(graph_file, api_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat_strong(single_dataset_nxresults_strong): (graph_file, _, _, _, api_type) = single_dataset_nxresults_strong assert_scipy_api_compat(graph_file, api_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("connection_type", ["strong", "weak"]) def test_scipy_api_compat(connection_type): if connection_type == "strong": diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py index 4d6c90364d8..1dbf51910ea 100644 --- a/python/cugraph/tests/test_convert_matrix.py +++ b/python/cugraph/tests/test_convert_matrix.py @@ -15,7 +15,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -37,9 +36,6 @@ def setup_function(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_to_from_pandas(graph_file): # Read in the graph @@ -86,9 +82,6 @@ def test_to_from_pandas(graph_file): assert exp_pdf.equals(res_pdf) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_to_numpy(graph_file): # Read in the graph @@ -157,9 +150,6 @@ def test_from_to_numpy(graph_file): assert exp_pdf.equals(res_pdf) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_edgelist(graph_file): """ @@ -175,9 +165,6 @@ def test_from_edgelist(graph_file): assert G1.EdgeList == G2.EdgeList -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_adjlist(graph_file): """ diff --git a/python/cugraph/tests/test_core_number.py b/python/cugraph/tests/test_core_number.py index c2394cdf735..9cfc37ba1c5 100644 --- a/python/cugraph/tests/test_core_number.py +++ b/python/cugraph/tests/test_core_number.py @@ -15,7 +15,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from cugraph.utilities import df_score_to_dictionary # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -79,9 +78,6 @@ def calc_core_number(graph_file): # https://github.com/rapidsai/cugraph/issues/1045 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number(graph_file): gc.collect() @@ -95,9 +91,6 @@ def test_core_number(graph_file): assert cg_num_dic == nx_num -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 60f97715efa..ba705a787ee 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -55,9 +55,6 @@ def golden_call(graph_file): ENSEMBLE_SIZES = [16, 32] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index 224998df48c..8c5aad7dc61 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cupy @@ -301,9 +300,6 @@ def prepare_test(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -336,9 +332,6 @@ def test_edge_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -380,9 +373,6 @@ def test_edge_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -419,9 +409,6 @@ def test_edge_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -460,9 +447,6 @@ def test_edge_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -498,9 +482,6 @@ def test_edge_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index fb04674a52b..b259c2567dc 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -36,9 +35,6 @@ RADIUS = [1, 2, 3] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) @@ -58,9 +54,6 @@ def test_ego_graph_nx(graph_file, seed, radius): assert nx.is_isomorphic(ego_nx, ego_cugraph) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) diff --git a/python/cugraph/tests/test_filter_unreachable.py b/python/cugraph/tests/test_filter_unreachable.py index f89dbba4e30..6c00461d234 100644 --- a/python/cugraph/tests/test_filter_unreachable.py +++ b/python/cugraph/tests/test_filter_unreachable.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +36,6 @@ SOURCES = [1] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_filter_unreachable(graph_file, source): diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 1a032bdaf17..348f7e2e130 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -23,7 +23,6 @@ from cudf.tests.utils import assert_eq import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # MG import cugraph.dask as dcg @@ -163,9 +162,6 @@ def test_version(): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_list_to_adj_list(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -186,9 +182,6 @@ def test_add_edge_list_to_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_adj_list_to_edge_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -215,9 +208,6 @@ def test_add_adj_list_to_edge_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_from_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -239,9 +229,6 @@ def test_view_edge_list_from_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -270,9 +257,6 @@ def test_delete_edge_list_delete_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -312,9 +296,6 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -353,9 +334,6 @@ def test_edges_for_Graph(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -399,9 +377,6 @@ def test_view_edge_list_for_Graph(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_consolidation(graph_file): cluster = LocalCUDACluster() @@ -436,9 +411,6 @@ def test_consolidation(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL) def test_two_hop_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -458,9 +430,6 @@ def test_two_hop_neighbors(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degree_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -499,9 +468,6 @@ def test_degree_functionality(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degrees_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -533,9 +499,6 @@ def test_degrees_functionality(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_number_of_vertices(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -554,9 +517,6 @@ def test_number_of_vertices(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_directed(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -584,9 +544,6 @@ def test_to_directed(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_undirected(graph_file): # Read data and then convert to directed by dropped some edges @@ -621,9 +578,6 @@ def test_to_undirected(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_edge(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -639,9 +593,6 @@ def test_has_edge(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_node(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -655,9 +606,6 @@ def test_has_node(graph_file): assert G.has_node(n) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_invalid_has_node(): df = cudf.DataFrame([[1, 2]], columns=["src", "dst"]) G = cugraph.Graph() @@ -667,9 +615,6 @@ def test_invalid_has_node(): assert not G.has_node(G.number_of_nodes() + 1) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_bipartite_api(graph_file): # This test only tests the functionality of adding set of nodes and @@ -703,9 +648,6 @@ def test_bipartite_api(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 58c03bebd88..9229f3734f8 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -20,7 +20,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -80,9 +79,6 @@ def networkx_call(M, max_iter, tol): TOLERANCE = [1.0e-06] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index 9027fdcffd6..43801be9fdc 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -42,7 +42,6 @@ from cudf.tests.utils import assert_eq import cugraph -from cugraph.utilities.utils import is_device_version_less_than simple_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -59,9 +58,6 @@ })) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_complex_df(): complex_df = pd.DataFrame({ "src": [0, 1, 2, 3], @@ -107,9 +103,6 @@ def test_complex_df(): cugraph.hypergraph(complex_df) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_hyperedges(categorical_metadata): @@ -180,9 +173,6 @@ def test_hyperedges(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) @@ -191,9 +181,6 @@ def test_hyperedges_direct(): assert_eq(len(h["nodes"]), 9) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct_categories(): h = cugraph.hypergraph( @@ -210,9 +197,6 @@ def test_hyperedges_direct_categories(): assert_eq(len(h["nodes"]), 6) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct_manual_shaping(): h1 = cugraph.hypergraph( @@ -230,9 +214,6 @@ def test_hyperedges_direct_manual_shaping(): assert_eq(len(h2["edges"]), 12) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs(categorical_metadata): @@ -284,9 +265,6 @@ def test_drop_edge_attrs(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs_direct(categorical_metadata): @@ -328,9 +306,6 @@ def test_drop_edge_attrs_direct(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -345,9 +320,6 @@ def test_skip_hyper(): assert len(hg["graph"].edges()) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_drop_na_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -362,9 +334,6 @@ def test_skip_drop_na_hyper(): assert len(hg["graph"].edges()) == 5 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -379,9 +348,6 @@ def test_skip_direct(): assert len(hg["graph"].edges()) == 3 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_drop_na_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -396,9 +362,6 @@ def test_skip_drop_na_direct(): assert len(hg["graph"].edges()) == 2 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_drop_na_hyper(): df = cudf.DataFrame.from_pandas( @@ -411,9 +374,6 @@ def test_drop_na_hyper(): assert len(hg["graph"].edges()) == 4 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_drop_na_direct(): df = cudf.DataFrame.from_pandas( @@ -426,9 +386,6 @@ def test_drop_na_direct(): assert len(hg["graph"].edges()) == 1 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_na_hyperedge(): nans_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -448,9 +405,6 @@ def test_skip_na_hyperedge(): assert_eq(len(default_h_edges), len(expected_hits)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_vanilla(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -465,9 +419,6 @@ def test_hyper_to_pa_vanilla(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_mixed(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -482,9 +433,6 @@ def test_hyper_to_pa_mixed(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_na(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -502,9 +450,6 @@ def test_hyper_to_pa_na(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_all(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"]) nodes_arr = hg["graph"].nodes().to_arrow() @@ -515,9 +460,6 @@ def test_hyper_to_pa_all(): assert len(edges_err) == 9 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_all_direct(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"], direct=True) nodes_arr = hg["graph"].nodes().to_arrow() diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 87e9a5a3cc7..b61101ef1d0 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -92,9 +91,6 @@ def networkx_call(M): return src, dst, coeff -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard(graph_file): gc.collect() @@ -117,9 +113,6 @@ def test_jaccard(graph_file): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", [PurePath( utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] ) @@ -144,9 +137,6 @@ def test_jaccard_edgevals(graph_file): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop(graph_file): gc.collect() @@ -179,9 +169,6 @@ def test_jaccard_two_hop(graph_file): assert diff < 1.0e-6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop_edge_vals(graph_file): gc.collect() @@ -216,9 +203,6 @@ def test_jaccard_two_hop_edge_vals(graph_file): assert diff < 1.0e-6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 4d3e4903d33..33d403ee27b 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -66,9 +65,6 @@ def compare_edges(cg, nxg): return True -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph(graph_file): gc.collect() @@ -78,9 +74,6 @@ def test_core_number_Graph(graph_file): assert compare_edges(cu_kcore, nx_kcore) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 02b95f01a01..a86490fb561 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -74,9 +73,6 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() @@ -90,9 +86,6 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): compare_k_truss(k_subgraph, k, nx_ground_truth) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): gc.collect() diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index 864b2974117..1fef6b05d59 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,9 +70,6 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -86,9 +82,6 @@ def test_katz_centrality(graph_file): assert topKNX.equals(topKCU) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/tests/test_maximum_spanning_tree.py index 0e55c7f15d7..311f28bd6f8 100644 --- a/python/cugraph/tests/test_maximum_spanning_tree.py +++ b/python/cugraph/tests/test_maximum_spanning_tree.py @@ -11,17 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time import gc import pytest +import numpy as np +import rmm +import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import rmm -import cudf -import time -import numpy as np + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +37,6 @@ print("Networkx version : {} ".format(nx.__version__)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_maximum_spanning_tree_nx(graph_file): gc.collect() @@ -75,9 +72,6 @@ def test_maximum_spanning_tree_nx(graph_file): ] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_maximum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/tests/test_minimum_spanning_tree.py index 15404bc8acf..d1588507bce 100644 --- a/python/cugraph/tests/test_minimum_spanning_tree.py +++ b/python/cugraph/tests/test_minimum_spanning_tree.py @@ -11,17 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time import gc import pytest +import numpy as np +import rmm +import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import rmm -import cudf -import time -import numpy as np + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +37,6 @@ print("Networkx version : {} ".format(nx.__version__)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_minimum_spanning_tree_nx(graph_file): gc.collect() @@ -75,9 +72,6 @@ def test_minimum_spanning_tree_nx(graph_file): ] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_minimum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index 2956d8f1913..c1ff95042ed 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -19,7 +19,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -53,9 +52,6 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index 62245bcf65d..57be3eb34e8 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -11,14 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import networkx as nx -from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import pytest import gc + +import pytest +import networkx as nx import numpy as np +import cugraph +from cugraph.tests import utils + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -27,9 +28,6 @@ def setup_function(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -63,9 +61,6 @@ def test_multigraph(graph_file): assert nxedges.equals(cuedges[["source", "target", "weight"]]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -99,9 +94,6 @@ def test_Graph_from_MultiGraph(graph_file): assert Gnxd.number_of_edges() == Gd.number_of_edges() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph_sssp(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py index 5799b88157e..98cc8a11dc7 100644 --- a/python/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/tests/test_nx_convert.py @@ -12,11 +12,13 @@ # limitations under the License. import gc + import pytest import cudf + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -49,9 +51,6 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert cu_df.to_dict() == nx_df.to_dict() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs @@ -81,9 +80,6 @@ def test_networkx_compatibility(graph_file): _compare_graphs(nxG, cuG) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert(graph_file): gc.collect() @@ -97,9 +93,6 @@ def test_nx_convert(graph_file): _compare_graphs(nxG, cuG, has_wt=False) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert_multicol(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index 96e510c0294..a0c336c3f16 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -17,9 +17,9 @@ import pytest import numpy as np import scipy + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(cu_M, pairs, edgevals=False): @@ -83,9 +83,6 @@ def cpu_call(M, first, second): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap(graph_file): gc.collect() @@ -120,9 +117,6 @@ def test_overlap(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap_edge_vals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 48ab1b39caa..163b2adb967 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -20,7 +20,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -145,9 +145,6 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # https://github.com/rapidsai/cugraph/issues/533 # -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -198,9 +195,6 @@ def test_pagerank( assert err < (0.01 * len(cugraph_pr)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_paths.py b/python/cugraph/tests/test_paths.py index f58195570b8..56cc9b3cd50 100644 --- a/python/cugraph/tests/test_paths.py +++ b/python/cugraph/tests/test_paths.py @@ -21,7 +21,6 @@ import pytest import cugraph -from cugraph.utilities.utils import is_device_version_less_than CONNECTED_GRAPH = """1,5,3 @@ -75,9 +74,6 @@ def graphs(request): yield cugraph_G, nx_G, cupy_df -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_connected_graph_shortest_path_length(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -111,9 +107,6 @@ def test_connected_graph_shortest_path_length(graphs): assert path_1_to_6_length == cugraph.shortest_path_length(cupy_df, 1, 6) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_source(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -128,9 +121,6 @@ def test_shortest_path_length_invalid_source(graphs): cugraph.shortest_path_length(cupy_df, -1, 1) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_target(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -145,9 +135,6 @@ def test_shortest_path_length_invalid_target(graphs): cugraph.shortest_path_length(cupy_df, 1, 10) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_vertexes(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -162,9 +149,6 @@ def test_shortest_path_length_invalid_vertexes(graphs): cugraph.shortest_path_length(cupy_df, 0, 42) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_path(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -175,9 +159,6 @@ def test_shortest_path_length_no_path(graphs): assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_target(graphs): cugraph_G, nx_G, cupy_df = graphs diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 57912150b12..129bd667621 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -21,12 +21,8 @@ from cugraph.structure.number_map import NumberMap from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_ips(): source_list = [ "192.168.1.1", @@ -61,9 +57,6 @@ def test_renumber_ips(): assert check_dst.equals(gdf["dest_as_int"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_ips_cols(): source_list = [ @@ -132,9 +125,6 @@ def test_renumber_ips_str_cols(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_negative(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -156,9 +146,6 @@ def test_renumber_negative(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_negative_col(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -180,9 +167,6 @@ def test_renumber_negative_col(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): @@ -220,9 +204,6 @@ def test_renumber_series(graph_file): assert check_dst["0_y"].equals(check_dst["0_x"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): gc.collect() @@ -256,9 +237,6 @@ def test_renumber_files(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_col(graph_file): gc.collect() @@ -291,9 +269,6 @@ def test_renumber_files_col(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_multi_col(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_sssp.py b/python/cugraph/tests/test_sssp.py index 9e866c84f07..9230b7a7b96 100644 --- a/python/cugraph/tests/test_sssp.py +++ b/python/cugraph/tests/test_sssp.py @@ -28,7 +28,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -204,9 +204,6 @@ def single_dataset_source_nxresults_weighted(request): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): # Extract the params generated from the fixture @@ -236,9 +233,6 @@ def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_nonnative_inputs(gpubenchmark, @@ -249,9 +243,6 @@ def test_sssp_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, cugraph_input_type): @@ -286,9 +277,6 @@ def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_edgevals_nonnative_inputs( @@ -300,9 +288,6 @@ def test_sssp_edgevals_nonnative_inputs( cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_sssp_data_type_conversion(graph_file, source): @@ -357,9 +342,6 @@ def test_sssp_data_type_conversion(graph_file, source): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 5be80f341b5..56c1c23e0ea 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -19,7 +19,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,9 +71,6 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -88,9 +85,6 @@ def test_subgraph_extraction_DiGraph(graph_file): assert compare_edges(cu_sg, nx_sg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph(graph_file): gc.collect() @@ -105,9 +99,6 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_triangle_count.py b/python/cugraph/tests/test_triangle_count.py index d768183e4ad..917a4f320a7 100644 --- a/python/cugraph/tests/test_triangle_count.py +++ b/python/cugraph/tests/test_triangle_count.py @@ -18,7 +18,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -67,9 +67,6 @@ def networkx_call(M): # https://github.com/rapidsai/cugraph/issues/1043 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles(graph_file): gc.collect() @@ -80,9 +77,6 @@ def test_triangles(graph_file): assert cu_count == nx_count -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_edge_vals(graph_file): gc.collect() @@ -93,9 +87,6 @@ def test_triangles_edge_vals(graph_file): assert cu_count == nx_count -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index b350ef27efd..2ca820271c0 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -12,16 +12,14 @@ # limitations under the License. import gc +from pathlib import PurePath + import pytest + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -from pathlib import PurePath -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() @@ -47,9 +45,6 @@ def test_bfs_paths(): assert "not in the result set" in str(ErrorMsg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index f31d65de652..9f82857a8d7 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -20,7 +20,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -87,9 +87,6 @@ def networkx_call(M): return coeff -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_wjaccard(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index 50d7b0ecf84..b6ceda40116 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -16,11 +16,11 @@ import pytest import scipy +import numpy as np import cudf + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import numpy as np def cugraph_call(cu_M, pairs): @@ -84,9 +84,6 @@ def cpu_call(M, first, second): return result -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_woverlap(graph_file): gc.collect() From 48bf0588aceb31ae9e2cf56ee747a121af357e35 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 2 Apr 2021 19:39:29 -0400 Subject: [PATCH 215/343] Reduce the size of the cugraph libraries (#1503) By explicitly telling nvcc's fatbin pass to always compress device code we can ensure that our binaries are the smallest possible size. See https://github.com/rapidsai/cudf/pull/7583 for additional context. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1503 --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3b1e93d0781..50a5d80d685 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -116,6 +116,7 @@ set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=comput set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all") # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking From a6edf62469c44f5b50c3a4558a2167f7a89f46fe Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Mon, 5 Apr 2021 18:14:03 -0500 Subject: [PATCH 216/343] Updating RAFT tag (#1509) There was a PR merged into RAFT today, updating to the most recent commit hash : f0cd81fb49638eaddc9bf18998cc894f292bc293. Authors: - Alex Fender (https://github.com/afender) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1509 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 50a5d80d685..1997fd75dab 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -302,7 +302,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 6455e05b3889db2b495cf3189b33c2b07bfbebf2 + GIT_TAG f0cd81fb49638eaddc9bf18998cc894f292bc293 SOURCE_SUBDIR raft ) From 9a1ab0921972b7c1c7f6ace78dd01e16551d7b41 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 6 Apr 2021 10:00:34 -0400 Subject: [PATCH 217/343] Update graph partitioning scheme (#1443) Partially addresses Issue #1442 Update graph partitioning scheme to better control memory footprint vs concurrency trade-offs for large-scale graph processing in large clusters. This new partitioning scheme also simplifies communication patterns among GPUs which can potentially improve scalability. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Alex Fender (https://github.com/afender) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1443 --- cpp/CMakeLists.txt | 1 + cpp/include/dendrogram.hpp | 2 +- .../experimental/detail/graph_utils.cuh | 80 +- cpp/include/experimental/graph.hpp | 14 + cpp/include/experimental/graph_functions.hpp | 170 +++- cpp/include/experimental/graph_view.hpp | 102 +-- cpp/include/matrix_partition_device.cuh | 2 +- .../patterns/copy_to_adj_matrix_row_col.cuh | 436 ++++----- .../copy_v_transform_reduce_in_out_nbr.cuh | 130 +-- ...ransform_reduce_key_aggregated_out_nbr.cuh | 246 +++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 31 +- .../update_frontier_v_push_if_out_nbr.cuh | 256 ++---- cpp/include/patterns/vertex_frontier.cuh | 109 +-- cpp/include/utilities/collect_comm.cuh | 179 +++- cpp/include/utilities/cython.hpp | 28 +- cpp/include/utilities/dataframe_buffer.cuh | 29 +- cpp/include/utilities/device_comm.cuh | 100 ++ cpp/include/utilities/shuffle_comm.cuh | 175 ++-- cpp/include/utilities/thrust_tuple_utils.cuh | 17 - cpp/src/community/ecg.cu | 2 +- cpp/src/community/leiden.cuh | 2 +- cpp/src/community/louvain.cuh | 2 +- cpp/src/experimental/bfs.cu | 16 +- cpp/src/experimental/coarsen_graph.cu | 422 +++++---- .../experimental/generate_rmat_edgelist.cu | 4 +- cpp/src/experimental/graph.cu | 82 +- cpp/src/experimental/graph_view.cu | 186 +++- cpp/src/experimental/louvain.cuh | 34 +- cpp/src/experimental/relabel.cu | 60 +- cpp/src/experimental/renumber_edgelist.cu | 851 ++++++++++-------- cpp/src/experimental/renumber_utils.cu | 477 ++++++++++ cpp/src/experimental/sssp.cu | 16 +- cpp/src/utilities/cython.cu | 265 ++++-- cpp/tests/CMakeLists.txt | 33 +- cpp/tests/community/egonet_test.cu | 7 +- cpp/tests/community/mg_louvain_helper.cu | 3 +- cpp/tests/community/mg_louvain_test.cpp | 21 +- cpp/tests/experimental/bfs_test.cpp | 245 +++-- cpp/tests/experimental/coarsen_graph_test.cpp | 5 +- cpp/tests/experimental/generate_rmat_test.cpp | 23 +- cpp/tests/experimental/graph_test.cpp | 2 +- .../experimental/katz_centrality_test.cpp | 245 +++-- cpp/tests/experimental/mg_bfs_test.cpp | 303 +++++++ .../experimental/mg_katz_centrality_test.cpp | 268 ++++++ cpp/tests/experimental/mg_sssp_test.cpp | 314 +++++++ cpp/tests/experimental/pagerank_test.cpp | 270 ++++-- cpp/tests/experimental/rw_low_level_test.cu | 21 +- cpp/tests/experimental/sssp_test.cpp | 267 ++++-- cpp/tests/pagerank/mg_pagerank_test.cpp | 400 ++++---- .../utilities/generate_graph_from_edgelist.cu | 199 ++-- .../utilities/matrix_market_file_utilities.cu | 69 ++ cpp/tests/utilities/rmat_utilities.cu | 733 +++++++++------ cpp/tests/utilities/test_utilities.hpp | 50 +- cpp/tests/utilities/thrust_wrapper.cu | 81 ++ cpp/tests/utilities/thrust_wrapper.hpp | 30 + python/cugraph/community/egonet_wrapper.pyx | 7 +- .../centrality/mg_katz_centrality_wrapper.pyx | 11 +- .../dask/community/louvain_wrapper.pyx | 7 +- .../link_analysis/mg_pagerank_wrapper.pyx | 11 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 5 +- .../dask/traversal/mg_sssp_wrapper.pyx | 7 +- .../link_analysis/pagerank_wrapper.pyx | 8 +- python/cugraph/structure/graph_utilities.pxd | 24 +- python/cugraph/structure/renumber_wrapper.pyx | 127 +-- 64 files changed, 5481 insertions(+), 2841 deletions(-) create mode 100644 cpp/src/experimental/renumber_utils.cu create mode 100644 cpp/tests/experimental/mg_bfs_test.cpp create mode 100644 cpp/tests/experimental/mg_katz_centrality_test.cpp create mode 100644 cpp/tests/experimental/mg_sssp_test.cpp create mode 100644 cpp/tests/utilities/thrust_wrapper.cu create mode 100644 cpp/tests/utilities/thrust_wrapper.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1997fd75dab..5a3cb65caa5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -432,6 +432,7 @@ add_library(cugraph SHARED src/experimental/graph_view.cu src/experimental/coarsen_graph.cu src/experimental/renumber_edgelist.cu + src/experimental/renumber_utils.cu src/experimental/relabel.cu src/experimental/induced_subgraph.cu src/experimental/bfs.cu diff --git a/cpp/include/dendrogram.hpp b/cpp/include/dendrogram.hpp index bb9ba470a52..aa0802e80b3 100644 --- a/cpp/include/dendrogram.hpp +++ b/cpp/include/dendrogram.hpp @@ -27,7 +27,7 @@ class Dendrogram { public: void add_level(vertex_t first_index, vertex_t num_verts, - cudaStream_t stream = 0, + cudaStream_t stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { level_ptr_.push_back(std::make_unique>(num_verts, stream, mr)); diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 084d68b8ba4..d79788e59ce 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -56,65 +56,32 @@ rmm::device_uvector compute_major_degrees( rmm::device_uvector degrees(0, handle.get_stream()); vertex_t max_num_local_degrees{0}; - for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); - ++i) { - auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(i * row_comm_size + row_comm_rank) - : static_cast(col_comm_rank * row_comm_size + i); + for (int i = 0; i < col_comm_size; ++i) { + auto vertex_partition_idx = static_cast(i * row_comm_size + row_comm_rank); auto vertex_partition_size = partition.get_vertex_partition_size(vertex_partition_idx); max_num_local_degrees = std::max(max_num_local_degrees, vertex_partition_size); - if (i == (partition.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank)) { - degrees.resize(vertex_partition_size, handle.get_stream()); - } + if (i == col_comm_rank) { degrees.resize(vertex_partition_size, handle.get_stream()); } } local_degrees.resize(max_num_local_degrees, handle.get_stream()); - for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); - ++i) { - auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(i * row_comm_size + row_comm_rank) - : static_cast(col_comm_rank * row_comm_size + i); + for (int i = 0; i < col_comm_size; ++i) { + auto vertex_partition_idx = static_cast(i * row_comm_size + row_comm_rank); vertex_t major_first{}; vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); - auto p_offsets = - partition.is_hypergraph_partitioned() - ? adj_matrix_partition_offsets[i] - : adj_matrix_partition_offsets[0] + - (major_first - partition.get_vertex_partition_first(col_comm_rank * row_comm_size)); + auto p_offsets = adj_matrix_partition_offsets[i]; thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_last - major_first), local_degrees.data(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); - if (partition.is_hypergraph_partitioned()) { - col_comm.reduce(local_degrees.data(), - i == col_comm_rank ? degrees.data() : static_cast(nullptr), - static_cast(major_last - major_first), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } else { - row_comm.reduce(local_degrees.data(), - i == row_comm_rank ? degrees.data() : static_cast(nullptr), - static_cast(major_last - major_first), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } + col_comm.reduce(local_degrees.data(), + i == col_comm_rank ? degrees.data() : static_cast(nullptr), + static_cast(major_last - major_first), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } - raft::comms::status_t status{}; - if (partition.is_hypergraph_partitioned()) { - status = - col_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become - // out-of-scope once this function returns. - } else { - status = - row_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become - // out-of-scope once this function returns. - } - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return degrees; } @@ -170,7 +137,6 @@ struct compute_gpu_id_from_vertex_t { template struct compute_gpu_id_from_edge_t { - bool hypergraph_partitioned{false}; int comm_size{0}; int row_comm_size{0}; int col_comm_size{0}; @@ -180,12 +146,22 @@ struct compute_gpu_id_from_edge_t { cuco::detail::MurmurHash3_32 hash_func{}; auto major_comm_rank = static_cast(hash_func(major) % comm_size); auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); - if (hypergraph_partitioned) { - return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); - } else { - return (major_comm_rank - (major_comm_rank % row_comm_size)) + - (minor_comm_rank / col_comm_size); - } + return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } +}; + +template +struct compute_partition_id_from_edge_t { + int comm_size{0}; + int row_comm_size{0}; + int col_comm_size{0}; + + __device__ int operator()(vertex_t major, vertex_t minor) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_comm_rank = static_cast(hash_func(major) % comm_size); + auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); + return major_comm_rank * col_comm_size + minor_comm_rank / row_comm_size; } }; diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 6a10256e6f4..a380200ea1f 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -188,6 +188,20 @@ template struct invalid_edge_id : invalid_idx { }; +template +__host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_t num_vertices, vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +__host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_t num_vertices, vertex_t v) +{ + return v < num_vertices; +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 7b4bb466b97..100742adccd 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -17,13 +17,13 @@ #include #include -#include #include #include #include #include +#include namespace cugraph { namespace experimental { @@ -40,19 +40,24 @@ namespace experimental { * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (major, minor) pair should return the local GPU ID for this function to work (edges should - * be pre-shuffled). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t - * functor to every (major, minor) pair should return the local GPU ID for this function to work - * (edges should be pre-shuffled). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). + * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored + * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs + * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition + * assigned to this process). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to @@ -63,10 +68,9 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check = false); /** @@ -115,19 +119,24 @@ std::enable_if_t> renumber_edgelist( * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function * to work (vertices should be pre-shuffled). * @param num_local_vertices Number of local vertices. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (major, minor) pair should return the local GPU ID for this function to work (edges should - * be pre-shuffled). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t - * functor to every (major, minor) pair should return the local GPU ID for this function to work - * (edges should be pre-shuffled). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). + * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored + * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs + * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition + * assigned to this process). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to @@ -140,10 +149,9 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check = false); /** @@ -181,6 +189,102 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check = false); +/** + * @brief Renumber external vertices to internal vertices based on the provoided @p + * renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the vertices to be renumbered. The input external vertices are + * renumbered to internal vertices in-place. + * @param num_vertices Number of vertices to be renumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void renumber_ext_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check = false); + +/** + * @brief Unrenumber local internal vertices to external vertices based on the providied @p + * renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the local internal vertices to be unrenumbered. Each input element + * should be in [@p local_int_vertex_first, @p local_int_vertex_last). The input internal vertices + * are renumbered to external vertices in-place. + * @param num_vertices Number of vertices to be unrenumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void unrenumber_local_int_vertices( + raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels /* size = local_int_vertex_last - local_int_vertex_first */, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check = false); + +/** + * @brief Unrenumber (possibly non-local) internal vertices to external vertices based on the + * providied @p renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the internal vertices to be unrenumbered. The input internal vertices + * are renumbered to external vertices in-place. + * @param num_vertices Number of vertices to be unrenumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param vertex_partition_lasts Last local internal vertices (exclusive, assigned to each process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void unrenumber_int_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check = false); + /** * @brief Compute the coarsened graph. * diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 5d3d09bb087..47c93b42ca9 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -40,32 +40,11 @@ namespace experimental { * * We need to partition 1D vertex arrays (storing per vertex values) and the 2D graph adjacency * matrix (or transposed 2D graph adjacency matrix) of G. An 1D vertex array of size V is divided to - * P linear partitions; each partition has the size close to V / P. We consider two different - * strategies to partition the 2D matrix: the default strategy and the hypergraph partitioning based - * strategy (the latter is for future extension). - * FIXME: in the future we may use the latter for both as this leads to simpler communication - * patterns and better control over parallelism vs memory footprint trade-off. + * P linear partitions; each partition has the size close to V / P. * - * In the default case, one GPU will be responsible for 1 rectangular partition. The matrix will be - * horizontally partitioned first to P_row slabs. Each slab will be further vertically partitioned - * to P_col rectangles. Each rectangular partition will have the size close to V / P_row by V / - * P_col. - * - * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for one - * rectangular partition [a,b) by [c,d) where a = vertex_partition_offsets[row_comm_size * - * col_comm_rank], b = vertex_partition_offsets[row_comm_size * (col_comm_rank + 1)], c = - * vertex_partition_offsets[col_comm_size * row_comm_rank], and d = - * vertex_partition_offsets[col_comm_size * (row_comm_rank + 1)]. - * - * In the future, we may apply hyper-graph partitioning to divide V vertices to P groups minimizing - * edge cuts across groups while balancing the number of vertices in each group. We will also - * renumber vertices so the vertices in each group are mapped to consecutive integers. Then, there - * will be more non-zeros in the diagonal partitions of the 2D graph adjacency matrix (or the - * transposed 2D graph adjacency matrix) than the off-diagonal partitions. The default strategy does - * not balance the number of nonzeros if hyper-graph partitioning is applied. To solve this problem, - * the matrix is first horizontally partitioned to P slabs, then each slab will be further - * vertically partitioned to P_row (instead of P_col in the default case) rectangles. One GPU will - * be responsible col_comm_size rectangular partitions in this case. + * The 2D graph adjacency matrix is first horizontally partitioned to P slabs, then each slab will + * be further vertically partitioned to P_row (instead of P_col in the default case) rectangles. One + * GPU will be responsible col_comm_size rectangular partitions. * * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for * col_comm_size rectangular partitions [a_i,b_i) by [c,d) where a_i = @@ -85,13 +64,11 @@ class partition_t { partition_t() = default; partition_t(std::vector const& vertex_partition_offsets, - bool hypergraph_partitioned, int row_comm_size, int col_comm_size, int row_comm_rank, int col_comm_rank) : vertex_partition_offsets_(vertex_partition_offsets), - hypergraph_partitioned_(hypergraph_partitioned), comm_rank_(col_comm_rank * row_comm_size + row_comm_rank), row_comm_size_(row_comm_size), col_comm_size_(col_comm_size), @@ -159,10 +136,7 @@ class partition_t { get_vertex_partition_first(vertex_partition_idx); } - size_t get_number_of_matrix_partitions() const - { - return hypergraph_partitioned_ ? col_comm_size_ : 1; - } + size_t get_number_of_matrix_partitions() const { return col_comm_size_; } // major: row of the graph adjacency matrix (if the graph adjacency matrix is stored as is) or // column of the graph adjacency matrix (if the transposed graph adjacency matrix is stored). @@ -175,16 +149,18 @@ class partition_t { vertex_t get_matrix_partition_major_first(size_t partition_idx) const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] - : vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; + return vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_]; } vertex_t get_matrix_partition_major_last(size_t partition_idx) const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] - : vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; + return vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1]; + } + + vertex_t get_matrix_partition_major_size(size_t partition_idx) const + { + return get_matrix_partition_major_last(partition_idx) - + get_matrix_partition_major_first(partition_idx); } vertex_t get_matrix_partition_major_value_start_offset(size_t partition_idx) const @@ -204,24 +180,21 @@ class partition_t { vertex_t get_matrix_partition_minor_first() const { - return hypergraph_partitioned_ ? vertex_partition_offsets_[col_comm_rank_ * row_comm_size_] - : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; + return vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; } vertex_t get_matrix_partition_minor_last() const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_] - : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; + return vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; } - // FIXME: this function may be removed if we use the same partitioning strategy whether hypergraph - // partitioning is applied or not - bool is_hypergraph_partitioned() const { return hypergraph_partitioned_; } + vertex_t get_matrix_partition_minor_size() const + { + return get_matrix_partition_minor_last() - get_matrix_partition_minor_first(); + } private: std::vector vertex_partition_offsets_{}; // size = P + 1 - bool hypergraph_partitioned_{false}; int comm_rank_{0}; int row_comm_size_{0}; @@ -236,6 +209,7 @@ class partition_t { struct graph_properties_t { bool is_symmetric{false}; bool is_multigraph{false}; + bool is_weighted{false}; }; namespace detail { @@ -277,6 +251,7 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } + bool is_weighted() const { return properties_.is_weighted; } protected: raft::handle_t const* get_handle_ptr() const { return handle_ptr_; }; @@ -334,11 +309,6 @@ class graph_view_t 0; } - - // FIXME: this should be removed once MNMG Louvain is updated to use graph primitives - partition_t get_partition() const { return partition_; } - vertex_t get_number_of_local_vertices() const { return partition_.get_local_vertex_last() - partition_.get_local_vertex_first(); @@ -421,6 +391,12 @@ class graph_view_t compute_in_weight_sums(raft::handle_t const& handle) const; rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + edge_t compute_max_in_degree(raft::handle_t const& handle) const; + edge_t compute_max_out_degree(raft::handle_t const& handle) const; + + weight_t compute_max_in_weight_sum(raft::handle_t const& handle) const; + weight_t compute_max_out_weight_sum(raft::handle_t const& handle) const; + private: std::vector adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; @@ -549,8 +535,6 @@ class graph_view_tget_number_of_vertices(); } constexpr vertex_t get_local_vertex_first() const { return vertex_t{0}; } @@ -628,8 +612,6 @@ class graph_view_t compute_in_weight_sums(raft::handle_t const& handle) const; rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + edge_t compute_max_in_degree(raft::handle_t const& handle) const; + edge_t compute_max_out_degree(raft::handle_t const& handle) const; + + weight_t compute_max_in_weight_sum(raft::handle_t const& handle) const; + weight_t compute_max_out_weight_sum(raft::handle_t const& handle) const; + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index b41119e7be6..30d6540bcfe 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -192,7 +192,7 @@ class matrix_partition_device_t rx_counts(row_comm_size, size_t{0}); - std::vector displacements(row_comm_size, size_t{0}); - for (int i = 0; i < row_comm_size; ++i) { - rx_counts[i] = graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); - displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; - } - device_allgatherv(row_comm, - vertex_value_input_first, - matrix_major_value_output_first, - rx_counts, - displacements, - handle.get_stream()); + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector rx_counts(col_comm_size, size_t{0}); + std::vector displacements(col_comm_size, size_t{0}); + for (int i = 0; i < col_comm_size; ++i) { + rx_counts[i] = graph_view.get_vertex_partition_size(i * row_comm_size + row_comm_rank); + displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; } + device_allgatherv(col_comm, + vertex_value_input_first, + matrix_major_value_output_first, + rx_counts, + displacements, + handle.get_stream()); } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() @@ -101,80 +97,78 @@ void copy_to_matrix_major(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - auto rx_counts = - host_scalar_allgather(row_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - - matrix_partition_device_t matrix_partition(graph_view, 0); - for (int i = 0; i < row_comm_size; ++i) { - rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], - handle.get_stream()); - auto rx_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_counts[i], - handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(rx_tmp_buffer); + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(col_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + + for (int i = 0; i < col_comm_size; ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); - if (row_comm_rank == i) { - vertex_partition_device_t vertex_partition(graph_view); - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { - return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); - }); - // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a - // permutation iterator (and directly gathers to the internal buffer) - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_value_input_first, - rx_value_first); - } + if (col_comm_rank == i) { + vertex_partition_device_t vertex_partition(graph_view); + auto map_first = + thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + }); + // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a + // permutation iterator (and directly gathers to the internal buffer) + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_first, + map_first + thrust::distance(vertex_first, vertex_last), + vertex_value_input_first, + rx_value_first); + } - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - device_bcast( - row_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); - device_bcast( - row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + col_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(col_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - if (row_comm_rank == i) { - auto map_first = - thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { - return matrix_partition.get_major_offset_from_major_nocheck(v); - }); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_major_value_output_first); - } else { - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_major_offset_from_major_nocheck(v); - }); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_major_value_output_first); - } + if (col_comm_rank == i) { + auto map_first = + thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first + matrix_partition.get_major_value_start_offset()); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first + matrix_partition.get_major_value_start_offset()); } } } else { @@ -199,59 +193,27 @@ void copy_to_matrix_minor(raft::handle_t const& handle, MatrixMinorValueOutputIterator matrix_minor_value_output_first) { if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may be no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_input_first, - vertex_value_input_first + graph_view.get_number_of_local_vertices(), - matrix_minor_value_output_first + - (graph_view.get_vertex_partition_first(comm_src_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size))); - } else { - device_sendrecv( - comm, - vertex_value_input_first, - static_cast(graph_view.get_number_of_local_vertices()), - comm_dst_rank, - matrix_minor_value_output_first + - (graph_view.get_vertex_partition_first(comm_src_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(graph_view.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - handle.get_stream()); - } - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - for (int i = 0; i < col_comm_size; ++i) { - auto offset = graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto count = graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i); - device_bcast(col_comm, - matrix_minor_value_output_first + offset, - matrix_minor_value_output_first + offset, - count, - i, - handle.get_stream()); - } + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector rx_counts(row_comm_size, size_t{0}); + std::vector displacements(row_comm_size, size_t{0}); + for (int i = 0; i < row_comm_size; ++i) { + rx_counts[i] = graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; } + device_allgatherv(row_comm, + vertex_value_input_first, + matrix_minor_value_output_first, + rx_counts, + displacements, + handle.get_stream()); } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() @@ -277,143 +239,75 @@ void copy_to_matrix_minor(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - // FIXME: this P2P is unnecessary if apply the same partitioning scheme regardless of - // hypergraph partitioning is applied or not - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - size_t tx_count = thrust::distance(vertex_first, vertex_last); - size_t rx_count{}; - // FIXME: it seems like raft::isend and raft::irecv do not properly handle the destination (or - // source) == self case. Need to double check and fix this if this is indeed the case (or RAFT - // may use ncclSend/ncclRecv instead of UCX for device data). - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - rx_count = tx_count; - } else { - std::vector count_requests(2); - comm.isend(&tx_count, 1, comm_dst_rank, 0 /* tag */, count_requests.data()); - comm.irecv(&rx_count, 1, comm_src_rank, 0 /* tag */, count_requests.data() + 1); - comm.waitall(count_requests.size(), count_requests.data()); - } - - vertex_partition_device_t vertex_partition(graph_view); - rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); - auto dst_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_count, + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(row_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + + matrix_partition_device_t matrix_partition(graph_view, 0); + for (int i = 0; i < row_comm_size; ++i) { + rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], handle.get_stream()); - auto dst_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(dst_tmp_buffer); - if (comm_src_rank == comm_rank) { - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - dst_vertices.begin()); - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { - return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); - }); - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_value_input_first, - dst_value_first); - } else { - auto src_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(tx_count, - handle.get_stream()); - auto src_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(src_tmp_buffer); + auto rx_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); + if (row_comm_rank == i) { + vertex_partition_device_t vertex_partition(graph_view); auto map_first = thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); }); + // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a + // permutation iterator (and directly gathers to the internal buffer) thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), map_first, map_first + thrust::distance(vertex_first, vertex_last), vertex_value_input_first, - src_value_first); - - device_sendrecv( - comm, - vertex_first, - tx_count, - comm_dst_rank, - dst_vertices.begin(), - rx_count, - comm_src_rank, - handle.get_stream()); - - device_sendrecv(comm, - src_value_first, - tx_count, - comm_dst_rank, - dst_value_first, - rx_count, - comm_src_rank, - handle.get_stream()); + rx_value_first); } - // FIXME: now we can clear tx_tmp_buffer - - auto rx_counts = host_scalar_allgather(col_comm, rx_count, handle.get_stream()); - - matrix_partition_device_t matrix_partition(graph_view, 0); - for (int i = 0; i < col_comm_size; ++i) { - rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], - handle.get_stream()); - auto rx_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_counts[i], - handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(rx_tmp_buffer); - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - device_bcast(col_comm, - dst_vertices.begin(), - rx_vertices.begin(), - rx_counts[i], - i, - handle.get_stream()); - device_bcast( - col_comm, dst_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - - if (col_comm_rank == i) { - auto map_first = thrust::make_transform_iterator( - dst_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_minor_offset_from_minor_nocheck(v); - }); - - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - dst_value_first, - dst_value_first + rx_counts[i], - map_first, - matrix_minor_value_output_first); - } else { - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_minor_offset_from_minor_nocheck(v); - }); + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + row_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_minor_value_output_first); - } + if (row_comm_rank == i) { + auto map_first = + thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); } } } else { diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 3059cf95852..e6a73a874ae 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -362,16 +362,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - auto comm_rank = handle.comms_initialized() ? handle.get_comms().get_rank() : int{0}; - auto minor_tmp_buffer_size = (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) ? GraphViewType::is_adj_matrix_transposed @@ -386,10 +376,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - minor_init = graph_view.is_hypergraph_partitioned() ? (row_comm_rank == 0) ? init : T{} - : (col_comm_rank == 0) ? init : T{}; + minor_init = (row_comm_rank == 0) ? init : T{}; } if (GraphViewType::is_multi_gpu) { @@ -407,24 +394,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, assert(minor_tmp_buffer_size == 0); } - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); - auto major_tmp_buffer_size = vertex_t{0}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - major_tmp_buffer_size = - (in == GraphViewType::is_adj_matrix_transposed) - ? graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_size() - : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) - : vertex_t{0}; - } + auto major_tmp_buffer_size = + GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed) + ? matrix_partition.get_major_size() + : vertex_t{0}; auto major_tmp_buffer = allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); @@ -432,12 +408,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto major_init = T{}; if (in == GraphViewType::is_adj_matrix_transposed) { if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - major_init = graph_view.is_hypergraph_partitioned() ? (col_comm_rank == 0) ? init : T{} - : (row_comm_rank == 0) ? init : T{}; + major_init = (col_comm_rank == 0) ? init : T{}; } else { major_init = init; } @@ -450,8 +423,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; + comm_root_rank = i * row_comm_size + row_comm_rank; } if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { @@ -505,25 +477,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - if (graph_view.is_hypergraph_partitioned()) { - device_reduce( - col_comm, - major_buffer_first, - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(i * row_comm_size + i)), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } else { - device_reduce(row_comm, - major_buffer_first, - vertex_value_output_first, - static_cast( - graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } + device_reduce(col_comm, + major_buffer_first, + vertex_value_output_first, + matrix_partition.get_major_size(), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } } @@ -537,53 +497,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - for (int i = 0; i < col_comm_size; ++i) { - auto offset = (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)); - auto size = static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i)); - device_reduce(col_comm, - minor_buffer_first + offset, - minor_buffer_first + offset, - size, - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - auto comm_dst_rank = row_comm_rank * col_comm_size + col_comm_rank; - // FIXME: this branch may no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - auto offset = - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto size = static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_buffer_first + offset, - minor_buffer_first + offset + size, - vertex_value_output_first); - } else { - device_sendrecv( - comm, - minor_buffer_first + - (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)), - comm_dst_rank, - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(comm_rank)), - comm_src_rank, - handle.get_stream()); - } + for (int i = 0; i < row_comm_size; ++i) { + auto offset = (graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size)); + device_reduce(row_comm, + minor_buffer_first + offset, + vertex_value_output_first, + static_cast( + graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } } } diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 19a5f67c9de..22dc2041793 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -170,8 +171,8 @@ __global__ void for_all_major_for_all_nbr_low_degree( */ template ::value_type, + static_assert(std::is_same::value_type, typename GraphViewType::vertex_type>::value); + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using vertex_t = typename GraphViewType::vertex_type; @@ -206,64 +209,113 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // 1. build a cuco::static_map object for the k, v pairs. auto kv_map_ptr = std::make_unique>( - static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / - load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - - // 2. aggregate each vertex out-going edges based on keys and transform-reduce. - - auto loop_count = size_t{1}; + size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); + + auto map_counts = + host_scalar_allgather(row_comm, + static_cast(thrust::distance(map_key_first, map_key_last)), + handle.get_stream()); + std::vector map_displacements(row_comm_size, size_t{0}); + std::partial_sum(map_counts.begin(), map_counts.end() - 1, map_displacements.begin() + 1); + rmm::device_uvector map_keys(map_displacements.back() + map_counts.back(), + handle.get_stream()); + auto map_value_buffer = + allocate_dataframe_buffer(map_keys.size(), handle.get_stream()); + for (int i = 0; i < row_comm_size; ++i) { + device_bcast(row_comm, + map_key_first, + map_keys.begin() + map_displacements[i], + map_counts[i], + i, + handle.get_stream()); + device_bcast(row_comm, + map_value_first, + get_dataframe_buffer_begin(map_value_buffer) + map_displacements[i], + map_counts[i], + i, + handle.get_stream()); + } + // FIXME: these copies are unnecessary, better fix RAFT comm's bcast to take separate input & + // output pointers + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_key_first, + map_key_last, + map_keys.begin() + map_displacements[row_comm_rank]); + thrust::copy( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_value_first, + map_value_first + thrust::distance(map_key_first, map_key_last), + get_dataframe_buffer_begin(map_value_buffer) + map_displacements[row_comm_rank]); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(map_keys.size()) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (map_keys.size()) { kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } + } else { + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last) > 0) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } } + // 2. aggregate each vertex out-going edges based on keys and transform-reduce. + rmm::device_uvector major_vertices(0, handle.get_stream()); auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); - - int comm_root_rank = 0; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; - } + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); - auto num_edges = thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), - thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()); - - rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); + rmm::device_uvector tmp_major_vertices(matrix_partition.get_number_of_edges(), + handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), handle.get_stream()); - if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + if (matrix_partition.get_major_size() > 0) { raft::grid_1d_thread_t update_grid( - graph_view.get_vertex_partition_size(comm_root_rank), + matrix_partition.get_major_size(), detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); @@ -277,8 +329,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), adj_matrix_col_key_first, tmp_major_vertices.data(), tmp_minor_keys.data(), @@ -300,10 +352,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() - ? cugraph::partition_2d::key_naming_t().col_name() - : cugraph::partition_2d::key_naming_t().row_name()); - auto const sub_comm_size = sub_comm.get_size(); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); triplet_first = thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), @@ -315,11 +371,13 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( std::forward_as_tuple( std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = groupby_gpuid_and_shuffle_values( - sub_comm, + col_comm, triplet_first, triplet_first + tmp_major_vertices.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}] __device__( - auto val) { return key_func(thrust::get<1>(val)); }, + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}, + row_comm_size] __device__(auto val) { + return key_func(thrust::get<1>(val)) / row_comm_size; + }, handle.get_stream()); auto pair_first = thrust::make_zip_iterator( @@ -355,56 +413,52 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( triplet_first = thrust::make_zip_iterator(thrust::make_tuple( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - triplet_first, - triplet_first + tmp_major_vertices.size(), - tmp_e_op_result_buffer_first, - [adj_matrix_row_value_input_first, - key_aggregated_e_op, - matrix_partition, - kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { - auto major = thrust::get<0>(val); - auto key = thrust::get<1>(val); - auto w = thrust::get<2>(val); - return key_aggregated_e_op( - major, - key, - w, - *(adj_matrix_row_value_input_first + - matrix_partition.get_major_offset_from_major_nocheck(major)), - kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); - }); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + tmp_major_vertices.size(), + tmp_e_op_result_buffer_first, + [adj_matrix_row_value_input_first = + adj_matrix_row_value_input_first + matrix_partition.get_major_value_start_offset(), + key_aggregated_e_op, + matrix_partition, + kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { + auto major = thrust::get<0>(val); + auto key = thrust::get<1>(val); + auto w = thrust::get<2>(val); + return key_aggregated_e_op(major, + key, + w, + *(adj_matrix_row_value_input_first + + matrix_partition.get_major_offset_from_major_nocheck(major)), + kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); + }); tmp_minor_keys.resize(0, handle.get_stream()); tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() - ? cugraph::partition_2d::key_naming_t().col_name() - : cugraph::partition_2d::key_naming_t().row_name()); - auto const sub_comm_rank = sub_comm.get_rank(); - auto const sub_comm_size = sub_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). auto rx_sizes = - host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream()); - std::vector rx_displs( - static_cast(sub_comm_rank) == i ? sub_comm_size : int{0}, size_t{0}); - if (static_cast(sub_comm_rank) == i) { + host_scalar_gather(col_comm, tmp_major_vertices.size(), i, handle.get_stream()); + std::vector rx_displs{}; + rmm::device_uvector rx_major_vertices(0, handle.get_stream()); + if (static_cast(col_comm_rank) == i) { + rx_displs.assign(col_comm_size, size_t{0}); std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + rx_major_vertices.resize(rx_displs.back() + rx_sizes.back(), handle.get_stream()); } - rmm::device_uvector rx_major_vertices( - static_cast(sub_comm_rank) == i - ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) - : size_t{0}, - handle.get_stream()); auto rx_tmp_e_op_result_buffer = allocate_dataframe_buffer(rx_major_vertices.size(), handle.get_stream()); - device_gatherv(sub_comm, + device_gatherv(col_comm, tmp_major_vertices.data(), rx_major_vertices.data(), tmp_major_vertices.size(), @@ -412,7 +466,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rx_displs, i, handle.get_stream()); - device_gatherv(sub_comm, + device_gatherv(col_comm, tmp_e_op_result_buffer_first, get_dataframe_buffer_begin(rx_tmp_e_op_result_buffer), tmp_major_vertices.size(), @@ -421,7 +475,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( i, handle.get_stream()); - if (static_cast(sub_comm_rank) == i) { + if (static_cast(col_comm_rank) == i) { major_vertices = std::move(rx_major_vertices); e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); } diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index e621ed91ddb..34721c75e31 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -179,20 +179,10 @@ transform_reduce_by_adj_matrix_row_col_key_e( using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - rmm::device_uvector keys(0, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); int comm_root_rank = 0; if (GraphViewType::is_multi_gpu) { @@ -201,8 +191,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; + comm_root_rank = i * row_comm_size + row_comm_rank; } auto num_edges = thrust::transform_reduce( @@ -224,6 +213,13 @@ transform_reduce_by_adj_matrix_row_col_key_e( detail::transform_reduce_by_key_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. @@ -232,9 +228,10 @@ transform_reduce_by_adj_matrix_row_col_key_e( matrix_partition, graph_view.get_vertex_partition_first(comm_root_rank), graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - adj_matrix_row_col_key_first, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4efd32bcac7..4d557b97a30 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -25,12 +25,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -115,12 +117,10 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( static_assert(sizeof(unsigned long long int) == sizeof(size_t)); auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = - remove_first_thrust_tuple_element()(e_op_result); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); } } - idx += gridDim.x * blockDim.x; } } @@ -155,8 +155,8 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the // system HBM size or a function of the maximum number of threads in the system)) // FIXME: actually, we can find how many unique keys are here by now. - // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the - // vertex unless reduce_op is a pure function. + // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding + // the vertex unless reduce_op is a pure function. rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); @@ -234,8 +234,7 @@ __global__ void update_frontier_and_vertex_output_values( auto v_op_result = v_op(v_val, payload); selected_bucket_idx = thrust::get<0>(v_op_result); if (selected_bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = - remove_first_thrust_tuple_element()(v_op_result); + *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); bucket_block_local_offsets[selected_bucket_idx] = 1; } } @@ -349,25 +348,18 @@ void update_frontier_v_push_if_out_nbr( static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using payload_t = typename ReduceOp::type; // 1. fill the buffer - vertex_frontier.set_buffer_idx_value(0); - - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + rmm::device_uvector keys(size_t{0}, handle.get_stream()); + auto payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); rmm::device_uvector frontier_rows( 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true @@ -380,22 +372,18 @@ void update_frontier_v_push_if_out_nbr( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto sub_comm_rank = graph_view.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank; - frontier_size = host_scalar_bcast( - graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, - (static_cast(sub_comm_rank) == i) ? thrust::distance(vertex_first, vertex_last) - : size_t{0}, - i, - handle.get_stream()); + auto sub_comm_rank = col_comm_rank; + frontier_size = host_scalar_bcast(col_comm, + (static_cast(sub_comm_rank) == i) + ? thrust::distance(vertex_first, vertex_last) + : size_t{0}, + i, + handle.get_stream()); if (static_cast(sub_comm_rank) != i) { frontier_rows.resize(frontier_size, handle.get_stream()); } - device_bcast(graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, - vertex_first, - frontier_rows.begin(), - frontier_size, - i, - handle.get_stream()); + device_bcast( + col_comm, vertex_first, frontier_rows.begin(), frontier_size, i, handle.get_stream()); } else { frontier_size = thrust::distance(vertex_first, vertex_last); } @@ -439,10 +427,8 @@ void update_frontier_v_push_if_out_nbr( // locking. // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature // to reserve address space. - vertex_frontier.resize_buffer(vertex_frontier.get_buffer_idx_value() + max_pushes); - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first); - auto buffer_payload_first = std::get<1>(buffer_first); + keys.resize(buffer_idx.value(handle.get_stream()) + max_pushes, handle.get_stream()); + resize_dataframe_buffer(payload_buffer, keys.size(), handle.get_stream()); auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} @@ -467,9 +453,9 @@ void update_frontier_v_push_if_out_nbr( frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_ptr(), + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), e_op); } else { detail::for_all_frontier_row_for_all_nbr_low_degree<<(payload_buffer), + buffer_idx.data(), e_op); } } @@ -491,18 +477,12 @@ void update_frontier_v_push_if_out_nbr( // 2. reduce the buffer - auto num_buffer_offset = edge_t{0}; - - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - - auto num_buffer_elements = detail::reduce_buffer_elements(handle, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_value(), - reduce_op); - + auto num_buffer_elements = + detail::reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); @@ -513,12 +493,9 @@ void update_frontier_v_push_if_out_nbr( auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - std::vector h_vertex_lasts(graph_view.is_hypergraph_partitioned() ? row_comm_size - : col_comm_size); + std::vector h_vertex_lasts(row_comm_size); for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { - h_vertex_lasts[i] = graph_view.get_vertex_partition_last( - graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i - : row_comm_rank * col_comm_size + i); + h_vertex_lasts[i] = graph_view.get_vertex_partition_last(col_comm_rank * row_comm_size + i); } rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); @@ -527,8 +504,8 @@ void update_frontier_v_push_if_out_nbr( rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), handle.get_stream()); thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_first, - buffer_key_first + num_buffer_elements, + keys.begin(), + keys.begin() + num_buffer_elements, d_vertex_lasts.begin(), d_vertex_lasts.end(), d_tx_buffer_last_boundaries.begin()); @@ -537,122 +514,35 @@ void update_frontier_v_push_if_out_nbr( d_tx_buffer_last_boundaries.data(), d_tx_buffer_last_boundaries.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + handle.get_stream_view().synchronize(); std::vector tx_counts(h_tx_buffer_last_boundaries.size()); std::adjacent_difference( h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - std::vector rx_counts(graph_view.is_hypergraph_partitioned() ? row_comm_size - : col_comm_size); - std::vector count_requests(tx_counts.size() + rx_counts.size()); - size_t tx_self_i = std::numeric_limits::max(); - for (size_t i = 0; i < tx_counts.size(); ++i) { - auto comm_dst_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : row_comm_rank * col_comm_size + static_cast(i); - if (comm_dst_rank == comm_rank) { - tx_self_i = i; - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms - count_requests[i] = std::numeric_limits::max(); - } else { - comm.isend(&tx_counts[i], 1, comm_dst_rank, 0 /* tag */, count_requests.data() + i); - } - } - for (size_t i = 0; i < rx_counts.size(); ++i) { - auto comm_src_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : static_cast(i) * row_comm_size + comm_rank / col_comm_size; - if (comm_src_rank == comm_rank) { - assert(tx_self_i != std::numeric_limits::max()); - rx_counts[i] = tx_counts[tx_self_i]; - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms - count_requests[tx_counts.size() + i] = std::numeric_limits::max(); - } else { - comm.irecv(&rx_counts[i], - 1, - comm_src_rank, - 0 /* tag */, - count_requests.data() + tx_counts.size() + i); - } - } - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms, if - // raft::comms::wait immediately returns on seeing request_null, this remove is unnecessary - count_requests.erase(std::remove(count_requests.begin(), - count_requests.end(), - std::numeric_limits::max()), - count_requests.end()); - comm.waitall(count_requests.size(), count_requests.data()); - - std::vector tx_offsets(tx_counts.size() + 1, edge_t{0}); - std::partial_sum(tx_counts.begin(), tx_counts.end(), tx_offsets.begin() + 1); - std::vector rx_offsets(rx_counts.size() + 1, edge_t{0}); - std::partial_sum(rx_counts.begin(), rx_counts.end(), rx_offsets.begin() + 1); - - // FIXME: this will require costly reallocation if we don't use the new CUDA feature to reserve - // address space. - // FIXME: std::max(actual size, 1) as ncclRecv currently hangs if recvuff is nullptr even if - // count is 0 - vertex_frontier.resize_buffer(std::max(num_buffer_elements + rx_offsets.back(), size_t(1))); - - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - - std::vector tx_dst_ranks(tx_counts.size()); - std::vector rx_src_ranks(rx_counts.size()); - for (size_t i = 0; i < tx_dst_ranks.size(); ++i) { - tx_dst_ranks[i] = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : row_comm_rank * col_comm_size + static_cast(i); - } - for (size_t i = 0; i < rx_src_ranks.size(); ++i) { - rx_src_ranks[i] = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : static_cast(i) * row_comm_size + comm_rank / col_comm_size; - } - - device_multicast_sendrecv( - comm, - buffer_key_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - buffer_key_first + num_buffer_elements, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - device_multicast_sendrecv( - comm, - buffer_payload_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - buffer_payload_first + num_buffer_elements, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - - // FIXME: this does not exploit the fact that each segment is sorted. Lost performance - // optimization opportunities. - // FIXME: we can use [vertex_frontier.buffer_begin(), vertex_frontier.buffer_begin() + - // num_buffer_elements) as temporary buffer inside reduce_buffer_elements(). - num_buffer_offset = num_buffer_elements; - num_buffer_elements = detail::reduce_buffer_elements(handle, - buffer_key_first + num_buffer_elements, - buffer_payload_first + num_buffer_elements, - rx_offsets.back(), - reduce_op); + rmm::device_uvector rx_keys(size_t{0}, handle.get_stream()); + std::tie(rx_keys, std::ignore) = + shuffle_values(row_comm, keys.begin(), tx_counts, handle.get_stream()); + keys = std::move(rx_keys); + + auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = + shuffle_values(row_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + + num_buffer_elements = + detail::reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + keys.size(), + reduce_op); } // 3. update vertex properties if (num_buffer_elements > 0) { - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - raft::grid_1d_thread_t update_grid(num_buffer_elements, detail::update_frontier_v_push_if_out_nbr_update_block_size, handle.get_device_properties().maxGridSize[0]); @@ -666,8 +556,8 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_and_vertex_output_values <<>>( vertex_partition, - buffer_key_first, - buffer_payload_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), num_buffer_elements, vertex_value_input_first, vertex_value_output_first, @@ -690,21 +580,5 @@ void update_frontier_v_push_if_out_nbr( } } -/* - -FIXME: - -iterating over lower triangular (or upper triangular) : triangle counting -LRB might be necessary if the cost of processing an edge (i, j) is a function of degree(i) and -degree(j) : triangle counting -push-pull switching support (e.g. DOBFS), in this case, we need both -CSR & CSC (trade-off execution time vs memory requirement, unless graph is symmetric) -if graph is symmetric, there will be additional optimization opportunities (e.g. in-degree == -out-degree) For BFS, sending a bit vector (for the entire set of dest vertices per partitoin may -work better we can use thrust::set_intersection for triangle counting think about adding thrust -wrappers for reduction functions. Can I pass nullptr for dummy -instead of thrust::make_counting_iterator(0)? -*/ - } // namespace experimental } // namespace cugraph diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index c11142d3cf7..375ec097850 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -48,26 +48,6 @@ inline size_t round_up(size_t number_to_round, size_t modulus) return ((number_to_round + (modulus - 1)) / modulus) * modulus; } -template -auto make_buffer_zip_iterator_impl(std::vector& buffer_ptrs, - size_t offset, - std::index_sequence) -{ - auto key_ptr = reinterpret_cast(buffer_ptrs[0]) + offset; - auto payload_it = thrust::make_zip_iterator( - thrust::make_tuple(reinterpret_cast::type*>( - buffer_ptrs[1 + Is])...)); - return std::make_tuple(key_ptr, payload_it); -} - -template -auto make_buffer_zip_iterator(std::vector& buffer_ptrs, size_t offset) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - return make_buffer_zip_iterator_impl( - buffer_ptrs, offset, std::make_index_sequence()); -} - template __global__ void move_and_invalidate_if(RowIterator row_first, RowIterator row_last, @@ -199,10 +179,7 @@ class Bucket { size_t size_{0}; }; -template +template class VertexFrontier { public: static size_t constexpr kNumBuckets = num_buckets; @@ -211,9 +188,7 @@ class VertexFrontier { VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) : handle_ptr_(&handle), tmp_bucket_ptrs_(num_buckets, handle.get_stream()), - tmp_bucket_sizes_(num_buckets, handle.get_stream()), - buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr), - buffer_idx_(0, handle_ptr_->get_stream()) + tmp_bucket_sizes_(num_buckets, handle.get_stream()) { CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, "invalid input argument bucket_capacities (size mismatch)"); @@ -228,7 +203,6 @@ class VertexFrontier { for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle, bucket_capacities[i]); } - buffer_.set_stream(handle_ptr_->get_stream()); } Bucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } @@ -311,90 +285,11 @@ class VertexFrontier { return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); } - void resize_buffer(size_t size) - { - // FIXME: rmm::device_buffer resize incurs copy if memory is reallocated, which is unnecessary - // in this case. - buffer_.resize(compute_aggregate_buffer_size_in_bytes(size), handle_ptr_->get_stream()); - if (size > buffer_capacity_) { - buffer_capacity_ = size; - update_buffer_ptrs(); - } - buffer_size_ = size; - } - - void clear_buffer() { resize_buffer(0); } - - void shrink_to_fit_buffer() - { - if (buffer_size_ != buffer_capacity_) { - // FIXME: rmm::device_buffer shrink_to_fit incurs copy if memory is reallocated, which is - // unnecessary in this case. - buffer_.shrink_to_fit(handle_ptr_->get_stream()); - update_buffer_ptrs(); - buffer_capacity_ = buffer_size_; - } - } - - auto buffer_begin() - { - return detail::make_buffer_zip_iterator(buffer_ptrs_, 0); - } - - auto buffer_end() - { - return detail::make_buffer_zip_iterator(buffer_ptrs_, - buffer_size_); - } - - auto get_buffer_idx_ptr() { return buffer_idx_.data(); } - - size_t get_buffer_idx_value() { return buffer_idx_.value(handle_ptr_->get_stream()); } - - void set_buffer_idx_value(size_t value) - { - buffer_idx_.set_value(value, handle_ptr_->get_stream()); - } - private: - static size_t constexpr kReduceInputTupleSize = thrust::tuple_size::value; - static size_t constexpr kBufferAlignment = 128; - raft::handle_t const* handle_ptr_{nullptr}; std::vector> buckets_{}; rmm::device_uvector tmp_bucket_ptrs_; rmm::device_uvector tmp_bucket_sizes_; - - std::array tuple_element_sizes_ = - compute_thrust_tuple_element_sizes()(); - std::vector buffer_ptrs_{}; - rmm::device_buffer buffer_{}; - size_t buffer_size_{0}; - size_t buffer_capacity_{0}; - rmm::device_scalar buffer_idx_{}; - - // FIXME: better pick between this apporach or the approach used in allocate_comm_buffer - size_t compute_aggregate_buffer_size_in_bytes(size_t size) - { - size_t aggregate_buffer_size_in_bytes = - detail::round_up(sizeof(vertex_t) * size, kBufferAlignment); - for (size_t i = 0; i < kReduceInputTupleSize; ++i) { - aggregate_buffer_size_in_bytes += - detail::round_up(tuple_element_sizes_[i] * size, kBufferAlignment); - } - return aggregate_buffer_size_in_bytes; - } - - void update_buffer_ptrs() - { - uintptr_t ptr = reinterpret_cast(buffer_.data()); - buffer_ptrs_[0] = reinterpret_cast(ptr); - ptr += detail::round_up(sizeof(vertex_t) * buffer_capacity_, kBufferAlignment); - for (size_t i = 0; i < kReduceInputTupleSize; ++i) { - buffer_ptrs_[1 + i] = reinterpret_cast(ptr); - ptr += detail::round_up(tuple_element_sizes_[i] * buffer_capacity_, kBufferAlignment); - } - } }; } // namespace experimental diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh index 8d2227c0f60..481717d7c38 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/utilities/collect_comm.cuh @@ -58,13 +58,18 @@ collect_values_for_keys(raft::comms::comms_t const &comm, double constexpr load_factor = 0.7; // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary - // search based approach + // search based approach (especially when thrust::distance(collect_key_first, collect_key_last) << + // thrust::distance(map_key_first, map_key_last) // 1. build a cuco::static_map object for the map k, v pairs. auto kv_map_ptr = std::make_unique>( - static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / - load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, invalid_vertex_id::value); { @@ -73,7 +78,11 @@ collect_values_for_keys(raft::comms::comms_t const &comm, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last) > 0) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } } // 2. collect values for the unique keys in [collect_key_first, collect_key_last) @@ -82,9 +91,6 @@ collect_values_for_keys(raft::comms::comms_t const &comm, stream); thrust::copy( rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin()); - // FIXME: sort and unique are unnecessary if the keys in [collect_key_first, collect_key_last) are - // already unique, if this cost becomes a performance bottlenec, we may add - // collect_values_for_unique_keys in the future thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end()); unique_keys.resize( thrust::distance( @@ -107,8 +113,12 @@ collect_values_for_keys(raft::comms::comms_t const &comm, CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream - kv_map_ptr->find( - rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_keys.size() > 0) { + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + } rmm::device_uvector rx_values_for_unique_keys(0, stream); std::tie(rx_values_for_unique_keys, std::ignore) = @@ -125,7 +135,11 @@ collect_values_for_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - static_cast(static_cast(unique_keys.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_keys.size()) / load_factor), + unique_keys.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value); { @@ -136,15 +150,154 @@ collect_values_for_keys(raft::comms::comms_t const &comm, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } } // 4. find values for [collect_key_first, collect_key_last) auto value_buffer = allocate_dataframe_buffer( thrust::distance(collect_key_first, collect_key_last), stream); - kv_map_ptr->find( - collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(collect_key_first, collect_key_last) > 0) { + kv_map_ptr->find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + } + + return value_buffer; +} + +// for key = [map_key_first, map_key_last), key_to_gpu_id_op(key) should be coincide with +// comm.get_rank() +template +decltype(allocate_dataframe_buffer::value_type>( + 0, cudaStream_t{nullptr})) +collect_values_for_unique_keys(raft::comms::comms_t const &comm, + VertexIterator0 map_key_first, + VertexIterator0 map_key_last, + ValueIterator map_value_first, + VertexIterator1 collect_unique_key_first, + VertexIterator1 collect_unique_key_last, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + using vertex_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, vertex_t>::value); + using value_t = typename std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary + // search based approach (especially when thrust::distance(collect_unique_key_first, + // collect_unique_key_last) << thrust::distance(map_key_first, map_key_last) + + // 1. build a cuco::static_map object for the map k, v pairs. + + auto kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last)) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } + } + + // 2. collect values for the unique keys in [collect_unique_key_first, collect_unique_key_last) + + rmm::device_uvector unique_keys( + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); + thrust::copy(rmm::exec_policy(stream)->on(stream), + collect_unique_key_first, + collect_unique_key_last, + unique_keys.begin()); + + rmm::device_uvector values_for_unique_keys(0, stream); + { + rmm::device_uvector rx_unique_keys(0, stream); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( + comm, + unique_keys.begin(), + unique_keys.end(), + [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, + stream); + + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_keys.size() > 0) { + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + } + + rmm::device_uvector rx_values_for_unique_keys(0, stream); + std::tie(rx_values_for_unique_keys, std::ignore) = + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + + values_for_unique_keys = std::move(rx_values_for_unique_keys); + } + + // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, + // values_for_unique_keys. + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_keys.size()) / load_factor), + unique_keys.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } + } + + // 4. find values for [collect_unique_key_first, collect_unique_key_last) + + auto value_buffer = allocate_dataframe_buffer( + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(collect_unique_key_first, collect_unique_key_last)) { + kv_map_ptr->find(collect_unique_key_first, + collect_unique_key_last, + get_dataframe_buffer_begin(value_buffer)); + } return value_buffer; } diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index a58331d465a..d8c476760f0 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -93,7 +93,7 @@ struct graph_container_t { void* weights; void* vertex_partition_offsets; - size_t num_partition_edges; + size_t num_local_edges; size_t num_global_vertices; size_t num_global_edges; numberTypeEnum vertexType; @@ -103,7 +103,6 @@ struct graph_container_t { bool is_multi_gpu; bool sorted_by_degree; bool do_expensive_check; - bool hypergraph_partitioned; int row_comm_size; int col_comm_size; int row_comm_rank; @@ -147,7 +146,7 @@ struct cy_multi_edgelists_t { // replacement for std::tuple<,,>, since std::tuple is not // supported in cython // -template +template struct major_minor_weights_t { explicit major_minor_weights_t(raft::handle_t const& handle) : shuffled_major_vertices_(0, handle.get_stream()), @@ -155,12 +154,15 @@ struct major_minor_weights_t { shuffled_weights_(0, handle.get_stream()) { } + rmm::device_uvector& get_major(void) { return shuffled_major_vertices_; } rmm::device_uvector& get_minor(void) { return shuffled_minor_vertices_; } rmm::device_uvector& get_weights(void) { return shuffled_weights_; } + std::vector& get_edge_counts(void) { return edge_counts_; } + std::pair, size_t> get_major_wrap( void) // const: triggers errors in Cython autogen-ed C++ { @@ -180,10 +182,16 @@ struct major_minor_weights_t { sizeof(weight_t)); } + std::unique_ptr> get_edge_counts_wrap(void) // const + { + return std::make_unique>(edge_counts_); + } + private: rmm::device_uvector shuffled_major_vertices_; rmm::device_uvector shuffled_minor_vertices_; rmm::device_uvector shuffled_weights_; + std::vector edge_counts_{}; }; // aggregate for random_walks() return type @@ -353,6 +361,9 @@ struct renum_quad_t { // The number of vertices and edges respectively in the graph represented by // the above arrays. // +// bool is_weighted +// true if the resulting graph object should store edge weights +// // bool transposed // true if the resulting graph object should store a transposed adjacency // matrix @@ -369,10 +380,11 @@ void populate_graph_container(graph_container_t& graph_container, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu); @@ -470,14 +482,13 @@ call_random_walks(raft::handle_t const& handle, // wrapper for shuffling: // template -std::unique_ptr> call_shuffle( +std::unique_ptr> call_shuffle( raft::handle_t const& handle, vertex_t* edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned); // = false + edge_t num_edgelist_edges); // Wrapper for calling renumber_edeglist() inplace: // @@ -486,8 +497,7 @@ std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index 06352b8e217..e59b12f2a80 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -47,21 +47,19 @@ auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, } template -void resize_dataframe_buffer_tuple_element_impl(BufferType& buffer, - size_t new_buffer_size, - cudaStream_t stream) -{ - std::get(buffer).resize(new_buffer_size, stream); - resize_dataframe_buffer_tuple_element_impl( - buffer, new_buffer_size, stream); -} +struct resize_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) + { + std::get(buffer).resize(new_buffer_size, stream); + resize_dataframe_buffer_tuple_iterator_element_impl().run( + buffer, new_buffer_size, stream); + } +}; template -void resize_dataframe_buffer_tuple_impl(BufferType& buffer, - size_t new_buffer_size, - cudaStream_t stream) -{ -} +struct resize_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) {} +}; template auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) @@ -108,8 +106,9 @@ template ::value; - detail::resize_dataframe_buffer_tuple_impl( - buffer, new_buffer_size, stream); + detail:: + resize_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, new_buffer_size, stream); } template +std::enable_if_t::value, void> +device_allreduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allreduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + comm.allreduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, stream); +} + +template +struct device_allreduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) const + { + device_allreduce_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + count, + op, + stream); + device_allreduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, stream); + } +}; + +template +struct device_allreduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) const + { + } +}; + template std::enable_if_t::value, void> device_reduce_impl(raft::comms::comms_t const& comm, @@ -856,6 +916,46 @@ device_bcast(raft::comms::comms_t const& comm, comm, input_first, output_first, count, root, stream); } +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allreduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + detail::device_allreduce_impl(comm, input_first, output_first, count, op, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_allreduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail::device_allreduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, stream); +} + template std::enable_if_t< std::is_arithmetic::value_type>::value, diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 8c363c9a346..b318009d9bf 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -22,6 +22,12 @@ #include #include +#include +#include +#include +#include +#include + #include #include #include @@ -31,89 +37,6 @@ namespace experimental { namespace detail { -template -rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, - ValueIterator tx_value_first /* [INOUT */, - ValueIterator tx_value_last /* [INOUT */, - ValueToGPUIdOp value_to_gpu_id_op, - cudaStream_t stream) -{ - auto const comm_size = comm.get_size(); - - thrust::sort(rmm::exec_policy(stream)->on(stream), - tx_value_first, - tx_value_last, - [value_to_gpu_id_op] __device__(auto lhs, auto rhs) { - return value_to_gpu_id_op(lhs) < value_to_gpu_id_op(rhs); - }); - - auto gpu_id_first = thrust::make_transform_iterator( - tx_value_first, - [value_to_gpu_id_op] __device__(auto value) { return value_to_gpu_id_op(value); }); - rmm::device_uvector d_tx_dst_ranks(comm_size, stream); - rmm::device_uvector d_tx_value_counts(comm_size, stream); - auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_value_first, tx_value_last), - thrust::make_constant_iterator(size_t{1}), - d_tx_dst_ranks.begin(), - d_tx_value_counts.begin()); - if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { - rmm::device_uvector d_counts(comm_size, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), - d_tx_value_counts.begin(), - thrust::get<1>(last), - d_tx_dst_ranks.begin(), - d_counts.begin()); - d_tx_value_counts = std::move(d_counts); - } - - return d_tx_value_counts; -} - -template -rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, - VertexIterator tx_key_first /* [INOUT */, - VertexIterator tx_key_last /* [INOUT */, - ValueIterator tx_value_first /* [INOUT */, - KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) -{ - auto const comm_size = comm.get_size(); - - thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), - tx_key_first, - tx_key_last, - tx_value_first, - [key_to_gpu_id_op] __device__(auto lhs, auto rhs) { - return key_to_gpu_id_op(lhs) < key_to_gpu_id_op(rhs); - }); - - auto gpu_id_first = thrust::make_transform_iterator( - tx_key_first, [key_to_gpu_id_op] __device__(auto key) { return key_to_gpu_id_op(key); }); - rmm::device_uvector d_tx_dst_ranks(comm_size, stream); - rmm::device_uvector d_tx_value_counts(comm_size, stream); - auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_key_first, tx_key_last), - thrust::make_constant_iterator(size_t{1}), - d_tx_dst_ranks.begin(), - d_tx_value_counts.begin()); - if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { - rmm::device_uvector d_counts(comm_size, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), - d_tx_value_counts.begin(), - thrust::get<1>(last), - d_tx_dst_ranks.begin(), - d_counts.begin()); - d_tx_value_counts = std::move(d_counts); - } - - return d_tx_value_counts; -} - // inline to suppress a complaint about ODR violation inline std::tuple, std::vector, @@ -187,6 +110,86 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, } // namespace detail +template +rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_group_id_op, + int num_groups, + cudaStream_t stream) +{ + thrust::sort(rmm::exec_policy(stream)->on(stream), + tx_value_first, + tx_value_last, + [value_to_group_id_op] __device__(auto lhs, auto rhs) { + return value_to_group_id_op(lhs) < value_to_group_id_op(rhs); + }); + + auto group_id_first = thrust::make_transform_iterator( + tx_value_first, + [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); + rmm::device_uvector d_tx_dst_ranks(num_groups, stream); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); + auto last = + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + group_id_first, + group_id_first + thrust::distance(tx_value_first, tx_value_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { + rmm::device_uvector d_counts(num_groups, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return d_tx_value_counts; +} + +template +rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_group_id_op, + int num_groups, + cudaStream_t stream) +{ + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + tx_key_first, + tx_key_last, + tx_value_first, + [key_to_group_id_op] __device__(auto lhs, auto rhs) { + return key_to_group_id_op(lhs) < key_to_group_id_op(rhs); + }); + + auto group_id_first = thrust::make_transform_iterator( + tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); + rmm::device_uvector d_tx_dst_ranks(num_groups, stream); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + group_id_first, + group_id_first + thrust::distance(tx_key_first, tx_key_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { + rmm::device_uvector d_counts(num_groups, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return d_tx_value_counts; +} + template auto shuffle_values(raft::comms::comms_t const &comm, TxValueIterator tx_value_first, @@ -250,7 +253,7 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, auto const comm_size = comm.get_size(); auto d_tx_value_counts = - detail::sort_and_count(comm, tx_value_first, tx_value_last, value_to_gpu_id_op, stream); + groupby_and_count(tx_value_first, tx_value_last, value_to_gpu_id_op, comm.get_size(), stream); std::vector tx_counts{}; std::vector tx_offsets{}; @@ -301,8 +304,8 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, { auto const comm_size = comm.get_size(); - auto d_tx_value_counts = detail::sort_and_count( - comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); + auto d_tx_value_counts = groupby_and_count( + tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, comm.get_size(), stream); std::vector tx_counts{}; std::vector tx_offsets{}; diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh index 01843a583eb..d5ce6ff1a29 100644 --- a/cpp/include/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -61,13 +61,6 @@ struct compute_thrust_tuple_element_sizes_impl { void compute(std::array::value>& arr) const {} }; -template -__device__ constexpr auto remove_first_thrust_tuple_element_impl(TupleType const& tuple, - std::index_sequence) -{ - return thrust::make_tuple(thrust::get<1 + Is>(tuple)...); -} - template struct plus_thrust_tuple_impl { __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const @@ -200,16 +193,6 @@ struct compute_thrust_tuple_element_sizes { } }; -template -struct remove_first_thrust_tuple_element { - __device__ constexpr auto operator()(TupleType const& tuple) const - { - size_t constexpr tuple_size = thrust::tuple_size::value; - return detail::remove_first_thrust_tuple_element_impl( - tuple, std::make_index_sequence()); - } -}; - template struct plus_thrust_tuple { __host__ __device__ constexpr TupleType operator()(TupleType const& lhs, diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index 45f7d723191..a176dfbd1c8 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -117,7 +117,7 @@ class EcgLouvain : public cugraph::Louvain { void initialize_dendrogram_level(vertex_t num_vertices) override { - this->dendrogram_->add_level(0, num_vertices); + this->dendrogram_->add_level(0, num_vertices, this->stream_); get_permutation_vector( num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index aae2d3712b5..4ffb7c20eb2 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -132,7 +132,7 @@ class Leiden : public Louvain { // // Initialize every cluster to reference each vertex to itself // - this->dendrogram_->add_level(0, current_graph.number_of_vertices); + this->dendrogram_->add_level(0, current_graph.number_of_vertices, this->stream_); thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->dendrogram_->current_level_begin(), diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 0862bbc62a9..e3569d4c850 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -210,7 +210,7 @@ class Louvain { virtual void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(0, num_vertices); + dendrogram_->add_level(0, num_vertices, stream_); thrust::sequence(rmm::exec_policy(stream_)->on(stream_), dendrogram_->current_level_begin(), diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 7adfbd7fbd7..9145e3737b6 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -93,10 +93,7 @@ void bfs(raft::handle_t const &handle, enum class Bucket { cur, num_buckets }; std::vector bucket_sizes(static_cast(Bucket::num_buckets), push_graph_view.get_number_of_local_vertices()); - VertexFrontier, - vertex_t, - GraphViewType::is_multi_gpu, - static_cast(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { @@ -133,19 +130,16 @@ void bfs(raft::handle_t const &handle, *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst)); if (distance != invalid_distance) { push = false; } } - // FIXME: need to test this works properly if payload size is 0 (returns a tuple of size - // 1) return thrust::make_tuple(push, src); }, - reduce_op::any>(), + reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), vertex_frontier, [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) - ? static_cast(Bucket::cur) - : VertexFrontier, vertex_t>::kInvalidBucketIdx; - return thrust::make_tuple(idx, depth + 1, thrust::get<0>(pushed_val)); + auto idx = (v_val == invalid_distance) ? static_cast(Bucket::cur) + : VertexFrontier::kInvalidBucketIdx; + return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); }); auto new_vertex_frontier_aggregate_size = diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 0cd551b0d73..1eccbd23584 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -49,6 +50,7 @@ std:: weight_t const *compressed_sparse_weights, vertex_t major_first, vertex_t major_last, + bool is_weighted, cudaStream_t stream) { edge_t number_of_edges{0}; @@ -57,8 +59,7 @@ std:: CUDA_TRY(cudaStreamSynchronize(stream)); rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights( - compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + rmm::device_uvector edgelist_weights(is_weighted ? number_of_edges : 0, stream); // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA @@ -77,7 +78,7 @@ std:: compressed_sparse_indices, compressed_sparse_indices + number_of_edges, edgelist_minor_vertices.begin()); - if (compressed_sparse_weights != nullptr) { + if (is_weighted) { thrust::copy(rmm::exec_policy(stream)->on(stream), compressed_sparse_weights, compressed_sparse_weights + number_of_edges, @@ -89,62 +90,62 @@ std:: std::move(edgelist_weights)); } -template -void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */, - cudaStream_t stream) +template +edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOUT] */, + vertex_t *edgelist_minor_vertices /* [INOUT] */, + weight_t *edgelist_weights /* [INOUT] */, + edge_t number_of_edges, + bool is_weighted, + cudaStream_t stream) { - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); - size_t number_of_edges{0}; - if (edgelist_weights.size() > 0) { + if (is_weighted) { thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin()); + pair_first + number_of_edges, + edgelist_weights); - rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), - stream); + rmm::device_uvector tmp_edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), stream); rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); auto it = thrust::reduce_by_key( rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin(), + pair_first + number_of_edges, + edgelist_weights, thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), tmp_edgelist_minor_vertices.begin())), tmp_edgelist_weights.begin()); - number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + auto ret = + static_cast(thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it))); - edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); - edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); - edgelist_weights = std::move(tmp_edgelist_weights); + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin(), + tmp_edgelist_weights.begin())); + thrust::copy(rmm::exec_policy(stream)->on(stream), + edge_first, + edge_first + ret, + thrust::make_zip_iterator(thrust::make_tuple( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights))); + + return ret; } else { - thrust::sort(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - number_of_edges = thrust::distance(pair_first, it); + thrust::sort(rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges); + return static_cast(thrust::distance( + pair_first, + thrust::unique( + rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges))); } - - edgelist_major_vertices.resize(number_of_edges, stream); - edgelist_minor_vertices.resize(number_of_edges, stream); - edgelist_weights.resize(number_of_edges, stream); - edgelist_major_vertices.shrink_to_fit(stream); - edgelist_minor_vertices.shrink_to_fit(stream); - edgelist_weights.shrink_to_fit(stream); } template std:: tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( edge_t const *compressed_sparse_offsets, vertex_t const *compressed_sparse_indices, weight_t const *compressed_sparse_weights, @@ -154,6 +155,7 @@ std:: vertex_t major_last, vertex_t minor_first, vertex_t minor_last, + bool is_weighted, cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the @@ -168,6 +170,7 @@ std:: compressed_sparse_weights, major_first, major_last, + is_weighted, stream); auto pair_first = thrust::make_zip_iterator( @@ -182,8 +185,21 @@ std:: p_minor_labels[thrust::get<1>(val) - minor_first]); }); - sort_and_coarsen_edgelist( - edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + auto number_of_edges = + groupby_e_and_coarsen_edgelist(edgelist_major_vertices.data(), + edgelist_minor_vertices.data(), + edgelist_weights.data(), + static_cast(edgelist_major_vertices.size()), + is_weighted, + stream); + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.shrink_to_fit(stream); + if (is_weighted) { + edgelist_weights.resize(number_of_edges, stream); + edgelist_weights.shrink_to_fit(stream); + } return std::make_tuple(std::move(edgelist_major_vertices), std::move(edgelist_minor_vertices), @@ -220,48 +236,66 @@ coarsen_graph( // currently, nothing to do } - // 1. locally construct coarsened edge list + // 1. construct coarsened edge list - // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme - // regardless of hypergraph partitioning is applied or not - rmm::device_uvector adj_matrix_major_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() - : graph_view.get_number_of_local_adj_matrix_partition_rows(), - handle.get_stream()); rmm::device_uvector adj_matrix_minor_labels( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() : graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); if (store_transposed) { - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); } else { - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); } - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::vector> coarsened_edgelist_major_vertices{}; + std::vector> coarsened_edgelist_minor_vertices{}; + std::vector> coarsened_edgelist_weights{}; + coarsened_edgelist_major_vertices.reserve(graph_view.get_number_of_local_adj_matrix_partitions()); + coarsened_edgelist_minor_vertices.reserve(coarsened_edgelist_major_vertices.size()); + coarsened_edgelist_weights.reserve( + graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : size_t{0}); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + coarsened_edgelist_major_vertices.emplace_back(0, handle.get_stream()); + coarsened_edgelist_minor_vertices.emplace_back(0, handle.get_stream()); + if (graph_view.is_weighted()) { + coarsened_edgelist_weights.emplace_back(0, handle.get_stream()); + } + } // FIXME: we may compare performance/memory footprint with the hash_based approach especially when // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst // case). We may be able to limit the memory requirement close to the final coarsened edgelist // with the hash based approach. for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - // get edge list + // 1-1. locally construct coarsened edge list + + rmm::device_uvector major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) + : graph_view.get_number_of_local_adj_matrix_partition_rows(i), + handle.get_stream()); + // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for + // input + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + major_labels.size(), + major_labels.begin()); + device_bcast(col_comm, + major_labels.data(), + major_labels.data(), + major_labels.size(), + static_cast(i), + handle.get_stream()); rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector edgelist_weights(0, handle.get_stream()); std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( graph_view.offsets(i), graph_view.indices(i), graph_view.weights(i), - adj_matrix_major_labels.begin() + - (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) - : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), - adj_matrix_minor_labels.begin(), + major_labels.data(), + adj_matrix_minor_labels.data(), store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) : graph_view.get_local_adj_matrix_partition_row_first(i), store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) @@ -270,86 +304,159 @@ coarsen_graph( : graph_view.get_local_adj_matrix_partition_col_first(i), store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) : graph_view.get_local_adj_matrix_partition_col_last(i), + graph_view.is_weighted(), handle.get_stream()); - auto cur_size = coarsened_edgelist_major_vertices.size(); - // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can - // reserve address space to avoid expensive reallocation. - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management - coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_weights.resize( - graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); - - if (graph_view.is_weighted()) { - auto src_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), - edgelist_minor_vertices.begin(), - edgelist_weights.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); - } else { - auto src_edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); + // 1-2. globaly shuffle + + { + rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); + if (graph_view.is_weighted()) { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())); + std::forward_as_tuple( + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + std::forward_as_tuple(std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + edgelist_major_vertices = std::move(rx_edgelist_major_vertices); + edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); + edgelist_weights = std::move(rx_edgelist_weights); } - } - sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights, - handle.get_stream()); - - // 2. globally shuffle edge list and re-coarsen - - { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())); - rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - std::forward_as_tuple( - std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), - std::ignore) = - groupby_gpuid_and_shuffle_values( - handle.get_comms(), - edge_first, - edge_first + coarsened_edgelist_major_vertices.size(), - [key_func = - detail::compute_gpu_id_from_edge_t{graph_view.is_hypergraph_partitioned(), - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, + // 1-3. append data to local adjacency matrix partitions + + // FIXME: we can skip this if groupby_gpuid_and_shuffle_values is updated to return sorted edge + // list based on the final matrix partition (maybe add + // groupby_adj_matrix_partition_and_shuffle_values). + + auto local_partition_id_op = + [comm_size, + key_func = detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto counts = graph_view.is_weighted() + ? groupby_and_count(pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + local_partition_id_op, + graph_view.get_number_of_local_adj_matrix_partitions(), + handle.get_stream()) + : groupby_and_count(pair_first, + pair_first + edgelist_major_vertices.size(), + local_partition_id_op, + graph_view.get_number_of_local_adj_matrix_partitions(), + handle.get_stream()); + + std::vector h_counts(counts.size()); + raft::update_host(h_counts.data(), counts.data(), counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + std::vector h_displacements(h_counts.size(), size_t{0}); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_displacements.begin() + 1); + + for (int j = 0; j < col_comm_size; ++j) { + auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( + edgelist_major_vertices.begin() + h_displacements[j], + edgelist_minor_vertices.begin() + h_displacements[j], + graph_view.is_weighted() ? edgelist_weights.begin() + h_displacements[j] + : static_cast(nullptr), + h_counts[j], + graph_view.is_weighted(), handle.get_stream()); - sort_and_coarsen_edgelist(rx_edgelist_major_vertices, - rx_edgelist_minor_vertices, - rx_edgelist_weights, - handle.get_stream()); + auto cur_size = coarsened_edgelist_major_vertices[j].size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we + // can reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + coarsened_edgelist_major_vertices[j].resize(cur_size + number_of_partition_edges, + handle.get_stream()); + coarsened_edgelist_minor_vertices[j].resize(coarsened_edgelist_major_vertices[j].size(), + handle.get_stream()); + if (graph_view.is_weighted()) { + coarsened_edgelist_weights[j].resize(coarsened_edgelist_major_vertices[j].size(), + handle.get_stream()); + + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())) + + h_displacements[j]; + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices[j].begin(), + coarsened_edgelist_minor_vertices[j].begin(), + coarsened_edgelist_weights[j].begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + number_of_partition_edges, + dst_edge_first); + } else { + auto src_edge_first = thrust::make_zip_iterator(thrust::make_tuple( + edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())) + + h_displacements[j]; + auto dst_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(coarsened_edgelist_major_vertices[j].begin(), + coarsened_edgelist_minor_vertices[j].begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } + } + } - coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); - coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); - coarsened_edgelist_weights = std::move(rx_edgelist_weights); + for (size_t i = 0; i < coarsened_edgelist_major_vertices.size(); ++i) { + auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( + coarsened_edgelist_major_vertices[i].data(), + coarsened_edgelist_minor_vertices[i].data(), + graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() + : static_cast(nullptr), + static_cast(coarsened_edgelist_major_vertices[i].size()), + graph_view.is_weighted(), + handle.get_stream()); + coarsened_edgelist_major_vertices[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_major_vertices[i].shrink_to_fit(handle.get_stream()); + coarsened_edgelist_minor_vertices[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_minor_vertices[i].shrink_to_fit(handle.get_stream()); + if (coarsened_edgelist_weights.size() > 0) { + coarsened_edgelist_weights[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_weights[i].shrink_to_fit(handle.get_stream()); + } } // 3. find unique labels for this GPU @@ -395,37 +502,43 @@ coarsen_graph( rmm::device_uvector renumber_map_labels(0, handle.get_stream()); partition_t partition(std::vector(comm_size + 1, 0), - graph_view.is_hypergraph_partitioned(), row_comm_size, col_comm_size, row_comm_rank, col_comm_rank); vertex_t number_of_vertices{}; edge_t number_of_edges{}; - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist( - handle, - unique_labels.data(), - static_cast(unique_labels.size()), - coarsened_edgelist_major_vertices.data(), - coarsened_edgelist_minor_vertices.data(), - static_cast(coarsened_edgelist_major_vertices.size()), - graph_view.is_hypergraph_partitioned(), - do_expensive_check); + { + std::vector major_ptrs(coarsened_edgelist_major_vertices.size()); + std::vector minor_ptrs(major_ptrs.size()); + std::vector counts(major_ptrs.size()); + for (size_t i = 0; i < coarsened_edgelist_major_vertices.size(); ++i) { + major_ptrs[i] = coarsened_edgelist_major_vertices[i].data(); + minor_ptrs[i] = coarsened_edgelist_minor_vertices[i].data(); + counts[i] = static_cast(coarsened_edgelist_major_vertices[i].size()); + } + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + renumber_edgelist(handle, + unique_labels.data(), + static_cast(unique_labels.size()), + major_ptrs, + minor_ptrs, + counts, + do_expensive_check); + } // 5. build a graph std::vector> edgelists{}; - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - edgelists.resize(1); - edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); - edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + edgelists.resize(graph_view.get_number_of_local_adj_matrix_partitions()); + for (size_t i = 0; i < edgelists.size(); ++i) { + edgelists[i].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices[i].data() + : coarsened_edgelist_major_vertices[i].data(); + edgelists[i].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices[i].data() + : coarsened_edgelist_minor_vertices[i].data(); + edgelists[i].p_edge_weights = graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() + : static_cast(nullptr); + edgelists[i].number_of_edges = static_cast(coarsened_edgelist_major_vertices[i].size()); } return std::make_tuple( @@ -435,7 +548,7 @@ coarsen_graph( partition, number_of_vertices, number_of_edges, - graph_properties_t{graph_view.is_symmetric(), false}, + graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, true), std::move(renumber_map_labels)); } @@ -466,7 +579,7 @@ coarsen_graph( std::tie(coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices, coarsened_edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( graph_view.offsets(), graph_view.indices(), graph_view.weights(), @@ -476,6 +589,7 @@ coarsen_graph( graph_view.get_number_of_vertices(), vertex_t{0}, graph_view.get_number_of_vertices(), + graph_view.is_weighted(), handle.get_stream()); rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), @@ -516,7 +630,7 @@ coarsen_graph( handle, edgelist, static_cast(renumber_map_labels.size()), - graph_properties_t{graph_view.is_symmetric(), false}, + graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, true), std::move(renumber_map_labels)); } diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index 185fa837a70..d75a4654a15 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -46,13 +46,13 @@ std::tuple, rmm::device_uvector> generat bool clip_and_flip, bool scramble_vertex_ids) { - CUGRAPH_EXPECTS(size_t{1} << scale <= std::numeric_limits::max(), + CUGRAPH_EXPECTS((size_t{1} << scale) <= static_cast(std::numeric_limits::max()), "Invalid input argument: scale too large for vertex_t."); CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0), "Invalid input argument: a, b, c should be non-negative and a + b + c should not " "be larger than 1.0."); - raft::random::Rng rng(seed + 10); + raft::random::Rng rng(seed); // to limit memory footprint (1024 is a tuning parameter) auto max_edges_to_generate_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * 1024; diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 5abe141dafd..47c41cb3426 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -67,12 +67,12 @@ std:: vertex_t major_last, vertex_t minor_first, vertex_t minor_last, + bool is_weighted, cudaStream_t stream) { rmm::device_uvector offsets((major_last - major_first) + 1, stream); rmm::device_uvector indices(edgelist.number_of_edges, stream); - rmm::device_uvector weights( - edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, stream); + rmm::device_uvector weights(is_weighted ? edgelist.number_of_edges : 0, stream); thrust::fill(rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), edge_t{0}); thrust::fill(rmm::exec_policy(stream)->on(stream), indices.begin(), indices.end(), vertex_t{0}); @@ -89,8 +89,7 @@ std:: auto p_offsets = offsets.data(); auto p_indices = indices.data(); - auto p_weights = - edgelist.p_edge_weights != nullptr ? weights.data() : static_cast(nullptr); + auto p_weights = is_weighted ? weights.data() : static_cast(nullptr); thrust::for_each(rmm::exec_policy(stream)->on(stream), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, @@ -103,7 +102,7 @@ std:: thrust::exclusive_scan( rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), offsets.begin()); - if (edgelist.p_edge_weights != nullptr) { + if (is_weighted) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); thrust::for_each(rmm::exec_policy(stream)->on(stream), @@ -191,24 +190,22 @@ graph_t 0, "Invalid input argument: edgelists.size() should be non-zero."); - bool is_weighted = edgelists[0].p_edge_weights != nullptr; - CUGRAPH_EXPECTS( std::any_of(edgelists.begin() + 1, edgelists.end(), - [is_weighted](auto edgelist) { - return (edgelist.p_src_vertices == nullptr) || - (edgelist.p_dst_vertices == nullptr) || - (is_weighted && (edgelist.p_edge_weights == nullptr)) || + [is_weighted = properties.is_weighted](auto edgelist) { + return ((edgelist.number_of_edges > 0) && (edgelist.p_src_vertices == nullptr)) || + ((edgelist.number_of_edges > 0) && (edgelist.p_dst_vertices == nullptr)) || + (is_weighted && (edgelist.number_of_edges > 0) && + (edgelist.p_edge_weights == nullptr)) || (!is_weighted && (edgelist.p_edge_weights != nullptr)); }) == false, "Invalid input argument: edgelists[].p_src_vertices and edgelists[].p_dst_vertices should not " - "be nullptr and edgelists[].p_edge_weights should be nullptr (if edgelists[0].p_edge_weights " - "is nullptr) or should not be nullptr (otherwise)."); + "be nullptr if edgelists[].number_of_edges > 0 and edgelists[].p_edge_weights should be " + "nullptr if unweighted or should not be nullptr if weighted and edgelists[].number_of_edges > " + "0."); - CUGRAPH_EXPECTS((partition.is_hypergraph_partitioned() && - (edgelists.size() == static_cast(col_comm_size))) || - (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), + CUGRAPH_EXPECTS(edgelists.size() == static_cast(col_comm_size), "Invalid input argument: errneous edgelists.size()."); // optional expensive checks (part 1/3) @@ -251,7 +248,7 @@ graph_tget_handle_ptr()->get_stream()); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); - if (is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } + if (properties.is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } } // update degree-based segment offsets (to be used for graph analytics kernel optimization) @@ -321,22 +319,12 @@ graph_t aggregate_segment_offsets(0, default_stream); - if (partition.is_hypergraph_partitioned()) { - rmm::device_uvector aggregate_segment_offsets( - col_comm_size * segment_offsets.size(), default_stream); - col_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); - } else { - rmm::device_uvector aggregate_segment_offsets( - row_comm_size * segment_offsets.size(), default_stream); - row_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); - } + rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), + default_stream); + col_comm.allgather(segment_offsets.data(), + aggregate_segment_offsets.data(), + segment_offsets.size(), + default_stream); vertex_partition_segment_offsets_.resize(aggregate_segment_offsets.size()); raft::update_host(vertex_partition_segment_offsets_.data(), @@ -344,18 +332,10 @@ graph_tget_handle_ptr()->get_stream(); CUGRAPH_EXPECTS( - (edgelist.p_src_vertices != nullptr) && (edgelist.p_dst_vertices != nullptr), + ((edgelist.number_of_edges == 0) || (edgelist.p_src_vertices != nullptr)) && + ((edgelist.number_of_edges == 0) || (edgelist.p_dst_vertices != nullptr)) && + ((properties.is_weighted && + ((edgelist.number_of_edges == 0) || (edgelist.p_edge_weights != nullptr))) || + (!properties.is_weighted && (edgelist.p_edge_weights == nullptr))), "Invalid input argument: edgelist.p_src_vertices and edgelist.p_dst_vertices should " - "not be nullptr."); + "not be nullptr if edgelist.number_of_edges > 0 and edgelist.p_edge_weights should be nullptr " + "if unweighted or should not be nullptr if weighted and edgelist.number_of_edges > 0."); // optional expensive checks (part 1/2) @@ -427,6 +412,7 @@ graph_tget_number_of_vertices(), vertex_t{0}, this->get_number_of_vertices(), + properties.is_weighted, this->get_handle_ptr()->get_stream()); // update degree-based segment offsets (to be used for graph analytics kernel optimization) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index f443608e424..c6f39a44333 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -195,16 +196,12 @@ graph_view_t(row_comm_size))) || - (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), - "Internal Error: erroneous adj_matrix_partition_offsets.size()."); + CUGRAPH_EXPECTS(adj_matrix_partition_offsets.size() == static_cast(col_comm_size), + "Internal Error: erroneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == - (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size) * - (detail::num_segments_per_vertex_partition + 1))) || + col_comm_size * (detail::num_segments_per_vertex_partition + 1))) || (!sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == 0)), "Internal Error: vertex_partition_segment_offsets.size() does not match " @@ -267,8 +264,7 @@ graph_view_t graph_view_t< } } +template +edge_t +graph_view_t>:: + compute_max_in_degree(raft::handle_t const& handle) const +{ + auto in_degrees = compute_in_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_degrees.begin(), + in_degrees.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +edge_t graph_view_t>::compute_max_in_degree(raft::handle_t const& + handle) const +{ + auto in_degrees = compute_in_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_degrees.begin(), + in_degrees.end()); + edge_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +edge_t +graph_view_t>:: + compute_max_out_degree(raft::handle_t const& handle) const +{ + auto out_degrees = compute_out_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_degrees.begin(), + out_degrees.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +edge_t graph_view_t>::compute_max_out_degree(raft::handle_t const& + handle) const +{ + auto out_degrees = compute_out_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_degrees.begin(), + out_degrees.end()); + edge_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +weight_t +graph_view_t>:: + compute_max_in_weight_sum(raft::handle_t const& handle) const +{ + auto in_weight_sums = compute_in_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_weight_sums.begin(), + in_weight_sums.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +weight_t graph_view_t>::compute_max_in_weight_sum(raft::handle_t const& + handle) const +{ + auto in_weight_sums = compute_in_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_weight_sums.begin(), + in_weight_sums.end()); + weight_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +weight_t +graph_view_t>:: + compute_max_out_weight_sum(raft::handle_t const& handle) const +{ + auto out_weight_sums = compute_out_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_weight_sums.begin(), + out_weight_sums.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +weight_t graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_max_out_weight_sum(raft::handle_t const& handle) const +{ + auto out_weight_sums = compute_out_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_weight_sums.begin(), + out_weight_sums.end()); + weight_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + // explicit instantiation template class graph_view_t; diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 3136515faa6..24914fb028b 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -151,7 +151,8 @@ class Louvain { protected: void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(current_graph_view_.get_local_vertex_first(), num_vertices); + dendrogram_->add_level( + current_graph_view_.get_local_vertex_first(), num_vertices, handle_.get_stream()); thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), dendrogram_->current_level_begin(), @@ -369,8 +370,6 @@ class Louvain { current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); rmm::device_uvector src_cluster_weights_v(next_cluster_v.size(), handle_.get_stream()); - rmm::device_uvector dst_cluster_weights_v(next_cluster_v.size(), - handle_.get_stream()); compute_cluster_sum_and_subtract(old_cluster_sum_v, cluster_subtract_v); @@ -396,19 +395,9 @@ class Louvain { vertex_to_gpu_id_op, handle_.get_stream()); - dst_cluster_weights_v = cugraph::experimental::collect_values_for_keys( - handle_.get_comms(), - cluster_keys_v_.begin(), - cluster_keys_v_.end(), - cluster_weights_v_.data(), - d_dst_cluster_cache_, - d_dst_cluster_cache_ + dst_cluster_cache_v_.size(), - vertex_to_gpu_id_op, - handle_.get_stream()); - - map_key_first = d_dst_cluster_cache_; - map_key_last = d_dst_cluster_cache_ + dst_cluster_cache_v_.size(); - map_value_first = dst_cluster_weights_v.begin(); + map_key_first = cluster_keys_v_.begin(); + map_key_last = cluster_keys_v_.end(); + map_value_first = cluster_weights_v_.begin(); } else { thrust::sort_by_key(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_keys_v_.begin(), @@ -432,12 +421,21 @@ class Louvain { map_value_first = src_cluster_weights_v.begin(); } + rmm::device_uvector src_old_cluster_sum_v( + current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), handle_.get_stream()); + rmm::device_uvector src_cluster_subtract_v( + current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), handle_.get_stream()); + copy_to_adj_matrix_row( + handle_, current_graph_view_, old_cluster_sum_v.begin(), src_old_cluster_sum_v.begin()); + copy_to_adj_matrix_row( + handle_, current_graph_view_, cluster_subtract_v.begin(), src_cluster_subtract_v.begin()); + copy_v_transform_reduce_key_aggregated_out_nbr( handle_, current_graph_view_, - thrust::make_zip_iterator(thrust::make_tuple(old_cluster_sum_v.begin(), + thrust::make_zip_iterator(thrust::make_tuple(src_old_cluster_sum_v.begin(), d_src_vertex_weights_cache_, - cluster_subtract_v.begin(), + src_cluster_subtract_v.begin(), d_src_cluster_cache_, src_cluster_weights_v.begin())), diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 62bd6951f71..8d8fb0322a8 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -42,6 +42,7 @@ namespace cugraph { namespace experimental { +// FIXME: think about requiring old_new_label_pairs to be pre-shuffled template void relabel(raft::handle_t const& handle, std::tuple old_new_label_pairs, @@ -120,7 +121,12 @@ void relabel(raft::handle_t const& handle, handle.get_stream())); // cuco::static_map currently does not take stream cuco::static_map relabel_map{ - static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + rx_label_pair_old_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value}; @@ -130,7 +136,11 @@ void relabel(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_label_pair_old_labels.size() > 0) { + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + } rx_label_pair_old_labels.resize(0, handle.get_stream()); rx_label_pair_new_labels.resize(0, handle.get_stream()); @@ -152,19 +162,29 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_old_labels.size() > 0) { + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the + // corresponding old labels + } std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); } } + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + cuco::static_map relabel_map( - static_cast(static_cast(unique_old_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), + unique_old_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value); @@ -175,11 +195,21 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - relabel_map.find(labels, labels + num_labels, labels); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_old_labels.size() > 0) { + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } } else { cuco::static_map relabel_map( - static_cast(static_cast(num_label_pairs) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(num_label_pairs) / load_factor), + static_cast(num_label_pairs) + 1), invalid_vertex_id::value, invalid_vertex_id::value); @@ -190,8 +220,12 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + num_label_pairs); - relabel_map.find(labels, labels + num_labels, labels); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_label_pairs > 0) { relabel_map.insert(pair_first, pair_first + num_label_pairs); } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } } if (do_expensive_check) { diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index a8847167b87..127bd507271 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -50,62 +50,153 @@ rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, vertex_t const* vertices, vertex_t num_local_vertices /* relevant only if vertices != nullptr */, - vertex_t const* edgelist_major_vertices, - vertex_t const* edgelist_minor_vertices, - edge_t num_edgelist_edges) + std::vector const& edgelist_major_vertices, + std::vector const& edgelist_minor_vertices, + std::vector const& edgelist_edge_counts) { // FIXME: compare this sort based approach with hash based approach in both speed and memory // footprint // 1. acquire (unique major label, count) pairs - rmm::device_uvector tmp_labels(num_edgelist_edges, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - tmp_labels.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end()); - rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); - rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); - auto major_pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end(), - thrust::make_constant_iterator(edge_t{1}), - major_labels.begin(), - major_counts.begin()); - tmp_labels.resize(0, handle.get_stream()); - tmp_labels.shrink_to_fit(handle.get_stream()); - major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), - handle.get_stream()); - major_counts.resize(major_labels.size(), handle.get_stream()); - major_labels.shrink_to_fit(handle.get_stream()); - major_counts.shrink_to_fit(handle.get_stream()); + rmm::device_uvector major_labels(0, handle.get_stream()); + rmm::device_uvector major_counts(0, handle.get_stream()); + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + rmm::device_uvector tmp_major_labels(0, handle.get_stream()); + rmm::device_uvector tmp_major_counts(0, handle.get_stream()); + { + rmm::device_uvector sorted_major_labels(edgelist_edge_counts[i], + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices[i], + edgelist_major_vertices[i] + edgelist_edge_counts[i], + sorted_major_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_labels.begin(), + sorted_major_labels.end()); + auto num_unique_labels = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(sorted_major_labels.size()), + [labels = sorted_major_labels.data()] __device__(auto i) { + return (i == 0) || (labels[i - 1] != labels[i]); + }); + tmp_major_labels.resize(num_unique_labels, handle.get_stream()); + tmp_major_counts.resize(tmp_major_labels.size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_labels.begin(), + sorted_major_labels.end(), + thrust::make_constant_iterator(edge_t{1}), + tmp_major_labels.begin(), + tmp_major_counts.begin()); + } + + if (multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + rmm::device_uvector rx_major_labels(0, handle.get_stream()); + rmm::device_uvector rx_major_counts(0, handle.get_stream()); + auto rx_sizes = host_scalar_gather( + col_comm, tmp_major_labels.size(), static_cast(i), handle.get_stream()); + std::vector rx_displs{}; + if (static_cast(i) == col_comm_rank) { + rx_displs.assign(col_comm_size, size_t{0}); + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + rx_major_labels.resize(rx_displs.back() + rx_sizes.back(), handle.get_stream()); + rx_major_counts.resize(rx_major_labels.size(), handle.get_stream()); + } + device_gatherv(col_comm, + thrust::make_zip_iterator( + thrust::make_tuple(tmp_major_labels.begin(), tmp_major_counts.begin())), + thrust::make_zip_iterator( + thrust::make_tuple(rx_major_labels.begin(), rx_major_counts.begin())), + tmp_major_labels.size(), + rx_sizes, + rx_displs, + static_cast(i), + handle.get_stream()); + if (static_cast(i) == col_comm_rank) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_major_labels.begin(), + rx_major_labels.end(), + rx_major_counts.begin()); + major_labels.resize(rx_major_labels.size(), handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_major_labels.begin(), + rx_major_labels.end(), + rx_major_counts.begin(), + major_labels.begin(), + major_counts.begin()); + major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(pair_it)), + handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + } + } else { + tmp_major_labels.shrink_to_fit(handle.get_stream()); + tmp_major_counts.shrink_to_fit(handle.get_stream()); + major_labels = std::move(tmp_major_labels); + major_counts = std::move(tmp_major_counts); + } + } // 2. acquire unique minor labels - rmm::device_uvector minor_labels(num_edgelist_edges, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - minor_labels.begin()); + std::vector minor_displs(edgelist_minor_vertices.size(), edge_t{0}); + std::partial_sum( + edgelist_edge_counts.begin(), edgelist_edge_counts.end() - 1, minor_displs.begin() + 1); + rmm::device_uvector minor_labels(minor_displs.back() + edgelist_edge_counts.back(), + handle.get_stream()); + for (size_t i = 0; i < edgelist_minor_vertices.size(); ++i) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices[i], + edgelist_minor_vertices[i] + edgelist_edge_counts[i], + minor_labels.begin() + minor_displs[i]); + } thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_labels.begin(), minor_labels.end()); - auto minor_label_it = - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end()); - minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); + minor_labels.resize( + thrust::distance(minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end())), + handle.get_stream()); + if (multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + + rmm::device_uvector rx_minor_labels(0, handle.get_stream()); + std::tie(rx_minor_labels, std::ignore) = groupby_gpuid_and_shuffle_values( + row_comm, + minor_labels.begin(), + minor_labels.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{row_comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end()); + rx_minor_labels.resize( + thrust::distance( + rx_minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end())), + handle.get_stream()); + minor_labels = std::move(rx_minor_labels); + } minor_labels.shrink_to_fit(handle.get_stream()); // 3. merge major and minor labels and vertex labels rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), handle.get_stream()); - rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_labels.begin(), @@ -142,47 +233,7 @@ rmm::device_uvector compute_renumber_map( labels.shrink_to_fit(handle.get_stream()); counts.shrink_to_fit(handle.get_stream()); - // 4. if multi-GPU, shuffle and reduce (label, count) pairs - - if (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); - rmm::device_uvector rx_labels(0, handle.get_stream()); - rmm::device_uvector rx_counts(0, handle.get_stream()); - std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = - groupby_gpuid_and_shuffle_values( - comm, - pair_first, - pair_first + labels.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( - auto val) { return key_func(thrust::get<0>(val)); }, - handle.get_stream()); - - labels.resize(rx_labels.size(), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin()); - pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin(), - labels.begin(), - counts.begin()); - rx_labels.resize(0, handle.get_stream()); - rx_counts.resize(0, handle.get_stream()); - rx_labels.shrink_to_fit(handle.get_stream()); - rx_counts.shrink_to_fit(handle.get_stream()); - labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - } - - // 5. if vertices != nullptr, add isolated vertices + // 4. if vertices != nullptr, add isolated vertices rmm::device_uvector isolated_vertices(0, handle.get_stream()); if (vertices != nullptr) { @@ -232,10 +283,9 @@ void expensive_check_edgelist( raft::handle_t const& handle, vertex_t const* local_vertices, vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, - vertex_t const* edgelist_major_vertices, - vertex_t const* edgelist_minor_vertices, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned /* relevant only if multi_gpu == true */) + std::vector const& edgelist_major_vertices, + std::vector const& edgelist_minor_vertices, + std::vector const& edgelist_edge_counts) { rmm::device_uvector sorted_local_vertices( local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); @@ -246,6 +296,12 @@ void expensive_check_edgelist( thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), sorted_local_vertices.begin(), sorted_local_vertices.end()); + CUGRAPH_EXPECTS(static_cast(thrust::distance( + sorted_local_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()))) == sorted_local_vertices.size(), + "Invalid input argument: local_vertices should not have duplicates."); if (multi_gpu) { auto& comm = handle.get_comms(); @@ -253,8 +309,15 @@ void expensive_check_edgelist( auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + CUGRAPH_EXPECTS((edgelist_major_vertices.size() == edgelist_minor_vertices.size()) && + (edgelist_major_vertices.size() == static_cast(col_comm_size)), + "Invalid input argument: both edgelist_major_vertices.size() & " + "edgelist_minor_vertices.size() should coincide with col_comm_size."); CUGRAPH_EXPECTS( thrust::count_if( @@ -268,95 +331,127 @@ void expensive_check_edgelist( }) == 0, "Invalid input argument: local_vertices should be pre-shuffled."); - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); - CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + num_edgelist_edges, - [comm_rank, - key_func = - detail::compute_gpu_id_from_edge_t{is_hypergraph_partitioned, - comm_size, - row_comm_size, - col_comm_size}] __device__(auto edge) { - return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank; - }) == 0, - "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " - "pre-shuffled."); - - if (local_vertices != nullptr) { - rmm::device_uvector unique_edge_vertices(num_edgelist_edges * 2, - handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - unique_edge_vertices.begin()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - unique_edge_vertices.begin() + num_edgelist_edges); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end()); - unique_edge_vertices.resize( - thrust::distance( - unique_edge_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end())), - handle.get_stream()); - - rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); - std::tie(rx_unique_edge_vertices, std::ignore) = groupby_gpuid_and_shuffle_values( - handle.get_comms(), - unique_edge_vertices.begin(), - unique_edge_vertices.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( - auto val) { return key_func(val); }, - handle.get_stream()); - - unique_edge_vertices = std::move(rx_unique_edge_vertices); - + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); CUGRAPH_EXPECTS( thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end(), - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + edge_first, + edge_first + edgelist_edge_counts[i], + [comm_size, + comm_rank, + row_comm_rank, + col_comm_size, + col_comm_rank, + i, + gpu_id_key_func = + detail::compute_gpu_id_from_edge_t{comm_size, row_comm_size, col_comm_size}, + partition_id_key_func = + detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto edge) { + return (gpu_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank) || + (partition_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != + row_comm_rank * col_comm_size + col_comm_rank + i * comm_size); }) == 0, - "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " - "invalid vertex ID(s)."); + "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " + "pre-shuffled."); + + auto aggregate_vertexlist_size = host_scalar_allreduce( + comm, + local_vertices != nullptr ? num_local_vertices : vertex_t{0}, + handle.get_stream()); // local_vertices != nullptr is insufficient in multi-GPU as only a + // subset of GPUs may have a non-zero vertices + if (aggregate_vertexlist_size > 0) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + rmm::device_uvector sorted_major_vertices(0, handle.get_stream()); + { + auto recvcounts = + host_scalar_allgather(col_comm, sorted_local_vertices.size(), handle.get_stream()); + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + sorted_major_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + device_allgatherv(col_comm, + sorted_local_vertices.data(), + sorted_major_vertices.data(), + recvcounts, + displacements, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_vertices.begin(), + sorted_major_vertices.end()); + } + + rmm::device_uvector sorted_minor_vertices(0, handle.get_stream()); + { + auto recvcounts = + host_scalar_allgather(row_comm, sorted_local_vertices.size(), handle.get_stream()); + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + sorted_minor_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + device_allgatherv(row_comm, + sorted_local_vertices.data(), + sorted_minor_vertices.data(), + recvcounts, + displacements, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_minor_vertices.begin(), + sorted_minor_vertices.end()); + } + + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[i], + [num_major_vertices = static_cast(sorted_major_vertices.size()), + sorted_major_vertices = sorted_major_vertices.data(), + num_minor_vertices = static_cast(sorted_minor_vertices.size()), + sorted_minor_vertices = sorted_minor_vertices.data()] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_major_vertices, + sorted_major_vertices + num_major_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_minor_vertices, + sorted_minor_vertices + num_minor_vertices, + thrust::get<1>(e)); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_mior_vertices have " + "invalid vertex ID(s)."); + } } } else { - if (local_vertices != nullptr) { - CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); - }) == 0, - "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + assert(edgelist_major_vertices.size() == 1); + assert(edgelist_minor_vertices.size() == 1); + if (local_vertices != nullptr) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[0], edgelist_minor_vertices[0])); CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); - }) == 0, - "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[0], + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_local_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_local_vertices, + thrust::get<1>(e)); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " + "invalid vertex ID(s)."); } } } @@ -368,15 +463,15 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of enumber_edgelist not supported on Pascal and older architectures."); #ifdef CUCO_STATIC_MAP_DEFINED auto& comm = handle.get_comms(); @@ -389,14 +484,20 @@ renumber_edgelist(raft::handle_t const& handle, auto const col_comm_size = col_comm.get_size(); auto const col_comm_rank = col_comm.get_rank(); + std::vector edgelist_const_major_vertices(edgelist_major_vertices.size()); + std::vector edgelist_const_minor_vertices(edgelist_const_major_vertices.size()); + for (size_t i = 0; i < edgelist_const_major_vertices.size(); ++i) { + edgelist_const_major_vertices[i] = edgelist_major_vertices[i]; + edgelist_const_minor_vertices[i] = edgelist_minor_vertices[i]; + } + if (do_expensive_check) { expensive_check_edgelist(handle, local_vertices, num_local_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned); + edgelist_const_major_vertices, + edgelist_const_minor_vertices, + edgelist_edge_counts); } // 1. compute renumber map @@ -405,142 +506,129 @@ renumber_edgelist(raft::handle_t const& handle, detail::compute_renumber_map(handle, local_vertices, num_local_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges); + edgelist_const_major_vertices, + edgelist_const_minor_vertices, + edgelist_edge_counts); // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened // graph - auto vertex_partition_counts = host_scalar_allgather( + auto vertex_counts = host_scalar_allgather( comm, static_cast(renumber_map_labels.size()), handle.get_stream()); std::vector vertex_partition_offsets(comm_size + 1, 0); - std::partial_sum(vertex_partition_counts.begin(), - vertex_partition_counts.end(), - vertex_partition_offsets.begin() + 1); + std::partial_sum( + vertex_counts.begin(), vertex_counts.end(), vertex_partition_offsets.begin() + 1); - partition_t partition(vertex_partition_offsets, - is_hypergraph_partitioned, - row_comm_size, - col_comm_size, - row_comm_rank, - col_comm_rank); + partition_t partition( + vertex_partition_offsets, row_comm_size, col_comm_size, row_comm_rank, col_comm_rank); auto number_of_vertices = vertex_partition_offsets.back(); - auto number_of_edges = host_scalar_allreduce(comm, num_edgelist_edges, handle.get_stream()); + auto number_of_edges = host_scalar_allreduce( + comm, + std::accumulate(edgelist_edge_counts.begin(), edgelist_edge_counts.end(), edge_t{0}), + handle.get_stream()); // 3. renumber edges - if (is_hypergraph_partitioned) { - CUGRAPH_FAIL("unimplemented."); - } else { - double constexpr load_factor = 0.7; + double constexpr load_factor = 0.7; - // FIXME: compare this hash based approach with a binary search based approach in both memory - // footprint and execution time + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time - { - vertex_t major_first{}; - vertex_t major_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); - rmm::device_uvector renumber_map_major_labels(major_last - major_first, - handle.get_stream()); - std::vector recvcounts(row_comm_size); - for (int i = 0; i < row_comm_size; ++i) { - recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); - } - std::vector displacements(row_comm_size, 0); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); - device_allgatherv(row_comm, - renumber_map_labels.begin(), - renumber_map_major_labels.begin(), - recvcounts, - displacements, - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), - thrust::make_counting_iterator(major_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); - renumber_map.find(edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - edgelist_major_vertices); + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + rmm::device_uvector renumber_map_major_labels( + col_comm_rank == static_cast(i) ? vertex_t{0} + : partition.get_matrix_partition_major_size(i), + handle.get_stream()); + device_bcast(col_comm, + renumber_map_labels.data(), + renumber_map_major_labels.data(), + partition.get_matrix_partition_major_size(i), + i, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), + static_cast(partition.get_matrix_partition_major_size(i)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + col_comm_rank == static_cast(i) ? renumber_map_labels.begin() + : renumber_map_major_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (partition.get_matrix_partition_major_size(i) > 0) { + renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (edgelist_edge_counts[i]) { + renumber_map.find(edgelist_major_vertices[i], + edgelist_major_vertices[i] + edgelist_edge_counts[i], + edgelist_major_vertices[i]); + } + } - { - vertex_t minor_first{}; - vertex_t minor_last{}; - std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); - rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, - handle.get_stream()); - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may be no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - renumber_map_labels.begin(), - renumber_map_labels.end(), - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); - } else { - device_sendrecv(comm, - renumber_map_labels.begin(), - renumber_map_labels.size(), - comm_dst_rank, - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(partition.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - handle.get_stream()); - } - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - for (int i = 0; i < col_comm_size; ++i) { - auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); - device_bcast(col_comm, - renumber_map_minor_labels.begin() + offset, - renumber_map_minor_labels.begin() + offset, - count, - i, - handle.get_stream()); - } + { + rmm::device_uvector renumber_map_minor_labels( + partition.get_matrix_partition_minor_size(), handle.get_stream()); + std::vector recvcounts(row_comm_size); + for (int i = 0; i < row_comm_size; ++i) { + recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + } + std::vector displacements(recvcounts.size(), 0); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + device_allgatherv(row_comm, + renumber_map_labels.begin(), + renumber_map_minor_labels.begin(), + recvcounts, + displacements, + handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ + cuco::static_map renumber_map{ + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), - thrust::make_counting_iterator(minor_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + renumber_map_minor_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (renumber_map_minor_labels.size()) { renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - renumber_map.find(edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - edgelist_minor_vertices); + } + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (edgelist_edge_counts[i]) { + renumber_map.find(edgelist_minor_vertices[i], + edgelist_minor_vertices[i] + edgelist_edge_counts[i], + edgelist_minor_vertices[i]); + } } } @@ -565,27 +653,28 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); #ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { - expensive_check_edgelist(handle, - vertices, - num_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - false); + expensive_check_edgelist( + handle, + vertices, + num_vertices, + std::vector{edgelist_major_vertices}, + std::vector{edgelist_minor_vertices}, + std::vector{num_edgelist_edges}); } - auto renumber_map_labels = - detail::compute_renumber_map(handle, - vertices, - num_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges); + auto renumber_map_labels = detail::compute_renumber_map( + handle, + vertices, + num_vertices, + std::vector{edgelist_major_vertices}, + std::vector{edgelist_minor_vertices}, + std::vector{num_edgelist_edges}); double constexpr load_factor = 0.7; @@ -593,7 +682,11 @@ std::enable_if_t> renumber_edgelist( // footprint and execution time cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), + renumber_map_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value}; auto pair_first = thrust::make_transform_iterator( @@ -602,11 +695,21 @@ std::enable_if_t> renumber_edgelist( [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); - renumber_map.find( - edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); - renumber_map.find( - edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (renumber_map_labels.size()) { + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_edgelist_edges > 0) { + renumber_map.find(edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + edgelist_major_vertices); + renumber_map.find(edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + edgelist_minor_vertices); + } return renumber_map_labels; #else @@ -620,22 +723,21 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0}, edgelist_major_vertices, edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, + edgelist_edge_counts, do_expensive_check); } @@ -648,8 +750,9 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0} /* dummy */, @@ -665,22 +768,21 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, local_vertices, num_local_vertices, edgelist_major_vertices, edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, + edgelist_edge_counts, do_expensive_check); } @@ -695,8 +797,9 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, vertices, num_vertices, @@ -711,12 +814,12 @@ std::enable_if_t> renumber_edgelist( // instantiations for // template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -726,14 +829,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -747,12 +850,12 @@ template rmm::device_uvector renumber_edgelist // instantiations for // template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -762,14 +865,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -783,12 +886,12 @@ template rmm::device_uvector renumber_edgelist // instantiations for // template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -798,14 +901,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int64_t const* local_vertices, - int64_t num_local_vertices, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int64_t const* local_vertices, + int64_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu new file mode 100644 index 00000000000..8f59683d9d6 --- /dev/null +++ b/cpp/src/experimental/renumber_utils.cu @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +template +void renumber_ext_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check) +{ + double constexpr load_factor = 0.7; + + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "renumber_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + rmm::device_uvector labels(local_int_vertex_last - local_int_vertex_first, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + renumber_map_labels, + renumber_map_labels + labels.size(), + labels.begin()); + thrust::sort( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels.begin(), labels.end()); + CUGRAPH_EXPECTS(thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels.begin(), + labels.end()) == labels.end(), + "Invalid input arguments: renumber_map_labels have duplicate elements."); + } + + auto renumber_map_ptr = std::make_unique>( + size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector sorted_unique_ext_vertices(num_vertices, handle.get_stream()); + sorted_unique_ext_vertices.resize( + thrust::distance( + sorted_unique_ext_vertices.begin(), + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_unique_ext_vertices.begin(), + [] __device__(auto v) { return v != invalid_vertex_id::value; })), + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end()); + sorted_unique_ext_vertices.resize( + thrust::distance( + sorted_unique_ext_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end())), + handle.get_stream()); + + auto int_vertices_for_sorted_unique_ext_vertices = collect_values_for_unique_keys( + comm, + renumber_map_labels, + renumber_map_labels + (local_int_vertex_last - local_int_vertex_first), + thrust::make_counting_iterator(local_int_vertex_first), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end(), + detail::compute_gpu_id_from_vertex_t{comm_size}, + handle.get_stream()); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + renumber_map_ptr.reset(); + + renumber_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), + sorted_unique_ext_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto kv_pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (sorted_unique_ext_vertices.size()) { + renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); + } + } else { + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + renumber_map_ptr.reset(); + + renumber_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), + static_cast(local_int_vertex_last - local_int_vertex_first) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if ((local_int_vertex_last - local_int_vertex_first) > 0) { + renumber_map_ptr->insert(pair_first, + pair_first + (local_int_vertex_last - local_int_vertex_first)); + } + } + + if (do_expensive_check) { + rmm::device_uvector contains(num_vertices, handle.get_stream()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { + renumber_map_ptr->contains(vertices, vertices + num_vertices, contains.begin()); + } + auto vc_pair_first = thrust::make_zip_iterator(thrust::make_tuple(vertices, contains.begin())); + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vc_pair_first, + vc_pair_first + num_vertices, + [] __device__(auto pair) { + auto v = thrust::get<0>(pair); + auto c = thrust::get<1>(pair); + return v == invalid_vertex_id::value + ? (c == true) + : (c == false); + }) == 0, + "Invalid input arguments: vertices have elements that are missing in " + "(aggregate) renumber_map_labels."); + } + + // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 +#if 1 + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [view = renumber_map_ptr->get_device_view()] __device__(auto v) { + return v != invalid_vertex_id::value + ? view.find(v)->second.load(cuda::std::memory_order_relaxed) + : invalid_vertex_id::value; + }); +#else + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); } +#endif +#endif +} + +template +void unrenumber_local_int_vertices( + raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels /* size = local_int_vertex_last - local_int_vertex_first */, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "unrenumber_local_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + [local_int_vertex_first, local_int_vertex_last] __device__(auto v) { + return v != invalid_vertex_id::value && + (v < local_int_vertex_first || v >= local_int_vertex_last); + }) == 0, + "Invalid input arguments: there are non-local vertices in [vertices, vertices " + "+ num_vertices)."); + } + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [renumber_map_labels, local_int_vertex_first] __device__(auto v) { + return v == invalid_vertex_id::value + ? v + : renumber_map_labels[v - local_int_vertex_first]; + }); +#endif +} + +template +void unrenumber_int_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check) +{ + double constexpr load_factor = 0.7; + + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "unrenumber_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + [int_vertex_last = vertex_partition_lasts.back()] __device__(auto v) { + return v != invalid_vertex_id::value && + !is_valid_vertex(int_vertex_last, v); + }) == 0, + "Invalid input arguments: there are out-of-range vertices in [vertices, vertices " + "+ num_vertices)."); + } + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector sorted_unique_int_vertices(num_vertices, handle.get_stream()); + sorted_unique_int_vertices.resize( + thrust::distance( + sorted_unique_int_vertices.begin(), + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_unique_int_vertices.begin(), + [] __device__(auto v) { return v != invalid_vertex_id::value; })), + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end()); + sorted_unique_int_vertices.resize( + thrust::distance( + sorted_unique_int_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end())), + handle.get_stream()); + + rmm::device_uvector d_vertex_partition_lasts(vertex_partition_lasts.size(), + handle.get_stream()); + raft::update_device(d_vertex_partition_lasts.data(), + vertex_partition_lasts.data(), + vertex_partition_lasts.size(), + handle.get_stream()); + rmm::device_uvector d_tx_int_vertex_offsets(d_vertex_partition_lasts.size(), + handle.get_stream()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end(), + d_vertex_partition_lasts.begin(), + d_vertex_partition_lasts.end(), + d_tx_int_vertex_offsets.begin()); + std::vector h_tx_int_vertex_counts(d_tx_int_vertex_offsets.size()); + raft::update_host(h_tx_int_vertex_counts.data(), + d_tx_int_vertex_offsets.data(), + d_tx_int_vertex_offsets.size(), + handle.get_stream()); + handle.get_stream_view().synchronize(); + std::adjacent_difference( + h_tx_int_vertex_counts.begin(), h_tx_int_vertex_counts.end(), h_tx_int_vertex_counts.begin()); + + rmm::device_uvector rx_int_vertices(0, handle.get_stream()); + std::vector rx_int_vertex_counts{}; + std::tie(rx_int_vertices, rx_int_vertex_counts) = shuffle_values( + comm, sorted_unique_int_vertices.begin(), h_tx_int_vertex_counts, handle.get_stream()); + + auto tx_ext_vertices = std::move(rx_int_vertices); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tx_ext_vertices.begin(), + tx_ext_vertices.end(), + tx_ext_vertices.begin(), + [renumber_map_labels, local_int_vertex_first] __device__(auto v) { + return renumber_map_labels[v - local_int_vertex_first]; + }); + + rmm::device_uvector rx_ext_vertices_for_sorted_unique_int_vertices( + 0, handle.get_stream()); + std::tie(rx_ext_vertices_for_sorted_unique_int_vertices, std::ignore) = + shuffle_values(comm, tx_ext_vertices.begin(), rx_int_vertex_counts, handle.get_stream()); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + cuco::static_map unrenumber_map( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), + sorted_unique_int_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(sorted_unique_int_vertices.begin(), + rx_ext_vertices_for_sorted_unique_int_vertices.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (sorted_unique_int_vertices.size()) { + unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); + } + // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 +#if 1 + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [view = unrenumber_map.get_device_view()] __device__(auto v) { + return v != invalid_vertex_id::value + ? view.find(v)->second.load(cuda::std::memory_order_relaxed) + : invalid_vertex_id::value; + }); +#else + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { unrenumber_map.find(vertices, vertices + num_vertices, vertices); } +#endif + } else { + unrenumber_local_int_vertices(handle, + vertices, + num_vertices, + renumber_map_labels, + local_int_vertex_first, + local_int_vertex_last, + do_expensive_check); + } +#endif +} + +// explicit instantiation + +template void renumber_ext_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_local_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_local_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 4996b3734cb..373444cb0a2 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -70,6 +70,9 @@ void sssp(raft::handle_t const &handle, CUGRAPH_EXPECTS(push_graph_view.is_valid_vertex(source_vertex), "Invalid input argument: source vertex out-of-range."); + CUGRAPH_EXPECTS(push_graph_view.is_weighted(), + "Invalid input argument: an unweighted graph is passed to SSSP, BFS is more " + "efficient for unweighted graphs."); if (do_expensive_check) { auto num_negative_edge_weights = @@ -126,10 +129,7 @@ void sssp(raft::handle_t const &handle, // FIXME: need to double check the bucket sizes are sufficient std::vector bucket_sizes(static_cast(Bucket::num_buckets), push_graph_view.get_number_of_local_vertices()); - VertexFrontier, - vertex_t, - GraphViewType::is_multi_gpu, - static_cast(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); // 5. SSSP iteration @@ -188,7 +188,7 @@ void sssp(raft::handle_t const &handle, threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return thrust::make_tuple(push, new_distance, src); + return thrust::make_tuple(push, thrust::make_tuple(new_distance, src)); }, reduce_op::min>(), distances, @@ -199,8 +199,8 @@ void sssp(raft::handle_t const &handle, auto idx = new_dist < v_val ? (new_dist < near_far_threshold ? static_cast(Bucket::new_near) : static_cast(Bucket::far)) - : VertexFrontier, vertex_t>::kInvalidBucketIdx; - return thrust::make_tuple(idx, thrust::get<0>(pushed_val), thrust::get<1>(pushed_val)); + : VertexFrontier::kInvalidBucketIdx; + return thrust::make_tuple(idx, pushed_val); }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); @@ -222,7 +222,7 @@ void sssp(raft::handle_t const &handle, auto dist = *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)); if (dist < old_near_far_threshold) { - return VertexFrontier, vertex_t>::kInvalidBucketIdx; + return VertexFrontier::kInvalidBucketIdx; } else if (dist < near_far_threshold) { return static_cast(Bucket::cur_near); } else { diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index a9e3146bbcd..4a2b98ea815 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -20,22 +20,101 @@ #include #include #include -#include #include #include #include #include +#include +#include + #include +#include +#include #include +#include #include +#include +#include + +#include +#include namespace cugraph { namespace cython { namespace detail { -// FIXME: Add description of this function +// workaround for CUDA extended lambda restrictions +template +struct compute_local_partition_id_t { + vertex_t const* lasts{nullptr}; + size_t num_local_partitions{0}; + + __device__ size_t operator()(vertex_t v) + { + for (size_t i = 0; i < num_local_partitions; ++i) { + if (v < lasts[i]) { return i; } + } + return num_local_partitions; + } +}; + +// FIXME: this is unnecessary if edge_counts_ in the major_minor_weights_t object returned by +// call_shuffle() is passed back, better be fixed. this code assumes that the entire set of edges +// for each partition are consecutively stored. +template +std::vector compute_edge_counts(raft::handle_t const& handle, + graph_container_t const& graph_container) +{ + auto num_local_partitions = static_cast(graph_container.col_comm_size); + + std::vector partition_offsets_vector( + reinterpret_cast(graph_container.vertex_partition_offsets), + reinterpret_cast(graph_container.vertex_partition_offsets) + + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); + + std::vector h_lasts(num_local_partitions); + for (size_t i = 0; i < h_lasts.size(); ++i) { + h_lasts[i] = partition_offsets_vector[graph_container.row_comm_size * (i + 1)]; + } + rmm::device_uvector d_lasts(h_lasts.size(), handle.get_stream()); + raft::update_device(d_lasts.data(), h_lasts.data(), h_lasts.size(), handle.get_stream()); + auto major_vertices = transposed + ? reinterpret_cast(graph_container.dst_vertices) + : reinterpret_cast(graph_container.src_vertices); + auto key_first = thrust::make_transform_iterator( + major_vertices, compute_local_partition_id_t{d_lasts.data(), num_local_partitions}); + rmm::device_uvector d_local_partition_ids(num_local_partitions, handle.get_stream()); + rmm::device_uvector d_edge_counts(d_local_partition_ids.size(), handle.get_stream()); + auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + graph_container.num_local_edges, + thrust::make_constant_iterator(edge_t{1}), + d_local_partition_ids.begin(), + d_edge_counts.begin()); + if (static_cast(thrust::distance(d_local_partition_ids.begin(), thrust::get<0>(it))) < + num_local_partitions) { + rmm::device_uvector d_counts(num_local_partitions, handle.get_stream()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_counts.begin(), + d_counts.end(), + edge_t{0}); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edge_counts.begin(), + thrust::get<1>(it), + d_local_partition_ids.begin(), + d_counts.begin()); + d_edge_counts = std::move(d_counts); + } + std::vector h_edge_counts(num_local_partitions, 0); + raft::update_host( + h_edge_counts.data(), d_edge_counts.data(), d_edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + return h_edge_counts; +} + template > create_graph(raft::handle_t const& handle, graph_container_t const& graph_container) { - std::vector> edgelist( - {{reinterpret_cast(graph_container.src_vertices), - reinterpret_cast(graph_container.dst_vertices), - reinterpret_cast(graph_container.weights), - static_cast(graph_container.num_partition_edges)}}); + auto num_local_partitions = static_cast(graph_container.col_comm_size); std::vector partition_offsets_vector( reinterpret_cast(graph_container.vertex_partition_offsets), reinterpret_cast(graph_container.vertex_partition_offsets) + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); + auto edge_counts = compute_edge_counts(handle, graph_container); + + std::vector displacements(edge_counts.size(), 0); + std::partial_sum(edge_counts.begin(), edge_counts.end() - 1, displacements.begin() + 1); + + std::vector> edgelists( + num_local_partitions); + for (size_t i = 0; i < edgelists.size(); ++i) { + edgelists[i] = cugraph::experimental::edgelist_t{ + reinterpret_cast(graph_container.src_vertices) + displacements[i], + reinterpret_cast(graph_container.dst_vertices) + displacements[i], + graph_container.graph_props.is_weighted + ? reinterpret_cast(graph_container.weights) + displacements[i] + : static_cast(nullptr), + edge_counts[i]}; + } + experimental::partition_t partition(partition_offsets_vector, - graph_container.hypergraph_partitioned, graph_container.row_comm_size, graph_container.col_comm_size, graph_container.row_comm_rank, @@ -65,14 +156,12 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai return std::make_unique>( handle, - edgelist, + edgelists, partition, static_cast(graph_container.num_global_vertices), static_cast(graph_container.num_global_edges), graph_container.graph_props, - // FIXME: This currently fails if sorted_by_degree is true... - // graph_container.sorted_by_degree, - false, + true, graph_container.do_expensive_check); } @@ -89,7 +178,7 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai reinterpret_cast(graph_container.src_vertices), reinterpret_cast(graph_container.dst_vertices), reinterpret_cast(graph_container.weights), - static_cast(graph_container.num_partition_edges)}; + static_cast(graph_container.num_local_edges)}; return std::make_unique>( handle, edgelist, @@ -113,10 +202,11 @@ void populate_graph_container(graph_container_t& graph_container, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu) { @@ -124,7 +214,6 @@ void populate_graph_container(graph_container_t& graph_container, "populate_graph_container() can only be called on an empty container."); bool do_expensive_check{true}; - bool hypergraph_partitioned{false}; if (multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -143,7 +232,7 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.src_vertices = src_vertices; graph_container.dst_vertices = dst_vertices; graph_container.weights = weights; - graph_container.num_partition_edges = num_partition_edges; + graph_container.num_local_edges = num_local_edges; graph_container.num_global_vertices = num_global_vertices; graph_container.num_global_edges = num_global_edges; graph_container.vertexType = vertexType; @@ -151,11 +240,11 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.weightType = weightType; graph_container.transposed = transposed; graph_container.is_multi_gpu = multi_gpu; - graph_container.hypergraph_partitioned = hypergraph_partitioned; graph_container.sorted_by_degree = sorted_by_degree; graph_container.do_expensive_check = do_expensive_check; - experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; + experimental::graph_properties_t graph_props{ + .is_symmetric = false, .is_multigraph = false, .is_weighted = is_weighted}; graph_container.graph_props = graph_props; graph_container.graph_type = graphTypeEnum::graph_t; @@ -177,7 +266,7 @@ void populate_graph_container_legacy(graph_container_t& graph_container, int* local_offsets) { CUGRAPH_EXPECTS(graph_container.graph_type == graphTypeEnum::null, - "populate_graph_container() can only be called on an empty container."); + "populate_graph_container_legacy() can only be called on an empty container."); // FIXME: This is soon-to-be legacy code left in place until the new graph_t // class is supported everywhere else. Remove everything down to the comment @@ -802,23 +891,23 @@ void call_sssp(raft::handle_t const& handle, // wrapper for shuffling: // template -std::unique_ptr> call_shuffle( +std::unique_ptr> call_shuffle( raft::handle_t const& handle, vertex_t* edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned) // = false + edge_t num_edgelist_edges) { - auto& comm = handle.get_comms(); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - - std::unique_ptr> ptr_ret = - std::make_unique>(handle); + std::unique_ptr> ptr_ret = + std::make_unique>(handle); if (edgelist_weights != nullptr) { auto zip_edge = thrust::make_zip_iterator( @@ -833,10 +922,7 @@ std::unique_ptr> call_shuffle( zip_edge + num_edgelist_edges, [key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { + comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, handle.get_stream()); @@ -852,15 +938,46 @@ std::unique_ptr> call_shuffle( zip_edge + num_edgelist_edges, [key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { + comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, handle.get_stream()); } + auto local_partition_id_op = + [comm_size, + key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(ptr_ret->get_major().data(), ptr_ret->get_minor().data())); + + auto edge_counts = + (edgelist_weights != nullptr) + ? cugraph::experimental::groupby_and_count(pair_first, + pair_first + ptr_ret->get_major().size(), + ptr_ret->get_weights().data(), + local_partition_id_op, + col_comm_size, + handle.get_stream()) + : cugraph::experimental::groupby_and_count(pair_first, + pair_first + ptr_ret->get_major().size(), + local_partition_id_op, + col_comm_size, + handle.get_stream()); + + std::vector h_edge_counts(edge_counts.size()); + raft::update_host( + h_edge_counts.data(), edge_counts.data(), edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + ptr_ret->get_edge_counts().resize(h_edge_counts.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + ptr_ret->get_edge_counts()[i] = static_cast(h_edge_counts[i]); + } + return ptr_ret; // RVO-ed } @@ -872,8 +989,7 @@ std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu) // bc. cython cannot take non-type template params { @@ -883,33 +999,31 @@ std::unique_ptr> call_renumber( std::make_unique>(handle); if (multi_gpu) { + std::vector displacements(edge_counts.size(), edge_t{0}); + std::partial_sum(edge_counts.begin(), edge_counts.end() - 1, displacements.begin() + 1); + std::vector major_ptrs(edge_counts.size()); + std::vector minor_ptrs(major_ptrs.size()); + for (size_t i = 0; i < edge_counts.size(); ++i) { + major_ptrs[i] = shuffled_edgelist_major_vertices + displacements[i]; + minor_ptrs[i] = shuffled_edgelist_minor_vertices + displacements[i]; + } + std::tie( p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = cugraph::experimental::renumber_edgelist( - handle, - shuffled_edgelist_major_vertices, - shuffled_edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, - do_expensive_check); + handle, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); } else { - auto ret_f = cugraph::experimental::renumber_edgelist( + p_ret->get_dv() = cugraph::experimental::renumber_edgelist( handle, shuffled_edgelist_major_vertices, shuffled_edgelist_minor_vertices, - num_edgelist_edges, + edge_counts[0], do_expensive_check); - auto tot_vertices = static_cast(ret_f.size()); - - p_ret->get_dv() = std::move(ret_f); - cugraph::experimental::partition_t part_sg( - std::vector{0, tot_vertices}, false, 1, 1, 0, 0); - - p_ret->get_partition() = std::move(part_sg); + p_ret->get_partition() = cugraph::experimental::partition_t{}; // dummy - p_ret->get_num_vertices() = tot_vertices; - p_ret->get_num_edges() = num_edgelist_edges; + p_ret->get_num_vertices() = static_cast(p_ret->get_dv().size()); + p_ret->get_num_edges() = edge_counts[0]; } return p_ret; // RVO-ed (copy ellision) @@ -1142,53 +1256,47 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, float* edgelist_weights, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int32_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, float* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, double* edgelist_weights, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int32_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, double* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int64_t* edgelist_major_vertices, int64_t* edgelist_minor_vertices, float* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int64_t* edgelist_major_vertices, int64_t* edgelist_minor_vertices, double* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); // TODO: add the remaining relevant EIDIr's: // @@ -1196,8 +1304,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); @@ -1205,8 +1312,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); @@ -1214,8 +1320,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int64_t* shuffled_edgelist_major_vertices /* [INOUT] */, int64_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3b65b0edb29..89975f673ae 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,9 +20,10 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") @@ -445,7 +446,34 @@ if(BUILD_CUGRAPH_MG_TESTS) target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### - # - MG LOUVAIN tests --------------------------------------------------------------------- + # - MG KATZ CENTRALITY tests -------------------------------------------------------------- + + set(MG_KATZ_CENTRALITY_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_katz_centrality_test.cpp") + + ConfigureTest(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") + target_link_libraries(MG_KATZ_CENTRALITY_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG BFS tests -------------------------------------------------------------------------- + + set(MG_BFS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_bfs_test.cpp") + + ConfigureTest(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") + target_link_libraries(MG_BFS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG SSSP tests ------------------------------------------------------------------------- + + set(MG_SSSP_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_sssp_test.cpp") + + ConfigureTest(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") + target_link_libraries(MG_SSSP_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG LOUVAIN tests ---------------------------------------------------------------------- set(MG_LOUVAIN_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" @@ -453,7 +481,6 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTest(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) - target_link_libraries(MG_LOUVAIN_TEST PRIVATE cugraph) else(MPI_CXX_FOUND) message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index e7fea43be42..d61080c685e 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -129,8 +128,10 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] <= h_cugraph_ego_edge_offsets[i + 1]); auto n_vertices = graph_view.get_number_of_vertices(); for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { - ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); - ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); + ASSERT_TRUE( + cugraph::experimental::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); + ASSERT_TRUE( + cugraph::experimental::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); } /* diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index a7f95e6d718..661065ca65b 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -323,7 +323,8 @@ coarsen_graph( handle, edgelist, new_number_of_vertices, - cugraph::experimental::graph_properties_t{graph_view.is_symmetric(), false}, + cugraph::experimental::graph_properties_t{ + graph_view.is_symmetric(), false, graph_view.is_weighted()}, true); } diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index f6596a6b59a..8a1a3010a6f 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -31,10 +31,13 @@ #include -void compare(float modularity, float sg_modularity) { ASSERT_FLOAT_EQ(modularity, sg_modularity); } -void compare(double modularity, double sg_modularity) +void compare(float mg_modularity, float sg_modularity) { - ASSERT_DOUBLE_EQ(modularity, sg_modularity); + ASSERT_FLOAT_EQ(mg_modularity, sg_modularity); +} +void compare(double mg_modularity, double sg_modularity) +{ + ASSERT_DOUBLE_EQ(mg_modularity, sg_modularity); } //////////////////////////////////////////////////////////////////////////////// @@ -90,13 +93,13 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam cugraph::Dendrogram const& dendrogram, weight_t resolution, int rank, - weight_t modularity) + weight_t mg_modularity) { auto sg_graph = std::make_unique>( handle); rmm::device_uvector d_clustering_v(0, handle.get_stream()); - weight_t sg_modularity; + weight_t sg_modularity{-1.0}; if (rank == 0) { // Create initial SG graph, renumbered according to the MNMG renumber map @@ -160,7 +163,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam } }); - if (rank == 0) compare(modularity, sg_modularity); + if (rank == 0) compare(mg_modularity, sg_modularity); } // Compare the results of running louvain on multiple GPUs to that of a @@ -197,9 +200,9 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam auto mg_graph_view = mg_graph.view(); std::unique_ptr> dendrogram; - weight_t modularity; + weight_t mg_modularity; - std::tie(dendrogram, modularity) = + std::tie(dendrogram, mg_modularity) = cugraph::louvain(handle, mg_graph_view, param.max_level, param.resolution); SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); @@ -213,7 +216,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam *dendrogram, param.resolution, comm_rank, - modularity); + mg_modularity); } }; diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index ad9ece99ef9..8fce9488d8a 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -28,10 +30,16 @@ #include +#include #include #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void bfs_reference(edge_t const* offsets, vertex_t const* indices, @@ -74,9 +82,12 @@ void bfs_reference(edge_t const* offsets, typedef struct BFS_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; - size_t source{false}; - BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + size_t source{0}; + bool check_correctness{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -88,13 +99,43 @@ typedef struct BFS_Usecase_t { input_graph_specifier.graph_file_full_path = graph_file_full_path; }; - BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } BFS_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_BFS : public ::testing::TestWithParam { public: Tests_BFS() {} @@ -107,58 +148,21 @@ class Tests_BFS : public ::testing::TestWithParam { template void run_current_test(BFS_Usecase const& configuration) { + constexpr bool renumber = true; + using weight_t = float; raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, false, false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - false, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(configuration.source >= 0 && - configuration.source <= graph_view.get_number_of_vertices()) - << "Starting sources should be >= 0 and" - << " less than the number of vertices in the graph."; - - std::vector h_reference_distances(graph_view.get_number_of_vertices()); - std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); - - bfs_reference(h_offsets.data(), - h_indices.data(), - h_reference_distances.data(), - h_reference_predecessors.data(), - graph_view.get_number_of_vertices(), - static_cast(configuration.source), - std::numeric_limits::max()); + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < graph_view.get_number_of_vertices()) + << "Invalid starting source."; rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -169,46 +173,120 @@ class Tests_BFS : public ::testing::TestWithParam { cugraph::experimental::bfs(handle, graph_view, - d_distances.begin(), - d_predecessors.begin(), + d_distances.data(), + d_predecessors.data(), static_cast(configuration.source), false, - std::numeric_limits::max(), - false); + std::numeric_limits::max()); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); - std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); - raft::update_host(h_cugraph_predecessors.data(), - d_predecessors.data(), - d_predecessors.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(std::equal( - h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) - << "distances do not match with the reference values."; - - for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { - auto i = std::distance(h_cugraph_predecessors.begin(), it); - if (*it == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_reference_predecessors[i] == *it) - << "vertex reachability do not match with the reference."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto unrenumbered_source = static_cast(configuration.source); + if (renumber) { + std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + unrenumbered_source = h_renumber_map_labels[configuration.source]; + } + + std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors( + unrenumbered_graph_view.get_number_of_vertices()); + + bfs_reference(h_offsets.data(), + h_indices.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + unrenumbered_graph_view.get_number_of_vertices(), + unrenumbered_source, + std::numeric_limits::max()); + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + if (renumber) { + cugraph::experimental::unrenumber_local_int_vertices(handle, + d_predecessors.data(), + d_predecessors.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + true); + + auto d_unrenumbered_distances = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); + raft::update_host(h_cugraph_distances.data(), + d_unrenumbered_distances.data(), + d_unrenumbered_distances.size(), + handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_unrenumbered_predecessors.data(), + d_unrenumbered_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); } else { - ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) - << "distance to this vertex != distance to the predecessor vertex + 1."; - bool found{false}; - for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { - if (h_indices[j] == i) { - found = true; - break; + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } + + ASSERT_TRUE(std::equal( + h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability does not match with the reference."; + } else { + ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) + << "distance to this vertex != distance to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + found = true; + break; + } } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } - ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } } } @@ -221,12 +299,17 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_BFS, ::testing::Values( + // enable correctness checks BFS_Usecase("test/datasets/karate.mtx", 0), BFS_Usecase("test/datasets/polbooks.mtx", 0), BFS_Usecase("test/datasets/netscience.mtx", 0), BFS_Usecase("test/datasets/netscience.mtx", 100), BFS_Usecase("test/datasets/wiki2003.mtx", 1000), BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000), - BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 789619f2cd9..0fc0634bbbc 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -54,13 +54,14 @@ void check_coarsened_graph_results(edge_t* org_offsets, ASSERT_TRUE(std::count_if(org_indices, org_indices + org_offsets[num_org_vertices], [num_org_vertices](auto nbr) { - return !cugraph::test::is_valid_vertex(num_org_vertices, nbr); + return !cugraph::experimental::is_valid_vertex(num_org_vertices, nbr); }) == 0); ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); ASSERT_TRUE(std::count_if(coarse_indices, coarse_indices + coarse_offsets[num_coarse_vertices], [num_coarse_vertices](auto nbr) { - return !cugraph::test::is_valid_vertex(num_coarse_vertices, nbr); + return !cugraph::experimental::is_valid_vertex(num_coarse_vertices, + nbr); }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 666106d62ca..221accea4f7 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -201,17 +202,19 @@ class Tests_GenerateRmat : public ::testing::TestWithParam (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) && (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor)) << "Returned an invalid number of R-mat graph edges."; - ASSERT_TRUE( - std::count_if(h_cugraph_srcs.begin(), - h_cugraph_srcs.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_vertices = static_cast( + size_t{1} << configuration.scale)](auto v) { + return !cugraph::experimental::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid source vertex IDs."; - ASSERT_TRUE( - std::count_if(h_cugraph_dsts.begin(), - h_cugraph_dsts.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_vertices = static_cast( + size_t{1} << configuration.scale)](auto v) { + return !cugraph::experimental::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid destination vertex IDs."; if (!scramble) { diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index 949f6d2e08e..6ce32e0c836 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -139,7 +139,7 @@ class Tests_Graph : public ::testing::TestWithParam { handle, edgelist, number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, configuration.test_weighted}, false, true); diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 776bb60716c..71011f3d018 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -34,6 +36,11 @@ #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void katz_centrality_reference(edge_t const* offsets, vertex_t const* indices, @@ -92,9 +99,12 @@ typedef struct KatzCentrality_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; bool test_weighted{false}; + bool check_correctness{false}; - KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) + KatzCentrality_Usecase_t(std::string const& graph_file_path, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -107,15 +117,45 @@ typedef struct KatzCentrality_Usecase_t { }; KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, - double personalization_ratio, - bool test_weighted) - : test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } KatzCentrality_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_KatzCentrality : public ::testing::TestWithParam { public: Tests_KatzCentrality() {} @@ -128,76 +168,26 @@ class Tests_KatzCentrality : public ::testing::TestWithParam void run_current_test(KatzCentrality_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights{}; - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - if (graph_view.is_weighted()) { - h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - } - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - std::vector h_reference_katz_centralities(graph_view.get_number_of_vertices()); - - std::vector tmps(h_offsets.size()); - std::adjacent_difference(h_offsets.begin(), h_offsets.end(), tmps.begin()); - auto max_it = std::max_element(tmps.begin(), tmps.end()); + auto degrees = graph_view.compute_in_degrees(handle); + std::vector h_degrees(degrees.size()); + raft::update_host(h_degrees.data(), degrees.data(), degrees.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + auto max_it = std::max_element(h_degrees.begin(), h_degrees.end()); result_t const alpha = result_t{1.0} / static_cast(*max_it + 1); result_t constexpr beta{1.0}; result_t constexpr epsilon{1e-6}; - katz_centrality_reference( - h_offsets.data(), - h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - static_cast(nullptr), - h_reference_katz_centralities.data(), - graph_view.get_number_of_vertices(), - alpha, - beta, - epsilon, - std::numeric_limits::max(), - false, - true); - rmm::device_uvector d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -206,39 +196,98 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(nullptr), - d_katz_centralities.begin(), + d_katz_centralities.data(), alpha, beta, epsilon, std::numeric_limits::max(), false, - true, - false); + true); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); - - raft::update_host(h_cugraph_katz_centralities.data(), - d_katz_centralities.data(), - d_katz_centralities.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), - h_reference_katz_centralities.end(), - h_cugraph_katz_centralities.begin(), - nearly_equal)) - << "Katz centrality values do not match with the reference values."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + if (unrenumbered_graph_view.is_weighted()) { + h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + std::vector h_reference_katz_centralities( + unrenumbered_graph_view.get_number_of_vertices()); + + katz_centrality_reference( + h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + static_cast(nullptr), + h_reference_katz_centralities.data(), + unrenumbered_graph_view.get_number_of_vertices(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + true); + + std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); + if (renumber) { + auto d_unrenumbered_katz_centralities = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_katz_centralities.data(), + d_renumber_map_labels.size()); + raft::update_host(h_cugraph_katz_centralities.data(), + d_unrenumbered_katz_centralities.data(), + d_unrenumbered_katz_centralities.size(), + handle.get_stream()); + } else { + raft::update_host(h_cugraph_katz_centralities.data(), + d_katz_centralities.data(), + d_katz_centralities.size(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), + h_reference_katz_centralities.end(), + h_cugraph_katz_centralities.begin(), + nearly_equal)) + << "Katz centrality values do not match with the reference values."; + } } }; @@ -252,6 +301,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_KatzCentrality, ::testing::Values( + // enable correctness checks KatzCentrality_Usecase("test/datasets/karate.mtx", false), KatzCentrality_Usecase("test/datasets/karate.mtx", true), KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), @@ -261,16 +311,15 @@ INSTANTIATE_TEST_CASE_P( KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, false), KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, true), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - true))); + // disable correctness checks for large graphs + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + false, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + true, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp new file mode 100644 index 00000000000..76ccb5d9de3 --- /dev/null +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct BFS_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + size_t source{0}; + bool check_correctness{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} BFS_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGBFS : public ::testing::TestWithParam { + public: + Tests_MGBFS() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running BFS on multiple GPUs to that of a single-GPU run + template + void run_current_test(BFS_Usecase const& configuration) + { + using weight_t = float; + + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < + mg_graph_view.get_number_of_vertices()) + << "Invalid starting source."; + + // 3. run MG BFS + + rmm::device_uvector d_mg_distances(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::bfs(handle, + mg_graph_view, + d_mg_distances.data(), + d_mg_predecessors.data(), + static_cast(configuration.source), + false, + std::numeric_limits::max(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + + rmm::device_scalar d_source(static_cast(configuration.source), + handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_source.data(), + size_t{1}, + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + auto unrenumbered_source = d_source.value(handle.get_stream()); + + // 5-2. run SG BFS + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + cugraph::experimental::bfs(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + false, + std::numeric_limits::max(), + true); + + // 5-3. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); + std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_predecessors.data(), + d_mg_predecessors.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + raft::update_host(h_mg_predecessors.data(), + d_mg_predecessors.data(), + d_mg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(h_mg_distances[i] == h_sg_distances[mapped_vertex]) + << "MG BFS distance for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_distances[i] + << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; + if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + ASSERT_TRUE(h_sg_distances[h_mg_predecessors[i]] + 1 == h_sg_distances[mapped_vertex]) + << "distances to this vertex != distances to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_predecessors[i]]; + j < h_sg_offsets[h_mg_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == mapped_vertex) { + found = true; + break; + } + } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; + } + } + } + } +}; + +TEST_P(Tests_MGBFS, CheckInt32Int32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGBFS, + ::testing::Values( + // enable correctness checks + BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/web-Google.mtx", 0), + BFS_Usecase("test/datasets/ljournal-2008.mtx", 0), + BFS_Usecase("test/datasets/webbase-1M.mtx", 0), + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp new file mode 100644 index 00000000000..e3033af3771 --- /dev/null +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct KatzCentrality_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + bool test_weighted{false}; + bool check_correctness{false}; + + KatzCentrality_Usecase_t(std::string const& graph_file_path, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} KatzCentrality_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGKatzCentrality : public ::testing::TestWithParam { + public: + Tests_MGKatzCentrality() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running Katz Centrality on multiple GPUs to that of a single-GPU run + template + void run_current_test(KatzCentrality_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + // 3. compute max in-degree + + auto max_in_degree = mg_graph_view.compute_max_in_degree(handle); + + // 4. run MG Katz Centrality + + result_t const alpha = result_t{1.0} / static_cast(max_in_degree + 1); + result_t constexpr beta{1.0}; + result_t constexpr epsilon{1e-6}; + + rmm::device_uvector d_mg_katz_centralities( + mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::katz_centrality(handle, + mg_graph_view, + static_cast(nullptr), + d_mg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + // 5-3. run SG Katz Centrality + + rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::katz_centrality(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), // max_iterations + false, + true); + + // 5-4. compare + + std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_katz_centralities.data(), + d_sg_katz_centralities.data(), + d_sg_katz_centralities.size(), + handle.get_stream()); + + std::vector h_mg_katz_centralities(mg_graph_view.get_number_of_local_vertices()); + raft::update_host(h_mg_katz_centralities.data(), + d_mg_katz_centralities.data(), + d_mg_katz_centralities.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_katz_centralities[i], h_sg_katz_centralities[mapped_vertex])) + << "MG KatzCentrality value for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_katz_centralities[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_katz_centralities[mapped_vertex]; + } + } + } +}; + +TEST_P(Tests_MGKatzCentrality, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGKatzCentrality, + ::testing::Values( + // enable correctness checks + KatzCentrality_Usecase("test/datasets/karate.mtx", false), + KatzCentrality_Usecase("test/datasets/karate.mtx", true), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + true), + // disable correctness checks for large graphs + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + false, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + true, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp new file mode 100644 index 00000000000..48e4dc869f4 --- /dev/null +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct SSSP_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + size_t source{0}; + bool check_correctness{false}; + + SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} SSSP_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGSSSP : public ::testing::TestWithParam { + public: + Tests_MGSSSP() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running SSSP on multiple GPUs to that of a single-GPU run + template + void run_current_test(SSSP_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < + mg_graph_view.get_number_of_vertices()) + << "Invalid starting source."; + + // 3. run MG SSSP + + rmm::device_uvector d_mg_distances(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // FIXME: disable do_expensive_check + cugraph::experimental::sssp(handle, + mg_graph_view, + d_mg_distances.data(), + d_mg_predecessors.data(), + static_cast(configuration.source), + std::numeric_limits::max(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + + rmm::device_scalar d_source(static_cast(configuration.source), + handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_source.data(), + size_t{1}, + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + auto unrenumbered_source = d_source.value(handle.get_stream()); + + // 5-2. run SG SSSP + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + // FIXME: disable do_expensive_check + cugraph::experimental::sssp(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + std::numeric_limits::max(), + true); + + // 5-3. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_sg_weights.data(), + sg_graph_view.weights(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); + std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_predecessors.data(), + d_mg_predecessors.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + raft::update_host(h_mg_predecessors.data(), + d_mg_predecessors.data(), + d_mg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_distances[i], h_sg_distances[mapped_vertex])) + << "MG SSSP distance for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_distances[i] + << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; + if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + auto pred_distance = h_sg_distances[h_mg_predecessors[i]]; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_predecessors[i]]; + j < h_sg_offsets[h_mg_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == mapped_vertex) { + if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[mapped_vertex])) { + found = true; + break; + } + } + } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; + } + } + } + } +}; + +TEST_P(Tests_MGSSSP, CheckInt32Int32Float) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGSSSP, + ::testing::Values( + // enable correctness checks + SSSP_Usecase("test/datasets/karate.mtx", 0), + SSSP_Usecase("test/datasets/dblp.mtx", 0), + SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index ff3b073cbc7..649fe11d805 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -35,6 +37,11 @@ #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void pagerank_reference(edge_t const* offsets, vertex_t const* indices, @@ -128,11 +135,15 @@ typedef struct PageRank_Usecase_t { double personalization_ratio{0.0}; bool test_weighted{false}; + bool check_correctness{false}; PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -146,14 +157,47 @@ typedef struct PageRank_Usecase_t { PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } PageRank_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_PageRank : public ::testing::TestWithParam { public: Tests_PageRank() {} @@ -166,52 +210,16 @@ class Tests_PageRank : public ::testing::TestWithParam { template void run_current_test(PageRank_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights{}; - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - if (graph_view.is_weighted()) { - h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - } - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - std::vector h_personalization_vertices{}; std::vector h_personalization_values{}; if (configuration.personalization_ratio > 0.0) { @@ -260,21 +268,6 @@ class Tests_PageRank : public ::testing::TestWithParam { result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; - std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); - - pagerank_reference(h_offsets.data(), - h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - h_personalization_vertices.data(), - h_personalization_values.data(), - h_reference_pageranks.data(), - graph_view.get_number_of_vertices(), - static_cast(h_personalization_vertices.size()), - alpha, - epsilon, - std::numeric_limits::max(), - false); - rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -286,7 +279,7 @@ class Tests_PageRank : public ::testing::TestWithParam { d_personalization_vertices.data(), d_personalization_values.data(), static_cast(d_personalization_vertices.size()), - d_pageranks.begin(), + d_pageranks.data(), alpha, epsilon, std::numeric_limits::max(), @@ -295,26 +288,129 @@ class Tests_PageRank : public ::testing::TestWithParam { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), - h_reference_pageranks.end(), - h_cugraph_pageranks.begin(), - nearly_equal)) - << "PageRank values do not match with the reference values."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + if (unrenumbered_graph_view.is_weighted()) { + h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + std::vector h_unrenumbered_personalization_vertices( + d_personalization_vertices.size()); + std::vector h_unrenumbered_personalization_values( + h_unrenumbered_personalization_vertices.size()); + if (renumber) { + rmm::device_uvector d_unrenumbered_personalization_vertices( + d_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_unrenumbered_personalization_values( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_vertices.data(), + d_personalization_vertices.data(), + d_personalization_vertices.size(), + handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_values.data(), + d_personalization_values.data(), + d_personalization_values.size(), + handle.get_stream()); + cugraph::experimental::unrenumber_local_int_vertices( + handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices()); + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); + + raft::update_host(h_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + handle.get_stream()); + raft::update_host(h_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_values.size(), + handle.get_stream()); + } else { + raft::update_host(h_unrenumbered_personalization_vertices.data(), + d_personalization_vertices.data(), + d_personalization_vertices.size(), + handle.get_stream()); + raft::update_host(h_unrenumbered_personalization_values.data(), + d_personalization_values.data(), + d_personalization_values.size(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + std::vector h_reference_pageranks(unrenumbered_graph_view.get_number_of_vertices()); + + pagerank_reference(h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_unrenumbered_personalization_vertices.data(), + h_unrenumbered_personalization_values.data(), + h_reference_pageranks.data(), + unrenumbered_graph_view.get_number_of_vertices(), + static_cast(h_personalization_vertices.size()), + alpha, + epsilon, + std::numeric_limits::max(), + false); + + std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); + if (renumber) { + auto d_unrenumbered_pageranks = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_pageranks.data(), d_renumber_map_labels.size()); + raft::update_host(h_cugraph_pageranks.data(), + d_unrenumbered_pageranks.data(), + d_unrenumbered_pageranks.size(), + handle.get_stream()); + } else { + raft::update_host( + h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), + h_reference_pageranks.end(), + h_cugraph_pageranks.begin(), + nearly_equal)) + << "PageRank values do not match with the reference values."; + } } }; @@ -328,6 +424,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_PageRank, ::testing::Values( + // enable correctness checks PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), @@ -355,6 +452,15 @@ INSTANTIATE_TEST_CASE_P( true), PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0.5, - true))); + true), + // disable correctness checks for large graphs + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/experimental/rw_low_level_test.cu index a32e258d366..8b562bc41f6 100644 --- a/cpp/tests/experimental/rw_low_level_test.cu +++ b/cpp/tests/experimental/rw_low_level_test.cu @@ -53,7 +53,8 @@ graph_t make_graph(raft::handle_t cons std::vector const& v_dst, std::vector const& v_w, vertex_t num_vertices, - edge_t num_edges) + edge_t num_edges, + bool is_weighted) { vector_test_t d_src(num_edges, handle.get_stream()); vector_test_t d_dst(num_edges, handle.get_stream()); @@ -67,7 +68,7 @@ graph_t make_graph(raft::handle_t cons d_src.data(), d_dst.data(), d_weights.data(), num_edges}; graph_t graph( - handle, edgelist, num_vertices, graph_properties_t{}, false); + handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); return graph; } @@ -119,7 +120,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -199,7 +200,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -275,7 +276,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -371,7 +372,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -449,7 +450,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -521,7 +522,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -666,7 +667,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -741,7 +742,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 611abcb0d75..9364d261dec 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -28,12 +30,18 @@ #include +#include #include #include #include #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + // Dijkstra's algorithm template void sssp_reference(edge_t const* offsets, @@ -80,9 +88,12 @@ void sssp_reference(edge_t const* offsets, typedef struct SSSP_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; - size_t source{false}; - SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + size_t source{0}; + bool check_correctness{false}; + + SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -94,13 +105,43 @@ typedef struct SSSP_Usecase_t { input_graph_specifier.graph_file_full_path = graph_file_full_path; }; - SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } SSSP_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_SSSP : public ::testing::TestWithParam { public: Tests_SSSP() {} @@ -113,61 +154,18 @@ class Tests_SSSP : public ::testing::TestWithParam { template void run_current_test(SSSP_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, true, false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - true, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights(graph_view.get_number_of_edges()); - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(configuration.source >= 0 && - configuration.source <= graph_view.get_number_of_vertices()) - << "Starting sources should be >= 0 and" - << " less than the number of vertices in the graph."; - - std::vector h_reference_distances(graph_view.get_number_of_vertices()); - std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); - - sssp_reference(h_offsets.data(), - h_indices.data(), - h_weights.data(), - h_reference_distances.data(), - h_reference_predecessors.data(), - graph_view.get_number_of_vertices(), - static_cast(configuration.source)); + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < graph_view.get_number_of_vertices()); rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -178,53 +176,135 @@ class Tests_SSSP : public ::testing::TestWithParam { cugraph::experimental::sssp(handle, graph_view, - d_distances.begin(), - d_predecessors.begin(), + d_distances.data(), + d_predecessors.data(), static_cast(configuration.source), std::numeric_limits::max(), false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); - std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); - raft::update_host(h_cugraph_predecessors.data(), - d_predecessors.data(), - d_predecessors.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto max_weight_element = std::max_element(h_weights.begin(), h_weights.end()); - auto epsilon = *max_weight_element * weight_t{1e-6}; - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; - - ASSERT_TRUE(std::equal(h_reference_distances.begin(), - h_reference_distances.end(), - h_cugraph_distances.begin(), - nearly_equal)) - << "distances do not match with the reference values."; - - for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { - auto i = std::distance(h_cugraph_predecessors.begin(), it); - if (*it == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_reference_predecessors[i] == *it) - << "vertex reachability do not match with the reference."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto unrenumbered_source = static_cast(configuration.source); + if (renumber) { + std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + unrenumbered_source = h_renumber_map_labels[configuration.source]; + } + + std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors( + unrenumbered_graph_view.get_number_of_vertices()); + + sssp_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + unrenumbered_graph_view.get_number_of_vertices(), + unrenumbered_source, + std::numeric_limits::max()); + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + if (renumber) { + cugraph::experimental::unrenumber_local_int_vertices(handle, + d_predecessors.data(), + d_predecessors.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + true); + + auto d_unrenumbered_distances = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); + + raft::update_host(h_cugraph_distances.data(), + d_unrenumbered_distances.data(), + d_unrenumbered_distances.size(), + handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_unrenumbered_predecessors.data(), + d_unrenumbered_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); } else { - auto pred_distance = h_reference_distances[*it]; - bool found{false}; - for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { - if (h_indices[j] == i) { - if (nearly_equal(pred_distance + h_weights[j], h_reference_distances[i])) { - found = true; - break; + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } + + auto max_weight_element = std::max_element(h_weights.begin(), h_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + ASSERT_TRUE(std::equal(h_reference_distances.begin(), + h_reference_distances.end(), + h_cugraph_distances.begin(), + nearly_equal)) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability do not match with the reference."; + } else { + auto pred_distance = h_reference_distances[*it]; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + if (nearly_equal(pred_distance + h_weights[j], h_reference_distances[i])) { + found = true; + break; + } } } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; } - ASSERT_TRUE(found) - << "no edge from the predecessor vertex to this vertex with the matching weight."; } } } @@ -237,9 +317,14 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_SSSP, ::testing::Values( + // enable correctness checks SSSP_Usecase("test/datasets/karate.mtx", 0), SSSP_Usecase("test/datasets/dblp.mtx", 0), SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), - SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + SSSP_Usecase(cugraph::test::rmat_params_t{20, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 85ee9a4243e..f7b1e8dfbb4 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -16,13 +16,19 @@ #include #include +#include #include +#include +#include +#include #include #include #include #include +#include +#include #include @@ -33,11 +39,15 @@ typedef struct PageRank_Usecase_t { double personalization_ratio{0.0}; bool test_weighted{false}; + bool check_correctness{false}; PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -51,14 +61,56 @@ typedef struct PageRank_Usecase_t { PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } PageRank_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + partition_ids, + static_cast(comm_size)); +} + class Tests_MGPageRank : public ::testing::TestWithParam { public: Tests_MGPageRank() {} @@ -68,7 +120,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { virtual void SetUp() {} virtual void TearDown() {} - // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run + // Compare the results of running PageRank on multiple GPUs to that of a single-GPU run template void run_current_test(PageRank_Usecase const& configuration) { @@ -86,168 +138,40 @@ class Tests_MGPageRank : public ::testing::TestWithParam { cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); - // 2. create SG & MG graphs - - cugraph::experimental::graph_t sg_graph(handle); - rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); - std::tie(sg_graph, d_sg_renumber_map_labels) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - true) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - true); - - auto sg_graph_view = sg_graph.view(); + // 2. create MG graph cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test::read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - true) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - true); + read_graph(handle, configuration, true); auto mg_graph_view = mg_graph.view(); - std::vector h_sg_renumber_map_labels(d_sg_renumber_map_labels.size()); - raft::update_host(h_sg_renumber_map_labels.data(), - d_sg_renumber_map_labels.data(), - d_sg_renumber_map_labels.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(mg_graph_view.get_number_of_local_vertices()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); + // 3. generate personalization vertex/value pairs - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - // 2. generate personalization vertex/value pairs - - std::vector h_personalization_vertices{}; - std::vector h_personalization_values{}; + std::vector h_mg_personalization_vertices{}; + std::vector h_mg_personalization_values{}; if (configuration.personalization_ratio > 0.0) { - std::default_random_engine generator{}; + std::default_random_engine generator{ + static_cast(comm.get_rank()) /* seed */}; std::uniform_real_distribution distribution{0.0, 1.0}; - h_personalization_vertices.resize(sg_graph_view.get_number_of_vertices()); - std::iota(h_personalization_vertices.begin(), h_personalization_vertices.end(), vertex_t{0}); - h_personalization_vertices.erase( - std::remove_if(h_personalization_vertices.begin(), - h_personalization_vertices.end(), + h_mg_personalization_vertices.resize(mg_graph_view.get_number_of_local_vertices()); + std::iota(h_mg_personalization_vertices.begin(), + h_mg_personalization_vertices.end(), + mg_graph_view.get_local_vertex_first()); + h_mg_personalization_vertices.erase( + std::remove_if(h_mg_personalization_vertices.begin(), + h_mg_personalization_vertices.end(), [&generator, &distribution, configuration](auto v) { return distribution(generator) >= configuration.personalization_ratio; }), - h_personalization_vertices.end()); - h_personalization_values.resize(h_personalization_vertices.size()); - std::for_each(h_personalization_values.begin(), - h_personalization_values.end(), + h_mg_personalization_vertices.end()); + h_mg_personalization_values.resize(h_mg_personalization_vertices.size()); + std::for_each(h_mg_personalization_values.begin(), + h_mg_personalization_values.end(), [&distribution, &generator](auto& val) { val = distribution(generator); }); } - result_t constexpr alpha{0.85}; - result_t constexpr epsilon{1e-6}; - - // 3. run SG pagerank - - std::vector h_sg_personalization_vertices{}; - std::vector h_sg_personalization_values{}; - if (h_personalization_vertices.size() > 0) { - for (vertex_t i = 0; i < sg_graph_view.get_number_of_vertices(); ++i) { - auto it = std::lower_bound(h_personalization_vertices.begin(), - h_personalization_vertices.end(), - h_sg_renumber_map_labels[i]); - if (*it == h_sg_renumber_map_labels[i]) { - h_sg_personalization_vertices.push_back(i); - h_sg_personalization_values.push_back( - h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); - } - } - } - - rmm::device_uvector d_sg_personalization_vertices( - h_sg_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_sg_personalization_values(d_sg_personalization_vertices.size(), - handle.get_stream()); - if (d_sg_personalization_vertices.size() > 0) { - raft::update_device(d_sg_personalization_vertices.data(), - h_sg_personalization_vertices.data(), - h_sg_personalization_vertices.size(), - handle.get_stream()); - raft::update_device(d_sg_personalization_values.data(), - h_sg_personalization_values.data(), - h_sg_personalization_values.size(), - handle.get_stream()); - } - - rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::pagerank(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_personalization_vertices.data(), - d_sg_personalization_values.data(), - static_cast(d_sg_personalization_vertices.size()), - d_sg_pageranks.begin(), - alpha, - epsilon, - std::numeric_limits::max(), // max_iterations - false, - false); - - std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - // 4. run MG pagerank - - std::vector h_mg_personalization_vertices{}; - std::vector h_mg_personalization_values{}; - if (h_personalization_vertices.size() > 0) { - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto it = std::lower_bound(h_personalization_vertices.begin(), - h_personalization_vertices.end(), - h_mg_renumber_map_labels[i]); - if (*it == h_mg_renumber_map_labels[i]) { - h_mg_personalization_vertices.push_back(mg_graph_view.get_local_vertex_first() + i); - h_mg_personalization_values.push_back( - h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); - } - } - } - rmm::device_uvector d_mg_personalization_vertices( h_mg_personalization_vertices.size(), handle.get_stream()); rmm::device_uvector d_mg_personalization_values(d_mg_personalization_vertices.size(), @@ -263,6 +187,11 @@ class Tests_MGPageRank : public ::testing::TestWithParam { handle.get_stream()); } + // 4. run MG PageRank + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); @@ -274,44 +203,145 @@ class Tests_MGPageRank : public ::testing::TestWithParam { d_mg_personalization_vertices.data(), d_mg_personalization_values.data(), static_cast(d_mg_personalization_vertices.size()), - d_mg_pageranks.begin(), + d_mg_pageranks.data(), alpha, epsilon, std::numeric_limits::max(), - false, false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - // 5. copmare SG & MG results - std::vector h_sg_shuffled_pageranks(sg_graph_view.get_number_of_vertices(), - result_t{0.0}); - for (size_t i = 0; i < h_sg_pageranks.size(); ++i) { - h_sg_shuffled_pageranks[h_sg_renumber_map_labels[i]] = h_sg_pageranks[i]; - } + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + // 5-2. collect personalization vertex/value pairs + + rmm::device_uvector d_sg_personalization_vertices(0, handle.get_stream()); + rmm::device_uvector d_sg_personalization_values(0, handle.get_stream()); + if (configuration.personalization_ratio > 0.0) { + rmm::device_uvector d_unrenumbered_personalization_vertices( + d_mg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_unrenumbered_personalization_values( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_vertices.data(), + d_mg_personalization_vertices.data(), + d_mg_personalization_vertices.size(), + handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_values.data(), + d_mg_personalization_values.data(), + d_mg_personalization_values.size(), + handle.get_stream()); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + cugraph::experimental::unrenumber_int_vertices( + handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + handle.get_stream()); + + rmm::device_scalar d_local_personalization_vector_size( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_recvcounts(comm_size, handle.get_stream()); + comm.allgather( + d_local_personalization_vector_size.data(), d_recvcounts.data(), 1, handle.get_stream()); + std::vector recvcounts(d_recvcounts.size()); + raft::update_host( + recvcounts.data(), d_recvcounts.data(), d_recvcounts.size(), handle.get_stream()); + auto status = comm.sync_stream(handle.get_stream()); + ASSERT_EQ(status, raft::comms::status_t::SUCCESS); + + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + + d_sg_personalization_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + d_sg_personalization_values.resize(d_sg_personalization_vertices.size(), + handle.get_stream()); + + comm.allgatherv(d_unrenumbered_personalization_vertices.data(), + d_sg_personalization_vertices.data(), + recvcounts.data(), + displacements.data(), + handle.get_stream()); + comm.allgatherv(d_unrenumbered_personalization_values.data(), + d_sg_personalization_values.data(), + recvcounts.data(), + displacements.data(), + handle.get_stream()); + + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); + } + + // 5-3. run SG PageRank + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_personalization_vertices.data(), + d_sg_personalization_values.data(), + static_cast(d_sg_personalization_vertices.size()), + d_sg_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-4. compare + + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_shuffled_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << i << " in rank: " << comm_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_shuffled_pageranks[mapped_vertex]; + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_pageranks[mapped_vertex]; + } } } }; @@ -325,6 +355,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_MGPageRank, ::testing::Values( + // enable correctness checks PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), @@ -352,6 +383,15 @@ INSTANTIATE_TEST_CASE_P( true), PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0.5, - true))); + true), + // disable correctness checks for large graphs + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu index 1b9fe6051f7..a9df392d2fb 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -28,7 +29,7 @@ namespace cugraph { namespace test { -namespace detail { +namespace { template , rmm::device_uvector>> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +generate_graph_from_edgelist_impl(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) { CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); @@ -59,95 +60,88 @@ generate_graph_from_edgelist(raft::handle_t const& handle, auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); - vertex_t number_of_vertices = static_cast(vertices.size()); - - auto vertex_key_func = - cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; - vertices.resize(thrust::distance(vertices.begin(), - thrust::remove_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertices.begin(), - vertices.end(), - [comm_rank, key_func = vertex_key_func] __device__(auto val) { - return key_func(val) != comm_rank; - })), - handle.get_stream()); - vertices.shrink_to_fit(handle.get_stream()); - - auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - false, comm_size, row_comm_size, col_comm_size}; - size_t number_of_local_edges{}; - if (test_weighted) { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin(), edgelist_weights.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } else { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } - - edgelist_rows.resize(number_of_local_edges, handle.get_stream()); - edgelist_rows.shrink_to_fit(handle.get_stream()); - edgelist_cols.resize(number_of_local_edges, handle.get_stream()); - edgelist_cols.shrink_to_fit(handle.get_stream()); - if (test_weighted) { - edgelist_weights.resize(number_of_local_edges, handle.get_stream()); - edgelist_weights.shrink_to_fit(handle.get_stream()); - } + auto local_partition_id_op = + [comm_size, + key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = + store_transposed + ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) + : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + auto edge_counts = test_weighted + ? cugraph::experimental::groupby_and_count(pair_first, + pair_first + edgelist_rows.size(), + edgelist_weights.begin(), + local_partition_id_op, + col_comm_size, + handle.get_stream()) + : cugraph::experimental::groupby_and_count(pair_first, + pair_first + edgelist_rows.size(), + local_partition_id_op, + col_comm_size, + handle.get_stream()); + + std::vector h_edge_counts(edge_counts.size()); + raft::update_host( + h_edge_counts.data(), edge_counts.data(), edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + std::vector h_displacements(h_edge_counts.size(), size_t{0}); + std::partial_sum(h_edge_counts.begin(), h_edge_counts.end() - 1, h_displacements.begin() + 1); // 3. renumber rmm::device_uvector renumber_map_labels(0, handle.get_stream()); cugraph::experimental::partition_t partition{}; - vertex_t aggregate_number_of_vertices{}; + vertex_t number_of_vertices{}; edge_t number_of_edges{}; - // FIXME: set do_expensive_check to false once validated - std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = - cugraph::experimental::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - store_transposed ? edgelist_cols.data() : edgelist_rows.data(), - store_transposed ? edgelist_rows.data() : edgelist_cols.data(), - edgelist_rows.size(), - false, - true); - assert(aggregate_number_of_vertices == number_of_vertices); + { + std::vector major_ptrs(h_edge_counts.size()); + std::vector minor_ptrs(major_ptrs.size()); + std::vector counts(major_ptrs.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + major_ptrs[i] = + (store_transposed ? edgelist_cols.begin() : edgelist_rows.begin()) + h_displacements[i]; + minor_ptrs[i] = + (store_transposed ? edgelist_rows.begin() : edgelist_cols.begin()) + h_displacements[i]; + counts[i] = static_cast(h_edge_counts[i]); + } + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + major_ptrs, + minor_ptrs, + counts, + true); + } // 4. create a graph + std::vector> edgelists( + h_edge_counts.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + edgelists[i] = cugraph::experimental::edgelist_t{ + edgelist_rows.data() + h_displacements[i], + edgelist_cols.data() + h_displacements[i], + test_weighted ? edgelist_weights.data() + h_displacements[i] + : static_cast(nullptr), + static_cast(h_edge_counts[i])}; + } + return std::make_tuple( cugraph::experimental::graph_t( handle, - std::vector>{ - cugraph::experimental::edgelist_t{ - edgelist_rows.data(), - edgelist_cols.data(), - test_weighted ? edgelist_weights.data() : nullptr, - static_cast(edgelist_rows.size())}}, + edgelists, partition, number_of_vertices, number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, true, true), std::move(renumber_map_labels)); @@ -163,14 +157,14 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +generate_graph_from_edgelist_impl(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) { vertex_t number_of_vertices = static_cast(vertices.size()); @@ -196,13 +190,13 @@ generate_graph_from_edgelist(raft::handle_t const& handle, test_weighted ? edgelist_weights.data() : nullptr, static_cast(edgelist_rows.size())}, number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, renumber ? true : false, true), std::move(renumber_map_labels)); } -} // namespace detail +} // namespace template ( - handle, - std::move(vertices), - std::move(edgelist_rows), - std::move(edgelist_cols), - std::move(edgelist_weights), - is_symmetric, - test_weighted, - renumber); + return generate_graph_from_edgelist_impl( + handle, + std::move(vertices), + std::move(edgelist_rows), + std::move(edgelist_cols), + std::move(edgelist_weights), + is_symmetric, + test_weighted, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index ddbbac603ee..bf7539864be 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -13,9 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include +#include #include +#include #include #include @@ -339,7 +342,73 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, d_vertices.begin(), d_vertices.end(), vertex_t{0}); + handle.get_stream_view().synchronize(); + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + d_vertices.resize( + thrust::distance( + d_vertices.begin(), + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + d_vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + } + } + handle.get_stream_view().synchronize(); return generate_graph_from_edgelist( handle, std::move(d_vertices), diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 16ea7a486fc..3f0bb0b4a1f 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -13,10 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include +#include #include +#include #include +#include #include #include @@ -41,39 +45,191 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, double a, double b, double c, - uint64_t seed, + uint64_t base_seed, bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber) + bool renumber, + std::vector const& partition_ids, + size_t num_partitions) { + CUGRAPH_EXPECTS(!multi_gpu || renumber, "renumber should be true if multi_gpu is true."); + CUGRAPH_EXPECTS(size_t{1} << scale <= static_cast(std::numeric_limits::max()), + "vertex_t overflow."); + CUGRAPH_EXPECTS( + (size_t{1} << scale) * edge_factor <= static_cast(std::numeric_limits::max()), + " edge_t overflow."); + + vertex_t number_of_vertices = static_cast(size_t{1} << scale); + edge_t number_of_edges = + static_cast(static_cast(number_of_vertices) * edge_factor); + + std::vector partition_edge_counts(partition_ids.size()); + std::vector partition_vertex_firsts(partition_ids.size()); + std::vector partition_vertex_lasts(partition_ids.size()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + partition_edge_counts[i] = number_of_edges / num_partitions + + (id < number_of_edges % num_partitions ? edge_t{1} : edge_t{0}); + + partition_vertex_firsts[i] = (number_of_vertices / num_partitions) * id; + partition_vertex_lasts[i] = (number_of_vertices / num_partitions) * (id + 1); + if (id < number_of_vertices % num_partitions) { + partition_vertex_firsts[i] += id; + partition_vertex_lasts[i] += id + 1; + } else { + partition_vertex_firsts[i] += number_of_vertices % num_partitions; + partition_vertex_lasts[i] += number_of_vertices % num_partitions; + } + } + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - std::tie(d_edgelist_rows, d_edgelist_cols) = - cugraph::experimental::generate_rmat_edgelist( - handle, scale, edge_factor, a, b, c, seed, undirected ? true : false, scramble_vertex_ids); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + rmm::device_uvector d_tmp_rows(0, handle.get_stream()); + rmm::device_uvector d_tmp_cols(0, handle.get_stream()); + std::tie(i == 0 ? d_edgelist_rows : d_tmp_rows, i == 0 ? d_edgelist_cols : d_tmp_cols) = + cugraph::experimental::generate_rmat_edgelist(handle, + scale, + partition_edge_counts[i], + a, + b, + c, + base_seed + id, + undirected ? true : false, + scramble_vertex_ids); + + rmm::device_uvector d_tmp_weights(0, handle.get_stream()); + if (test_weighted) { + if (i == 0) { + d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + } else { + d_tmp_weights.resize(d_tmp_rows.size(), handle.get_stream()); + } + + raft::random::Rng rng(base_seed + num_partitions + id); + rng.uniform(i == 0 ? d_edgelist_weights.data() : d_tmp_weights.data(), + i == 0 ? d_edgelist_weights.size() : d_tmp_weights.size(), + weight_t{0.0}, + weight_t{1.0}, + handle.get_stream()); + } + + if (i > 0) { + auto start_offset = d_edgelist_rows.size(); + d_edgelist_rows.resize(start_offset + d_tmp_rows.size(), handle.get_stream()); + d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_rows.begin(), + d_tmp_rows.end(), + d_edgelist_rows.begin() + start_offset); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_cols.begin(), + d_tmp_cols.end(), + d_edgelist_cols.begin() + start_offset); + if (test_weighted) { + d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_weights.begin(), + d_tmp_weights.end(), + d_edgelist_weights.begin() + start_offset); + } + } + } + if (undirected) { // FIXME: need to symmetrize CUGRAPH_FAIL("unimplemented."); } - rmm::device_uvector d_edgelist_weights(test_weighted ? d_edgelist_rows.size() : 0, - handle.get_stream()); - if (test_weighted) { - raft::random::Rng rng(seed + 1); - rng.uniform(d_edgelist_weights.data(), - d_edgelist_weights.size(), - weight_t{0.0}, - weight_t{1.0}, - handle.get_stream()); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + rmm::device_uvector d_rx_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_rx_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_rx_edgelist_weights(0, handle.get_stream()); + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin(), + d_edgelist_weights.begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols, + d_rx_edgelist_weights), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + d_edgelist_rows = std::move(d_rx_edgelist_rows); + d_edgelist_cols = std::move(d_rx_edgelist_cols); + d_edgelist_weights = std::move(d_rx_edgelist_weights); + } + + rmm::device_uvector d_vertices(0, handle.get_stream()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + auto start_offset = d_vertices.size(); + d_vertices.resize(start_offset + (partition_vertex_lasts[i] - partition_vertex_firsts[i]), + handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin() + start_offset, + d_vertices.end(), + partition_vertex_firsts[i]); } - rmm::device_uvector d_vertices(static_cast(size_t{1} << scale), - handle.get_stream()); - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - vertex_t{0}); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_rx_vertices(0, handle.get_stream()); + std::tie(d_rx_vertices, std::ignore) = cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ + comm_size}] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + d_vertices = std::move(d_rx_vertices); + } return generate_graph_from_edgelist( handle, @@ -90,59 +246,71 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -157,105 +325,128 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -270,105 +461,128 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -383,49 +597,60 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 37e87c62247..e81a76b4163 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -106,6 +106,22 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + // returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template std::tuple, @@ -130,22 +146,6 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); -template -std::tuple, - rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber); - template const& partition_ids, + size_t num_partitions); struct rmat_params_t { size_t scale{}; @@ -182,19 +184,5 @@ struct input_graph_specifier_t { rmat_params_t rmat_params{}; }; -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return (v >= 0) && (v < num_vertices); -} - -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return v < num_vertices; -} - } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu new file mode 100644 index 00000000000..5d32fb8a5d1 --- /dev/null +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector sort_by_key(raft::handle_t const& handle, + vertex_t const* keys, + value_t const* values, + size_t num_pairs) +{ + rmm::device_uvector sorted_keys(num_pairs, handle.get_stream_view()); + rmm::device_uvector sorted_values(num_pairs, handle.get_stream_view()); + + thrust::copy( + rmm::exec_policy(handle.get_stream_view()), keys, keys + num_pairs, sorted_keys.begin()); + thrust::copy( + rmm::exec_policy(handle.get_stream_view()), values, values + num_pairs, sorted_values.begin()); + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream_view()), + sorted_keys.begin(), + sorted_keys.end(), + sorted_values.begin()); + + return sorted_values; +} + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + float const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + double const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + int32_t const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + float const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + double const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + int64_t const* values, + size_t num_pairs); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp new file mode 100644 index 00000000000..579dc3c550f --- /dev/null +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector sort_by_key(raft::handle_t const& handle, + vertex_t const* keys, + value_t const* values, + size_t num_pairs); + +} // namespace test +} // namespace cugraph diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index ead41705628..23aa159314f 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -42,7 +42,7 @@ def egonet(input_graph, vertices, radius=1): num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) - num_partition_edges = num_edges + num_local_edges = num_edges cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -50,8 +50,10 @@ def egonet(input_graph, vertices, radius=1): if weights is not None: c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype + is_weighted = True else: weight_t = np.dtype("float32") + is_weighted = False # Pointers for egonet vertices = vertices.astype('int32') @@ -72,10 +74,11 @@ def egonet(input_graph, vertices, radius=1): ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_verts, num_edges, False, + is_weighted, False, False) if(weight_t==np.dtype("float32")): diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index ccae26fe7e6..5fb9de788cf 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -52,8 +52,12 @@ def mg_katz_centrality(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True + raise NotImplementedError # FIXME: c_edge_weights is always set to NULL else: + weights = None weight_t = np.dtype("float32") + is_weighted = False if alpha is None: alpha = 0.1 @@ -67,11 +71,13 @@ def mg_katz_centrality(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C vertex_partition_offsets_host = vertex_partition_offsets.values_host @@ -85,9 +91,10 @@ def mg_katz_centrality(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index f58630d07aa..a3cebeac272 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -56,12 +56,12 @@ def louvain(input_df, src = input_df['src'] dst = input_df['dst'] - num_partition_edges = len(src) + num_local_edges = len(src) if "value" in input_df.columns: weights = input_df['value'] else: - weights = cudf.Series(np.full(num_partition_edges, 1.0, dtype=np.float32)) + weights = cudf.Series(np.full(num_local_edges, 1.0, dtype=np.float32)) vertex_t = src.dtype if num_global_edges > (2**31 - 1): @@ -94,9 +94,10 @@ def louvain(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, sorted_by_degree, + True, False, True) # store_transposed, multi_gpu # Create the output dataframe, column lengths must be equal to the number of diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 12f2342559b..c2f92f0f33b 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -51,8 +51,12 @@ def mg_pagerank(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True + raise NotImplementedError # FIXME: c_edge_weights is always set to NULL else: + weights = None weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -62,11 +66,13 @@ def mg_pagerank(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C vertex_partition_offsets_host = vertex_partition_offsets.values_host @@ -81,9 +87,10 @@ def mg_pagerank(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index 527cb2bcf0a..44630ba5fb3 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -58,7 +58,7 @@ def mg_bfs(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -77,9 +77,10 @@ def mg_bfs(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + False, # BFS runs on unweighted graphs False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index 15d956836b4..82a4ebe04d6 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -46,9 +46,11 @@ def mg_sssp(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True else: weights = None weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -58,7 +60,7 @@ def mg_sssp(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -79,9 +81,10 @@ def mg_sssp(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 81a68d42360..2c619a052ec 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -42,7 +42,7 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) df = cudf.DataFrame() df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) @@ -71,8 +71,10 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. if weights is not None: c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype + is_weighted = True else: weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -96,10 +98,10 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. ((numberTypeEnum.int32Type)), ((numberTypeEnum.int32Type)), ((numberTypeMap[weight_t])), - #num_verts, num_edges, - num_partition_edges, + num_local_edges, num_verts, num_edges, False, + is_weighted, True, False) diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index 10c90f44cb8..b169e42ccf8 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -46,10 +46,11 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu) except + @@ -106,18 +107,21 @@ cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental" # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + cdef cppclass major_minor_weights_t[vertex_t, edge_t, weight_t]: major_minor_weights_t(const handle_t &handle) pair[unique_ptr[device_buffer], size_t] get_major_wrap() pair[unique_ptr[device_buffer], size_t] get_minor_wrap() pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + unique_ptr[vector[edge_t]] get_edge_counts_wrap() ctypedef fused shuffled_vertices_t: - major_minor_weights_t[int, float] - major_minor_weights_t[int, double] - major_minor_weights_t[long, float] - major_minor_weights_t[long, double] + major_minor_weights_t[int, int, float] + major_minor_weights_t[int, int, double] + major_minor_weights_t[int, long, float] + major_minor_weights_t[int, long, double] + major_minor_weights_t[long, long, float] + major_minor_weights_t[long, long, double] # 3. return type for renumber: # @@ -151,13 +155,12 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + cdef unique_ptr[major_minor_weights_t[vertex_t, edge_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( const handle_t &handle, vertex_t *edgelist_major_vertices, vertex_t *edgelist_minor_vertices, weight_t* edgelist_weights, - edge_t num_edges, - bool is_hyper_partitioned) except + + edge_t num_edges) except + # 5. `renumber_edgelist()` wrapper # @@ -167,7 +170,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": const handle_t &handle, vertex_t *edgelist_major_vertices, vertex_t *edgelist_minor_vertices, - edge_t num_edges, - bool is_hyper_partitioned, + const vector[edge_t]& edge_counts, bool do_check, bool multi_gpu) except + diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 682c6b32a0f..99626cdee08 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -22,6 +22,7 @@ from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref import numpy as np +from libcpp.memory cimport make_unique from libcpp.utility cimport move from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer @@ -103,13 +104,11 @@ def renumber(input_df, # maybe use cpdef ? raise Exception("Incompatible vertex_t and edge_t types.") # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(major_vertices) + cdef int num_local_edges = len(major_vertices) cdef uintptr_t c_major_vertices = major_vertices.__cuda_array_interface__['data'][0] cdef uintptr_t c_minor_vertices = minor_vertices.__cuda_array_interface__['data'][0] - cdef bool is_hyper_partitioned = False # for now - cdef uintptr_t shuffled_major = NULL cdef uintptr_t shuffled_minor = NULL @@ -119,12 +118,14 @@ def renumber(input_df, # maybe use cpdef ? cdef pair[unique_ptr[device_buffer], size_t] pair_original cdef pair[unique_ptr[device_buffer], size_t] pair_partition - # tparams: vertex_t, weight_t: + # tparams: vertex_t, edge_t, weight_t: # - cdef unique_ptr[major_minor_weights_t[int, float]] ptr_shuffled_32_32 - cdef unique_ptr[major_minor_weights_t[int, double]] ptr_shuffled_32_64 - cdef unique_ptr[major_minor_weights_t[long, float]] ptr_shuffled_64_32 - cdef unique_ptr[major_minor_weights_t[long, double]] ptr_shuffled_64_64 + cdef unique_ptr[major_minor_weights_t[int, int, float]] ptr_shuffled_32_32_32 + cdef unique_ptr[major_minor_weights_t[int, int, double]] ptr_shuffled_32_32_64 + cdef unique_ptr[major_minor_weights_t[int, long, float]] ptr_shuffled_32_64_32 + cdef unique_ptr[major_minor_weights_t[int, long, double]] ptr_shuffled_32_64_64 + cdef unique_ptr[major_minor_weights_t[long, long, float]] ptr_shuffled_64_64_32 + cdef unique_ptr[major_minor_weights_t[long, long, double]] ptr_shuffled_64_64_64 # tparams: vertex_t, edge_t: # @@ -132,6 +133,11 @@ def renumber(input_df, # maybe use cpdef ? cdef unique_ptr[renum_quad_t[int, long]] ptr_renum_quad_32_64 cdef unique_ptr[renum_quad_t[long, long]] ptr_renum_quad_64_64 + # tparam: vertex_t: + # + cdef unique_ptr[vector[int]] edge_counts_32 + cdef unique_ptr[vector[long]] edge_counts_64 + # tparam: vertex_t: # cdef unique_ptr[vector[int]] uniq_partition_vector_32 @@ -143,31 +149,32 @@ def renumber(input_df, # maybe use cpdef ? if ( edge_t == np.dtype("int32")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + ptr_shuffled_32_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + num_local_edges).release()) + shuffled_df = renumber_helper(ptr_shuffled_32_32_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_32 = move(ptr_shuffled_32_32_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df - + edge_counts_32 = make_unique[vector[int]](1, num_local_edges) + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_32.get()), 1, mg_flag).release()) @@ -190,8 +197,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -205,24 +211,25 @@ def renumber(input_df, # maybe use cpdef ? elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + ptr_shuffled_32_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_32_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_32 = move(ptr_shuffled_32_32_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_32 = make_unique[vector[int]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -230,8 +237,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_32.get()), do_check, mg_flag).release()) @@ -254,8 +260,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -271,24 +276,25 @@ def renumber(input_df, # maybe use cpdef ? elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + ptr_shuffled_32_64_32.reset(call_shuffle[int, long, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_64_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_32_64_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -296,8 +302,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -320,8 +325,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -335,24 +339,25 @@ def renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + ptr_shuffled_32_64_64.reset(call_shuffle[int, long, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_64_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_32_64_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -360,8 +365,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -384,8 +388,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -401,24 +404,25 @@ def renumber(input_df, # maybe use cpdef ? if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + ptr_shuffled_64_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_64_64_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_64_64_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -426,8 +430,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -450,8 +453,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), - uniq_partition_vector_64.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -466,24 +468,25 @@ def renumber(input_df, # maybe use cpdef ? elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + ptr_shuffled_64_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_64_64_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_64_64_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -491,8 +494,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -515,8 +517,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), - uniq_partition_vector_64.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df From b442f3be635b11781ebfad0cc44684554dd0c315 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 6 Apr 2021 10:46:26 -0500 Subject: [PATCH 218/343] Updated NetworkX version to 2.5.1 (#1510) Updated NetworkX version to latest version, which addresses an incompatibility with the latest `decorator` dependency. Tested by running the BC tests which were previously failing with Nx 2.5 Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) - Jordan Jacobelli (https://github.com/Ethyling) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1510 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index f26c3dd45d9..a138f5e80df 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=10.1 - clang=8.0.1 diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 2848cc49dc7..d53fefc086a 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=10.2 - clang=8.0.1 diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 82e8b409d13..771b175aa92 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=11.0 - clang=8.0.1 From 1b34e264cab785db88dab2ea0dea7349ea326674 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 7 Apr 2021 09:04:48 -0400 Subject: [PATCH 219/343] Improve graph primitives performance on graphs with widely varying vertex degrees (#1447) Partially addresses Issue #1442 Update graph primitives used by PageRank, Katz Centrality, BFS, and SSSP to launch 3 different kernels based on vertex degrees to address thread divergence issue. In addition, cut memory footprint of the VertexFrontier class used by BFS & SSSP. The following highlights performance improvement with this optimization. R-mat 2^25 vertices 2^25 * 32 edges PageRank: 7.66, 7.42, 8.83, 8.83 seconds (the first two unweighted, the last two weighted, first & third without personalization)=> 1.07, 1.08, 1.36, 1.39 seconds Katz: 1.08, 1.94 seconds (unweighted, weighted)=> 0.243, 0.275 BFS: 1.32 seconds=> 0.251 R-mat 2^25 vertices 2^25 * 16 edges SSSP: 1.89 seconds (memory allocation fails with the edge factor of 32)=> 0.317 And now SSSP also works with 2^25 vertices 2^25 * 32 edges with the memory footprint improvement and it took 0.514 sec. Still needs additional optimizations to reach the target performance 1. add BFS & SSSP specific optimizations (the current implementation assumes general reduction operations while BFS can pick any source vertex if a vertex is discovered by multiple source vertices and SSSP picks the one with the minimum edge weight, these pure function reduction operations allow additional optimizations). 2. Launch 3 different kernels in multiple streams to recover parallelism when the frontier size is relatively small (currently three kernels are queued in a single stream, and this leads to up to 3x decrease in parallelism) Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Alex Fender (https://github.com/afender) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1447 --- cpp/include/experimental/graph.hpp | 11 +- cpp/include/experimental/graph_functions.hpp | 2 + cpp/include/experimental/graph_view.hpp | 27 +- .../copy_v_transform_reduce_in_out_nbr.cuh | 256 ++++---- ...ransform_reduce_key_aggregated_out_nbr.cuh | 2 +- cpp/include/patterns/count_if_e.cuh | 179 +----- cpp/include/patterns/edge_op_utils.cuh | 38 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- cpp/include/patterns/transform_reduce_e.cuh | 260 ++++++-- .../update_frontier_v_push_if_out_nbr.cuh | 603 +++++++++++------- cpp/include/patterns/vertex_frontier.cuh | 344 +++++----- cpp/include/utilities/dataframe_buffer.cuh | 36 ++ cpp/src/experimental/bfs.cu | 34 +- cpp/src/experimental/graph.cu | 20 +- cpp/src/experimental/graph_view.cu | 28 +- cpp/src/experimental/sssp.cu | 34 +- cpp/tests/experimental/bfs_test.cpp | 24 +- .../experimental/katz_centrality_test.cpp | 24 +- cpp/tests/experimental/mg_bfs_test.cpp | 35 +- .../experimental/mg_katz_centrality_test.cpp | 35 +- cpp/tests/experimental/mg_sssp_test.cpp | 35 +- cpp/tests/experimental/pagerank_test.cpp | 24 +- cpp/tests/experimental/sssp_test.cpp | 26 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 29 +- .../utilities/generate_graph_from_edgelist.cu | 13 +- 25 files changed, 1276 insertions(+), 845 deletions(-) diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index a380200ea1f..27f766b8593 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -88,12 +88,12 @@ class graph_tget_number_of_vertices(), this->get_number_of_edges(), this->get_graph_properties(), - vertex_partition_segment_offsets_.size() > 0, + adj_matrix_partition_segment_offsets_.size() > 0, false); } @@ -105,9 +105,10 @@ class graph_t partition_{}; std::vector - vertex_partition_segment_offsets_{}; // segment offsets within the vertex partition based on - // vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is true + adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based + // on vertex degree, relevant only if + // sorted_by_global_degree_within_vertex_partition is + // true }; // single-GPU version diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 100742adccd..b48dc6da136 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -251,6 +251,8 @@ void unrenumber_local_int_vertices( vertex_t local_int_vertex_last, bool do_expensive_check = false); +// FIXME: We may add unrenumber_int_rows(or cols) as this will require communication only within a +// sub-communicator and potentially be more efficient. /** * @brief Unrenumber (possibly non-local) internal vertices to external vertices based on the * providied @p renumber_map_labels. diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 47c93b42ca9..e9593b70ddb 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -301,7 +301,7 @@ class graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, std::vector const& adj_matrix_partition_weights, - std::vector const& vertex_partition_segment_offsets, + std::vector const& adj_matrix_partition_segment_offsets, partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -431,6 +431,17 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets(size_t partition_idx) const + { + return adj_matrix_partition_segment_offsets_.size() > 0 + ? std::vector( + adj_matrix_partition_segment_offsets_.begin() + + partition_idx * (detail::num_segments_per_vertex_partition + 1), + adj_matrix_partition_segment_offsets_.begin() + + (partition_idx + 1) * (detail::num_segments_per_vertex_partition + 1)) + : std::vector{}; + } + // FIXME: this function is not part of the public stable API. This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy // implementations directly accessing CSR/CSC data, but this function will eventually become @@ -499,9 +510,10 @@ class graph_view_t partition_{}; std::vector - vertex_partition_segment_offsets_{}; // segment offsets within the vertex partition based on - // vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is true + adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based + // on vertex degree, relevant only if + // sorted_by_global_degree_within_vertex_partition is + // true }; // single-GPU version @@ -612,6 +624,13 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets( + size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return segment_offsets_.size() > 0 ? segment_offsets_ : std::vector{}; + } + // FIXME: this function is not part of the public stable API.This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy // implementations directly accessing CSR/CSC data, but this function will eventually become diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index e6a73a874ae..6d828dab513 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -42,23 +42,7 @@ namespace experimental { namespace detail { -// FIXME: block size requires tuning -int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 128; - -#if 0 -// FIXME: delete this once we verify that the thrust replace in for_all_major_for_all_nbr_low_degree is no slower than the original for loop based imoplementation -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - lhs = plus_edge_op_result(lhs, rhs); -} - -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - atomic_add(&lhs, rhs); -} -#endif +int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 512; template (tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); -#if 1 auto transform_op = [&matrix_partition, &adj_matrix_row_value_input_first, &adj_matrix_col_value_input_first, @@ -148,44 +131,6 @@ __global__ void for_all_major_for_all_nbr_low_degree( atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); }); } -#else - // FIXME: delete this once we verify that the code above is not slower than this. - e_op_result_t e_op_result_sum{init}; // relevent only if update_major == true - for (edge_t i = 0; i < local_degree; ++i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(major_offset); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) - : minor; - auto row_offset = GraphViewType::is_adj_matrix_transposed - ? minor_offset - : static_cast(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (update_major) { - accumulate_edge_op_result(e_op_result_sum, e_op_result); - } else { - accumulate_edge_op_result(*(result_value_output_first + minor_offset), - e_op_result); - } - } - if (update_major) { *(result_value_output_first + idx) = e_op_result_sum; } -#endif idx += gridDim.x * blockDim.x; } } @@ -219,14 +164,14 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto idx = static_cast(tid / raft::warp_size()); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = lane_id == 0 ? init : e_op_result_t{}; // relevent only if update_major == true - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size) { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { auto minor = indices[i]; auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); @@ -293,10 +238,10 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto idx = static_cast(blockIdx.x); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = threadIdx.x == 0 ? init : e_op_result_t{}; // relevent only if update_major == true @@ -358,7 +303,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, T init, VertexValueOutputIterator vertex_value_output_first) { - using vertex_t = typename GraphViewType::vertex_type; + constexpr auto update_major = (in == GraphViewType::is_adj_matrix_transposed); + using vertex_t = typename GraphViewType::vertex_type; static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); @@ -398,15 +344,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, matrix_partition_device_t matrix_partition(graph_view, i); auto major_tmp_buffer_size = - GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed) - ? matrix_partition.get_major_size() - : vertex_t{0}; + GraphViewType::is_multi_gpu && update_major ? matrix_partition.get_major_size() : vertex_t{0}; auto major_tmp_buffer = allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); auto major_init = T{}; - if (in == GraphViewType::is_adj_matrix_transposed) { + if (update_major) { if (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); @@ -416,60 +360,142 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } - int comm_root_rank = 0; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = i * row_comm_size + row_comm_rank; - } - - if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { - raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets.size() > 0) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + static_assert(detail::num_segments_per_vertex_partition == 3); + if (segment_offsets[1] > 0) { + raft::grid_1d_block_t update_grid(segment_offsets[1], + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first, + e_op, + major_init); + } + } + if (segment_offsets[2] - segment_offsets[1] > 0) { + raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; - - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - (in == GraphViewType::is_adj_matrix_transposed) ? major_buffer_first - : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - vertex_value_output_first, - e_op, - major_init); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first + segment_offsets[1] : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first + (update_major ? segment_offsets[1] : vertex_t{0}), + e_op, + major_init); + } + } + if (segment_offsets[3] - segment_offsets[2] > 0) { + raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first + segment_offsets[2] : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first + (update_major ? segment_offsets[2] : vertex_t{0}), + e_op, + major_init); + } + } + } else { + if (matrix_partition.get_major_size() > 0) { + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first, + e_op, + major_init); + } } } - if (GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed)) { + if (GraphViewType::is_multi_gpu && update_major) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); @@ -487,7 +513,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } - if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) { + if (GraphViewType::is_multi_gpu && !update_major) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 22dc2041793..f904c35ef9e 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -60,10 +60,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto idx = static_cast(tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); if (local_degree > 0) { diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 99bfc80f643..4eb3fea24c4 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -16,132 +16,16 @@ #pragma once #include -#include #include -#include -#include +#include -#include -#include #include -#include -#include - #include -#include namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr count_if_e_for_all_block_size = 128; - -// FIXME: function names conflict if included with transform_reduce_e.cuh -template -__global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - typename GraphViewType::edge_type* block_counts, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto idx = static_cast(tid); - - edge_t count{0}; - while (idx < static_cast(matrix_partition.get_major_size())) { - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); -#if 1 - count += thrust::count_if( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&matrix_partition, - &adj_matrix_row_value_input_first, - &adj_matrix_col_value_input_first, - &e_op, - idx, - indices, - weights] __device__(auto i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - - return e_op_result; - }); -#else - // FIXME: delete this once we verify that the code above is not slower than this. - for (vertex_t i = 0; i < local_degree; ++i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (e_op_result) { count++; } - } -#endif - idx += gridDim.x * blockDim.x; - } - - using BlockReduce = cub::BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage; - count = BlockReduce(temp_storage).Sum(count); - if (threadIdx.x == 0) { *(block_counts + blockIdx.x) = count; } -} - -} // namespace detail - /** * @brief Count the number of edges that satisfies the given predicate. * @@ -182,55 +66,18 @@ typename GraphViewType::edge_type count_if_e( AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op) { - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - - edge_t count{0}; - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - - if (matrix_partition.get_major_size() > 0) { - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; - - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::count_if_e_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - rmm::device_uvector block_counts(update_grid.num_blocks, handle.get_stream()); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - block_counts.data(), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within - // the previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - count += thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_counts.begin(), - block_counts.end(), - edge_t{0}, - thrust::plus()); - } - } - - if (GraphViewType::is_multi_gpu) { - count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); - } - - return count; + using edge_t = typename GraphViewType::edge_type; + + return transform_reduce_e(handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + cast_edge_op_bool_to_integer{e_op}, + edge_t{0}); } } // namespace experimental diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/patterns/edge_op_utils.cuh index 58fb31c7605..198c1880ff4 100644 --- a/cpp/include/patterns/edge_op_utils.cuh +++ b/cpp/include/patterns/edge_op_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,6 +77,42 @@ struct evaluate_edge_op { } }; +template +struct cast_edge_op_bool_to_integer { + static_assert(std::is_integral::value); + using vertex_type = typename GraphViewType::vertex_type; + using weight_type = typename GraphViewType::weight_type; + using row_value_type = typename std::iterator_traits::value_type; + using col_value_type = typename std::iterator_traits::value_type; + + EdgeOp e_op{}; + + template + __device__ std::enable_if_t>::valid, T> + operator()(V r, V c, W w, R rv, C cv) + { + return e_op(r, c, w, rv, cv) ? T{1} : T{0}; + } + + template + __device__ std::enable_if_t>::valid, T> + operator()(V r, V c, R rv, C cv) + { + return e_op(r, c, rv, cv) ? T{1} : T{0}; + } +}; + template __host__ __device__ std::enable_if_t::value, T> plus_edge_op_result( T const& lhs, T const& rhs) diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 34721c75e31..9848aa21f88 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -62,10 +62,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto idx = static_cast(tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); if (local_degree > 0) { diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 1f59777bc35..b95e036d460 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -41,31 +41,34 @@ int32_t constexpr transform_reduce_e_for_all_block_size = 128; template __global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - BlockResultIterator block_result_first, + ResultIterator result_iter /* size 1 */, EdgeOp e_op) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; - using e_op_result_t = typename std::iterator_traits::value_type; + using e_op_result_t = typename std::iterator_traits::value_type; - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = static_cast(tid); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid); e_op_result_t e_op_result_sum{}; - while (idx < static_cast(matrix_partition.get_major_size())) { + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); -#if 1 - auto sum = thrust::transform_reduce( + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + auto sum = thrust::transform_reduce( thrust::seq, thrust::make_counting_iterator(edge_t{0}), thrust::make_counting_iterator(local_degree), @@ -104,9 +107,112 @@ __global__ void for_all_major_for_all_nbr_low_degree( [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); -#else - // FIXME: delete this once we verify that the code above is not slower than this. - for (vertex_t i = 0; i < local_degree; ++i) { + idx += gridDim.x * blockDim.x; + } + + e_op_result_sum = + block_reduce_edge_op_result().compute( + e_op_result_sum); + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } +} + +template +__global__ void for_all_major_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultIterator result_iter /* size 1 */, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(transform_reduce_e_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid / raft::warp_size()); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + } + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } + + e_op_result_sum = + block_reduce_edge_op_result().compute( + e_op_result_sum); + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } +} + +template +__global__ void for_all_major_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultIterator result_iter /* size 1 */, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(blockIdx.x); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { auto minor = indices[i]; auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); @@ -132,14 +238,13 @@ __global__ void for_all_major_for_all_nbr_low_degree( e_op); e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); } -#endif - idx += gridDim.x * blockDim.x; + idx += gridDim.x; } e_op_result_sum = block_reduce_edge_op_result().compute( e_op_result_sum); - if (threadIdx.x == 0) { *(block_result_first + blockIdx.x) = e_op_result_sum; } + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } } } // namespace detail @@ -190,51 +295,106 @@ T transform_reduce_e(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; - T result{}; + auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(result_buffer), + get_dataframe_buffer_begin(result_buffer) + 1, + T{}); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - if (matrix_partition.get_major_size() > 0) { - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets.size() > 0) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + static_assert(detail::num_segments_per_vertex_partition == 3); + if (segment_offsets[1] > 0) { + raft::grid_1d_block_t update_grid(segment_offsets[1], + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::for_all_major_for_all_nbr_high_degree<<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + if (segment_offsets[2] - segment_offsets[1] > 0) { + raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - auto block_result_buffer = - allocate_dataframe_buffer(update_grid.num_blocks, handle.get_stream()); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(block_result_buffer), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within - // the previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - auto partial_result = - thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(block_result_buffer), - get_dataframe_buffer_begin(block_result_buffer) + update_grid.num_blocks, - T(), - [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); - - result = plus_edge_op_result(result, partial_result); + detail::for_all_major_for_all_nbr_mid_degree<<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + if (segment_offsets[3] - segment_offsets[2] > 0) { + raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + } else { + if (matrix_partition.get_major_size() > 0) { + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } } } + auto result = + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(result_buffer), + get_dataframe_buffer_begin(result_buffer) + 1, + T{}, + [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); + if (GraphViewType::is_multi_gpu) { result = host_scalar_allreduce(handle.get_comms(), result, handle.get_stream()); } diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4d557b97a30..3d87f19969e 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -15,7 +15,6 @@ */ #pragma once -#include #include #include #include @@ -37,13 +36,15 @@ #include #include #include -#include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -55,9 +56,7 @@ namespace experimental { namespace detail { -// FIXME: block size requires tuning -int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 128; -int32_t constexpr update_frontier_v_push_if_out_nbr_update_block_size = 128; +int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 512; template (thrust::distance(row_first, row_last)); auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; + auto idx = static_cast(tid); - while (idx < num_rows) { + while (idx < static_cast(thrust::distance(row_first, row_last))) { vertex_t row = *(row_first + idx); auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); - for (vertex_t i = 0; i < local_out_degree; ++i) { + for (edge_t i = 0; i < local_out_degree; ++i) { auto col = indices[i]; auto weight = weights != nullptr ? weights[i] : 1.0; auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); @@ -125,12 +123,145 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( } } +template +__global__ void for_all_frontier_row_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + RowIterator row_first, + RowIterator row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(update_frontier_v_push_if_out_nbr_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto idx = static_cast(tid / raft::warp_size()); + + while (idx < static_cast(thrust::distance(row_first, row_last))) { + vertex_t row = *(row_first + idx); + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { + auto col = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (thrust::get<0>(e_op_result) == true) { + // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows + // within a partition are sorted by their out-degree in decreasing order, we can compute + // a tight uppper bound for the maximum number of pushes per warp/block and use shared + // memory buffer to reduce the number of atomicAdd operations. + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); + } + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_frontier_row_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + RowIterator row_first, + RowIterator row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto idx = static_cast(blockIdx.x); + + while (idx < static_cast(thrust::distance(row_first, row_last))) { + vertex_t row = *(row_first + idx); + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { + auto col = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (thrust::get<0>(e_op_result) == true) { + // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows + // within a partition are sorted by their out-degree in decreasing order, we can compute + // a tight uppper bound for the maximum number of pushes per warp/block and use shared + // memory buffer to reduce the number of atomicAdd operations. + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); + } + } + + idx += gridDim.x; + } +} + template -size_t reduce_buffer_elements(raft::handle_t const& handle, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t num_buffer_elements, - ReduceOp reduce_op) +size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t num_buffer_elements, + ReduceOp reduce_op) { thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, @@ -182,92 +313,6 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, } } -template -__global__ void update_frontier_and_vertex_output_values( - vertex_partition_device_t vertex_partition, - BufferKeyInputIterator buffer_key_input_first, - BufferPayloadInputIterator buffer_payload_input_first, - size_t num_buffer_elements, - VertexValueInputIterator vertex_value_input_first, - VertexValueOutputIterator vertex_value_output_first, - vertex_t** bucket_ptrs, - size_t* bucket_sizes_ptr, - size_t invalid_bucket_idx, - vertex_t invalid_vertex, - VertexOp v_op) -{ - static_assert(std::is_same::value_type, - vertex_t>::value); - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; - size_t block_idx = blockIdx.x; - // FIXME: it might be more performant to process more than one element per thread - auto num_blocks = (num_buffer_elements + blockDim.x - 1) / blockDim.x; - - using BlockScan = - cub::BlockScan; - __shared__ typename BlockScan::TempStorage temp_storage; - - __shared__ size_t bucket_block_start_offsets[num_buckets]; - - size_t bucket_block_local_offsets[num_buckets]; - size_t bucket_block_aggregate_sizes[num_buckets]; - - while (block_idx < num_blocks) { - for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } - - size_t selected_bucket_idx{invalid_bucket_idx}; - vertex_t key{invalid_vertex}; - - if (idx < num_buffer_elements) { - key = *(buffer_key_input_first + idx); - auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); - auto v_val = *(vertex_value_input_first + key_offset); - auto payload = *(buffer_payload_input_first + idx); - auto v_op_result = v_op(v_val, payload); - selected_bucket_idx = thrust::get<0>(v_op_result); - if (selected_bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); - bucket_block_local_offsets[selected_bucket_idx] = 1; - } - } - - for (size_t i = 0; i < num_buckets; ++i) { - BlockScan(temp_storage) - .ExclusiveSum(bucket_block_local_offsets[i], - bucket_block_local_offsets[i], - bucket_block_aggregate_sizes[i]); - } - - if (threadIdx.x == 0) { - for (size_t i = 0; i < num_buckets; ++i) { - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - bucket_block_start_offsets[i] = - atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), - static_cast(bucket_block_aggregate_sizes[i])); - } - } - - __syncthreads(); - - // FIXME: better use shared memory buffer to aggreaget global memory writes - if (selected_bucket_idx != invalid_bucket_idx) { - bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + - bucket_block_local_offsets[selected_bucket_idx]] = key; - } - - idx += gridDim.x * blockDim.x; - block_idx += gridDim.x; - } -} - } // namespace detail /** @@ -289,10 +334,12 @@ __global__ void update_frontier_and_vertex_output_values( * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex in the current frontier. v - * in [vertex_first, vertex_last) should be distinct (and should belong to this process in - * multi-GPU), otherwise undefined behavior - * @param vertex_last Iterator pointing to the last (exclusive) vertex in the current frontier. + * @param vertex_frontier VertexFrontier class object for vertex frontier managements. This object + * includes multiple bucket objects. + * @param cur_fontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the current + * iteration. + * @param next_frontier_bucket_indices Indices of the VertexFrontier buckets to store new frontier + * vertices for the next iteration. * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + @@ -314,35 +361,33 @@ __global__ void update_frontier_and_vertex_output_values( * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.get_number_of_local_vertices(). - * @param vertex_frontier vertex frontier class object for vertex frontier managements. This object - * includes multiple bucket objects. * @param v_op Binary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p * graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs for * this vertex and returns the target bucket index (for frontier update) and new verrtex property - * values (to update *(@p vertex_value_output_first + i)). + * values (to update *(@p vertex_value_output_first + i)). The target bucket index should either be + * VertexFrontier::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. */ template void update_frontier_v_push_if_out_nbr( raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexFrontierType& vertex_frontier, + size_t cur_frontier_bucket_idx, + std::vector const& next_frontier_bucket_indices, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op, ReduceOp reduce_op, VertexValueInputIterator vertex_value_input_first, VertexValueOutputIterator vertex_value_output_first, - VertexFrontierType& vertex_frontier, VertexOp v_op) { static_assert(!GraphViewType::is_adj_matrix_transposed, @@ -353,6 +398,9 @@ void update_frontier_v_push_if_out_nbr( using weight_t = typename GraphViewType::weight_type; using payload_t = typename ReduceOp::type; + auto cur_frontier_vertex_first = vertex_frontier.get_bucket(cur_frontier_bucket_idx).begin(); + auto cur_frontier_vertex_last = vertex_frontier.get_bucket(cur_frontier_bucket_idx).end(); + // 1. fill the buffer rmm::device_uvector keys(size_t{0}, handle.get_stream()); @@ -361,57 +409,55 @@ void update_frontier_v_push_if_out_nbr( for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - rmm::device_uvector frontier_rows( - 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true - - size_t frontier_size{}; + rmm::device_uvector frontier_rows(0, handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto sub_comm_rank = col_comm_rank; - frontier_size = host_scalar_bcast(col_comm, - (static_cast(sub_comm_rank) == i) - ? thrust::distance(vertex_first, vertex_last) - : size_t{0}, - i, - handle.get_stream()); - if (static_cast(sub_comm_rank) != i) { - frontier_rows.resize(frontier_size, handle.get_stream()); + auto frontier_size = + host_scalar_bcast(col_comm, + (static_cast(col_comm_rank) == i) + ? thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last) + : size_t{0} /* dummy */, + i, + handle.get_stream()); + frontier_rows.resize(frontier_size, handle.get_stream()); + + if (static_cast(col_comm_rank) == i) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + cur_frontier_vertex_first, + cur_frontier_vertex_last, + frontier_rows.begin()); } - device_bcast( - col_comm, vertex_first, frontier_rows.begin(), frontier_size, i, handle.get_stream()); + + device_bcast(col_comm, + cur_frontier_vertex_first, + frontier_rows.begin(), + frontier_size, + i, + handle.get_stream()); } else { - frontier_size = thrust::distance(vertex_first, vertex_last); + frontier_rows.resize(thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + cur_frontier_vertex_first, + cur_frontier_vertex_last, + frontier_rows.begin()); } - auto max_pushes = - frontier_size > 0 - ? frontier_rows.size() > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : edge_t{0}; + auto max_pushes = frontier_rows.size() > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + [matrix_partition] __device__(auto row) { + auto row_offset = + matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : edge_t{0}; // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain // additional per column data for filtering in e_op). If we can pause & resume execution if @@ -433,23 +479,80 @@ void update_frontier_v_push_if_out_nbr( auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} : matrix_partition.get_major_value_start_offset(); - - // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in decreasing - // order, we will apply this kernel only to low out-degree vertices. - if (frontier_size > 0) { - raft::grid_1d_thread_t for_all_low_degree_grid( - frontier_size, - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - if (frontier_rows.size() > 0) { - detail::for_all_frontier_row_for_all_nbr_low_degree<< 0) { + static_assert(detail::num_segments_per_vertex_partition == 3); + std::vector h_thresholds(detail::num_segments_per_vertex_partition - 1); + h_thresholds[0] = matrix_partition.get_major_first() + segment_offsets[1]; + h_thresholds[1] = matrix_partition.get_major_first() + segment_offsets[2]; + rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); + raft::update_device( + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); + rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin()); + std::vector h_offsets(d_offsets.size()); + raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + if (h_offsets[0] > 0) { + raft::grid_1d_block_t update_grid( + h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_high_degree<<>>( + matrix_partition, + frontier_rows.begin(), + frontier_rows.begin() + h_offsets[0], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (h_offsets[1] - h_offsets[0] > 0) { + raft::grid_1d_warp_t update_grid( + h_offsets[1] - h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_mid_degree<<>>( matrix_partition, - frontier_rows.begin(), + frontier_rows.begin() + h_offsets[0], + frontier_rows.begin() + h_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (frontier_rows.size() - h_offsets[1] > 0) { + raft::grid_1d_thread_t update_grid( + frontier_rows.size() - h_offsets[1], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + frontier_rows.begin() + h_offsets[1], frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, @@ -457,14 +560,21 @@ void update_frontier_v_push_if_out_nbr( get_dataframe_buffer_begin(payload_buffer), buffer_idx.data(), e_op); - } else { - detail::for_all_frontier_row_for_all_nbr_low_degree<< 0) { + raft::grid_1d_thread_t update_grid( + frontier_rows.size(), + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( matrix_partition, - vertex_first, - vertex_last, + frontier_rows.begin(), + frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, keys.begin(), @@ -478,12 +588,13 @@ void update_frontier_v_push_if_out_nbr( // 2. reduce the buffer auto num_buffer_elements = - detail::reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.value(handle.get_stream()), - reduce_op); + detail::sort_and_reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); if (GraphViewType::is_multi_gpu) { + // FIXME: this step is unnecessary if row_comm_size== 1 auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -533,49 +644,113 @@ void update_frontier_v_push_if_out_nbr( payload_buffer = std::move(rx_payload_buffer); num_buffer_elements = - detail::reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - keys.size(), - reduce_op); + detail::sort_and_reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + keys.size(), + reduce_op); } // 3. update vertex properties if (num_buffer_elements > 0) { - raft::grid_1d_thread_t update_grid(num_buffer_elements, - detail::update_frontier_v_push_if_out_nbr_update_block_size, - handle.get_device_properties().maxGridSize[0]); - - auto constexpr invalid_vertex = invalid_vertex_id::value; + static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(num_buffer_elements, handle.get_stream()); vertex_partition_device_t vertex_partition(graph_view); - auto bucket_and_bucket_size_device_ptrs = - vertex_frontier.get_bucket_and_bucket_size_device_pointers(); - detail::update_frontier_and_vertex_output_values - <<>>( - vertex_partition, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - num_buffer_elements, - vertex_value_input_first, - vertex_value_output_first, - std::get<0>(bucket_and_bucket_size_device_ptrs), - std::get<1>(bucket_and_bucket_size_device_ptrs), - VertexFrontierType::kInvalidBucketIdx, - invalid_vertex, - v_op); - - auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - std::vector bucket_sizes(VertexFrontierType::kNumBuckets); - raft::update_host(bucket_sizes.data(), - bucket_sizes_device_ptr, - VertexFrontierType::kNumBuckets, - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) { - vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]); + auto key_payload_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(keys.begin(), get_dataframe_buffer_begin(payload_buffer))); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_payload_pair_first, + key_payload_pair_first + num_buffer_elements, + bucket_indices.begin(), + [vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { + auto key = thrust::get<0>(pair); + auto payload = thrust::get<1>(pair); + auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + auto v_val = *(vertex_value_input_first + key_offset); + auto v_op_result = v_op(v_val, payload); + auto bucket_idx = thrust::get<0>(v_op_result); + if (bucket_idx != invalid_bucket_idx) { + *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); + return static_cast(bucket_idx); + } else { + return std::numeric_limits::max(); + } + }); + + resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); + + auto bucket_key_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); + keys.resize(thrust::distance( + bucket_key_pair_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + num_buffer_elements, + [] __device__(auto pair) { + return thrust::get<0>(pair) == + std::numeric_limits::max(); + })), + handle.get_stream()); + bucket_indices.resize(keys.size(), handle.get_stream()); + keys.shrink_to_fit(handle.get_stream()); + bucket_indices.shrink_to_fit(handle.get_stream()); + + bucket_key_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); + if (next_frontier_bucket_indices.size() == 1) { + vertex_frontier.get_bucket(next_frontier_bucket_indices[0]).insert(keys.begin(), keys.size()); + } else if (next_frontier_bucket_indices.size() == 2) { + auto first_bucket_size = thrust::distance( + bucket_key_pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + bucket_indices.size(), + [first_bucket_idx = static_cast(next_frontier_bucket_indices[0])] __device__( + auto pair) { return thrust::get<0>(pair) == first_bucket_idx; })); + vertex_frontier.get_bucket(next_frontier_bucket_indices[0]) + .insert(keys.begin(), first_bucket_size); + vertex_frontier.get_bucket(next_frontier_bucket_indices[1]) + .insert(keys.begin() + first_bucket_size, + thrust::distance(keys.begin() + first_bucket_size, keys.end())); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + bucket_indices.size()); + rmm::device_uvector d_indices(next_frontier_bucket_indices.size(), + handle.get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle.get_stream()); + auto it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_indices.begin(), + bucket_indices.end(), + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle.get_stream()); + d_counts.resize(d_indices.size(), handle.get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host(h_indices.data(), d_indices.data(), d_indices.size(), handle.get_stream()); + raft::update_host(h_counts.data(), d_counts.data(), d_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + std::vector h_offsets(h_indices.size(), 0); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); + for (size_t i = 0; i < h_indices.size(); ++i) { + if (h_counts[i] > 0) { + vertex_frontier.get_bucket(h_indices[i]).insert(keys.begin() + h_offsets[i], h_counts[i]); + } + } } } } diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index 375ec097850..4758334e9fc 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -24,8 +24,7 @@ #include #include -#include -#include +#include #include #include @@ -37,129 +36,80 @@ namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr move_and_invalidate_if_block_size = 128; - -// FIXME: better move to another file for reusability -inline size_t round_up(size_t number_to_round, size_t modulus) -{ - return ((number_to_round + (modulus - 1)) / modulus) * modulus; -} - -template -__global__ void move_and_invalidate_if(RowIterator row_first, - RowIterator row_last, - vertex_t** bucket_ptrs, - size_t* bucket_sizes_ptr, - size_t this_bucket_idx, - size_t invalid_bucket_idx, - vertex_t invalid_vertex, - SplitOp split_op) -{ - static_assert( - std::is_same::value_type, vertex_t>::value); - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; - size_t block_idx = blockIdx.x; - auto num_elements = thrust::distance(row_first, row_last); - // FIXME: it might be more performant to process more than one element per thread - auto num_blocks = (num_elements + blockDim.x - 1) / blockDim.x; - - using BlockScan = cub::BlockScan; - __shared__ typename BlockScan::TempStorage temp_storage; - - __shared__ size_t bucket_block_start_offsets[num_buckets]; - - size_t bucket_block_local_offsets[num_buckets]; - size_t bucket_block_aggregate_sizes[num_buckets]; - - while (block_idx < num_blocks) { - for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } - - size_t selected_bucket_idx{invalid_bucket_idx}; - vertex_t key{invalid_vertex}; - - if (idx < num_elements) { - key = *(row_first + idx); - selected_bucket_idx = split_op(key); - if (selected_bucket_idx != this_bucket_idx) { - *(row_first + idx) = invalid_vertex; - if (selected_bucket_idx != invalid_bucket_idx) { - bucket_block_local_offsets[selected_bucket_idx] = 1; - } - } - } - - for (size_t i = 0; i < num_buckets; ++i) { - BlockScan(temp_storage) - .ExclusiveSum(bucket_block_local_offsets[i], - bucket_block_local_offsets[i], - bucket_block_aggregate_sizes[i]); - } - - if (threadIdx.x == 0) { - for (size_t i = 0; i < num_buckets; ++i) { - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - bucket_block_start_offsets[i] = - atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), - static_cast(bucket_block_aggregate_sizes[i])); - } - } - - __syncthreads(); - - // FIXME: better use shared memory buffer to aggreaget global memory writes - if ((selected_bucket_idx != this_bucket_idx) && (selected_bucket_idx != invalid_bucket_idx)) { - bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + - bucket_block_local_offsets[selected_bucket_idx]] = key; - } - - idx += gridDim.x * blockDim.x; - block_idx += gridDim.x; - } -} - -} // namespace detail - template -class Bucket { +class SortedUniqueElementBucket { public: - Bucket(raft::handle_t const& handle, size_t capacity) - : handle_ptr_(&handle), elements_(capacity, handle.get_stream()) + SortedUniqueElementBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), elements_(0, handle.get_stream()) { - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - elements_.begin(), - elements_.end(), - invalid_vertex_id::value); } void insert(vertex_t v) { - raft::update_device(elements_.data() + size_, &v, 1, handle_ptr_->get_stream()); - ++size_; + if (elements_.size() > 0) { + rmm::device_scalar vertex(v, handle_ptr_->get_stream()); + insert(vertex.data(), vertex_t{1}); + } else { + elements_.resize(1, handle_ptr_->get_stream()); + raft::update_device(elements_.data(), &v, size_t{1}, handle_ptr_->get_stream()); + } } - size_t size() const { return size_; } + /** + * @ brief insert a list of vertices to the bucket + * + * @param sorted_unique_vertices Device pointer to the array storing the vertex list. + * @param num_sorted_unique_vertices Size of the vertex list to insert. + */ + void insert(vertex_t const* sorted_unique_vertices, vertex_t num_sorted_unique_vertices) + { + if (elements_.size() > 0) { + rmm::device_uvector merged_vertices(elements_.size() + num_sorted_unique_vertices, + handle_ptr_->get_stream()); + thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + elements_.begin(), + elements_.end(), + sorted_unique_vertices, + sorted_unique_vertices + num_sorted_unique_vertices, + merged_vertices.begin()); + merged_vertices.resize( + thrust::distance( + merged_vertices.begin(), + thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + merged_vertices.begin(), + merged_vertices.end())), + handle_ptr_->get_stream()); + merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); + elements_ = std::move(merged_vertices); + } else { + elements_.resize(num_sorted_unique_vertices, handle_ptr_->get_stream()); + thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + sorted_unique_vertices, + sorted_unique_vertices + num_sorted_unique_vertices, + elements_.begin()); + } + } - void set_size(size_t size) { size_ = size; } + size_t size() const { return elements_.size(); } template std::enable_if_t aggregate_size() const { - return host_scalar_allreduce(handle_ptr_->get_comms(), size_, handle_ptr_->get_stream()); + return host_scalar_allreduce( + handle_ptr_->get_comms(), elements_.size(), handle_ptr_->get_stream()); } template std::enable_if_t aggregate_size() const { - return size_; + return elements_.size(); } - void clear() { size_ = 0; } + void resize(size_t size) { elements_.resize(size, handle_ptr_->get_stream()); } + + void clear() { elements_.resize(0, handle_ptr_->get_stream()); } - size_t capacity() const { return elements_.size(); } + void shrink_to_fit() { elements_.shrink_to_fit(handle_ptr_->get_stream()); } auto const data() const { return elements_.data(); } @@ -169,14 +119,13 @@ class Bucket { auto begin() { return elements_.begin(); } - auto const end() const { return elements_.begin() + size_; } + auto const end() const { return elements_.end(); } - auto end() { return elements_.begin() + size_; } + auto end() { return elements_.end(); } private: raft::handle_t const* handle_ptr_{nullptr}; rmm::device_uvector elements_; - size_t size_{0}; }; template @@ -185,29 +134,17 @@ class VertexFrontier { static size_t constexpr kNumBuckets = num_buckets; static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; - VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) - : handle_ptr_(&handle), - tmp_bucket_ptrs_(num_buckets, handle.get_stream()), - tmp_bucket_sizes_(num_buckets, handle.get_stream()) + VertexFrontier(raft::handle_t const& handle) : handle_ptr_(&handle) { - CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, - "invalid input argument bucket_capacities (size mismatch)"); - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - tmp_bucket_ptrs_.begin(), - tmp_bucket_ptrs_.end(), - static_cast(nullptr)); - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - tmp_bucket_sizes_.begin(), - tmp_bucket_sizes_.end(), - size_t{0}); - for (size_t i = 0; i < num_buckets; ++i) { - buckets_.emplace_back(handle, bucket_capacities[i]); - } + for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } } - Bucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } + SortedUniqueElementBucket& get_bucket(size_t bucket_idx) + { + return buckets_[bucket_idx]; + } - Bucket const& get_bucket(size_t bucket_idx) const + SortedUniqueElementBucket const& get_bucket(size_t bucket_idx) const { return buckets_[bucket_idx]; } @@ -218,78 +155,111 @@ class VertexFrontier { } template - void split_bucket(size_t bucket_idx, SplitOp split_op) + void split_bucket(size_t this_bucket_idx, + std::vector const& move_to_bucket_indices, + SplitOp split_op) { - auto constexpr invalid_vertex = invalid_vertex_id::value; - - auto bucket_and_bucket_size_device_ptrs = get_bucket_and_bucket_size_device_pointers(); - - auto& this_bucket = get_bucket(bucket_idx); + auto& this_bucket = get_bucket(this_bucket_idx); if (this_bucket.size() > 0) { - raft::grid_1d_thread_t move_and_invalidate_if_grid( - this_bucket.size(), - detail::move_and_invalidate_if_block_size, - handle_ptr_->get_device_properties().maxGridSize[0]); - - detail::move_and_invalidate_if - <<get_stream()>>>(this_bucket.begin(), - this_bucket.end(), - std::get<0>(bucket_and_bucket_size_device_ptrs), - std::get<1>(bucket_and_bucket_size_device_ptrs), - bucket_idx, - kInvalidBucketIdx, - invalid_vertex, - split_op); - } + static_assert(kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); + thrust::transform( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + this_bucket.begin(), + this_bucket.end(), + bucket_indices.begin(), + [split_op] __device__(auto v) { return static_cast(split_op(v)); }); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + this_bucket.resize(thrust::distance( + pair_first, + thrust::remove_if( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [invalid_bucket_idx = static_cast(kInvalidBucketIdx)] __device__(auto pair) { + return thrust::get<0>(pair) == invalid_bucket_idx; + }))); + bucket_indices.resize(this_bucket.size(), handle_ptr_->get_stream()); + this_bucket.shrink_to_fit(); + bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); + + pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + auto new_this_bucket_size = thrust::distance( + pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { + return thrust::get<0>(pair) == this_bucket_idx; + })); + + if (move_to_bucket_indices.size() == 1) { + get_bucket(move_to_bucket_indices[0]) + .insert(this_bucket.begin() + new_this_bucket_size, + thrust::distance(this_bucket.begin() + new_this_bucket_size, this_bucket.end())); + } else if (move_to_bucket_indices.size() == 2) { + auto next_bucket_size = thrust::distance( + pair_first + new_this_bucket_size, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first + new_this_bucket_size, + pair_first + bucket_indices.size(), + [next_bucket_idx = static_cast(move_to_bucket_indices[0])] __device__( + auto pair) { return thrust::get<0>(pair) == next_bucket_idx; })); + get_bucket(move_to_bucket_indices[0]) + .insert(this_bucket.begin() + new_this_bucket_size, next_bucket_size); + get_bucket(move_to_bucket_indices[1]) + .insert(this_bucket.begin() + new_this_bucket_size + next_bucket_size, + thrust::distance(this_bucket.begin() + new_this_bucket_size + next_bucket_size, + this_bucket.end())); + } else { + thrust::sort(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first + new_this_bucket_size, + pair_first + bucket_indices.size()); + rmm::device_uvector d_indices(move_to_bucket_indices.size(), + handle_ptr_->get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + bucket_indices.begin() + new_this_bucket_size, + bucket_indices.end(), + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle_ptr_->get_stream()); + d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host( + h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); + raft::update_host( + h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); + handle_ptr_->get_stream_view().synchronize(); + std::vector h_offsets(h_indices.size(), 0); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); + for (size_t i = 0; i < h_indices.size(); ++i) { + if (h_counts[i] > 0) { + get_bucket(h_indices[i]) + .insert(this_bucket.begin() + new_this_bucket_size + h_offsets[i], h_counts[i]); + } + } + } - // FIXME: if we adopt CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups - // and global sync(), we can merge this step with the above kernel (and rename the above kernel - // to move_if) - auto it = - thrust::remove_if(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - get_bucket(bucket_idx).begin(), - get_bucket(bucket_idx).end(), - [] __device__(auto value) { return value == invalid_vertex; }); - - auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - std::vector bucket_sizes(kNumBuckets); - raft::update_host( - bucket_sizes.data(), bucket_sizes_device_ptr, kNumBuckets, handle_ptr_->get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); - for (size_t i = 0; i < kNumBuckets; ++i) { - if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); } + this_bucket.resize(new_this_bucket_size); + this_bucket.shrink_to_fit(); } - auto size = thrust::distance(get_bucket(bucket_idx).begin(), it); - get_bucket(bucket_idx).set_size(size); - return; } - auto get_bucket_and_bucket_size_device_pointers() - { - std::vector tmp_ptrs(buckets_.size(), nullptr); - std::vector tmp_sizes(buckets_.size(), 0); - for (size_t i = 0; i < buckets_.size(); ++i) { - tmp_ptrs[i] = get_bucket(i).data(); - tmp_sizes[i] = get_bucket(i).size(); - } - raft::update_device( - tmp_bucket_ptrs_.data(), tmp_ptrs.data(), tmp_ptrs.size(), handle_ptr_->get_stream()); - raft::update_device( - tmp_bucket_sizes_.data(), tmp_sizes.data(), tmp_sizes.size(), handle_ptr_->get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); - return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); - } - private: raft::handle_t const* handle_ptr_{nullptr}; - std::vector> buckets_{}; - rmm::device_uvector tmp_bucket_ptrs_; - rmm::device_uvector tmp_bucket_sizes_; + std::vector> buckets_{}; }; } // namespace experimental diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index e59b12f2a80..b0e9c1ebfec 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -61,6 +61,21 @@ struct resize_dataframe_buffer_tuple_iterator_element_impl +struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, cudaStream_t stream) + { + std::get(buffer).shrink_to_fit(stream); + shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, stream); + } +}; + +template +struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, cudaStream_t stream) {} +}; + template auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) { @@ -111,6 +126,27 @@ void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStr .run(buffer, new_buffer_size, stream); } +template ::value>* = nullptr> +void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +{ + buffer.shrink_to_fit(stream); +} + +template ::value>* = nullptr> +void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, stream); +} + template ::value>* = nullptr> diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 9145e3737b6..2a703c1c85e 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -90,11 +90,9 @@ void bfs(raft::handle_t const &handle, // 3. initialize BFS frontier - enum class Bucket { cur, num_buckets }; - std::vector bucket_sizes(static_cast(Bucket::num_buckets), - push_graph_view.get_number_of_local_vertices()); + enum class Bucket { cur, next, num_buckets }; VertexFrontier(Bucket::num_buckets)> - vertex_frontier(handle, bucket_sizes); + vertex_frontier(handle); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { vertex_frontier.get_bucket(static_cast(Bucket::cur)).insert(source_vertex); @@ -103,23 +101,18 @@ void bfs(raft::handle_t const &handle, // 4. BFS iteration vertex_t depth{0}; - auto cur_local_vertex_frontier_first = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).begin(); - auto cur_vertex_frontier_aggregate_size = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size(); while (true) { if (direction_optimizing) { CUGRAPH_FAIL("unimplemented."); } else { vertex_partition_device_t vertex_partition(push_graph_view); - auto cur_local_vertex_frontier_last = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).end(); update_frontier_v_push_if_out_nbr( handle, push_graph_view, - cur_local_vertex_frontier_first, - cur_local_vertex_frontier_last, + vertex_frontier, + static_cast(Bucket::cur), + std::vector{static_cast(Bucket::next)}, thrust::make_constant_iterator(0) /* dummy */, thrust::make_constant_iterator(0) /* dummy */, [vertex_partition, distances] __device__( @@ -135,20 +128,19 @@ void bfs(raft::handle_t const &handle, reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - vertex_frontier, [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) ? static_cast(Bucket::cur) + auto idx = (v_val == invalid_distance) ? static_cast(Bucket::next) : VertexFrontier::kInvalidBucketIdx; return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); }); - auto new_vertex_frontier_aggregate_size = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() - - cur_vertex_frontier_aggregate_size; - if (new_vertex_frontier_aggregate_size == 0) { break; } - - cur_local_vertex_frontier_first = cur_local_vertex_frontier_last; - cur_vertex_frontier_aggregate_size += new_vertex_frontier_aggregate_size; + vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); + vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); + vertex_frontier.swap_buckets(static_cast(Bucket::cur), + static_cast(Bucket::next)); + if (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() == 0) { + break; + } } depth++; diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 47c41cb3426..18db57a737f 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -295,8 +295,8 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::low_degree_threshold), - static_cast(detail::mid_degree_threshold)}; + std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), + static_cast(detail::low_degree_threshold)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); @@ -317,7 +317,8 @@ graph_t{}); rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), default_stream); @@ -326,8 +327,8 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::low_degree_threshold), - static_cast(detail::mid_degree_threshold)}; + std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), + static_cast(detail::low_degree_threshold)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); @@ -462,7 +463,8 @@ graph_tget_number_of_vertices(), d_thresholds.begin(), d_thresholds.end(), - segment_offsets.begin() + 1); + segment_offsets.begin() + 1, + thrust::greater{}); segment_offsets_.resize(segment_offsets.size()); raft::update_host( diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index c6f39a44333..67603ae260b 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -161,7 +161,7 @@ graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, std::vector const& adj_matrix_partition_weights, - std::vector const& vertex_partition_segment_offsets, + std::vector const& adj_matrix_partition_segment_offsets, partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -176,7 +176,7 @@ graph_view_t bucket_sizes(static_cast(Bucket::num_buckets), - push_graph_view.get_number_of_local_vertices()); + enum class Bucket { cur_near, next_near, far, num_buckets }; VertexFrontier(Bucket::num_buckets)> - vertex_frontier(handle, bucket_sizes); + vertex_frontier(handle); // 5. SSSP iteration @@ -172,8 +169,9 @@ void sssp(raft::handle_t const &handle, update_frontier_v_push_if_out_nbr( handle, push_graph_view, - vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).begin(), - vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).end(), + vertex_frontier, + static_cast(Bucket::cur_near), + std::vector{static_cast(Bucket::next_near), static_cast(Bucket::far)}, row_distances, thrust::make_constant_iterator(0) /* dummy */, [vertex_partition, distances, cutoff] __device__( @@ -193,30 +191,31 @@ void sssp(raft::handle_t const &handle, reduce_op::min>(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - vertex_frontier, [near_far_threshold] __device__(auto v_val, auto pushed_val) { auto new_dist = thrust::get<0>(pushed_val); auto idx = new_dist < v_val - ? (new_dist < near_far_threshold ? static_cast(Bucket::new_near) + ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) : static_cast(Bucket::far)) : VertexFrontier::kInvalidBucketIdx; return thrust::make_tuple(idx, pushed_val); }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); - if (vertex_frontier.get_bucket(static_cast(Bucket::new_near)).aggregate_size() > 0) { + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).shrink_to_fit(); + if (vertex_frontier.get_bucket(static_cast(Bucket::next_near)).aggregate_size() > 0) { vertex_frontier.swap_buckets(static_cast(Bucket::cur_near), - static_cast(Bucket::new_near)); + static_cast(Bucket::next_near)); } else if (vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size() > 0) { // near queue is empty, split the far queue auto old_near_far_threshold = near_far_threshold; near_far_threshold += delta; - size_t new_near_size{0}; - size_t new_far_size{0}; + size_t near_size{0}; + size_t far_size{0}; while (true) { vertex_frontier.split_bucket( static_cast(Bucket::far), + std::vector{static_cast(Bucket::cur_near)}, [vertex_partition, distances, old_near_far_threshold, near_far_threshold] __device__( auto v) { auto dist = @@ -229,17 +228,16 @@ void sssp(raft::handle_t const &handle, return static_cast(Bucket::far); } }); - new_near_size = + near_size = vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size(); - new_far_size = - vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size(); - if ((new_near_size > 0) || (new_far_size == 0)) { + far_size = vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size(); + if ((near_size > 0) || (far_size == 0)) { break; } else { near_far_threshold += delta; } } - if ((new_near_size == 0) && (new_far_size == 0)) { break; } + if ((near_size == 0) && (far_size == 0)) { break; } } else { break; } diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 8fce9488d8a..ded57dd1855 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -153,11 +154,22 @@ class Tests_BFS : public ::testing::TestWithParam { using weight_t = float; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); ASSERT_TRUE(static_cast(configuration.source) >= 0 && @@ -169,7 +181,10 @@ class Tests_BFS : public ::testing::TestWithParam { rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::bfs(handle, graph_view, @@ -179,7 +194,12 @@ class Tests_BFS : public ::testing::TestWithParam { false, std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "BFS took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 71011f3d018..c7756699acd 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -171,11 +172,22 @@ class Tests_KatzCentrality : public ::testing::TestWithParam graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); auto degrees = graph_view.compute_in_degrees(handle); @@ -191,7 +203,10 @@ class Tests_KatzCentrality : public ::testing::TestWithParam d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::katz_centrality(handle, graph_view, @@ -204,7 +219,12 @@ class Tests_KatzCentrality : public ::testing::TestWithParam unrenumbered_graph( diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index 76ccb5d9de3..64ffedd2492 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct BFS_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -117,6 +123,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -130,10 +137,20 @@ class Tests_MGBFS : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -149,7 +166,10 @@ class Tests_MGBFS : public ::testing::TestWithParam { rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::bfs(handle, mg_graph_view, @@ -157,10 +177,14 @@ class Tests_MGBFS : public ::testing::TestWithParam { d_mg_predecessors.data(), static_cast(configuration.source), false, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG BFS took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -204,8 +228,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { d_sg_predecessors.data(), unrenumbered_source, false, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); // 5-3. compare diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index e3033af3771..937bd33472b 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -31,6 +32,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct KatzCentrality_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -117,6 +123,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -150,7 +167,10 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam d_mg_katz_centralities( mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::katz_centrality(handle, mg_graph_view, @@ -160,10 +180,14 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam::max(), - false, - true); + false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG Katz Centrality took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -189,8 +213,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam::max(), // max_iterations - false, - true); + false); // 5-4. compare diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index 48e4dc869f4..de39b8da128 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct SSSP_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -115,6 +121,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -128,10 +135,20 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -147,7 +164,10 @@ class Tests_MGSSSP : public ::testing::TestWithParam { rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } // FIXME: disable do_expensive_check cugraph::experimental::sssp(handle, @@ -155,10 +175,14 @@ class Tests_MGSSSP : public ::testing::TestWithParam { d_mg_distances.data(), d_mg_predecessors.data(), static_cast(configuration.source), - std::numeric_limits::max(), - true); + std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG SSSP took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -202,8 +226,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { d_sg_distances.data(), d_sg_predecessors.data(), unrenumbered_source, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); // 5-3. compare diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 649fe11d805..0340140d14b 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -213,11 +214,22 @@ class Tests_PageRank : public ::testing::TestWithParam { constexpr bool renumber = true; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); std::vector h_personalization_vertices{}; @@ -271,7 +283,10 @@ class Tests_PageRank : public ::testing::TestWithParam { rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::pagerank(handle, graph_view, @@ -286,7 +301,12 @@ class Tests_PageRank : public ::testing::TestWithParam { false, false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "PageRank took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 9364d261dec..e8ab3ec5426 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -157,11 +158,22 @@ class Tests_SSSP : public ::testing::TestWithParam { constexpr bool renumber = true; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); ASSERT_TRUE(static_cast(configuration.source) >= 0 && @@ -172,7 +184,10 @@ class Tests_SSSP : public ::testing::TestWithParam { rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::sssp(handle, graph_view, @@ -182,7 +197,12 @@ class Tests_SSSP : public ::testing::TestWithParam { std::numeric_limits::max(), false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "SSSP took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( @@ -323,7 +343,7 @@ INSTANTIATE_TEST_CASE_P( SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), // disable correctness checks for large graphs - SSSP_Usecase(cugraph::test::rmat_params_t{20, 16, 0.57, 0.19, 0.19, 0, false, false}, + SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0, false))); diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index f7b1e8dfbb4..bbc80a60a3d 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct PageRank_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -127,6 +133,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -140,10 +147,20 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -195,7 +212,10 @@ class Tests_MGPageRank : public ::testing::TestWithParam { rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::pagerank(handle, mg_graph_view, @@ -209,7 +229,12 @@ class Tests_MGPageRank : public ::testing::TestWithParam { std::numeric_limits::max(), false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG PageRank took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu index a9df392d2fb..5f41e0e5ce0 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -109,7 +109,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, (store_transposed ? edgelist_rows.begin() : edgelist_cols.begin()) + h_displacements[i]; counts[i] = static_cast(h_edge_counts[i]); } - // FIXME: set do_expensive_check to false once validated std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = cugraph::experimental::renumber_edgelist( handle, @@ -117,8 +116,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(vertices.size()), major_ptrs, minor_ptrs, - counts, - true); + counts); } // 4. create a graph @@ -142,7 +140,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, number_of_vertices, number_of_edges, cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - true, true), std::move(renumber_map_labels)); } @@ -168,7 +165,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, { vertex_t number_of_vertices = static_cast(vertices.size()); - // FIXME: set do_expensive_check to false once validated auto renumber_map_labels = renumber ? cugraph::experimental::renumber_edgelist( handle, @@ -176,11 +172,9 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(vertices.size()), store_transposed ? edgelist_cols.data() : edgelist_rows.data(), store_transposed ? edgelist_rows.data() : edgelist_cols.data(), - static_cast(edgelist_rows.size()), - true) + static_cast(edgelist_rows.size())) : rmm::device_uvector(0, handle.get_stream()); - // FIXME: set do_expensive_check to false once validated return std::make_tuple( cugraph::experimental::graph_t( handle, @@ -191,8 +185,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(edgelist_rows.size())}, number_of_vertices, cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - renumber ? true : false, - true), + renumber ? true : false), std::move(renumber_map_labels)); } From 79c3ba059d2cc4f2bfafece0e92671013a584175 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Wed, 7 Apr 2021 17:05:09 +0200 Subject: [PATCH 220/343] Add utility function for computing a secondary cost for BFS and SSSP output (#1376) Solves: https://github.com/rapidsai/cugraph/issues/1373 Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1376 --- cpp/CMakeLists.txt | 1 + cpp/include/utilities/path_retrieval.hpp | 45 ++++++ cpp/src/utilities/path_retrieval.cu | 133 ++++++++++++++++++ python/cugraph/tests/test_utils.py | 28 ++++ python/cugraph/utilities/__init__.py | 3 +- python/cugraph/utilities/path_retrieval.pxd | 30 ++++ python/cugraph/utilities/path_retrieval.py | 100 +++++++++++++ .../utilities/path_retrieval_wrapper.pyx | 72 ++++++++++ 8 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 cpp/include/utilities/path_retrieval.hpp create mode 100644 cpp/src/utilities/path_retrieval.cu create mode 100644 python/cugraph/utilities/path_retrieval.pxd create mode 100644 python/cugraph/utilities/path_retrieval.py create mode 100644 python/cugraph/utilities/path_retrieval_wrapper.pyx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5a3cb65caa5..0388a76d729 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -402,6 +402,7 @@ endif(BUILD_STATIC_FAISS) add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu + src/utilities/path_retrieval.cu src/structure/graph.cu src/linear_assignment/hungarian.cu src/link_analysis/gunrock_hits.cpp diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/utilities/path_retrieval.hpp new file mode 100644 index 00000000000..e626d6af1ab --- /dev/null +++ b/cpp/include/utilities/path_retrieval.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { + +/** + * @brief Takes the results of BFS or SSSP function call and sums the given + * weights along the path to the starting vertex. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream. + * @param vertices Pointer to vertex ids. + * @param preds Pointer to predecessors. + * @param info_weights Secondary weights along the edge from predecessor to vertex. + * @param out Contains for each index the sum of weights along the path unfolding. + * @param num_vertices Number of vertices. + **/ +template +void get_traversed_cost(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices); +} // namespace cugraph diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu new file mode 100644 index 00000000000..93ead5898f8 --- /dev/null +++ b/cpp/src/utilities/path_retrieval.cu @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include + +namespace cugraph { +namespace detail { + +template +__global__ void get_traversed_cost_kernel(vertex_t const *vertices, + vertex_t const *preds, + vertex_t const *vtx_map, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + for (vertex_t i = threadIdx.x + blockIdx.x * blockDim.x; i < num_vertices; + i += gridDim.x * blockDim.x) { + weight_t sum = info_weights[i]; + vertex_t pred = preds[i]; + while (pred != stop_vertex) { + vertex_t pos = vtx_map[pred]; + sum += info_weights[pos]; + pred = preds[pos]; + } + out[i] = sum; + } +} + +template +void get_traversed_cost_impl(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + auto stream = handle.get_stream(); + vertex_t max_blocks = handle.get_device_properties().maxGridSize[0]; + vertex_t max_threads = handle.get_device_properties().maxThreadsPerBlock; + + dim3 nthreads, nblocks; + nthreads.x = std::min(num_vertices, max_threads); + nthreads.y = 1; + nthreads.z = 1; + nblocks.x = std::min((num_vertices + nthreads.x - 1) / nthreads.x, max_blocks); + nblocks.y = 1; + nblocks.z = 1; + + rmm::device_uvector vtx_map_v(num_vertices, stream); + rmm::device_uvector vtx_keys_v(num_vertices, stream); + vertex_t *vtx_map = vtx_map_v.data(); + vertex_t *vtx_keys = vtx_keys_v.data(); + raft::copy(vtx_keys, vertices, num_vertices, stream); + + thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices); + + thrust::stable_sort_by_key( + rmm::exec_policy(stream)->on(stream), vtx_keys, vtx_keys + num_vertices, vtx_map); + + get_traversed_cost_kernel<<>>( + vertices, preds, vtx_map, info_weights, out, stop_vertex, num_vertices); +} +} // namespace detail + +template +void get_traversed_cost(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + CUGRAPH_EXPECTS(num_vertices > 0, "num_vertices should be strictly positive"); + CUGRAPH_EXPECTS(out != nullptr, "out should be of size num_vertices"); + cugraph::detail::get_traversed_cost_impl( + handle, vertices, preds, info_weights, out, stop_vertex, num_vertices); +} + +template void get_traversed_cost(raft::handle_t const &handle, + int32_t const *vertices, + int32_t const *preds, + float const *info_weights, + float *out, + int32_t stop_vertex, + int32_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int32_t const *vertices, + int32_t const *preds, + double const *info_weights, + double *out, + int32_t stop_vertex, + int32_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int64_t const *vertices, + int64_t const *preds, + float const *info_weights, + float *out, + int64_t stop_vertex, + int64_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int64_t const *vertices, + int64_t const *preds, + double const *info_weights, + double *out, + int64_t stop_vertex, + int64_t num_vertices); +} // namespace cugraph diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 2ca820271c0..55256d6b74e 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -17,7 +17,9 @@ import pytest import cugraph +import cudf from cugraph.tests import utils +import numpy as np def test_bfs_paths(): @@ -68,3 +70,29 @@ def test_bfs_paths_array(): answer = cugraph.utils.get_traversed_path_list(df, 100) assert "not in the result set" in str(ErrorMsg) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_get_traversed_cost(graph_file): + cu_M = utils.read_csv_file(graph_file) + + noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0]))) + cu_M['info'] = cu_M['2'] + noise + + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info') + + # run SSSP starting at vertex 17 + df = cugraph.sssp(G, 16) + + answer = cugraph.utilities.path_retrieval.get_traversed_cost(df, 16, + cu_M['0'], + cu_M['1'], + cu_M['info'] + ) + + df = df.sort_values(by='vertex').reset_index() + answer = answer.sort_values(by='vertex').reset_index() + + assert df.shape[0] == answer.shape[0] + assert np.allclose(df['distance'], answer['info']) diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py index 61f5596eee6..38b46b0fe87 100644 --- a/python/cugraph/utilities/__init__.py +++ b/python/cugraph/utilities/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,3 +25,4 @@ is_cp_matrix_type, is_sp_matrix_type, ) +from cugraph.utilities.path_retrieval import get_traversed_cost diff --git a/python/cugraph/utilities/path_retrieval.pxd b/python/cugraph/utilities/path_retrieval.pxd new file mode 100644 index 00000000000..88f1da8f213 --- /dev/null +++ b/python/cugraph/utilities/path_retrieval.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": + + cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle, + const vertex_t *vertices, + const vertex_t *preds, + const weight_t *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) except + + diff --git a/python/cugraph/utilities/path_retrieval.py b/python/cugraph/utilities/path_retrieval.py new file mode 100644 index 00000000000..b9baadc2f21 --- /dev/null +++ b/python/cugraph/utilities/path_retrieval.py @@ -0,0 +1,100 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cudf + +from cugraph.structure.symmetrize import symmetrize +from cugraph.structure.number_map import NumberMap +from cugraph.utilities import path_retrieval_wrapper + + +def get_traversed_cost(df, source, source_col, dest_col, value_col): + """ + Take the DataFrame result from a BFS or SSSP function call and sums + the given weights along the path to the starting vertex. + The source_col, dest_col identifiers need to match with the vertex and + predecessor columns of df. + + Input Parameters + ---------- + df : cudf.DataFrame + The dataframe containing the results of a BFS or SSSP call + source: int + Index of the source vertex. + source_col : cudf.DataFrame + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the source index for each edge. + Source indices must be an integer type. + dest_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be an integer type. + value_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains values associated with this edge. + Weight should be a floating type. + + Returns + --------- + df : cudf.DataFrame + DataFrame containing two columns 'vertex' and 'info'. + Unreachable vertices will have value the max value of the weight type. + """ + + if 'vertex' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing") + if 'distance' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing") + if 'predecessor' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing") + + src, dst, val = symmetrize(source_col, + dest_col, + value_col) + + symmetrized_df = cudf.DataFrame() + symmetrized_df['source'] = src + symmetrized_df['destination'] = dst + symmetrized_df['weights'] = val + + input_df = df.merge(symmetrized_df, + left_on=['vertex', 'predecessor'], + right_on=['source', 'destination'], + how="left" + ) + + # Set unreachable vertex weights to max float and source vertex weight to 0 + max_val = np.finfo(val.dtype).max + input_df[['weights']] = input_df[['weights']].fillna(max_val) + input_df.loc[input_df['vertex'] == source, 'weights'] = 0 + + # Renumber + renumbered_gdf, renumber_map = NumberMap.renumber(input_df, + ["vertex"], + ["predecessor"], + preserve_order=True) + renumbered_gdf = renumbered_gdf.rename(columns={'src': 'vertex', + 'dst': 'predecessor'}) + stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0] + + out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf, + stop_vertex) + + # Unrenumber + out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf, 'vertex', + preserve_order=True)["vertex"] + return out_df diff --git a/python/cugraph/utilities/path_retrieval_wrapper.pyx b/python/cugraph/utilities/path_retrieval_wrapper.pyx new file mode 100644 index 00000000000..98d11ad07df --- /dev/null +++ b/python/cugraph/utilities/path_retrieval_wrapper.pyx @@ -0,0 +1,72 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost +from cugraph.structure.graph_primtypes cimport * +from libc.stdint cimport uintptr_t +from numba import cuda +import cudf +import numpy as np + + +def get_traversed_cost(input_df, stop_vertex): + """ + Call get_traversed_cost + """ + num_verts = input_df.shape[0] + vertex_t = input_df.vertex.dtype + weight_t = input_df.weights.dtype + + df = cudf.DataFrame() + df['vertex'] = input_df['vertex'] + df['info'] = cudf.Series(np.zeros(num_verts, dtype=weight_t)) + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef uintptr_t vertices = NULL + cdef uintptr_t preds = NULL + cdef uintptr_t out = NULL + cdef uintptr_t info_weights = NULL + + vertices = input_df['vertex'].__cuda_array_interface__['data'][0] + preds = input_df['predecessor'].__cuda_array_interface__['data'][0] + info_weights = input_df['weights'].__cuda_array_interface__['data'][0] + out = df['info'].__cuda_array_interface__['data'][0] + + if weight_t == np.float32: + c_get_traversed_cost(handle_[0], + vertices, + preds, + info_weights, + out, + stop_vertex, + num_verts) + elif weight_t == np.float64: + c_get_traversed_cost(handle_[0], + vertices, + preds, + info_weights, + out, + stop_vertex, + num_verts) + else: + raise NotImplementedError + + return df From 63e69fcf32742fdee7e14267ba6accd94fd19c4c Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 7 Apr 2021 18:51:32 -0500 Subject: [PATCH 221/343] Random Walks - Python Bindings (#1516) Python bindings for random walks closes #1488 check the rendering after the PR is merged to make sure everything render as expected Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1516 --- README.md | 1 + docs/source/api.rst | 11 ++ python/cugraph/__init__.py | 2 + python/cugraph/sampling/__init__.py | 14 ++ python/cugraph/sampling/random_walks.pxd | 22 +++ python/cugraph/sampling/random_walks.py | 95 +++++++++++ .../cugraph/sampling/random_walks_wrapper.pyx | 116 +++++++++++++ python/cugraph/structure/graph_utilities.pxd | 9 + python/cugraph/tests/test_random_walks.py | 154 ++++++++++++++++++ 9 files changed, 424 insertions(+) create mode 100644 python/cugraph/sampling/__init__.py create mode 100644 python/cugraph/sampling/random_walks.pxd create mode 100644 python/cugraph/sampling/random_walks.py create mode 100644 python/cugraph/sampling/random_walks_wrapper.pyx create mode 100644 python/cugraph/tests/test_random_walks.py diff --git a/README.md b/README.md index 4bdbcd00280..ccc91bfe225 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ As of Release 0.18 - including 0.18 nightly | | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) | | | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) | | | Traveling Salesperson Problem (TSP) | Single-GPU | | +| Sampling | Random Walks (RW) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | diff --git a/docs/source/api.rst b/docs/source/api.rst index b02f8f488c5..b9b8ea4859c 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -225,6 +225,17 @@ Overlap Coefficient :undoc-members: +Sampling +======== + +Random Walks +------------ + +.. automodule:: cugraph.sampling.random_walks + :members: + :undoc-members: + + Traversal ========= diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 11ba2d6ef96..d4632708591 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -101,6 +101,8 @@ from cugraph.raft import raft_include_test from cugraph.comms import comms +from cugraph.sampling import random_walks + # Versioneer from ._version import get_versions diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py new file mode 100644 index 00000000000..fd9d072d4f8 --- /dev/null +++ b/python/cugraph/sampling/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.sampling.random_walks import random_walks diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd new file mode 100644 index 00000000000..3e0e24b4e98 --- /dev/null +++ b/python/cugraph/sampling/random_walks.pxd @@ -0,0 +1,22 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t]( + const handle_t &handle, + const graph_container_t &g, + const vertex_t *ptr_d_start, + edge_t num_paths, + edge_t max_depth) except + diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py new file mode 100644 index 00000000000..7ab3191a07c --- /dev/null +++ b/python/cugraph/sampling/random_walks.py @@ -0,0 +1,95 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cudf +from cugraph.sampling import random_walks_wrapper +import cugraph +from collections import defaultdict + +# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series + + +def random_walks( + G, + start_vertices, + max_depth=None +): + """ + compute random walks for each nodes in 'start_vertices' + + parameters + ---------- + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). + Weights in the graph are ignored. + Use weight parameter if weights need to be considered + (currently not supported) + + start_vertices : int or list or cudf.Series + A single node or a list or a cudf.Series of nodes from which to run + the random walks + + max_depth : int + The maximum depth of the random walks + + + Returns + ------- + random_walks_edge_lists : cudf.DataFrame + GPU data frame containing all random walks sources identifiers, + destination identifiers, edge weights + + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each vertex in start_vertices. + """ + if max_depth is None: + raise TypeError("must specify a 'max_depth'") + + G, _ = cugraph.utilities.check_nx_graph(G) + + if start_vertices is int: + start_vertices = [start_vertices] + + if not isinstance(start_vertices, cudf.Series): + start_vertices = cudf.Series(start_vertices) + + if G.renumbered is True: + start_vertices = G.lookup_internal_vertex_id(start_vertices) + vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( + G, start_vertices, max_depth) + + if G.renumbered: + df_ = cudf.DataFrame() + df_['vertex_set'] = vertex_set + df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True) + vertex_set = cudf.Series(df_['vertex_set']) + + edge_list = defaultdict(list) + next_path_idx = 0 + offsets = [0] + + df = cudf.DataFrame() + for s in sizes.values_host: + for i in range(next_path_idx, s+next_path_idx-1): + edge_list['src'].append(vertex_set.values_host[i]) + edge_list['dst'].append(vertex_set.values_host[i+1]) + next_path_idx += s + df = df.append(edge_list, ignore_index=True) + offsets.append(df.index[-1]+1) + edge_list['src'].clear() + edge_list['dst'].clear() + df['weight'] = edge_set + offsets = cudf.Series(offsets) + + return df, offsets diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx new file mode 100644 index 00000000000..7b16ff14018 --- /dev/null +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -0,0 +1,116 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from cugraph.sampling.random_walks cimport call_random_walks +#from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * +from libcpp cimport bool +from libcpp.utility cimport move +from libc.stdint cimport uintptr_t +from cugraph.structure import graph_primtypes_wrapper +import cudf +import rmm +import numpy as np +import numpy.ctypeslib as ctypeslib +from rmm._lib.device_buffer cimport DeviceBuffer +from cudf.core.buffer import Buffer +from cython.operator cimport dereference as deref +def random_walks(input_graph, start_vertices, max_depth): + """ + Call random_walks + """ + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + vertex_t = src.dtype + edge_t = np.dtype("int32") + weights = None + if input_graph.edgelist.weights: + weights = input_graph.edgelist.edgelist_df['weights'] + num_verts = input_graph.number_of_vertices() + num_edges = input_graph.number_of_edges(directed_edges=True) + num_partition_edges = num_edges + + if num_edges > (2**31 - 1): + edge_t = np.dtype("int64") + cdef unique_ptr[random_walk_ret_t] rw_ret_ptr + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] + weight_t = weights.dtype + is_weighted = True + else: + weight_t = np.dtype("float32") + is_weighted = False + # Pointers for random_walks + start_vertices = start_vertices.astype('int32') + cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0] + num_paths = start_vertices.size + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_verts, + num_edges, + False, + is_weighted, + False, False) + if(vertex_t == np.dtype("int32")): + if(edge_t == np.dtype("int32")): + rw_ret_ptr = move(call_random_walks[int, int]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + else: # (edge_t == np.dtype("int64")): + rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + else: # (vertex_t == edge_t == np.dtype("int64")): + rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + + + rw_ret= move(rw_ret_ptr.get()[0]) + vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_)) + edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_)) + sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) + vertex_set = Buffer(vertex_set) + edge_set = Buffer(edge_set) + sizes = Buffer(sizes) + + set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t) + set_edge = cudf.Series(data=edge_set, dtype=weight_t) + set_sizes = cudf.Series(data=sizes, dtype=edge_t) + + return set_vertex, set_edge, set_sizes + \ No newline at end of file diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index b169e42ccf8..c9cf1748bfe 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -83,6 +83,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] dst_indices unique_ptr[device_buffer] edge_data unique_ptr[device_buffer] subgraph_offsets + + cdef cppclass random_walk_ret_t: + size_t coalesced_sz_v_ + size_t coalesced_sz_w_ + size_t num_paths_ + size_t max_depth_ + unique_ptr[device_buffer] d_coalesced_v_ + unique_ptr[device_buffer] d_coalesced_w_ + unique_ptr[device_buffer] d_sizes_ cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py new file mode 100644 index 00000000000..9767e81ba1f --- /dev/null +++ b/python/cugraph/tests/test_random_walks.py @@ -0,0 +1,154 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION.: +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +from cugraph.tests import utils +import cugraph +import random + + +# ============================================================================= +# Parameters +# ============================================================================= +DIRECTED_GRAPH_OPTIONS = [False, True] +WEIGHTED_GRAPH_OPTIONS = [False, True] +DATASETS = [pytest.param(d) for d in utils.DATASETS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] + + +def calc_random_walks( + graph_file, + directed=False, + max_depth=None +): + """ + compute random walks for each nodes in 'start_vertices' + + parameters + ---------- + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). + Weights in the graph are ignored. + Use weight parameter if weights need to be considered + (currently not supported) + + start_vertices : int or list or cudf.Series + A single node or a list or a cudf.Series of nodes from which to run + the random walks + + max_depth : int + The maximum depth of the random walks + + + Returns + ------- + random_walks_edge_lists : cudf.DataFrame + GPU data frame containing all random walks sources identifiers, + destination identifiers, edge weights + + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each vertex in start_vertices. + """ + G = utils.generate_cugraph_graph_from_file( + graph_file, directed=directed, edgevals=True) + assert G is not None + + k = random.randint(1, 10) + start_vertices = random.sample(range(G.number_of_vertices()), k) + df, offsets = cugraph.random_walks(G, start_vertices, max_depth) + + return df, offsets, start_vertices + + +def check_random_walks(df, offsets, seeds, df_G=None): + invalid_edge = 0 + invalid_seeds = 0 + invalid_weight = 0 + offsets_idx = 0 + for i in range(len(df.index)): + src, dst, weight = df.iloc[i].to_array() + if i == offsets[offsets_idx]: + if df['src'].iloc[i] != seeds[offsets_idx]: + invalid_seeds += 1 + print( + "[ERR] Invalid seed: " + " src {} != src {}" + .format(df['src'].iloc[i], offsets[offsets_idx]) + ) + offsets_idx += 1 + + edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index( + drop=True) + exp_edge = df_G.loc[ + (df_G['src'] == (src)) & ( + df_G['dst'] == (dst))].reset_index(drop=True) + + if not exp_edge.equals(edge[:1]): + print( + "[ERR] Invalid edge: " + "There is no edge src {} dst {} weight {}" + .format(src, dst, weight) + ) + invalid_weight += 1 + + assert invalid_edge == 0 + assert invalid_seeds == 0 + assert invalid_weight == 0 + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= + + +def prepare_test(): + gc.collect() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("max_depth", [None]) +def test_random_walks_invalid_max_dept( + graph_file, + directed, + max_depth +): + """Test calls random_walks an invalid type""" + prepare_test() + with pytest.raises(TypeError): + df, offsets, seeds = calc_random_walks( + graph_file, + directed=directed, + max_depth=max_depth + ) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + df, offsets, seeds = calc_random_walks( + graph_file, + directed, + max_depth=max_depth + ) + check_random_walks(df, offsets, seeds, df_G) From 9fd4f3c92135108f67f986b3f8d8633f4de47f0f Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Thu, 8 Apr 2021 09:53:50 -0700 Subject: [PATCH 222/343] Update docs and remove all warnings (#1521) This pr fixes the following - Add traveling salesperson problem to the docs - Update docs to address all build warnings To remove some warnings. updated the use of `NOTE:` in cases like the one shown below. | Old | New | | ------------- | ------------- | | ![image](https://user-images.githubusercontent.com/19949207/113936070-283a2380-97ac-11eb-9705-9f261c965fa9.png) | ![image](https://user-images.githubusercontent.com/19949207/113935703-b06bf900-97ab-11eb-93a4-7df2f711c1aa.png) | Authors: - Ayush Dattagupta (https://github.com/ayushdg) Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1521 --- docs/source/api.rst | 29 ++++++++------- python/cugraph/centrality/katz_centrality.py | 18 +++++----- python/cugraph/components/connectivity.py | 36 ++++++++++++------- .../dask/centrality/katz_centrality.py | 19 +++++----- python/cugraph/dask/link_analysis/pagerank.py | 2 ++ python/cugraph/link_analysis/pagerank.py | 1 - python/cugraph/structure/symmetrize.py | 1 + python/cugraph/traversal/bfs.py | 6 ++-- .../traversal/traveling_salesperson.py | 1 + 9 files changed, 70 insertions(+), 43 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index b9b8ea4859c..e2c2c19cf02 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -192,7 +192,7 @@ Pagerank :undoc-members: Pagerank (MG) ---------- +------------- .. automodule:: cugraph.dask.link_analysis.pagerank :members: pagerank @@ -247,7 +247,7 @@ Breadth-first-search :undoc-members: Breadth-first-search (MG) --------------------- +------------------------- .. automodule:: cugraph.dask.traversal.bfs :members: @@ -261,12 +261,19 @@ Single-source-shortest-path :undoc-members: Single-source-shortest-path (MG) ---------------------------- +-------------------------------- .. automodule:: cugraph.dask.traversal.sssp :members: :undoc-members: +Traveling-salesperson-problem +----------------------------- + +.. automodule:: cugraph.traversal.traveling_salesperson + :members: + :undoc-members: + Tree ========= @@ -275,27 +282,25 @@ Minimum Spanning Tree --------------------- .. automodule:: cugraph.tree.minimum_spanning_tree - :members: + :members: minimum_spanning_tree :undoc-members: Maximum Spanning Tree --------------------- -.. automodule:: cugraph.tree.maximum_spanning_tree - :members: +.. automodule:: cugraph.tree.minimum_spanning_tree + :members: maximum_spanning_tree :undoc-members: + :noindex: -DASK MG Helper functions +DASK MG Helper functions =========================== .. automodule:: cugraph.comms.comms - :members: initialize - :undoc-members: - -.. automodule:: cugraph.comms.comms - :members: destroy + :members: initialize, destroy :undoc-members: + :member-order: bysource .. automodule:: cugraph.dask.common.read_utils :members: get_chunksize diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index 3e2680a196f..ce52d15f5db 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -39,14 +39,16 @@ def katz_centrality( Attenuation factor defaulted to None. If alpha is not specified then it is internally calculated as 1/(degree_max) where degree_max is the maximum out degree. - NOTE : The maximum acceptable value of alpha for convergence - alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue - of the graph. - Since lambda_max is always lesser than or equal to degree_max for a - graph, alpha_max will always be greater than or equal to - (1/degree_max). Therefore, setting alpha to (1/degree_max) will - guarantee that it will never exceed alpha_max thus in turn fulfilling - the requirement for convergence. + + NOTE + The maximum acceptable value of alpha for convergence + alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue + of the graph. + Since lambda_max is always lesser than or equal to degree_max for a + graph, alpha_max will always be greater than or equal to + (1/degree_max). Therefore, setting alpha to (1/degree_max) will + guarantee that it will never exceed alpha_max thus in turn fulfilling + the requirement for convergence. beta : None A weight scalar - currently Not Supported max_iter : int diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index 72f33ebfcbb..df33f8b8e03 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -138,8 +138,10 @@ def weakly_connected_components(G, directed : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. + Raises TypeError if used with a Graph object. + If True (default), then convert the input matrix to a cugraph.DiGraph and only move from point i to point j along paths csgraph[i, j]. If False, then find the shortest path on an undirected graph: the @@ -154,8 +156,10 @@ def weakly_connected_components(G, return_labels : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. Raises + TypeError if used with a Graph object. + If True (default), then return the labels for each of the connected components. @@ -231,8 +235,10 @@ def strongly_connected_components(G, directed : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. + Raises TypeError if used with a Graph object. + If True (default), then convert the input matrix to a cugraph.DiGraph and only move from point i to point j along paths csgraph[i, j]. If False, then find the shortest path on an undirected graph: the @@ -247,8 +253,10 @@ def strongly_connected_components(G, return_labels : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. Raises + TypeError if used with a Graph object. + If True (default), then return the labels for each of the connected components. @@ -325,8 +333,10 @@ def connected_components(G, directed : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. Raises + TypeError if used with a Graph object. + If True (default), then convert the input matrix to a cugraph.DiGraph and only move from point i to point j along paths csgraph[i, j]. If False, then find the shortest path on an undirected graph: the @@ -340,8 +350,10 @@ def connected_components(G, return_labels : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. Raises + TypeError if used with a Graph object. + If True (default), then return the labels for each of the connected components. diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index a2f83a0b2a8..45deda8b7ae 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -68,14 +68,16 @@ def katz_centrality(input_graph, Attenuation factor defaulted to None. If alpha is not specified then it is internally calculated as 1/(degree_max) where degree_max is the maximum out degree. - NOTE : The maximum acceptable value of alpha for convergence - alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue - of the graph. - Since lambda_max is always lesser than or equal to degree_max for a - graph, alpha_max will always be greater than or equal to - (1/degree_max). Therefore, setting alpha to (1/degree_max) will - guarantee that it will never exceed alpha_max thus in turn fulfilling - the requirement for convergence. + + NOTE + The maximum acceptable value of alpha for convergence + alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue + of the graph. + Since lambda_max is always lesser than or equal to degree_max for a + graph, alpha_max will always be greater than or equal to + (1/degree_max). Therefore, setting alpha to (1/degree_max) will + guarantee that it will never exceed alpha_max thus in turn fulfilling + the requirement for convergence. beta : None A weight scalar - currently Not Supported max_iter : int @@ -94,6 +96,7 @@ def katz_centrality(input_graph, acceptable. nstart : dask_cudf.Dataframe GPU Dataframe containing the initial guess for katz centrality + nstart['vertex'] : dask_cudf.Series Contains the vertex identifiers nstart['values'] : dask_cudf.Series diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index bfaada85a6f..fb9f4ad3a25 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -73,6 +73,7 @@ def pagerank(input_graph, personalization : cudf.Dataframe GPU Dataframe containing the personalization information. Currently not supported. + personalization['vertex'] : cudf.Series Subset of vertices of graph for personalization personalization['values'] : cudf.Series @@ -91,6 +92,7 @@ def pagerank(input_graph, acceptable. nstart : not supported initial guess for pagerank + Returns ------- PageRank : dask_cudf.DataFrame diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 0bb89195e01..8a03ee077f6 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -46,7 +46,6 @@ def pagerank( Subset of vertices of graph for personalization personalization['values'] : cudf.Series Personalization values for vertices - max_iter : int The maximum number of iterations before an answer is returned. This can be used to limit the execution time and do an early exit before the diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index 0f4ca90a97c..8720f7ad343 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -32,6 +32,7 @@ def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True): != data2 then this code will arbitrarily pick the smaller data element to keep, if this is not desired then the caller should should correct the data prior to calling symmetrize. + Parameters ---------- df : cudf.DataFrame diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index efbae095676..a483b96850b 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -136,8 +136,10 @@ def bfs(G, can be set, not both. directed : bool, optional - NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises - TypeError if used with a Graph object. + NOTE + For non-Graph-type (eg. sparse matrix) values of G only. Raises + TypeError if used with a Graph object. + If True (default), then convert the input matrix to a cugraph.DiGraph, otherwise a cugraph.Graph object will be used. diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py index ae17555e4ea..7aea7ae603f 100644 --- a/python/cugraph/traversal/traveling_salesperson.py +++ b/python/cugraph/traversal/traveling_salesperson.py @@ -29,6 +29,7 @@ def traveling_salesperson(pos_list, optimization. The current implementation does not support a weighted graph. + Parameters ---------- pos_list: cudf.DataFrame From e9d09eeb11414c2e12c46b4a188186e1ceee032d Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 8 Apr 2021 21:26:13 -0500 Subject: [PATCH 223/343] fix mg_renumber non-deterministic errors (#1523) * @Iroy30 added missing dask `persist()` call to ensure deterministic indirection map state prior to merging renumbering results. * @rlratzel updated MG renumbering test for latest API changes, removed redundant test, and updated test IDs to include the dataset name. Authors: - https://github.com/Iroy30 - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/1523 --- python/cugraph/structure/number_map.py | 8 +- .../test_mg_batch_betweenness_centrality.py | 3 +- ...st_mg_batch_edge_betweenness_centrality.py | 5 +- python/cugraph/tests/dask/test_mg_bfs.py | 5 +- python/cugraph/tests/dask/test_mg_comms.py | 6 +- python/cugraph/tests/dask/test_mg_degree.py | 5 +- .../tests/dask/test_mg_katz_centrality.py | 5 +- python/cugraph/tests/dask/test_mg_louvain.py | 7 +- python/cugraph/tests/dask/test_mg_pagerank.py | 5 +- python/cugraph/tests/dask/test_mg_renumber.py | 93 ++++++------------- .../cugraph/tests/dask/test_mg_replication.py | 46 ++++++--- python/cugraph/tests/dask/test_mg_sssp.py | 5 +- python/cugraph/tests/dask/test_mg_utility.py | 5 +- 13 files changed, 107 insertions(+), 91 deletions(-) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index e45a50d6dbe..cd24dfc0434 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -263,7 +263,6 @@ def indirection_map(self, ddf, src_col_names, dst_col_names): to_frame(name=newname) else: tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) - print(tmp_df.columns) else: for newname in self.col_names: tmp_df[newname] = tmp[newname] @@ -273,7 +272,7 @@ def indirection_map(self, ddf, src_col_names, dst_col_names): tmp_ddf = tmp_ddf.assign(idx=1) tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1 tmp_ddf = tmp_ddf.drop(columns='idx') - + tmp_ddf = tmp_ddf.persist() self.ddf = tmp_ddf return tmp_ddf @@ -481,8 +480,6 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, renumber_type = 'legacy' else: renumber_type = 'experimental' - df = df.rename(columns={src_col_names: "src", - dst_col_names: "dst"}) renumber_map = NumberMap() if not isinstance(src_col_names, list): @@ -514,6 +511,9 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, df, "dst", dst_col_names, drop=True, preserve_order=preserve_order ) + else: + df = df.rename(columns={src_col_names[0]: "src", + dst_col_names[0]: "dst"}) num_edges = len(df) diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 6e1e5ea380a..02696f589e3 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -51,7 +51,8 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DATASETS, + ids=[f"dataset={d.as_posix()}" for d in DATASETS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 54b58c340aa..89844797807 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -48,7 +48,8 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DATASETS, + ids=[f"dataset={d}" for d in DATASETS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index 63580461b17..36d1f436b52 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,10 @@ def client_connection(): def test_dask_bfs(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index 61a4944b5f1..03a0a5d73d2 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,10 +38,14 @@ def test_dask_pagerank(client_connection): # Initialize and run pagerank on two distributed graphs # with same communicator + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path1 = r"../datasets/karate.csv" + print(f"dataset1={input_data_path1}") chunksize1 = dcg.get_chunksize(input_data_path1) input_data_path2 = r"../datasets/dolphins.csv" + print(f"dataset2={input_data_path2}") chunksize2 = dcg.get_chunksize(input_data_path2) ddf1 = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index 9f4c0d94319..93e8a365dea 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,10 @@ def client_connection(): def test_dask_mg_degree(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = cugraph.dask.get_chunksize(input_data_path) diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py index 631457f7558..eadf0f662d4 100644 --- a/python/cugraph/tests/dask/test_mg_katz_centrality.py +++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,7 +36,10 @@ def client_connection(): def test_dask_katz_centrality(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index a07eede8cb9..bd7374fb75e 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,7 +52,10 @@ def client_connection(): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.fixture(scope="module", params=utils.DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", + params=utils.DATASETS_UNDIRECTED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNDIRECTED]) def daskGraphFromDataset(request, client_connection): """ Returns a new dask dataframe created from the dataset file param. diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py index 4f0b45242dd..9cb00010311 100644 --- a/python/cugraph/tests/dask/test_mg_pagerank.py +++ b/python/cugraph/tests/dask/test_mg_pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -65,7 +65,10 @@ def client_connection(): def test_dask_pagerank(client_connection, personalization_perc): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index 7f5cf6f08bc..68ec3de35f8 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,11 +38,12 @@ def client_connection(): teardown_local_dask_cluster(cluster, client) -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNRENUMBERED]) def test_mg_renumber(graph_file, client_connection): gc.collect() @@ -60,71 +61,37 @@ def test_mg_renumber(graph_file, client_connection): ddf = dask.dataframe.from_pandas(gdf, npartitions=2) - numbering = NumberMap() - numbering.from_dataframe(ddf, ["src", "src_old"], ["dst", "dst_old"]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(ddf, "src_id", ["src", "src_old"]), - "dst_id", - ["dst", "dst_old"], - ) - - check_src = numbering.from_internal_vertex_id( - renumbered_df, "src_id" - ).compute() - check_dst = numbering.from_internal_vertex_id( - renumbered_df, "dst_id" - ).compute() - - assert check_src["0"].to_pandas().equals(check_src["src"].to_pandas()) - assert check_src["1"].to_pandas().equals(check_src["src_old"].to_pandas()) - assert check_dst["0"].to_pandas().equals(check_dst["dst"].to_pandas()) - assert check_dst["1"].to_pandas().equals(check_dst["dst_old"].to_pandas()) - - -# Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_single_gpu(), reason="skipping MG testing on Single GPU system" -) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -def test_mg_renumber2(graph_file, client_connection): - gc.collect() - - M = utils.read_csv_for_nx(graph_file) - sources = cudf.Series(M["0"]) - destinations = cudf.Series(M["1"]) - - translate = 1000 - - gdf = cudf.DataFrame() - gdf["src_old"] = sources - gdf["dst_old"] = destinations - gdf["src"] = sources + translate - gdf["dst"] = destinations + translate - gdf["weight"] = gdf.index.astype(np.float) - - ddf = dask.dataframe.from_pandas(gdf, npartitions=2) - - ren2, num2 = NumberMap.renumber( - ddf, ["src", "src_old"], ["dst", "dst_old"] - ) - - check_src = num2.from_internal_vertex_id(ren2, "src").compute() - check_src = check_src.sort_values("weight").reset_index(drop=True) - check_dst = num2.from_internal_vertex_id(ren2, "dst").compute() - check_dst = check_dst.sort_values("weight").reset_index(drop=True) - - assert check_src["0"].to_pandas().equals(gdf["src"].to_pandas()) - assert check_src["1"].to_pandas().equals(gdf["src_old"].to_pandas()) - assert check_dst["0"].to_pandas().equals(gdf["dst"].to_pandas()) - assert check_dst["1"].to_pandas().equals(gdf["dst_old"].to_pandas()) + # preserve_order is not supported for MG + renumbered_df, renumber_map = NumberMap.renumber(ddf, + ["src", "src_old"], + ["dst", "dst_old"], + preserve_order=False) + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=False) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=False) + + # sort needed only for comparisons, since preserve_order is False + gdf = gdf.sort_values(by=["src", "src_old", "dst", "dst_old"]) + gdf = gdf.reset_index() + unrenumbered_df = unrenumbered_df.compute() + unrenumbered_df = unrenumbered_df.sort_values(by=["0_src", "1_src", + "0_dst", "1_dst"]) + unrenumbered_df = unrenumbered_df.reset_index() + + assert gdf["src"].equals(unrenumbered_df["0_src"]) + assert gdf["src_old"].equals(unrenumbered_df["1_src"]) + assert gdf["dst"].equals(unrenumbered_df["0_dst"]) + assert gdf["dst_old"].equals(unrenumbered_df["1_dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -def test_mg_renumber3(graph_file, client_connection): +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNRENUMBERED]) +def test_mg_renumber_add_internal_vertex_id(graph_file, client_connection): gc.collect() M = utils.read_csv_for_nx(graph_file) diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index bb43d6c0f7a..3974cf9ed82 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,9 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_dataframe_with_weights( input_data_path, mg_device_count @@ -60,7 +62,9 @@ def test_replicate_cudf_dataframe_with_weights( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): gc.collect() @@ -84,7 +88,9 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_series(input_data_path, mg_device_count): gc.collect() @@ -114,7 +120,9 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_no_context(graph_file, directed, mg_device_count): @@ -129,7 +137,9 @@ def test_enable_batch_no_context(graph_file, directed, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_no_context_view_adj( @@ -145,7 +155,9 @@ def test_enable_batch_no_context_view_adj( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_context_then_views( @@ -174,7 +186,9 @@ def test_enable_batch_context_then_views( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): @@ -205,7 +219,9 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_context_no_context_views( @@ -230,7 +246,9 @@ def test_enable_batch_context_no_context_views( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_edgelist_replication( @@ -251,7 +269,9 @@ def test_enable_batch_edgelist_replication( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_adjlist_replication_weights( @@ -293,7 +313,9 @@ def test_enable_batch_adjlist_replication_weights( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_adjlist_replication_no_weights( diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py index d75d76d7fd4..9e1fd1ec82f 100644 --- a/python/cugraph/tests/dask/test_mg_sssp.py +++ b/python/cugraph/tests/dask/test_mg_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,10 @@ def client_connection(): def test_dask_sssp(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 3217c1bef1a..150fa0137f5 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -46,7 +46,10 @@ def client_connection(): is_single_gpu(), reason="skipping MG testing on Single GPU system" ) def test_from_edgelist(client_connection): + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, From 62c1c6824ab9f4249ed227cb4954076d282d3b57 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 12 Apr 2021 08:39:03 -0500 Subject: [PATCH 224/343] Fixed copyright date and format. (#1526) Update copyright data and format Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1526 --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index eb4745a61f0..3422428c96b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# Copyright (c) 2018-2020 NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # pygdf documentation build configuration file, created by # sphinx-quickstart on Wed May 3 10:59:22 2017. From db17426458d267df1b8f9f8ee9045a1281660ff2 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Wed, 14 Apr 2021 15:09:18 -0500 Subject: [PATCH 225/343] disabling shallow fetch for cuhoret (#1535) Got rid of the GIT_SHALLOW setting since it should not be enabled by default. Authors: - Alex Fender (https://github.com/afender) --- cpp/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1b15d04bbfd..034de2b85c1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -273,7 +273,6 @@ FetchContent_Declare( cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad - GIT_SHALLOW true SOURCE_SUBDIR hornet ) From f5ba9e7a33fea5a69949283a5de33a4eb1233c36 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 15 Apr 2021 15:40:37 -0400 Subject: [PATCH 226/343] Undo cuco::static_map bug workarounds (#1520) Undo workarounds due to cuco::static_map bugs (Issues 71, 72, 73, 74, 75 in https://github.com/NVIDIA/cuCollections/issues) as those get fixed and update the cuCollection git tag to pull the version after the bug fixes. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1520 --- cpp/CMakeLists.txt | 2 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 18 ++--- cpp/include/utilities/collect_comm.cuh | 70 +++++------------- cpp/src/experimental/relabel.cu | 50 ++++--------- cpp/src/experimental/renumber_edgelist.cu | 64 +++++------------ cpp/src/experimental/renumber_utils.cu | 72 +++---------------- python/cugraph/centrality/katz_centrality.py | 10 +-- python/cugraph/components/connectivity.py | 2 +- .../dask/centrality/katz_centrality.py | 8 +-- 9 files changed, 71 insertions(+), 225 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9394f7b38d1..fefd66423db 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -242,7 +242,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 2196040f0562a0280292eebef5295d914f615e63 + GIT_TAG 7678a5ecaa192b8983b02a0191a140097171713e ) FetchContent_GetProperties(cuco) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index f904c35ef9e..f6eac67e4e7 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -256,9 +256,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(map_keys.size()) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, @@ -270,18 +268,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (map_keys.size()) { kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } + kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), @@ -293,11 +287,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (thrust::distance(map_key_first, map_key_last) > 0) { - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - } + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } // 2. aggregate each vertex out-going edges based on keys and transform-reduce. diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh index 481717d7c38..f5a904ad875 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/utilities/collect_comm.cuh @@ -64,9 +64,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. auto kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), @@ -78,11 +76,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (thrust::distance(map_key_first, map_key_last) > 0) { - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - } + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } // 2. collect values for the unique keys in [collect_key_first, collect_key_last) @@ -113,12 +107,8 @@ collect_values_for_keys(raft::comms::comms_t const &comm, CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (rx_unique_keys.size() > 0) { - kv_map_ptr->find( - rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); - } + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); rmm::device_uvector rx_values_for_unique_keys(0, stream); std::tie(rx_values_for_unique_keys, std::ignore) = @@ -135,9 +125,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, @@ -150,21 +138,15 @@ collect_values_for_keys(raft::comms::comms_t const &comm, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } // 4. find values for [collect_key_first, collect_key_last) auto value_buffer = allocate_dataframe_buffer( thrust::distance(collect_key_first, collect_key_last), stream); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (thrust::distance(collect_key_first, collect_key_last) > 0) { - kv_map_ptr->find( - collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); - } + kv_map_ptr->find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); return value_buffer; } @@ -200,9 +182,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. auto kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), @@ -214,11 +194,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (thrust::distance(map_key_first, map_key_last)) { - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - } + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } // 2. collect values for the unique keys in [collect_unique_key_first, collect_unique_key_last) @@ -245,12 +221,8 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (rx_unique_keys.size() > 0) { - kv_map_ptr->find( - rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); - } + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); rmm::device_uvector rx_values_for_unique_keys(0, stream); std::tie(rx_values_for_unique_keys, std::ignore) = @@ -267,9 +239,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, @@ -282,22 +252,16 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } // 4. find values for [collect_unique_key_first, collect_unique_key_last) auto value_buffer = allocate_dataframe_buffer( thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (thrust::distance(collect_unique_key_first, collect_unique_key_last)) { - kv_map_ptr->find(collect_unique_key_first, - collect_unique_key_last, - get_dataframe_buffer_begin(value_buffer)); - } + kv_map_ptr->find(collect_unique_key_first, + collect_unique_key_last, + get_dataframe_buffer_begin(value_buffer)); return value_buffer; } diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 8d8fb0322a8..918feeb7a10 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -121,9 +121,7 @@ void relabel(raft::handle_t const& handle, handle.get_stream())); // cuco::static_map currently does not take stream cuco::static_map relabel_map{ - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), rx_label_pair_old_labels.size() + 1), @@ -136,11 +134,7 @@ void relabel(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the - // grid size is 0; this leads to cudaErrorInvaildConfiguration. - if (rx_label_pair_old_labels.size() > 0) { - relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); - } + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); rx_label_pair_old_labels.resize(0, handle.get_stream()); rx_label_pair_new_labels.resize(0, handle.get_stream()); @@ -162,15 +156,11 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the - // grid size is 0; this leads to cudaErrorInvaildConfiguration. - if (rx_unique_old_labels.size() > 0) { - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the - // corresponding old labels - } + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels + .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); @@ -180,9 +170,7 @@ void relabel(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream cuco::static_map relabel_map( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), unique_old_labels.size() + 1), invalid_vertex_id::value, @@ -195,19 +183,11 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (unique_old_labels.size() > 0) { - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - } - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + relabel_map.find(labels, labels + num_labels, labels); } else { cuco::static_map relabel_map( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(num_label_pairs) / load_factor), static_cast(num_label_pairs) + 1), invalid_vertex_id::value, @@ -220,12 +200,8 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_label_pairs > 0) { relabel_map.insert(pair_first, pair_first + num_label_pairs); } - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } + relabel_map.insert(pair_first, pair_first + num_label_pairs); + relabel_map.find(labels, labels + num_labels, labels); } if (do_expensive_check) { diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 127bd507271..dbf0250b88a 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -551,9 +551,7 @@ renumber_edgelist(raft::handle_t const& handle, handle.get_stream())); // cuco::static_map currently does not take stream cuco::static_map renumber_map{ - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), static_cast(partition.get_matrix_partition_major_size(i)) + 1), @@ -567,18 +565,10 @@ renumber_edgelist(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (partition.get_matrix_partition_major_size(i) > 0) { - renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); - } - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (edgelist_edge_counts[i]) { - renumber_map.find(edgelist_major_vertices[i], - edgelist_major_vertices[i] + edgelist_edge_counts[i], - edgelist_major_vertices[i]); - } + renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); + renumber_map.find(edgelist_major_vertices[i], + edgelist_major_vertices[i] + edgelist_edge_counts[i], + edgelist_major_vertices[i]); } { @@ -601,9 +591,7 @@ renumber_edgelist(raft::handle_t const& handle, handle.get_stream())); // cuco::static_map currently does not take stream cuco::static_map renumber_map{ - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), renumber_map_minor_labels.size() + 1), @@ -616,19 +604,11 @@ renumber_edgelist(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (renumber_map_minor_labels.size()) { - renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - } + renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the - // grid size is 0; this leads to cudaErrorInvaildConfiguration. - if (edgelist_edge_counts[i]) { - renumber_map.find(edgelist_minor_vertices[i], - edgelist_minor_vertices[i] + edgelist_edge_counts[i], - edgelist_minor_vertices[i]); - } + renumber_map.find(edgelist_minor_vertices[i], + edgelist_minor_vertices[i] + edgelist_edge_counts[i], + edgelist_minor_vertices[i]); } } @@ -682,9 +662,7 @@ std::enable_if_t> renumber_edgelist( // footprint and execution time cuco::static_map renumber_map{ - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), renumber_map_labels.size() + 1), invalid_vertex_id::value, @@ -695,21 +673,11 @@ std::enable_if_t> renumber_edgelist( [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (renumber_map_labels.size()) { - renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); - } - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_edgelist_edges > 0) { - renumber_map.find(edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - edgelist_major_vertices); - renumber_map.find(edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - edgelist_minor_vertices); - } + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + renumber_map.find( + edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); + renumber_map.find( + edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); return renumber_map_labels; #else diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index 8f59683d9d6..eef6ca88b3c 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -108,9 +108,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, renumber_map_ptr.reset(); renumber_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), sorted_unique_ext_vertices.size() + 1), @@ -123,20 +121,14 @@ void renumber_ext_vertices(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (sorted_unique_ext_vertices.size()) { - renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); - } + renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream renumber_map_ptr.reset(); renumber_map_ptr = std::make_unique>( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), static_cast(local_int_vertex_last - local_int_vertex_first) + 1), @@ -149,21 +141,13 @@ void renumber_ext_vertices(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if ((local_int_vertex_last - local_int_vertex_first) > 0) { - renumber_map_ptr->insert(pair_first, - pair_first + (local_int_vertex_last - local_int_vertex_first)); - } + renumber_map_ptr->insert(pair_first, + pair_first + (local_int_vertex_last - local_int_vertex_first)); } if (do_expensive_check) { rmm::device_uvector contains(num_vertices, handle.get_stream()); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_vertices > 0) { - renumber_map_ptr->contains(vertices, vertices + num_vertices, contains.begin()); - } + renumber_map_ptr->contains(vertices, vertices + num_vertices, contains.begin()); auto vc_pair_first = thrust::make_zip_iterator(thrust::make_tuple(vertices, contains.begin())); CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vc_pair_first, @@ -179,22 +163,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, "(aggregate) renumber_map_labels."); } - // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 -#if 1 - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertices, - vertices + num_vertices, - vertices, - [view = renumber_map_ptr->get_device_view()] __device__(auto v) { - return v != invalid_vertex_id::value - ? view.find(v)->second.load(cuda::std::memory_order_relaxed) - : invalid_vertex_id::value; - }); -#else - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_vertices > 0) { renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); } -#endif + renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); #endif } @@ -338,9 +307,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream cuco::static_map unrenumber_map( - // FIXME: std::max(..., ...) as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/72 and - // https://github.com/NVIDIA/cuCollections/issues/73 + // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), sorted_unique_int_vertices.size() + 1), @@ -354,27 +321,8 @@ void unrenumber_int_vertices(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (sorted_unique_int_vertices.size()) { - unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); - } - // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 -#if 1 - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertices, - vertices + num_vertices, - vertices, - [view = unrenumber_map.get_device_view()] __device__(auto v) { - return v != invalid_vertex_id::value - ? view.find(v)->second.load(cuda::std::memory_order_relaxed) - : invalid_vertex_id::value; - }); -#else - // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid - // size is 0; this leads to cudaErrorInvaildConfiguration. - if (num_vertices > 0) { unrenumber_map.find(vertices, vertices + num_vertices, vertices); } -#endif + unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); + unrenumber_map.find(vertices, vertices + num_vertices, vertices); } else { unrenumber_local_int_vertices(handle, vertices, diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index ce52d15f5db..4a2b41cfe59 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -42,13 +42,13 @@ def katz_centrality( NOTE The maximum acceptable value of alpha for convergence - alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue - of the graph. + alpha_max = 1/(lambda_max) where lambda_max is the largest + eigenvalue of the graph. Since lambda_max is always lesser than or equal to degree_max for a graph, alpha_max will always be greater than or equal to (1/degree_max). Therefore, setting alpha to (1/degree_max) will - guarantee that it will never exceed alpha_max thus in turn fulfilling - the requirement for convergence. + guarantee that it will never exceed alpha_max thus in turn + fulfilling the requirement for convergence. beta : None A weight scalar - currently Not Supported max_iter : int diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index df33f8b8e03..94eea312fb9 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -139,7 +139,7 @@ def weakly_connected_components(G, directed : bool, optional NOTE - For non-Graph-type (eg. sparse matrix) values of G only. + For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. If True (default), then convert the input matrix to a cugraph.DiGraph diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index 45deda8b7ae..cd6af8e7906 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -71,13 +71,13 @@ def katz_centrality(input_graph, NOTE The maximum acceptable value of alpha for convergence - alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue - of the graph. + alpha_max = 1/(lambda_max) where lambda_max is the largest + eigenvalue of the graph. Since lambda_max is always lesser than or equal to degree_max for a graph, alpha_max will always be greater than or equal to (1/degree_max). Therefore, setting alpha to (1/degree_max) will - guarantee that it will never exceed alpha_max thus in turn fulfilling - the requirement for convergence. + guarantee that it will never exceed alpha_max thus in turn + fulfilling the requirement for convergence. beta : None A weight scalar - currently Not Supported max_iter : int From aa0442135c7206a4bde05b62b0ecd7378fbc702b Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Fri, 16 Apr 2021 09:15:49 -0400 Subject: [PATCH 227/343] Reorganized docs and environment files (#1537) * reorganized docs so that information is under ./docs/cugraph that better matches the rest of rapids * Updated yml files * removed unneeded packages in the yml files * drop old notebook * marked FA2 notebook as skip Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Rick Ratzel (https://github.com/rlratzel) - https://github.com/Iroy30 URL: https://github.com/rapidsai/cugraph/pull/1537 --- build.sh | 2 +- conda/environments/cugraph_dev_cuda10.1.yml | 8 +- conda/environments/cugraph_dev_cuda10.2.yml | 8 +- conda/environments/cugraph_dev_cuda11.0.yml | 8 +- conda/environments/cugraph_dev_cuda11.1.yml | 47 ++ conda/environments/cugraph_dev_cuda11.2.yml | 47 ++ docs/{ => cugraph}/Makefile | 0 docs/{ => cugraph}/README.md | 0 docs/{ => cugraph}/make.bat | 0 docs/{ => cugraph}/requirement.txt | 0 docs/{ => cugraph}/source/_static/EMPTY | 0 .../source/_static/copybutton.css | 0 .../source/_static/copybutton_pydocs.js | 65 +++ .../source/_static/example_mod.js | 0 docs/{ => cugraph}/source/_static/params.css | 0 .../source/_static/references.css | 0 docs/{ => cugraph}/source/api.rst | 0 docs/{ => cugraph}/source/conf.py | 10 +- docs/{ => cugraph}/source/cugraph_blogs.rst | 0 docs/{ => cugraph}/source/cugraph_intro.md | 0 docs/{ => cugraph}/source/cugraph_ref.rst | 30 +- docs/{ => cugraph}/source/dask-cugraph.rst | 0 docs/{ => cugraph}/source/images/Nx_Cg_1.png | Bin docs/{ => cugraph}/source/images/Nx_Cg_2.png | Bin docs/{ => cugraph}/source/index.rst | 0 docs/{ => cugraph}/source/nx_transition.rst | 0 .../source/sphinxext/github_link.py | 14 + notebooks/layout/Force-Atlas2.ipynb | 5 +- python/utils/ECG_Golden.ipynb | 487 ------------------ 29 files changed, 214 insertions(+), 517 deletions(-) create mode 100644 conda/environments/cugraph_dev_cuda11.1.yml create mode 100644 conda/environments/cugraph_dev_cuda11.2.yml rename docs/{ => cugraph}/Makefile (100%) rename docs/{ => cugraph}/README.md (100%) rename docs/{ => cugraph}/make.bat (100%) rename docs/{ => cugraph}/requirement.txt (100%) rename docs/{ => cugraph}/source/_static/EMPTY (100%) rename docs/{ => cugraph}/source/_static/copybutton.css (100%) create mode 100644 docs/cugraph/source/_static/copybutton_pydocs.js rename docs/{ => cugraph}/source/_static/example_mod.js (100%) rename docs/{ => cugraph}/source/_static/params.css (100%) rename docs/{ => cugraph}/source/_static/references.css (100%) rename docs/{ => cugraph}/source/api.rst (100%) rename docs/{ => cugraph}/source/conf.py (98%) rename docs/{ => cugraph}/source/cugraph_blogs.rst (100%) rename docs/{ => cugraph}/source/cugraph_intro.md (100%) rename docs/{ => cugraph}/source/cugraph_ref.rst (61%) rename docs/{ => cugraph}/source/dask-cugraph.rst (100%) rename docs/{ => cugraph}/source/images/Nx_Cg_1.png (100%) rename docs/{ => cugraph}/source/images/Nx_Cg_2.png (100%) rename docs/{ => cugraph}/source/index.rst (100%) rename docs/{ => cugraph}/source/nx_transition.rst (100%) rename docs/{ => cugraph}/source/sphinxext/github_link.py (88%) delete mode 100644 python/utils/ECG_Golden.ipynb diff --git a/build.sh b/build.sh index 54634e2ca6e..7c99b27f632 100755 --- a/build.sh +++ b/build.sh @@ -170,6 +170,6 @@ if buildAll || hasArg docs; then fi cd ${LIBCUGRAPH_BUILD_DIR} cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG} - cd ${REPODIR}/docs + cd ${REPODIR}/docs/cugraph make html fi diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 8d717c205c7..9108f642c20 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,10 +5,10 @@ channels: - rapidsai-nightly - conda-forge dependencies: +- cudatoolkit=10.1 - cudf=0.20.* - libcudf=0.20.* - rmm=0.20.* -- cuxfilter=0.20.* - librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 @@ -19,8 +19,6 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- python-louvain -- cudatoolkit=10.1 - clang=8.0.1 - clang-tools=8.0.1 - cmake>=3.18 @@ -32,18 +30,16 @@ dependencies: - libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 -- colorcet -- holoviews - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport - sphinx-markdown-tables +- sphinx-copybutton - nbsphinx - numpydoc - ipython - recommonmark - pip -- libcypher-parser - rapids-pytest-benchmark - doxygen - pytest-cov diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 771f6141a68..593af0a99bd 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,10 +5,10 @@ channels: - rapidsai-nightly - conda-forge dependencies: +- cudatoolkit=10.2 - cudf=0.20.* - libcudf=0.20.* - rmm=0.20.* -- cuxfilter=0.20.* - librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 @@ -19,8 +19,6 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- python-louvain -- cudatoolkit=10.2 - clang=8.0.1 - clang-tools=8.0.1 - cmake>=3.18 @@ -32,18 +30,16 @@ dependencies: - libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 -- colorcet -- holoviews - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport - sphinx-markdown-tables +- sphinx-copybutton - nbsphinx - numpydoc - ipython - recommonmark - pip -- libcypher-parser - rapids-pytest-benchmark - doxygen - pytest-cov diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 84c07524a00..20d56b281d2 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,10 +5,10 @@ channels: - rapidsai-nightly - conda-forge dependencies: +- cudatoolkit=11.0 - cudf=0.20.* - libcudf=0.20.* - rmm=0.20.* -- cuxfilter=0.20.* - librmm=0.20.* - dask>=2.12.0 - distributed>=2.12.0 @@ -19,8 +19,6 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- python-louvain -- cudatoolkit=11.0 - clang=8.0.1 - clang-tools=8.0.1 - cmake>=3.18 @@ -32,18 +30,16 @@ dependencies: - libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 -- colorcet -- holoviews - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport - sphinx-markdown-tables +- sphinx-copybutton - nbsphinx - numpydoc - ipython - recommonmark - pip -- libcypher-parser - rapids-pytest-benchmark - doxygen - pytest-cov diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml new file mode 100644 index 00000000000..0eba2baccaa --- /dev/null +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -0,0 +1,47 @@ +name: cugraph_dev +channels: +- rapidsai +- nvidia +- rapidsai-nightly +- conda-forge +dependencies: +- cudatoolkit=11.1 +- cudf=0.20.* +- libcudf=0.20.* +- rmm=0.20.* +- librmm=0.20.* +- dask>=2.12.0 +- distributed>=2.12.0 +- dask-cuda=0.20* +- dask-cudf=0.20* +- nccl>=2.8.4 +- ucx-py=0.20* +- ucx-proc=*=gpu +- scipy +- networkx>=2.5.1 +- clang=8.0.1 +- clang-tools=8.0.1 +- cmake>=3.18 +- python>=3.6,<3.9 +- notebook>=0.5.0 +- boost +- cython>=0.29,<0.30 +- pytest +- libfaiss=1.7.0 +- faiss-proc=*=cuda +- scikit-learn>=0.23.1 +- sphinx +- sphinx_rtd_theme +- sphinxcontrib-websupport +- sphinx-markdown-tables +- sphinx-copybutton +- nbsphinx +- numpydoc +- ipython +- recommonmark +- pip +- rapids-pytest-benchmark +- doxygen +- pytest-cov +- gtest +- gmock diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml new file mode 100644 index 00000000000..55f6ad75cec --- /dev/null +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -0,0 +1,47 @@ +name: cugraph_dev +channels: +- rapidsai +- nvidia +- rapidsai-nightly +- conda-forge +dependencies: +- cudatoolkit=11.2 +- cudf=0.20.* +- libcudf=0.20.* +- rmm=0.20.* +- librmm=0.20.* +- dask>=2.12.0 +- distributed>=2.12.0 +- dask-cuda=0.20* +- dask-cudf=0.20* +- nccl>=2.8.4 +- ucx-py=0.20* +- ucx-proc=*=gpu +- scipy +- networkx>=2.5.1 +- clang=8.0.1 +- clang-tools=8.0.1 +- cmake>=3.18 +- python>=3.6,<3.9 +- notebook>=0.5.0 +- boost +- cython>=0.29,<0.30 +- pytest +- libfaiss=1.7.0 +- faiss-proc=*=cuda +- scikit-learn>=0.23.1 +- sphinx +- sphinx_rtd_theme +- sphinxcontrib-websupport +- sphinx-markdown-tables +- sphinx-copybutton +- nbsphinx +- numpydoc +- ipython +- recommonmark +- pip +- rapids-pytest-benchmark +- doxygen +- pytest-cov +- gtest +- gmock diff --git a/docs/Makefile b/docs/cugraph/Makefile similarity index 100% rename from docs/Makefile rename to docs/cugraph/Makefile diff --git a/docs/README.md b/docs/cugraph/README.md similarity index 100% rename from docs/README.md rename to docs/cugraph/README.md diff --git a/docs/make.bat b/docs/cugraph/make.bat similarity index 100% rename from docs/make.bat rename to docs/cugraph/make.bat diff --git a/docs/requirement.txt b/docs/cugraph/requirement.txt similarity index 100% rename from docs/requirement.txt rename to docs/cugraph/requirement.txt diff --git a/docs/source/_static/EMPTY b/docs/cugraph/source/_static/EMPTY similarity index 100% rename from docs/source/_static/EMPTY rename to docs/cugraph/source/_static/EMPTY diff --git a/docs/source/_static/copybutton.css b/docs/cugraph/source/_static/copybutton.css similarity index 100% rename from docs/source/_static/copybutton.css rename to docs/cugraph/source/_static/copybutton.css diff --git a/docs/cugraph/source/_static/copybutton_pydocs.js b/docs/cugraph/source/_static/copybutton_pydocs.js new file mode 100644 index 00000000000..cec05777e6b --- /dev/null +++ b/docs/cugraph/source/_static/copybutton_pydocs.js @@ -0,0 +1,65 @@ +$(document).ready(function() { + /* Add a [>>>] button on the top-right corner of code samples to hide + * the >>> and ... prompts and the output and thus make the code + * copyable. */ + var div = $('.highlight-python .highlight,' + + '.highlight-python3 .highlight,' + + '.highlight-pycon .highlight,' + + '.highlight-default .highlight'); + var pre = div.find('pre'); + + // get the styles from the current theme + pre.parent().parent().css('position', 'relative'); + var hide_text = 'Hide the prompts and output'; + var show_text = 'Show the prompts and output'; + var border_width = pre.css('border-top-width'); + var border_style = pre.css('border-top-style'); + var border_color = pre.css('border-top-color'); + var button_styles = { + 'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0', + 'border-color': border_color, 'border-style': border_style, + 'border-width': border_width, 'text-size': '75%', + 'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '1.5em', + 'border-radius': '0 3px 0 0', + 'transition': "0.5s" + } + + // create and add the button to all the code blocks that contain >>> + div.each(function(index) { + var jthis = $(this); + if (jthis.find('.gp').length > 0) { + var button = $('>>>'); + button.css(button_styles) + button.attr('title', hide_text); + button.data('hidden', 'false'); + jthis.prepend(button); + } + // tracebacks (.gt) contain bare text elements that need to be + // wrapped in a span to work with .nextUntil() (see later) + jthis.find('pre:has(.gt)').contents().filter(function() { + return ((this.nodeType == 3) && (this.data.trim().length > 0)); + }).wrap(''); + }); + + // define the behavior of the button when it's clicked + $('.copybutton').click(function(e){ + e.preventDefault(); + var button = $(this); + if (button.data('hidden') === 'false') { + // hide the code output + button.parent().find('.go, .gp, .gt').hide(); + button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden'); + button.css('text-decoration', 'line-through'); + button.attr('title', show_text); + button.data('hidden', 'true'); + } else { + // show the code output + button.parent().find('.go, .gp, .gt').show(); + button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible'); + button.css('text-decoration', 'none'); + button.attr('title', hide_text); + button.data('hidden', 'false'); + } + }); +}); + diff --git a/docs/source/_static/example_mod.js b/docs/cugraph/source/_static/example_mod.js similarity index 100% rename from docs/source/_static/example_mod.js rename to docs/cugraph/source/_static/example_mod.js diff --git a/docs/source/_static/params.css b/docs/cugraph/source/_static/params.css similarity index 100% rename from docs/source/_static/params.css rename to docs/cugraph/source/_static/params.css diff --git a/docs/source/_static/references.css b/docs/cugraph/source/_static/references.css similarity index 100% rename from docs/source/_static/references.css rename to docs/cugraph/source/_static/references.css diff --git a/docs/source/api.rst b/docs/cugraph/source/api.rst similarity index 100% rename from docs/source/api.rst rename to docs/cugraph/source/api.rst diff --git a/docs/source/conf.py b/docs/cugraph/source/conf.py similarity index 98% rename from docs/source/conf.py rename to docs/cugraph/source/conf.py index 5e87622bd09..a4633d04f8d 100644 --- a/docs/source/conf.py +++ b/docs/cugraph/source/conf.py @@ -42,17 +42,17 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'numpydoc', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', + "sphinx.ext.intersphinx", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "numpydoc", + "sphinx_markdown_tables", 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', 'sphinx.ext.linkcode', "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", "nbsphinx", "recommonmark", - "sphinx_markdown_tables", ] diff --git a/docs/source/cugraph_blogs.rst b/docs/cugraph/source/cugraph_blogs.rst similarity index 100% rename from docs/source/cugraph_blogs.rst rename to docs/cugraph/source/cugraph_blogs.rst diff --git a/docs/source/cugraph_intro.md b/docs/cugraph/source/cugraph_intro.md similarity index 100% rename from docs/source/cugraph_intro.md rename to docs/cugraph/source/cugraph_intro.md diff --git a/docs/source/cugraph_ref.rst b/docs/cugraph/source/cugraph_ref.rst similarity index 61% rename from docs/source/cugraph_ref.rst rename to docs/cugraph/source/cugraph_ref.rst index 591619fb338..e0f113eaba4 100644 --- a/docs/source/cugraph_ref.rst +++ b/docs/cugraph/source/cugraph_ref.rst @@ -2,22 +2,35 @@ References ########## +************ +Architecture +************ + +2-D Data Partitioning + +- Kang, S., Fender, A., Eaton, J., & Rees, B. (2020, September) *Computing PageRank Scores of Web Crawl Data Using DGX A100 Clusters*. In 2020 IEEE High Performance Extreme Computing Conference (HPEC) (pp. 1-4). IEEE. + + +| + +| + ********** Algorithms ********** Betweenness Centrality -- Brandes, U. (2001). A faster algorithm for betweenness centrality. Journal of mathematical sociology, 25(2), 163-177. -- Brandes, U. (2008). On variants of shortest-path betweenness centrality and their generic computation. Social Networks, 30(2), 136-145. -- McLaughlin, A., & Bader, D. A. (2018). Accelerating GPU betweenness centrality. Communications of the ACM, 61(8), 85-92. +- Brandes, U. (2001). *A faster algorithm for betweenness centrality*. Journal of mathematical sociology, 25(2), 163-177. +- Brandes, U. (2008). *On variants of shortest-path betweenness centrality and their generic computation*. Social Networks, 30(2), 136-145. +- McLaughlin, A., & Bader, D. A. (2018). *Accelerating GPU betweenness centrality*. Communications of the ACM, 61(8), 85-92. Katz - J. Cohen, *Trusses: Cohesive subgraphs for social network analysis* National security agency technical report, 2008 - O. Green, J. Fox, E. Kim, F. Busato, et al. *Quickly Finding a Truss in a Haystack* IEEE High Performance Extreme Computing Conference (HPEC), 2017 https://doi.org/10.1109/HPEC.2017.8091038 -- O. Green, P. Yalamanchili, L.M. Munguia, “*ast Triangle Counting on GPU* Irregular Applications: Architectures and Algorithms (IA3), 2014 +- O. Green, P. Yalamanchili, L.M. Munguia, *Fast Triangle Counting on GPU* Irregular Applications: Architectures and Algorithms (IA3), 2014 Hungarian Algorithm @@ -27,6 +40,15 @@ Hungarian Algorithm | +************* +Other Papers +************* +- Hricik, T., Bader, D., & Green, O. (2020, September). *Using RAPIDS AI to Accelerate Graph Data Science Workflows*. In 2020 IEEE High Performance Extreme Computing Conference (HPEC) (pp. 1-4). IEEE. + +| + +| + ********** Data Sets ********** diff --git a/docs/source/dask-cugraph.rst b/docs/cugraph/source/dask-cugraph.rst similarity index 100% rename from docs/source/dask-cugraph.rst rename to docs/cugraph/source/dask-cugraph.rst diff --git a/docs/source/images/Nx_Cg_1.png b/docs/cugraph/source/images/Nx_Cg_1.png similarity index 100% rename from docs/source/images/Nx_Cg_1.png rename to docs/cugraph/source/images/Nx_Cg_1.png diff --git a/docs/source/images/Nx_Cg_2.png b/docs/cugraph/source/images/Nx_Cg_2.png similarity index 100% rename from docs/source/images/Nx_Cg_2.png rename to docs/cugraph/source/images/Nx_Cg_2.png diff --git a/docs/source/index.rst b/docs/cugraph/source/index.rst similarity index 100% rename from docs/source/index.rst rename to docs/cugraph/source/index.rst diff --git a/docs/source/nx_transition.rst b/docs/cugraph/source/nx_transition.rst similarity index 100% rename from docs/source/nx_transition.rst rename to docs/cugraph/source/nx_transition.rst diff --git a/docs/source/sphinxext/github_link.py b/docs/cugraph/source/sphinxext/github_link.py similarity index 88% rename from docs/source/sphinxext/github_link.py rename to docs/cugraph/source/sphinxext/github_link.py index a7a46fdd9df..fa8fe3f5fe3 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/cugraph/source/sphinxext/github_link.py @@ -1,3 +1,17 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: # This contains code with copyright by the scikit-learn project, subject to the # license in /thirdparty/LICENSES/LICENSE.scikit_learn diff --git a/notebooks/layout/Force-Atlas2.ipynb b/notebooks/layout/Force-Atlas2.ipynb index fa9ec0fd180..456af3c62de 100644 --- a/notebooks/layout/Force-Atlas2.ipynb +++ b/notebooks/layout/Force-Atlas2.ipynb @@ -4,7 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Force Atlas 2" + "# Force Atlas 2\n", + "# Skip notebook test" ] }, { @@ -521,4 +522,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/python/utils/ECG_Golden.ipynb b/python/utils/ECG_Golden.ipynb deleted file mode 100644 index 0da04869d78..00000000000 --- a/python/utils/ECG_Golden.ipynb +++ /dev/null @@ -1,487 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook was used to generate the golden data results for ECG. It requires that the python-igraph package be installed to run. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.io import mmread\n", - "import networkx as nx\n", - "#mmFile='/datasets/kron_g500-logn21/kron_g500-logn21.mtx'\n", - "mmFile='/datasets/golden_data/graphs/dblp.mtx'\n", - "#mmFile='/datasets/networks/karate.mtx'\n", - "#mmFile='/home/jwyles/code/mycugraph/datasets/dolphins.mtx'\n", - "#mmFile='/home/jwyles/code/mycugraph/datasets/netscience.mtx'\n", - "M = mmread(mmFile).asfptype()\n", - "import cugraph\n", - "import cudf\n", - "import numpy as np\n", - "rows = cudf.Series(M.row)\n", - "cols = cudf.Series(M.col)\n", - "values = cudf.Series(M.data)\n", - "G = cugraph.Graph()\n", - "G.add_edge_list(rows, cols, values)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 326 ms, sys: 400 ms, total: 726 ms\n", - "Wall time: 796 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "parts = cugraph.ecg(G, .05, 16)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "49204" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "numParts = parts['partition'].max() + 1\n", - "numParts" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.850147008895874" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mod = cugraph.analyzeClustering_modularity(G, numParts, parts['partition'])\n", - "mod" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7506256512679915" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "parts2, mod2 = cugraph.louvain(G)\n", - "mod2" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "import igraph as ig\n", - "import numpy as np\n", - "\n", - "def community_ecg(self, weights=None, ens_size=16, min_weight=0.05):\n", - " W = [0]*self.ecount()\n", - " ## Ensemble of level-1 Louvain \n", - " for i in range(ens_size):\n", - " p = np.random.permutation(self.vcount()).tolist()\n", - " g = self.permute_vertices(p)\n", - " l = g.community_multilevel(weights=weights, return_levels=True)[0].membership\n", - " b = [l[p[x.tuple[0]]]==l[p[x.tuple[1]]] for x in self.es]\n", - " W = [W[i]+b[i] for i in range(len(W))]\n", - " W = [min_weight + (1-min_weight)*W[i]/ens_size for i in range(len(W))]\n", - " part = self.community_multilevel(weights=W)\n", - " ## Force min_weight outside 2-core\n", - " core = self.shell_index()\n", - " ecore = [min(core[x.tuple[0]],core[x.tuple[1]]) for x in self.es]\n", - " part.W = [W[i] if ecore[i]>1 else min_weight for i in range(len(ecore))]\n", - " return part\n", - "\n", - "ig.Graph.community_ecg = community_ecg" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "Gi = ig.Graph.Read_Edgelist('./dblp2.txt', directed=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 3min 49s, sys: 1.67 s, total: 3min 51s\n", - "Wall time: 3min 50s\n" - ] - } - ], - "source": [ - "%%time\n", - "ec = Gi.community_ecg()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "ecg = np.zeros(len(Gi.vs), dtype=np.int32)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 0, 0, ..., 0, 0, 0], dtype=int32)" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ecg" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(len(ec)):\n", - " for j in ec[i]:\n", - " ecg[j] = i" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([275, 275, 0, ..., 435, 435, 107], dtype=int32)" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ecg" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "ecg_col = cudf.Series(ecg)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "numParts = ecg_col.max() + 1" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "mod4 = cugraph.analyzeClustering_modularity(G, numParts, ecg_col)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9279554486274719" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mod4" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "34" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "maxId = 0\n", - "for i in range(len(ec)):\n", - " for j in ec[i]:\n", - " if j > maxId:\n", - " maxId = j\n", - "maxId" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "156" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(Gi.es)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "156" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(M.row)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "156" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "78 *2" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "filename = \"dblp2.txt\"\n", - "f = open(filename, 'w')\n", - "for i in range(len(M.row)):\n", - " f.write(str(M.row[i]) + ' ' + str(M.col[i]) + '\\n')\n", - "f.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "igraph.Edge(, 1, {})" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Gi.es[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "igraph.Edge(, 1, {})" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Gi.es.select()[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Gi.es[0].source" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Gi.es[0].target" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From e37af35ad4caee4a357aca37dc760b46d7dc0ba6 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Mon, 19 Apr 2021 09:55:25 -0400 Subject: [PATCH 228/343] New C++ testing structure (#1529) This PR reorganizes the tests for the new graph object and graph primitives to simplify testing and support an expansion of the number of test data sources. The principal idea is to segregate the testing of the algorithm from how the graph is actually constructed. This allows for the composition of tests by using different test graph generators or input sources with the same test graph algorithm implementation. Closes #1534 Closes #1536 Supports #1467 Should be merged after PR #1520 which fixes python format issues Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1529 --- cpp/tests/CMakeLists.txt | 144 +++++++++++++- .../centrality/betweenness_centrality_test.cu | 16 +- .../edge_betweenness_centrality_test.cu | 16 +- cpp/tests/centrality/katz_centrality_test.cu | 4 +- cpp/tests/community/egonet_test.cu | 4 +- cpp/tests/community/louvain_test.cpp | 2 +- cpp/tests/community/mg_louvain_test.cpp | 4 +- cpp/tests/components/con_comp_test.cu | 14 +- cpp/tests/components/scc_test.cu | 2 +- cpp/tests/components/wcc_graphs.cu | 87 ++++++++ cpp/tests/components/wcc_graphs.hpp | 40 ++++ cpp/tests/components/wcc_test.cpp | 97 +++++++++ cpp/tests/experimental/bfs_test.cpp | 134 ++++++------- cpp/tests/experimental/coarsen_graph_test.cpp | 2 +- cpp/tests/experimental/degree_test.cpp | 12 +- cpp/tests/experimental/generate_rmat_test.cpp | 14 +- cpp/tests/experimental/graph_test.cpp | 20 +- .../experimental/induced_subgraph_test.cpp | 2 +- .../experimental/katz_centrality_test.cpp | 140 +++++-------- cpp/tests/experimental/mg_bfs_test.cpp | 140 +++++-------- .../experimental/mg_katz_centrality_test.cpp | 157 ++++++--------- cpp/tests/experimental/mg_sssp_test.cpp | 136 +++++-------- cpp/tests/experimental/ms_bfs_test.cpp | 4 +- cpp/tests/experimental/pagerank_test.cpp | 177 ++++++----------- cpp/tests/experimental/random_walks_test.cu | 2 +- cpp/tests/experimental/sssp_test.cpp | 130 +++++------- cpp/tests/experimental/weight_sum_test.cpp | 12 +- cpp/tests/layout/force_atlas2_test.cu | 16 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 186 ++++++------------ cpp/tests/traversal/bfs_test.cu | 18 +- cpp/tests/traversal/sssp_test.cu | 12 +- cpp/tests/traversal/tsp_test.cu | 2 +- cpp/tests/tree/mst_test.cu | 8 +- cpp/tests/utilities/test_utilities.hpp | 126 ++++++++++-- 34 files changed, 1007 insertions(+), 873 deletions(-) create mode 100644 cpp/tests/components/wcc_graphs.cu create mode 100644 cpp/tests/components/wcc_graphs.hpp create mode 100644 cpp/tests/components/wcc_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 89975f673ae..7a544fd75fb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -25,6 +25,7 @@ add_library(cugraphtestutil STATIC "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_graphs.cu" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) @@ -68,8 +69,8 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) PRIVATE "${CUB_INCLUDE_DIR}" "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${RMM_INCLUDE}" "${NCCL_INCLUDE_DIRS}" @@ -165,6 +166,117 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() +function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) + add_executable(${CMAKE_TEST_NAME} + ${CMAKE_TEST_SRC}) + + target_include_directories(${CMAKE_TEST_NAME} + PRIVATE + "${CUB_INCLUDE_DIR}" + "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" + "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" + "${RMM_INCLUDE}" + "${NCCL_INCLUDE_DIRS}" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}/../src" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${RAFT_DIR}/cpp/include" + ) + + target_link_directories(${CMAKE_TEST_NAME} + PRIVATE + # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported + # variable containing the link directories for nvcc. + "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + + target_link_libraries(${CMAKE_TEST_NAME} + PRIVATE + cugraphtestutil + cugraph + GTest::GTest + GTest::Main + ${NCCL_LIBRARIES} + cudart + cuda + cublas + cusparse + cusolver + curand) + + if(OpenMP_CXX_FOUND) + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE +################################################################################################### +### Use ${OpenMP_CXX_LIB_NAMES} instead of OpenMP::OpenMP_CXX to avoid the following warnings. +### +### Cannot generate a safe runtime search path for target TARGET_NAME +### because files in some directories may conflict with libraries in implicit +### directories: +### ... +### +### libgomp.so is included in the conda base environment and copied to every new conda +### environment. If a full file path is provided (e.g ${CUDF_LIBRARY}), cmake +### extracts the directory path and adds the directory path to BUILD_RPATH (if BUILD_RPATH is not +### disabled). +### +### cmake maintains a system specific implicit directories (e.g. /lib, /lib/x86_64-linux-gnu, +### /lib32, /lib32/x86_64-linux-gnu, /lib64, /lib64/x86_64-linux-gnu, /usr/lib, +### /usr/lib/gcc/x86_64-linux-gnu/7, /usr/lib/x86_64-linux-gnu, /usr/lib32, +### /usr/lib32/x86_64-linux-gnu, /usr/lib64, /usr/lib64/x86_64-linux-gnu, +### /usr/local/cuda-10.0/lib64", /usr/local/cuda-10.0/lib64/stubs). +### +### If a full path to libgomp.so is provided (which is the case with OpenMP::OpenMP_CXX), cmake +### checks whether there is any other libgomp.so with the different full path (after resolving +### soft links) in the search paths (implicit directoires + BUILD_RAPTH). There is one in the +### path included in BUILD_RPATH when ${CUDF_LIBRARY} is added; this one can +### potentially hide the one in the provided full path and cmake generates a warning (and RPATH +### is searched before the directories in /etc/ld.so/conf; ld.so.conf does not coincide but +### overlaps with implicit directories). +### +### If we provide just the library names (gomp;pthread), cmake does not generate warnings (we +### did not specify which libgomp.so should be loaded in runtime), and the one first found in +### the search order is loaded (we can change the loaded library by setting LD_LIBRARY_PATH or +### manually editing BUILD_RPATH). +### +### Manually editing BUILD_RPATH: +### set(TARGET_BUILD_RPATH "") +### foreach(TMP_VAR_FULLPATH IN LISTS OpenMP_CXX_LIBRARIES) +### get_filename_component(TMP_VAR_DIR ${TMP_VAR_FULLPATH} DIRECTORY) +### string(APPEND TARGET_BUILD_RPATH "${TMP_VAR_DIR};") +### get_filename_component(TMP_VAR_REALPATH ${TMP_VAR_FULLPATH} REALPATH) +### get_filename_component(TMP_VAR_DIR ${TMP_VAR_REALPATH} DIRECTORY) +### # cmake automatically removes duplicates, so skip checking. +### string(APPEND TARGET_BUILD_RPATH "${TMP_VAR_DIR};") +### endforeach() +### string(APPEND TARGET_BUILD_RPATH "${CONDA_PREFIX}/lib") +### message(STATUS "TARGET_BUILD_RPATH=${TARGET_BUILD_RPATH}") +### set_target_properties(target PROPERTIES +### BUILD_RPATH "${TARGET_BUILD_RPATH}") + ${OpenMP_CXX_LIB_NAMES}) + endif(OpenMP_CXX_FOUND) + + # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the + # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent + # cmake warnings about policy CMP0104. With this setting, arch flags must be + # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism + # used in cpp/CMakeLists.txt for setting arch options). + # Run "cmake --help-policy CMP0104" for policy details. + # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to + # the findcudatoolkit features in cmake 3.17+ + set_target_properties(${CMAKE_TEST_NAME} PROPERTIES + CUDA_ARCHITECTURES OFF) + + add_test(NAME ${CMAKE_TEST_NAME} + COMMAND ${MPIEXEC_EXECUTABLE} + ${MPIEXEC_NUMPROC_FLAG} + ${GPU_COUNT} + ${MPIEXEC_PREFLAGS} + ${CMAKE_TEST_NAME} + ${MPIEXEC_POSTFLAGS}) +endfunction() + ################################################################################################### # - set rapids dataset path ---------------------------------------------------------------------- @@ -303,6 +415,14 @@ set(SCC_TEST_SRC ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") +################################################################################################### +# - WEAKLY CONNECTED COMPONENTS tests ---------------------------------------------------------- + +set(WCC_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_test.cpp") + +ConfigureTest(WCC_TEST "${WCC_TEST_SRC}") + ################################################################################################### #-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- @@ -435,6 +555,14 @@ ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_TEST "${EXPERIMENTAL_RANDOM_WA # - MG tests -------------------------------------------------------------------------------------- if(BUILD_CUGRAPH_MG_TESTS) + execute_process( + COMMAND nvidia-smi -L + COMMAND wc -l + OUTPUT_VARIABLE GPU_COUNT) + + string(REGEX REPLACE "\n$" "" GPU_COUNT ${GPU_COUNT}) + MESSAGE(STATUS "GPU_COUNT: " ${GPU_COUNT}) + if(MPI_CXX_FOUND) ########################################################################################### # - MG PAGERANK tests --------------------------------------------------------------------- @@ -442,7 +570,7 @@ if(BUILD_CUGRAPH_MG_TESTS) set(MG_PAGERANK_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp") - ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") + ConfigureTestMG(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### @@ -451,7 +579,7 @@ if(BUILD_CUGRAPH_MG_TESTS) set(MG_KATZ_CENTRALITY_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_katz_centrality_test.cpp") - ConfigureTest(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") + ConfigureTestMG(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") target_link_libraries(MG_KATZ_CENTRALITY_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### @@ -460,7 +588,7 @@ if(BUILD_CUGRAPH_MG_TESTS) set(MG_BFS_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_bfs_test.cpp") - ConfigureTest(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") + ConfigureTestMG(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") target_link_libraries(MG_BFS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### @@ -469,17 +597,17 @@ if(BUILD_CUGRAPH_MG_TESTS) set(MG_SSSP_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_sssp_test.cpp") - ConfigureTest(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") + ConfigureTestMG(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") target_link_libraries(MG_SSSP_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### # - MG LOUVAIN tests ---------------------------------------------------------------------- set(MG_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_test.cpp") - ConfigureTest(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") + ConfigureTestMG(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) else(MPI_CXX_FOUND) diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index d680574e10b..7ff6ab31213 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -399,12 +399,12 @@ TEST_P(Tests_BC, CheckFP64_NORMALIZE_ENDPOINTS) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_BC, - ::testing::Values(BC_Usecase("test/datasets/karate.mtx", 0), - BC_Usecase("test/datasets/netscience.mtx", 0), - BC_Usecase("test/datasets/netscience.mtx", 4), - BC_Usecase("test/datasets/wiki2003.mtx", 4), - BC_Usecase("test/datasets/wiki-Talk.mtx", 4))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_BC, + ::testing::Values(BC_Usecase("test/datasets/karate.mtx", 0), + BC_Usecase("test/datasets/netscience.mtx", 0), + BC_Usecase("test/datasets/netscience.mtx", 4), + BC_Usecase("test/datasets/wiki2003.mtx", 4), + BC_Usecase("test/datasets/wiki-Talk.mtx", 4))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index b6cce8684e8..2432943504c 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -312,12 +312,12 @@ TEST_P(Tests_EdgeBC, CheckFP64_NORMALIZE) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_EdgeBC, - ::testing::Values(EdgeBC_Usecase("test/datasets/karate.mtx", 0), - EdgeBC_Usecase("test/datasets/netscience.mtx", 0), - EdgeBC_Usecase("test/datasets/netscience.mtx", 4), - EdgeBC_Usecase("test/datasets/wiki2003.mtx", 4), - EdgeBC_Usecase("test/datasets/wiki-Talk.mtx", 4))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_EdgeBC, + ::testing::Values(EdgeBC_Usecase("test/datasets/karate.mtx", 0), + EdgeBC_Usecase("test/datasets/netscience.mtx", 0), + EdgeBC_Usecase("test/datasets/netscience.mtx", 4), + EdgeBC_Usecase("test/datasets/wiki2003.mtx", 4), + EdgeBC_Usecase("test/datasets/wiki-Talk.mtx", 4))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index c4f17192955..114a89858b8 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,7 +156,7 @@ class Tests_Katz : public ::testing::TestWithParam { } }; -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Katz, ::testing::Values(Katz_Usecase("test/datasets/karate.mtx", "ref/katz/karate.csv"), diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index d61080c685e..27a235ee15b 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -168,7 +168,7 @@ TEST_P(Tests_InducedEgo, CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_InducedEgo, ::testing::Values( @@ -182,7 +182,7 @@ INSTANTIATE_TEST_CASE_P( // For perf analysis /* -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_InducedEgo, ::testing::Values( diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 2ebf9a85902..43d274e6723 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -313,7 +313,7 @@ TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) } // FIXME: Expand testing once we evaluate RMM memory use -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Louvain, ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 3, 0.408695))); diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 8a1a3010a6f..4b398f0a4aa 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -43,7 +43,7 @@ void compare(double mg_modularity, double sg_modularity) //////////////////////////////////////////////////////////////////////////////// // Test param object. This defines the input and expected output for a test, and // will be instantiated as the parameter to the tests defined below using -// INSTANTIATE_TEST_CASE_P() +// INSTANTIATE_TEST_SUITE_P() // struct Louvain_Usecase { std::string graph_file_full_path{}; @@ -226,7 +226,7 @@ TEST_P(Louvain_MG_Testfixture, CheckInt32Int32Float) run_test(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Louvain_MG_Testfixture, ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 100, 1) diff --git a/cpp/tests/components/con_comp_test.cu b/cpp/tests/components/con_comp_test.cu index 15d60867753..fdae77f2384 100644 --- a/cpp/tests/components/con_comp_test.cu +++ b/cpp/tests/components/con_comp_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -141,11 +141,11 @@ std::vector Tests_Weakly_CC::weakly_cc_time; TEST_P(Tests_Weakly_CC, Weakly_CC) { run_current_test(GetParam()); } // --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Weakly_CC, - ::testing::Values(Usecase("test/datasets/dolphins.mtx"), - Usecase("test/datasets/coPapersDBLP.mtx"), - Usecase("test/datasets/coPapersCiteseer.mtx"), - Usecase("test/datasets/hollywood.mtx"))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_Weakly_CC, + ::testing::Values(Usecase("test/datasets/dolphins.mtx"), + Usecase("test/datasets/coPapersDBLP.mtx"), + Usecase("test/datasets/coPapersCiteseer.mtx"), + Usecase("test/datasets/hollywood.mtx"))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index a74b5a0ad27..b875a459bd0 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -211,7 +211,7 @@ std::vector Tests_Strongly_CC::strongly_cc_counts; TEST_P(Tests_Strongly_CC, Strongly_CC) { run_current_test(GetParam()); } // --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Strongly_CC, ::testing::Values( diff --git a/cpp/tests/components/wcc_graphs.cu b/cpp/tests/components/wcc_graphs.cu new file mode 100644 index 00000000000..fb11f872fb8 --- /dev/null +++ b/cpp/tests/components/wcc_graphs.cu @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace test { + +template +std::tuple, + rmm::device_uvector> +LineGraph_Usecase::construct_graph(raft::handle_t const& handle, + bool test_weighted, + bool renumber) const +{ + uint64_t seed{0}; + raft::random::Rng rng(seed); + + edge_t num_edges = 2 * (num_vertices_ - 1); + + rmm::device_uvector vertices_v(num_vertices_, handle.get_stream()); + rmm::device_uvector src_v(num_edges, handle.get_stream()); + rmm::device_uvector dst_v(num_edges, handle.get_stream()); + rmm::device_uvector order_v(num_vertices_, handle.get_stream()); + rmm::device_uvector weights_v(edge_t{0}, handle.get_stream()); + + thrust::sequence( + rmm::exec_policy(handle.get_stream()), vertices_v.begin(), vertices_v.end(), vertex_t{0}); + + rng.uniform(order_v.data(), num_vertices_, 0.0f, 1.0f, handle.get_stream()); + + thrust::sort_by_key( + rmm::exec_policy(handle.get_stream()), order_v.begin(), order_v.end(), vertices_v.begin()); + + raft::copy(src_v.begin(), vertices_v.begin(), (num_vertices_ - 1), handle.get_stream()); + raft::copy(dst_v.begin(), vertices_v.begin() + 1, (num_vertices_ - 1), handle.get_stream()); + + raft::copy(src_v.begin() + (num_vertices_ - 1), + vertices_v.begin() + 1, + (num_vertices_ - 1), + handle.get_stream()); + raft::copy(dst_v.begin() + (num_vertices_ - 1), + vertices_v.begin(), + (num_vertices_ - 1), + handle.get_stream()); + + thrust::sequence( + rmm::exec_policy(handle.get_stream()), vertices_v.begin(), vertices_v.end(), vertex_t{0}); + + handle.get_stream_view().synchronize(); + + return generate_graph_from_edgelist( + handle, + std::move(vertices_v), + std::move(src_v), + std::move(dst_v), + std::move(weights_v), + true, + false, + false); +} + +template std::tuple, + rmm::device_uvector> +LineGraph_Usecase::construct_graph(raft::handle_t const&, bool, bool) const; + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/components/wcc_graphs.hpp b/cpp/tests/components/wcc_graphs.hpp new file mode 100644 index 00000000000..2b5955c2b78 --- /dev/null +++ b/cpp/tests/components/wcc_graphs.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +#include + +#include + +namespace cugraph { +namespace test { + +class LineGraph_Usecase { + public: + LineGraph_Usecase() = delete; + + LineGraph_Usecase(size_t num_vertices) : num_vertices_(num_vertices) {} + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + size_t num_vertices_{0}; +}; + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/components/wcc_test.cpp b/cpp/tests/components/wcc_test.cpp new file mode 100644 index 00000000000..962ecefe8f3 --- /dev/null +++ b/cpp/tests/components/wcc_test.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +#include +#include +#include + +#include +#include + +#include +#include + +struct WCC_Usecase { + bool validate_results{true}; +}; + +template +class Tests_WCC : public ::testing::TestWithParam> { + public: + Tests_WCC() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + static std::vector weakly_cc_time; + + template + void run_current_test(WCC_Usecase const& param, input_usecase_t const& input_usecase) + { + raft::handle_t handle{}; + + cugraph::experimental::graph_t graph(handle); + + std::tie(graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + + auto graph_view = graph.view(); + + rmm::device_uvector component_labels_v(graph_view.get_number_of_vertices(), + handle.get_stream()); + + // cugraph::weakly_connected_components(handle, graph_view, component_labels_v.begin()); + + // TODO: validate result + } +}; + +using Tests_WCC_File = Tests_WCC; +using Tests_WCC_Rmat = Tests_WCC; +using Tests_WCC_LineGraph = Tests_WCC; + +TEST_P(Tests_WCC_File, WCC) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} +TEST_P(Tests_WCC_Rmat, WCC) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} +TEST_P(Tests_WCC_LineGraph, WCC) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +// --gtest_filter=*simple_test* +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_WCC_File, + ::testing::Values( + std::make_tuple(WCC_Usecase{}, cugraph::test::File_Usecase("test/datasets/dolphins.mtx")), + std::make_tuple(WCC_Usecase{}, cugraph::test::File_Usecase("test/datasets/coPapersDBLP.mtx")), + std::make_tuple(WCC_Usecase{}, + cugraph::test::File_Usecase("test/datasets/coPapersCiteseer.mtx")), + std::make_tuple(WCC_Usecase{}, cugraph::test::File_Usecase("test/datasets/hollywood.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + line_graph_test, + Tests_WCC_LineGraph, + ::testing::Values(std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(1000)), + std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(100000)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index ded57dd1855..1de439e1430 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -81,63 +81,13 @@ void bfs_reference(edge_t const* offsets, return; } -typedef struct BFS_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct BFS_Usecase { size_t source{0}; bool check_correctness{false}; +}; - BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, - size_t source, - bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} BFS_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) -{ - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - false, - renumber, - std::vector{0}, - size_t{1}); -} - -class Tests_BFS : public ::testing::TestWithParam { +template +class Tests_BFS : public ::testing::TestWithParam> { public: Tests_BFS() {} static void SetupTestCase() {} @@ -147,7 +97,7 @@ class Tests_BFS : public ::testing::TestWithParam { virtual void TearDown() {} template - void run_current_test(BFS_Usecase const& configuration) + void run_current_test(BFS_Usecase const& bfs_usecase, input_usecase_t const& input_usecase) { constexpr bool renumber = true; @@ -163,17 +113,19 @@ class Tests_BFS : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = - read_graph(handle, configuration, renumber); + input_usecase.template construct_graph( + handle, true, renumber); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto graph_view = graph.view(); - ASSERT_TRUE(static_cast(configuration.source) >= 0 && - static_cast(configuration.source) < graph_view.get_number_of_vertices()) + ASSERT_TRUE(static_cast(bfs_usecase.source) >= 0 && + static_cast(bfs_usecase.source) < graph_view.get_number_of_vertices()) << "Invalid starting source."; rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), @@ -190,7 +142,7 @@ class Tests_BFS : public ::testing::TestWithParam { graph_view, d_distances.data(), d_predecessors.data(), - static_cast(configuration.source), + static_cast(bfs_usecase.source), false, std::numeric_limits::max()); @@ -201,12 +153,13 @@ class Tests_BFS : public ::testing::TestWithParam { std::cout << "BFS took " << elapsed_time * 1e-6 << " s.\n"; } - if (configuration.check_correctness) { + if (bfs_usecase.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( handle); if (renumber) { std::tie(unrenumbered_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; @@ -223,7 +176,7 @@ class Tests_BFS : public ::testing::TestWithParam { handle.get_stream_view().synchronize(); - auto unrenumbered_source = static_cast(configuration.source); + auto unrenumbered_source = static_cast(bfs_usecase.source); if (renumber) { std::vector h_renumber_map_labels(d_renumber_map_labels.size()); raft::update_host(h_renumber_map_labels.data(), @@ -233,7 +186,7 @@ class Tests_BFS : public ::testing::TestWithParam { handle.get_stream_view().synchronize(); - unrenumbered_source = h_renumber_map_labels[configuration.source]; + unrenumbered_source = h_renumber_map_labels[bfs_usecase.source]; } std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); @@ -312,24 +265,49 @@ class Tests_BFS : public ::testing::TestWithParam { } }; +using Tests_BFS_File = Tests_BFS; +using Tests_BFS_Rmat = Tests_BFS; + // FIXME: add tests for type combinations -TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test(GetParam()); } +TEST_P(Tests_BFS_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_BFS_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_BFS, +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_BFS_File, + ::testing::Values( + // enable correctness checks + std::make_tuple(BFS_Usecase{0}, cugraph::test::File_Usecase("test/datasets/karate.mtx")), + std::make_tuple(BFS_Usecase{0}, cugraph::test::File_Usecase("test/datasets/polbooks.mtx")), + std::make_tuple(BFS_Usecase{0}, cugraph::test::File_Usecase("test/datasets/netscience.mtx")), + std::make_tuple(BFS_Usecase{100}, cugraph::test::File_Usecase("test/datasets/netscience.mtx")), + std::make_tuple(BFS_Usecase{1000}, cugraph::test::File_Usecase("test/datasets/wiki2003.mtx")), + std::make_tuple(BFS_Usecase{1000}, + cugraph::test::File_Usecase("test/datasets/wiki-Talk.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_BFS_Rmat, ::testing::Values( // enable correctness checks - BFS_Usecase("test/datasets/karate.mtx", 0), - BFS_Usecase("test/datasets/polbooks.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 100), - BFS_Usecase("test/datasets/wiki2003.mtx", 1000), - BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000), - BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + std::make_tuple(BFS_Usecase{0}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_BFS_Rmat, + ::testing::Values( // disable correctness checks for large graphs - BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - 0, - false))); + std::make_pair(BFS_Usecase{0, false}, + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 0fc0634bbbc..5943a5cd286 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -370,7 +370,7 @@ TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_CoarsenGraph, ::testing::Values(CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, false), diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index 581b6b29f64..ea7cc246df0 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -157,11 +157,11 @@ TEST_P(Tests_Degree, CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Degree, - ::testing::Values(Degree_Usecase("test/datasets/karate.mtx"), - Degree_Usecase("test/datasets/web-Google.mtx"), - Degree_Usecase("test/datasets/ljournal-2008.mtx"), - Degree_Usecase("test/datasets/webbase-1M.mtx"))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_Degree, + ::testing::Values(Degree_Usecase("test/datasets/karate.mtx"), + Degree_Usecase("test/datasets/web-Google.mtx"), + Degree_Usecase("test/datasets/ljournal-2008.mtx"), + Degree_Usecase("test/datasets/webbase-1M.mtx"))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 221accea4f7..60c3a322725 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -279,12 +279,12 @@ class Tests_GenerateRmat : public ::testing::TestWithParam TEST_P(Tests_GenerateRmat, CheckInt32) { run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_GenerateRmat, - ::testing::Values(GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, true), - GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), - GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), - GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_GenerateRmat, + ::testing::Values(GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, true), + GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); typedef struct GenerateRmats_Usecase_t { size_t n_edgelists{0}; size_t min_scale{0}; @@ -343,7 +343,7 @@ class Tests_GenerateRmats : public ::testing::TestWithParam(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_GenerateRmats, ::testing::Values( diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index 6ce32e0c836..bdf56ae7aff 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -230,15 +230,15 @@ TEST_P(Tests_Graph, CheckStoreTransposedTrue) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Graph, - ::testing::Values(Graph_Usecase("test/datasets/karate.mtx", false), - Graph_Usecase("test/datasets/karate.mtx", true), - Graph_Usecase("test/datasets/web-Google.mtx", false), - Graph_Usecase("test/datasets/web-Google.mtx", true), - Graph_Usecase("test/datasets/ljournal-2008.mtx", false), - Graph_Usecase("test/datasets/ljournal-2008.mtx", true), - Graph_Usecase("test/datasets/webbase-1M.mtx", false), - Graph_Usecase("test/datasets/webbase-1M.mtx", true))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_Graph, + ::testing::Values(Graph_Usecase("test/datasets/karate.mtx", false), + Graph_Usecase("test/datasets/karate.mtx", true), + Graph_Usecase("test/datasets/web-Google.mtx", false), + Graph_Usecase("test/datasets/web-Google.mtx", true), + Graph_Usecase("test/datasets/ljournal-2008.mtx", false), + Graph_Usecase("test/datasets/ljournal-2008.mtx", true), + Graph_Usecase("test/datasets/webbase-1M.mtx", false), + Graph_Usecase("test/datasets/webbase-1M.mtx", true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 4e0ca9e7d92..2d49c174d7e 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -295,7 +295,7 @@ TEST_P(Tests_InducedSubgraph, CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_InducedSubgraph, ::testing::Values( diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index c7756699acd..af70b90dd02 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -96,68 +96,14 @@ void katz_centrality_reference(edge_t const* offsets, return; } -typedef struct KatzCentrality_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct KatzCentrality_Usecase { bool test_weighted{false}; bool check_correctness{false}; +}; - KatzCentrality_Usecase_t(std::string const& graph_file_path, - bool test_weighted, - bool check_correctness = true) - : test_weighted(test_weighted), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, - bool test_weighted, - bool check_correctness = true) - : test_weighted(test_weighted), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} KatzCentrality_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) -{ - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - renumber, - std::vector{0}, - size_t{1}); -} - -class Tests_KatzCentrality : public ::testing::TestWithParam { +template +class Tests_KatzCentrality + : public ::testing::TestWithParam> { public: Tests_KatzCentrality() {} static void SetupTestCase() {} @@ -167,7 +113,8 @@ class Tests_KatzCentrality : public ::testing::TestWithParam - void run_current_test(KatzCentrality_Usecase const& configuration) + void run_current_test(KatzCentrality_Usecase const& katz_usecase, + input_usecase_t const& input_usecase) { constexpr bool renumber = true; @@ -181,12 +128,14 @@ class Tests_KatzCentrality : public ::testing::TestWithParam graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = - read_graph(handle, configuration, renumber); + input_usecase.template construct_graph( + handle, true, renumber); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto graph_view = graph.view(); @@ -226,12 +175,13 @@ class Tests_KatzCentrality : public ::testing::TestWithParam unrenumbered_graph( handle); if (renumber) { std::tie(unrenumbered_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; @@ -311,35 +261,47 @@ class Tests_KatzCentrality : public ::testing::TestWithParam; +using Tests_KatzCentrality_Rmat = Tests_KatzCentrality; + // FIXME: add tests for type combinations -TEST_P(Tests_KatzCentrality, CheckInt32Int32FloatFloat) +TEST_P(Tests_KatzCentrality_File, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_KatzCentrality_Rmat, CheckInt32Int32FloatFloat) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_KatzCentrality, - ::testing::Values( +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_KatzCentrality_File, + ::testing::Combine( // enable correctness checks - KatzCentrality_Usecase("test/datasets/karate.mtx", false), - KatzCentrality_Usecase("test/datasets/karate.mtx", true), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - true), - // disable correctness checks for large graphs - KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - false, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - true, - false))); + ::testing::Values(KatzCentrality_Usecase{false}, KatzCentrality_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_KatzCentrality_Rmat, + // enable correctness checks + ::testing::Combine(::testing::Values(KatzCentrality_Usecase{false}, + KatzCentrality_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_KatzCentrality_Rmat, + // disable correctness checks for large graphs + ::testing::Combine(::testing::Values(KatzCentrality_Usecase{false, false}, + KatzCentrality_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index 64ffedd2492..ebb2824fb87 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -40,72 +40,13 @@ // static int PERF = 0; -typedef struct BFS_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct BFS_Usecase { size_t source{0}; bool check_correctness{false}; +}; - BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, - size_t source, - bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} BFS_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) -{ - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - false, - renumber, - partition_ids, - static_cast(comm_size)); -} - -class Tests_MGBFS : public ::testing::TestWithParam { +template +class Tests_MGBFS : public ::testing::TestWithParam> { public: Tests_MGBFS() {} static void SetupTestCase() {} @@ -116,7 +57,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { // Compare the results of running BFS on multiple GPUs to that of a single-GPU run template - void run_current_test(BFS_Usecase const& configuration) + void run_current_test(BFS_Usecase const& bfs_usecase, input_usecase_t const& input_usecase) { using weight_t = float; @@ -144,19 +85,20 @@ class Tests_MGBFS : public ::testing::TestWithParam { cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - read_graph(handle, configuration, true); + input_usecase.template construct_graph( + handle, false, true); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto mg_graph_view = mg_graph.view(); - ASSERT_TRUE(static_cast(configuration.source) >= 0 && - static_cast(configuration.source) < - mg_graph_view.get_number_of_vertices()) + ASSERT_TRUE(static_cast(bfs_usecase.source) >= 0 && + static_cast(bfs_usecase.source) < mg_graph_view.get_number_of_vertices()) << "Invalid starting source."; // 3. run MG BFS @@ -175,7 +117,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { mg_graph_view, d_mg_distances.data(), d_mg_predecessors.data(), - static_cast(configuration.source), + static_cast(bfs_usecase.source), false, std::numeric_limits::max()); @@ -186,14 +128,15 @@ class Tests_MGBFS : public ::testing::TestWithParam { std::cout << "MG BFS took " << elapsed_time * 1e-6 << " s.\n"; } - // 5. copmare SG & MG results + // 5. compare SG & MG results - if (configuration.check_correctness) { + if (bfs_usecase.check_correctness) { // 5-1. create SG graph cugraph::experimental::graph_t sg_graph(handle); std::tie(sg_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, false, false); auto sg_graph_view = sg_graph.view(); @@ -202,7 +145,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); } - rmm::device_scalar d_source(static_cast(configuration.source), + rmm::device_scalar d_source(static_cast(bfs_usecase.source), handle.get_stream()); cugraph::experimental::unrenumber_int_vertices( handle, @@ -306,21 +249,46 @@ class Tests_MGBFS : public ::testing::TestWithParam { } }; -TEST_P(Tests_MGBFS, CheckInt32Int32) { run_current_test(GetParam()); } +using Tests_MGBFS_File = Tests_MGBFS; +using Tests_MGBFS_Rmat = Tests_MGBFS; + +TEST_P(Tests_MGBFS_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGBFS_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_MGBFS, +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGBFS_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(BFS_Usecase{0}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGBFS_Rmat, ::testing::Values( // enable correctness checks - BFS_Usecase("test/datasets/karate.mtx", 0), - BFS_Usecase("test/datasets/web-Google.mtx", 0), - BFS_Usecase("test/datasets/ljournal-2008.mtx", 0), - BFS_Usecase("test/datasets/webbase-1M.mtx", 0), - BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + std::make_tuple(BFS_Usecase{0}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MGBFS_Rmat, + ::testing::Values( // disable correctness checks for large graphs - BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - 0, - false))); + std::make_tuple(BFS_Usecase{0, false}, + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index 937bd33472b..b4a7968e955 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -37,77 +37,14 @@ // static int PERF = 0; -typedef struct KatzCentrality_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct KatzCentrality_Usecase { bool test_weighted{false}; bool check_correctness{false}; +}; - KatzCentrality_Usecase_t(std::string const& graph_file_path, - bool test_weighted, - bool check_correctness = true) - : test_weighted(test_weighted), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, - bool test_weighted, - bool check_correctness = true) - : test_weighted(test_weighted), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} KatzCentrality_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) -{ - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - renumber, - partition_ids, - static_cast(comm_size)); -} - -class Tests_MGKatzCentrality : public ::testing::TestWithParam { +template +class Tests_MGKatzCentrality + : public ::testing::TestWithParam> { public: Tests_MGKatzCentrality() {} static void SetupTestCase() {} @@ -118,7 +55,8 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam - void run_current_test(KatzCentrality_Usecase const& configuration) + void run_current_test(KatzCentrality_Usecase const &katz_usecase, + input_usecase_t const &input_usecase) { // 1. initialize handle @@ -126,7 +64,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - read_graph(handle, configuration, true); + input_usecase.template construct_graph( + handle, true, true); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto mg_graph_view = mg_graph.view(); @@ -174,7 +114,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam(nullptr), + static_cast(nullptr), d_mg_katz_centralities.data(), alpha, beta, @@ -191,12 +131,13 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam sg_graph(handle); std::tie(sg_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); auto sg_graph_view = sg_graph.view(); @@ -207,7 +148,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam(nullptr), + static_cast(nullptr), d_sg_katz_centralities.data(), alpha, beta, @@ -258,34 +199,48 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam; +using Tests_MGKatzCentrality_Rmat = Tests_MGKatzCentrality; + +TEST_P(Tests_MGKatzCentrality_File, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGKatzCentrality_Rmat, CheckInt32Int32FloatFloat) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_MGKatzCentrality, - ::testing::Values( +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGKatzCentrality_File, + ::testing::Combine( // enable correctness checks - KatzCentrality_Usecase("test/datasets/karate.mtx", false), - KatzCentrality_Usecase("test/datasets/karate.mtx", true), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - true), - // disable correctness checks for large graphs - KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - false, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - true, - false))); + ::testing::Values(KatzCentrality_Usecase{false}, KatzCentrality_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGKatzCentrality_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values(KatzCentrality_Usecase{false}, + KatzCentrality_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGKatzCentrality_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(KatzCentrality_Usecase{false, false}, + KatzCentrality_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index de39b8da128..c49efefacd5 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -40,72 +40,13 @@ // static int PERF = 0; -typedef struct SSSP_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct SSSP_Usecase { size_t source{0}; bool check_correctness{false}; +}; - SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, - size_t source, - bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} SSSP_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) -{ - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - true, - renumber, - partition_ids, - static_cast(comm_size)); -} - -class Tests_MGSSSP : public ::testing::TestWithParam { +template +class Tests_MGSSSP : public ::testing::TestWithParam> { public: Tests_MGSSSP() {} static void SetupTestCase() {} @@ -116,10 +57,9 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // Compare the results of running SSSP on multiple GPUs to that of a single-GPU run template - void run_current_test(SSSP_Usecase const& configuration) + void run_current_test(SSSP_Usecase const& sssp_usecase, input_usecase_t const& input_usecase) { // 1. initialize handle - raft::handle_t handle{}; HighResClock hr_clock{}; @@ -142,19 +82,20 @@ class Tests_MGSSSP : public ::testing::TestWithParam { cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - read_graph(handle, configuration, true); + input_usecase.template construct_graph( + handle, true, true); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto mg_graph_view = mg_graph.view(); - ASSERT_TRUE(static_cast(configuration.source) >= 0 && - static_cast(configuration.source) < - mg_graph_view.get_number_of_vertices()) + ASSERT_TRUE(static_cast(sssp_usecase.source) >= 0 && + static_cast(sssp_usecase.source) < mg_graph_view.get_number_of_vertices()) << "Invalid starting source."; // 3. run MG SSSP @@ -174,7 +115,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { mg_graph_view, d_mg_distances.data(), d_mg_predecessors.data(), - static_cast(configuration.source), + static_cast(sssp_usecase.source), std::numeric_limits::max()); if (PERF) { @@ -186,12 +127,13 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // 5. copmare SG & MG results - if (configuration.check_correctness) { + if (sssp_usecase.check_correctness) { // 5-1. create SG graph cugraph::experimental::graph_t sg_graph(handle); std::tie(sg_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); auto sg_graph_view = sg_graph.view(); @@ -200,7 +142,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); } - rmm::device_scalar d_source(static_cast(configuration.source), + rmm::device_scalar d_source(static_cast(sssp_usecase.source), handle.get_stream()); cugraph::experimental::unrenumber_int_vertices( handle, @@ -315,23 +257,45 @@ class Tests_MGSSSP : public ::testing::TestWithParam { } }; -TEST_P(Tests_MGSSSP, CheckInt32Int32Float) +using Tests_MGSSSP_File = Tests_MGSSSP; +using Tests_MGSSSP_Rmat = Tests_MGSSSP; + +TEST_P(Tests_MGSSSP_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGSSSP_Rmat, CheckInt32Int32Float) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_MGSSSP, +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGSSSP_File, + ::testing::Values( + // enable correctness checks + std::make_tuple(SSSP_Usecase{0}, cugraph::test::File_Usecase("test/datasets/karate.mtx")), + std::make_tuple(SSSP_Usecase{0}, cugraph::test::File_Usecase("test/datasets/dblp.mtx")), + std::make_tuple(SSSP_Usecase{1000}, + cugraph::test::File_Usecase("test/datasets/wiki2003.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGSSSP_Rmat, ::testing::Values( // enable correctness checks - SSSP_Usecase("test/datasets/karate.mtx", 0), - SSSP_Usecase("test/datasets/dblp.mtx", 0), - SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), - SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + std::make_tuple(SSSP_Usecase{0}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MGSSSP_Rmat, + ::testing::Values( // disable correctness checks for large graphs - SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - 0, - false))); + std::make_tuple(SSSP_Usecase{0, false}, + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp index 264382c22a3..eec51f105ab 100644 --- a/cpp/tests/experimental/ms_bfs_test.cpp +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -153,7 +153,7 @@ TEST_P(Tests_MsBfs, DISABLED_CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } /* -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_MsBfs, ::testing::Values( @@ -167,7 +167,7 @@ INSTANTIATE_TEST_CASE_P( */ // For perf analysis -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_MsBfs, ::testing::Values( diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 0340140d14b..27739cee01b 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -131,75 +131,15 @@ void pagerank_reference(edge_t const* offsets, return; } -typedef struct PageRank_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct PageRank_Usecase { double personalization_ratio{0.0}; bool test_weighted{false}; bool check_correctness{false}; +}; - PageRank_Usecase_t(std::string const& graph_file_path, - double personalization_ratio, - bool test_weighted, - bool check_correctness = true) - : personalization_ratio(personalization_ratio), - test_weighted(test_weighted), - check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, - double personalization_ratio, - bool test_weighted, - bool check_correctness = true) - : personalization_ratio(personalization_ratio), - test_weighted(test_weighted), - check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} PageRank_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) -{ - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - renumber, - std::vector{0}, - size_t{1}); -} - -class Tests_PageRank : public ::testing::TestWithParam { +template +class Tests_PageRank + : public ::testing::TestWithParam> { public: Tests_PageRank() {} static void SetupTestCase() {} @@ -209,7 +149,8 @@ class Tests_PageRank : public ::testing::TestWithParam { virtual void TearDown() {} template - void run_current_test(PageRank_Usecase const& configuration) + void run_current_test(PageRank_Usecase const& pagerank_usecase, + input_usecase_t const& input_usecase) { constexpr bool renumber = true; @@ -223,18 +164,19 @@ class Tests_PageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = - read_graph(handle, configuration, renumber); + input_usecase.template construct_graph( + handle, true, renumber); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto graph_view = graph.view(); std::vector h_personalization_vertices{}; std::vector h_personalization_values{}; - if (configuration.personalization_ratio > 0.0) { + if (pagerank_usecase.personalization_ratio > 0.0) { std::default_random_engine generator{}; std::uniform_real_distribution distribution{0.0, 1.0}; h_personalization_vertices.resize(graph_view.get_number_of_local_vertices()); @@ -244,8 +186,8 @@ class Tests_PageRank : public ::testing::TestWithParam { h_personalization_vertices.erase( std::remove_if(h_personalization_vertices.begin(), h_personalization_vertices.end(), - [&generator, &distribution, configuration](auto v) { - return distribution(generator) >= configuration.personalization_ratio; + [&generator, &distribution, pagerank_usecase](auto v) { + return distribution(generator) >= pagerank_usecase.personalization_ratio; }), h_personalization_vertices.end()); h_personalization_values.resize(h_personalization_vertices.size()); @@ -308,12 +250,13 @@ class Tests_PageRank : public ::testing::TestWithParam { std::cout << "PageRank took " << elapsed_time * 1e-6 << " s.\n"; } - if (configuration.check_correctness) { + if (pagerank_usecase.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( handle); if (renumber) { std::tie(unrenumbered_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; @@ -434,53 +377,57 @@ class Tests_PageRank : public ::testing::TestWithParam { } }; +using Tests_PageRank_File = Tests_PageRank; +using Tests_PageRank_Rmat = Tests_PageRank; + +// FIXME: add tests for type combinations +TEST_P(Tests_PageRank_File, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + // FIXME: add tests for type combinations -TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) +TEST_P(Tests_PageRank_Rmat, CheckInt32Int32FloatFloat) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_PageRank, - ::testing::Values( +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_PageRank_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_PageRank_Rmat, + ::testing::Combine( // enable correctness checks - PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - false), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - false), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - true), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - true), + ::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_tests, + Tests_PageRank_Rmat, + ::testing::Combine( // disable correctness checks for large graphs - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); + ::testing::Values(PageRank_Usecase{0.0, false, false}, + PageRank_Usecase{0.5, false, false}, + PageRank_Usecase{0.0, true, false}, + PageRank_Usecase{0.5, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/random_walks_test.cu b/cpp/tests/experimental/random_walks_test.cu index 9fb1716f62b..d692f6a7592 100644 --- a/cpp/tests/experimental/random_walks_test.cu +++ b/cpp/tests/experimental/random_walks_test.cu @@ -141,7 +141,7 @@ TEST_P(Tests_RandomWalks, Initialize_i32_i32_f) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( simple_test, Tests_RandomWalks, ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index e8ab3ec5426..a9c12043a7f 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -87,63 +87,13 @@ void sssp_reference(edge_t const* offsets, return; } -typedef struct SSSP_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct SSSP_Usecase { size_t source{0}; bool check_correctness{false}; +}; - SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, - size_t source, - bool check_correctness = true) - : source(source), check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} SSSP_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) -{ - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - true, - renumber, - std::vector{0}, - size_t{1}); -} - -class Tests_SSSP : public ::testing::TestWithParam { +template +class Tests_SSSP : public ::testing::TestWithParam> { public: Tests_SSSP() {} static void SetupTestCase() {} @@ -153,7 +103,7 @@ class Tests_SSSP : public ::testing::TestWithParam { virtual void TearDown() {} template - void run_current_test(SSSP_Usecase const& configuration) + void run_current_test(SSSP_Usecase const& sssp_usecase, input_usecase_t const& input_usecase) { constexpr bool renumber = true; @@ -167,17 +117,19 @@ class Tests_SSSP : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = - read_graph(handle, configuration, renumber); + input_usecase.template construct_graph( + handle, true, renumber); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } + auto graph_view = graph.view(); - ASSERT_TRUE(static_cast(configuration.source) >= 0 && - static_cast(configuration.source) < graph_view.get_number_of_vertices()); + ASSERT_TRUE(static_cast(sssp_usecase.source) >= 0 && + static_cast(sssp_usecase.source) < graph_view.get_number_of_vertices()); rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -193,7 +145,7 @@ class Tests_SSSP : public ::testing::TestWithParam { graph_view, d_distances.data(), d_predecessors.data(), - static_cast(configuration.source), + static_cast(sssp_usecase.source), std::numeric_limits::max(), false); @@ -204,12 +156,13 @@ class Tests_SSSP : public ::testing::TestWithParam { std::cout << "SSSP took " << elapsed_time * 1e-6 << " s.\n"; } - if (configuration.check_correctness) { + if (sssp_usecase.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( handle); if (renumber) { std::tie(unrenumbered_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; @@ -231,7 +184,7 @@ class Tests_SSSP : public ::testing::TestWithParam { handle.get_stream_view().synchronize(); - auto unrenumbered_source = static_cast(configuration.source); + auto unrenumbered_source = static_cast(sssp_usecase.source); if (renumber) { std::vector h_renumber_map_labels(d_renumber_map_labels.size()); raft::update_host(h_renumber_map_labels.data(), @@ -241,7 +194,7 @@ class Tests_SSSP : public ::testing::TestWithParam { handle.get_stream_view().synchronize(); - unrenumbered_source = h_renumber_map_labels[configuration.source]; + unrenumbered_source = h_renumber_map_labels[sssp_usecase.source]; } std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); @@ -330,21 +283,44 @@ class Tests_SSSP : public ::testing::TestWithParam { } }; +using Tests_SSSP_File = Tests_SSSP; +using Tests_SSSP_Rmat = Tests_SSSP; + // FIXME: add tests for type combinations -TEST_P(Tests_SSSP, CheckInt32Int32Float) { run_current_test(GetParam()); } +TEST_P(Tests_SSSP_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} +TEST_P(Tests_SSSP_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_SSSP, +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SSSP_File, + // enable correctness checks + ::testing::Values( + std::make_tuple(SSSP_Usecase{0}, cugraph::test::File_Usecase("test/datasets/karate.mtx")), + std::make_tuple(SSSP_Usecase{0}, cugraph::test::File_Usecase("test/datasets/dblp.mtx")), + std::make_tuple(SSSP_Usecase{1000}, + cugraph::test::File_Usecase("test/datasets/wiki2003.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SSSP_Rmat, + // enable correctness checks + ::testing::Values(std::make_tuple( + SSSP_Usecase{0}, cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_SSSP_Rmat, + // disable correctness checks for large graphs ::testing::Values( - // enable correctness checks - SSSP_Usecase("test/datasets/karate.mtx", 0), - SSSP_Usecase("test/datasets/dblp.mtx", 0), - SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), - SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), - // disable correctness checks for large graphs - SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, - 0, - false))); + std::make_tuple(SSSP_Usecase{0, false}, + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index 9ab47b69baa..d04cba2d132 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -178,11 +178,11 @@ TEST_P(Tests_WeightSum, CheckInt32Int32FloatUntransposed) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_WeightSum, - ::testing::Values(WeightSum_Usecase("test/datasets/karate.mtx"), - WeightSum_Usecase("test/datasets/web-Google.mtx"), - WeightSum_Usecase("test/datasets/ljournal-2008.mtx"), - WeightSum_Usecase("test/datasets/webbase-1M.mtx"))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_WeightSum, + ::testing::Values(WeightSum_Usecase("test/datasets/karate.mtx"), + WeightSum_Usecase("test/datasets/web-Google.mtx"), + WeightSum_Usecase("test/datasets/ljournal-2008.mtx"), + WeightSum_Usecase("test/datasets/webbase-1M.mtx"))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index d564765d0df..c6067407b70 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -229,12 +229,12 @@ TEST_P(Tests_Force_Atlas2, CheckFP32_T) { run_current_test(GetParam()); } TEST_P(Tests_Force_Atlas2, CheckFP64_T) { run_current_test(GetParam()); } // --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Force_Atlas2, - ::testing::Values(Force_Atlas2_Usecase("test/datasets/karate.mtx", 0.73), - Force_Atlas2_Usecase("test/datasets/dolphins.mtx", 0.69), - Force_Atlas2_Usecase("test/datasets/polbooks.mtx", 0.76), - Force_Atlas2_Usecase("test/datasets/netscience.mtx", - 0.80))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_Force_Atlas2, + ::testing::Values(Force_Atlas2_Usecase("test/datasets/karate.mtx", 0.73), + Force_Atlas2_Usecase("test/datasets/dolphins.mtx", 0.69), + Force_Atlas2_Usecase("test/datasets/polbooks.mtx", 0.76), + Force_Atlas2_Usecase("test/datasets/netscience.mtx", + 0.80))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index bbc80a60a3d..0eae6a62f31 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -40,84 +40,15 @@ // static int PERF = 0; -typedef struct PageRank_Usecase_t { - cugraph::test::input_graph_specifier_t input_graph_specifier{}; - +struct PageRank_Usecase { double personalization_ratio{0.0}; bool test_weighted{false}; bool check_correctness{false}; +}; - PageRank_Usecase_t(std::string const& graph_file_path, - double personalization_ratio, - bool test_weighted, - bool check_correctness = true) - : personalization_ratio(personalization_ratio), - test_weighted(test_weighted), - check_correctness(check_correctness) - { - std::string graph_file_full_path{}; - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; - input_graph_specifier.graph_file_full_path = graph_file_full_path; - }; - - PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, - double personalization_ratio, - bool test_weighted, - bool check_correctness = true) - : personalization_ratio(personalization_ratio), - test_weighted(test_weighted), - check_correctness(check_correctness) - { - input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; - input_graph_specifier.rmat_params = rmat_params; - } -} PageRank_Usecase; - -template -std::tuple, - rmm::device_uvector> -read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) -{ - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - - return configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - renumber) - : cugraph::test:: - generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - renumber, - partition_ids, - static_cast(comm_size)); -} - -class Tests_MGPageRank : public ::testing::TestWithParam { +template +class Tests_MGPageRank + : public ::testing::TestWithParam> { public: Tests_MGPageRank() {} static void SetupTestCase() {} @@ -128,10 +59,10 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // Compare the results of running PageRank on multiple GPUs to that of a single-GPU run template - void run_current_test(PageRank_Usecase const& configuration) + void run_current_test(PageRank_Usecase const& pagerank_usecase, + input_usecase_t const& input_usecase) { // 1. initialize handle - raft::handle_t handle{}; HighResClock hr_clock{}; @@ -154,12 +85,13 @@ class Tests_MGPageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - read_graph(handle, configuration, true); + input_usecase.template construct_graph(handle, true); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); - std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } auto mg_graph_view = mg_graph.view(); @@ -168,7 +100,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { std::vector h_mg_personalization_vertices{}; std::vector h_mg_personalization_values{}; - if (configuration.personalization_ratio > 0.0) { + if (pagerank_usecase.personalization_ratio > 0.0) { std::default_random_engine generator{ static_cast(comm.get_rank()) /* seed */}; std::uniform_real_distribution distribution{0.0, 1.0}; @@ -179,8 +111,8 @@ class Tests_MGPageRank : public ::testing::TestWithParam { h_mg_personalization_vertices.erase( std::remove_if(h_mg_personalization_vertices.begin(), h_mg_personalization_vertices.end(), - [&generator, &distribution, configuration](auto v) { - return distribution(generator) >= configuration.personalization_ratio; + [&generator, &distribution, pagerank_usecase](auto v) { + return distribution(generator) >= pagerank_usecase.personalization_ratio; }), h_mg_personalization_vertices.end()); h_mg_personalization_values.resize(h_mg_personalization_vertices.size()); @@ -238,12 +170,13 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // 5. copmare SG & MG results - if (configuration.check_correctness) { + if (pagerank_usecase.check_correctness) { // 5-1. create SG graph cugraph::experimental::graph_t sg_graph(handle); std::tie(sg_graph, std::ignore) = - read_graph(handle, configuration, false); + input_usecase.template construct_graph( + handle, true, false); auto sg_graph_view = sg_graph.view(); @@ -251,7 +184,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { rmm::device_uvector d_sg_personalization_vertices(0, handle.get_stream()); rmm::device_uvector d_sg_personalization_values(0, handle.get_stream()); - if (configuration.personalization_ratio > 0.0) { + if (pagerank_usecase.personalization_ratio > 0.0) { rmm::device_uvector d_unrenumbered_personalization_vertices( d_mg_personalization_vertices.size(), handle.get_stream()); rmm::device_uvector d_unrenumbered_personalization_values( @@ -371,52 +304,51 @@ class Tests_MGPageRank : public ::testing::TestWithParam { } }; -TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat) +using Tests_MGPageRank_File = Tests_MGPageRank; +using Tests_MGPageRank_Rmat = Tests_MGPageRank; + +TEST_P(Tests_MGPageRank_File, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPageRank_Rmat, CheckInt32Int32FloatFloat) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_MGPageRank, - ::testing::Values( +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGPageRank_File, + ::testing::Combine( // enable correctness checks - PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - false), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - false), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - true), - PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - true), - // disable correctness checks for large graphs - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), - PageRank_Usecase( - cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); + ::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false, false}, + PageRank_Usecase{0.5, false, false}, + PageRank_Usecase{0.0, true, false}, + PageRank_Usecase{0.5, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/bfs_test.cu b/cpp/tests/traversal/bfs_test.cu index d90da4367a0..9027d73b83e 100644 --- a/cpp/tests/traversal/bfs_test.cu +++ b/cpp/tests/traversal/bfs_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -224,13 +224,13 @@ TEST_P(Tests_BFS, CheckInt64_SP_COUNTER) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_BFS, - ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), - BFS_Usecase("test/datasets/polbooks.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 100), - BFS_Usecase("test/datasets/wiki2003.mtx", 1000), - BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_BFS, + ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/polbooks.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 100), + BFS_Usecase("test/datasets/wiki2003.mtx", 1000), + BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index ea56d1d79cb..e151ab64e68 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -425,10 +425,10 @@ TEST_P(Tests_SSSP, CheckFP64_RANDOM_DIST_PREDS) // --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_SSSP, - ::testing::Values(SSSP_Usecase(MTX, "test/datasets/dblp.mtx", 100), - SSSP_Usecase(MTX, "test/datasets/wiki2003.mtx", 100000), - SSSP_Usecase(MTX, "test/datasets/karate.mtx", 1))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_SSSP, + ::testing::Values(SSSP_Usecase(MTX, "test/datasets/dblp.mtx", 100), + SSSP_Usecase(MTX, "test/datasets/wiki2003.mtx", 100000), + SSSP_Usecase(MTX, "test/datasets/karate.mtx", 1))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index d4e9ff90f35..47a72757bd8 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -242,5 +242,5 @@ class Tests_Tsp : public ::testing::TestWithParam { TEST_P(Tests_Tsp, CheckFP32_T) { run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, Tests_Tsp, ::testing::ValuesIn(euc_2d)); +INSTANTIATE_TEST_SUITE_P(simple_test, Tests_Tsp, ::testing::ValuesIn(euc_2d)); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/tree/mst_test.cu b/cpp/tests/tree/mst_test.cu index 949d6bae59b..e3d7b70d51e 100644 --- a/cpp/tests/tree/mst_test.cu +++ b/cpp/tests/tree/mst_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -144,8 +144,8 @@ TEST_P(Tests_Mst, CheckFP32_T) { run_current_test(GetParam()); } TEST_P(Tests_Mst, CheckFP64_T) { run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Mst, - ::testing::Values(Mst_Usecase("test/datasets/netscience.mtx"))); +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_Mst, + ::testing::Values(Mst_Usecase("test/datasets/netscience.mtx"))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index e81a76b4163..196128e37c0 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -167,21 +168,120 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::vector const& partition_ids, size_t num_partitions); -struct rmat_params_t { - size_t scale{}; - size_t edge_factor{}; - double a{}; - double b{}; - double c{}; - uint64_t seed{}; - bool undirected{}; - bool scramble_vertex_ids{}; +class File_Usecase { + public: + File_Usecase() = delete; + + File_Usecase(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path_ = graph_file_path; + } + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + return read_graph_from_matrix_market_file( + handle, graph_file_full_path_, test_weighted, renumber); + } + + private: + std::string graph_file_full_path_{}; }; -struct input_graph_specifier_t { - enum { MATRIX_MARKET_FILE_PATH, RMAT_PARAMS } tag{}; - std::string graph_file_full_path{}; - rmat_params_t rmat_params{}; +class Rmat_Usecase { + public: + Rmat_Usecase() = delete; + + Rmat_Usecase(size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool multi_gpu_usecase = false) + : scale_(scale), + edge_factor_(edge_factor), + a_(a), + b_(b), + c_(c), + seed_(seed), + undirected_(undirected), + scramble_vertex_ids_(scramble_vertex_ids), + multi_gpu_usecase_(multi_gpu_usecase) + { + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + std::vector partition_ids(1); + size_t comm_size; + + if (multi_gpu_usecase_) { + auto& comm = handle.get_comms(); + comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + partition_ids.resize(multi_gpu ? size_t{1} : static_cast(comm_size)); + + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + } else { + comm_size = 1; + partition_ids[0] = size_t{0}; + } + + return generate_graph_from_rmat_params( + handle, + scale_, + edge_factor_, + a_, + b_, + c_, + seed_, + undirected_, + scramble_vertex_ids_, + test_weighted, + renumber, + partition_ids, + comm_size); + } + + private: + size_t scale_{}; + size_t edge_factor_{}; + double a_{}; + double b_{}; + double c_{}; + uint64_t seed_{}; + bool undirected_{}; + bool scramble_vertex_ids_{}; + bool multi_gpu_usecase_{}; }; } // namespace test From 36371e98ba06800cfb3b341d68a38c06114086d5 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 19 Apr 2021 10:41:20 -0400 Subject: [PATCH 229/343] Update CUDA version in build scripts (#1541) This PR updates the CUDA version used in the build scripts. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1541 --- ci/cpu/prebuild.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index ee471329b35..6665757181d 100644 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then export BUILD_LIBCUGRAPH=1 fi -if [[ "$CUDA" == "10.1" ]]; then +if [[ "$CUDA" == "11.0" ]]; then export UPLOAD_CUGRAPH=1 else export UPLOAD_CUGRAPH=0 From 7fb335a4177cb34316b1327658b43cdca56da7ed Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 20 Apr 2021 13:47:07 -0500 Subject: [PATCH 230/343] update bfs backend, tests and expose depth limit (#1532) - Transitioned SG bfs backend - Exposed depth_limit parameter, removed spc(shortest path counter) from bfs API - Added/updates depth_limit tests - Updated bfs to accept multi-column start vertex Authors: - https://github.com/Iroy30 Approvers: - Brad Rees (https://github.com/BradReesWork) - Alex Fender (https://github.com/afender) - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1532 --- cpp/include/utilities/cython.hpp | 4 +- cpp/src/utilities/cython.cu | 75 +++++----- python/cugraph/dask/traversal/bfs.py | 24 ++- python/cugraph/dask/traversal/mg_bfs.pxd | 7 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 44 ++++-- python/cugraph/structure/number_map.py | 21 +-- python/cugraph/tests/dask/test_mg_bfs.py | 65 +++++++- python/cugraph/tests/test_bfs.py | 139 +++--------------- python/cugraph/traversal/bfs.pxd | 6 +- python/cugraph/traversal/bfs.py | 75 +++------- python/cugraph/traversal/bfs_wrapper.pyx | 67 ++++----- python/cugraph/traversal/sssp_wrapper.pyx | 44 +++--- 12 files changed, 257 insertions(+), 314 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index d8c476760f0..c1d0c836225 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -448,9 +448,9 @@ void call_bfs(raft::handle_t const& handle, vertex_t* identifiers, vertex_t* distances, vertex_t* predecessors, - double* sp_counters, + vertex_t depth_limit, const vertex_t start_vertex, - bool directed); + bool direction_optimizing); // Wrapper for calling SSSP through a graph container template diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 4a2b98ea815..9729039fd48 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -689,31 +689,11 @@ void call_bfs(raft::handle_t const& handle, vertex_t* identifiers, vertex_t* distances, vertex_t* predecessors, - double* sp_counters, + vertex_t depth_limit, const vertex_t start_vertex, - bool directed) + bool direction_optimizing) { - if (graph_container.graph_type == graphTypeEnum::GraphCSRViewFloat) { - graph_container.graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - bfs(handle, - *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), - reinterpret_cast(distances), - reinterpret_cast(predecessors), - sp_counters, - static_cast(start_vertex), - directed); - } else if (graph_container.graph_type == graphTypeEnum::GraphCSRViewDouble) { - graph_container.graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - bfs(handle, - *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), - reinterpret_cast(distances), - reinterpret_cast(predecessors), - sp_counters, - static_cast(start_vertex), - directed); - } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.is_multi_gpu) { if (graph_container.edgeType == numberTypeEnum::int32Type) { auto graph = detail::create_graph(handle, graph_container); @@ -721,7 +701,9 @@ void call_bfs(raft::handle_t const& handle, graph->view(), reinterpret_cast(distances), reinterpret_cast(predecessors), - static_cast(start_vertex)); + static_cast(start_vertex), + direction_optimizing, + static_cast(depth_limit)); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { auto graph = detail::create_graph(handle, graph_container); @@ -729,9 +711,31 @@ void call_bfs(raft::handle_t const& handle, graph->view(), reinterpret_cast(distances), reinterpret_cast(predecessors), - static_cast(start_vertex)); - } else { - CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + static_cast(start_vertex), + direction_optimizing, + static_cast(depth_limit)); + } + } else { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::bfs(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(start_vertex), + direction_optimizing, + static_cast(depth_limit)); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::bfs(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(start_vertex), + direction_optimizing, + static_cast(depth_limit)); } } } @@ -1149,36 +1153,37 @@ template void call_bfs(raft::handle_t const& handle, int32_t* identifiers, int32_t* distances, int32_t* predecessors, - double* sp_counters, + int32_t depth_limit, const int32_t start_vertex, - bool directed); + bool direction_optimizing); template void call_bfs(raft::handle_t const& handle, graph_container_t const& graph_container, int32_t* identifiers, int32_t* distances, int32_t* predecessors, - double* sp_counters, + int32_t depth_limit, const int32_t start_vertex, - bool directed); + bool direction_optimizing); template void call_bfs(raft::handle_t const& handle, graph_container_t const& graph_container, int64_t* identifiers, int64_t* distances, int64_t* predecessors, - double* sp_counters, + int64_t depth_limit, const int64_t start_vertex, - bool directed); + bool direction_optimizing); template void call_bfs(raft::handle_t const& handle, graph_container_t const& graph_container, int64_t* identifiers, int64_t* distances, int64_t* predecessors, - double* sp_counters, + int64_t depth_limit, const int64_t start_vertex, - bool directed); + bool direction_optimizing); + template std::unique_ptr call_egonet( raft::handle_t const& handle, graph_container_t const& graph_container, diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index d108730f665..03b9844bf6c 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -28,6 +28,7 @@ def call_bfs(sID, num_edges, vertex_partition_offsets, start, + depth_limit, return_distances): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) @@ -38,12 +39,14 @@ def call_bfs(sID, wid, handle, start, + depth_limit, return_distances) def bfs(graph, start, - return_distances=False): + depth_limit=None, + return_distances=True): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -59,7 +62,9 @@ def bfs(graph, start : Integer Specify starting vertex for breadth-first search; this function iterates over edges in the component reachable from this node. - return_distances : bool, optional, default=False + depth_limit : Integer or None + Limit the depth of the search + return_distances : bool, optional, default=True Indicates if distances should be returned Returns @@ -99,9 +104,15 @@ def bfs(graph, data = get_distributed_data(ddf) if graph.renumbered: - start = graph.lookup_internal_vertex_id(cudf.Series([start], - dtype='int32')).compute() - start = start.iloc[0] + if isinstance(start, dask_cudf.DataFrame)\ + or isinstance(start, cudf.DataFrame): + start = graph.lookup_internal_vertex_id(start, start.columns).\ + compute() + start = start.iloc[0] + else: + start = graph.lookup_internal_vertex_id(cudf.Series([start], + dtype='int32')).compute() + start = start.iloc[0] result = [client.submit( call_bfs, @@ -111,6 +122,7 @@ def bfs(graph, num_edges, vertex_partition_offsets, start, + depth_limit, return_distances, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] @@ -120,5 +132,5 @@ def bfs(graph, if graph.renumbered: ddf = graph.unrenumber(ddf, 'vertex') ddf = graph.unrenumber(ddf, 'predecessor') - ddf["predecessor"] = ddf["predecessor"].fillna(-1) + ddf = ddf.fillna(-1) return ddf diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index afd209158c4..6a0277f8713 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -17,6 +17,9 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool +cdef extern from "limits.h": + cdef int INT_MAX + cdef long LONG_MAX cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": @@ -26,6 +29,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t *identifiers, vertex_t *distances, vertex_t *predecessors, - double *sp_counters, + vertex_t depth_limit, const vertex_t start_vertex, - bool directed) except + + bool direction_optimizing) except + diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index 44630ba5fb3..e2f44ada32c 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -28,11 +28,11 @@ def mg_bfs(input_df, rank, handle, start, + depth_limit, return_distances=False): """ - Call pagerank + Call BFS """ - cdef size_t handle_size_t = handle.getHandle() handle_ = handle_size_t @@ -43,7 +43,7 @@ def mg_bfs(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -86,9 +86,9 @@ def mg_bfs(input_df, # Generate the cudf.DataFrame result df = cudf.DataFrame() df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) - df['predecessor'] = cudf.Series(np.zeros(len(df['vertex']), dtype=np.int32)) + df['predecessor'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t)) if (return_distances): - df['distance'] = cudf.Series(np.zeros(len(df['vertex']), dtype=np.int32)) + df['distance'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t)) # Associate to cudf Series cdef uintptr_t c_distance_ptr = NULL # Pointer to the DataFrame 'distance' Series @@ -96,14 +96,28 @@ def mg_bfs(input_df, if (return_distances): c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] - cdef bool direction = 1 - # MG BFS path assumes directed is true - c_bfs.call_bfs[int, float](handle_[0], - graph_container, - NULL, - c_distance_ptr, - c_predecessor_ptr, - NULL, - start, - direction) + cdef bool direction_optimizing = 0 + + if vertex_t == np.int32: + if depth_limit is None: + depth_limit = c_bfs.INT_MAX + c_bfs.call_bfs[int, float](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + depth_limit, + start, + direction_optimizing) + else: + if depth_limit is None: + depth_limit = c_bfs.LONG_MAX + c_bfs.call_bfs[long, float](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + depth_limit, + start, + direction_optimizing) return df diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index cd24dfc0434..73316756ef2 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -173,12 +173,17 @@ def __init__( self.numbered = False def to_internal_vertex_id(self, ddf, col_names): - return self.ddf.merge( - ddf, - right_on=col_names, - left_on=self.col_names, + tmp_ddf = ddf[col_names].rename( + columns=dict(zip(col_names, self.col_names))) + for name in self.col_names: + tmp_ddf[name] = tmp_ddf[name].astype(self.ddf[name].dtype) + x = self.ddf.merge( + tmp_ddf, + on=self.col_names, how="right", - )["global_id"] + ) + print(x.compute()) + return x['global_id'] def from_internal_vertex_id( self, df, internal_column_name, external_column_names @@ -342,11 +347,7 @@ def to_internal_vertex_id(self, df, col_names=None): reply = self.implementation.to_internal_vertex_id(tmp_df, tmp_col_names) - - if type(df) in [cudf.DataFrame, dask_cudf.DataFrame]: - return reply["0"] - else: - return reply + return reply def add_internal_vertex_id( self, df, id_column_name="id", col_names=None, drop=False, diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index 36d1f436b52..3e83491c87a 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -63,9 +63,8 @@ def test_dask_bfs(client_connection): dg.from_dask_cudf_edgelist(ddf, "src", "dst") expected_dist = cugraph.bfs(g, 0) - result_dist = dcg.bfs(dg, 0, True) + result_dist = dcg.bfs(dg, 0, depth_limit=2) result_dist = result_dist.compute() - compare_dist = expected_dist.merge( result_dist, on="vertex", suffixes=["_local", "_dask"] ) @@ -79,3 +78,65 @@ def test_dask_bfs(client_connection): ): err = err + 1 assert err == 0 + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +def test_dask_bfs_multi_column_depthlimit(client_connection): + gc.collect() + + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) + input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src_a", "dst_a", "value"], + dtype=["int32", "int32", "float32"], + ) + ddf['src_b'] = ddf['src_a'] + 1000 + ddf['dst_b'] = ddf['dst_a'] + 1000 + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src_a", "dst_a", "value"], + dtype=["int32", "int32", "float32"], + ) + df['src_b'] = df['src_a'] + 1000 + df['dst_b'] = df['dst_a'] + 1000 + + g = cugraph.DiGraph() + g.from_cudf_edgelist(df, ["src_a", "src_b"], ["dst_a", "dst_b"]) + + dg = cugraph.DiGraph() + dg.from_dask_cudf_edgelist(ddf, ["src_a", "src_b"], ["dst_a", "dst_b"]) + + start = cudf.DataFrame() + start['a'] = [0] + start['b'] = [1000] + + depth_limit = 18 + expected_dist = cugraph.bfs(g, start, depth_limit=depth_limit) + result_dist = dcg.bfs(dg, start, depth_limit=depth_limit) + result_dist = result_dist.compute() + + compare_dist = expected_dist.merge( + result_dist, on=["0_vertex", "1_vertex"], suffixes=["_local", "_dask"] + ) + + err = 0 + for i in range(len(compare_dist)): + if ( + compare_dist["distance_local"].iloc[i] <= depth_limit and + compare_dist["distance_dask"].iloc[i] <= depth_limit and + compare_dist["distance_local"].iloc[i] + != compare_dist["distance_dask"].iloc[i] + ): + err = err + 1 + assert err == 0 diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index d04ef957104..a8547d692c2 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -51,6 +51,7 @@ DEFAULT_EPSILON = 1e-6 +DEPTH_LIMITS = [None, 1, 5, 18] # Map of cuGraph input types to the expected output type for cuGraph # connected_components calls. @@ -148,28 +149,14 @@ def compare_single_sp_counter(result, expected, epsilon=DEFAULT_EPSILON): return np.isclose(result, expected, rtol=epsilon) -def compare_bfs(benchmark_callable, G, nx_values, start_vertex, - return_sp_counter=False): +def compare_bfs(benchmark_callable, G, nx_values, start_vertex, depth_limit): """ Genereate both cugraph and reference bfs traversal. """ if isinstance(start_vertex, int): - result = benchmark_callable(cugraph.bfs_edges, G, start_vertex, - return_sp_counter=return_sp_counter) + result = benchmark_callable(cugraph.bfs_edges, G, start_vertex) cugraph_df = convert_output_to_cudf(G, result) - - if return_sp_counter: - # This call should only contain 3 columns: - # 'vertex', 'distance', 'predecessor', 'sp_counter' - assert len(cugraph_df.columns) == 4, ( - "The result of the BFS has an invalid " "number of columns" - ) - - if return_sp_counter: - compare_func = _compare_bfs_spc - - else: - compare_func = _compare_bfs + compare_func = _compare_bfs # NOTE: We need to take 2 different path for verification as the nx # functions used as reference return dictionaries that might @@ -185,18 +172,15 @@ def compare_bfs(benchmark_callable, G, nx_values, start_vertex, def func_to_benchmark(): for sv in start_vertex: cugraph_df = cugraph.bfs_edges( - G, sv, return_sp_counter=return_sp_counter) + G, sv, depth_limit=depth_limit) all_cugraph_distances.append(cugraph_df) benchmark_callable(func_to_benchmark) - compare_func = _compare_bfs_spc if return_sp_counter else _compare_bfs + compare_func = _compare_bfs for (i, sv) in enumerate(start_vertex): cugraph_df = convert_output_to_cudf(G, all_cugraph_distances[i]) - if return_sp_counter: - assert len(cugraph_df.columns) == 4, ( - "The result of the BFS has an invalid " "number of columns" - ) + compare_func(cugraph_df, all_nx_values[i], sv) else: # Unknown type given to seed @@ -272,55 +256,6 @@ def _compare_bfs(cugraph_df, nx_distances, source): assert invalid_predecessor_error == 0, "There are invalid predecessors" -def _compare_bfs_spc(cugraph_df, nx_sp_counter, unused): - """ - Compare BFS with shortest path counters. - """ - sorted_nx = [nx_sp_counter[key] for key in sorted(nx_sp_counter.keys())] - # We are not checking for distances / predecessors here as we assume - # that these have been checked in the _compare_bfs tests - # We focus solely on shortest path counting - - # cugraph return a dataframe that should contain exactly one time each - # vertex - # We could us isin to filter only vertices that are common to both - # But it would slow down the comparison, and in this specific case - # nxacb._single_source_shortest_path_basic is a dictionary containing all - # the vertices. - # There is no guarantee when we get `df` that the vertices are sorted - # thus we enforce the order so that we can leverage faster comparison after - sorted_df = cugraph_df.sort_values("vertex").rename( - columns={"sp_counter": "cu_spc"}, copy=False - ) - - # This allows to detect vertices identifier that could have been - # wrongly present multiple times - cu_vertices = set(sorted_df['vertex'].values_host) - nx_vertices = nx_sp_counter.keys() - assert len(cu_vertices.intersection(nx_vertices)) == len( - nx_vertices - ), "There are missing vertices" - - # We add the nx shortest path counter in the cudf.DataFrame, both the - # the DataFrame and `sorted_nx` are sorted base on vertices identifiers - sorted_df["nx_spc"] = sorted_nx - - # We could use numpy.isclose or cupy.isclose, we can then get the entries - # in the cudf.DataFrame where there are is a mismatch. - # numpy / cupy allclose would get only a boolean and we might want the - # extra information about the discrepancies - shortest_path_counter_errors = sorted_df[ - ~cupy.isclose( - sorted_df["cu_spc"], sorted_df["nx_spc"], rtol=DEFAULT_EPSILON - ) - ] - if len(shortest_path_counter_errors) > 0: - print(shortest_path_counter_errors) - assert len(shortest_path_counter_errors) == 0, ( - "Shortest path counters " "are too different" - ) - - def get_nx_graph_and_params(dataset, directed): """ Helper for fixtures returning a Nx graph obj and params. @@ -329,21 +264,17 @@ def get_nx_graph_and_params(dataset, directed): utils.generate_nx_graph_from_file(dataset, directed)) -def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx): +def get_nx_results_and_params(seed, depth_limit, dataset, directed, Gnx): """ Helper for fixtures returning Nx results and params. """ random.seed(seed) start_vertex = random.sample(Gnx.nodes(), 1)[0] - if use_spc: - _, _, nx_sp_counter = \ - nxacb._single_source_shortest_path_basic(Gnx, start_vertex) - nx_values = nx_sp_counter - else: - nx_values = nx.single_source_shortest_path_length(Gnx, start_vertex) + nx_values = nx.single_source_shortest_path_length(Gnx, start_vertex, + cutoff=depth_limit) - return (dataset, directed, nx_values, start_vertex, use_spc) + return (dataset, directed, nx_values, start_vertex, depth_limit) # ============================================================================= @@ -353,7 +284,7 @@ def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx): DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] DATASETS = [pytest.param(d) for d in utils.DATASETS] DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -USE_SHORTEST_PATH_COUNTER = [pytest.param(False), pytest.param(True)] +DEPTH_LIMIT = [pytest.param(d) for d in DEPTH_LIMITS] # Call genFixtureParamsProduct() to caluculate the cartesian product of # multiple lists of params. This is required since parameterized fixtures do @@ -362,7 +293,7 @@ def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx): # full test name. algo_test_fixture_params = utils.genFixtureParamsProduct( (SEEDS, "seed"), - (USE_SHORTEST_PATH_COUNTER, "spc")) + (DEPTH_LIMIT, "depth_limit")) graph_fixture_params = utils.genFixtureParamsProduct( (DATASETS, "ds"), @@ -377,7 +308,7 @@ def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx): # was covered elsewhere). single_algo_test_fixture_params = utils.genFixtureParamsProduct( ([SEEDS[0]], "seed"), - ([USE_SHORTEST_PATH_COUNTER[0]], "spc")) + ([DEPTH_LIMIT[0]], "depth_limit")) single_small_graph_fixture_params = utils.genFixtureParamsProduct( ([DATASETS_SMALL[0]], "ds"), @@ -446,7 +377,7 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, """ Test BFS traversal on random source with distance and predecessors """ - (dataset, directed, nx_values, start_vertex, use_spc) = \ + (dataset, directed, nx_values, start_vertex, depth_limit) = \ dataset_nxresults_startvertex_spc # special case: ensure cugraph and Nx Graph types are DiGraphs if @@ -463,8 +394,7 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, compare_bfs( gpubenchmark, - G_or_matrix, nx_values, start_vertex, return_sp_counter=use_spc - ) + G_or_matrix, nx_values, start_vertex, depth_limit) @pytest.mark.parametrize("cugraph_input_type", @@ -477,36 +407,6 @@ def test_bfs_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) -def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, - cugraph_input_type): - """ - Test BFS traversal on every vertex with shortest path counting - """ - (dataset, directed, all_nx_values, start_vertices, use_spc) = \ - dataset_nxresults_allstartvertices_spc - - # use_spc is currently always True - - # special case: ensure cugraph and Nx Graph types are DiGraphs if - # "directed" is set, since the graph type parameterization is currently - # independent of the directed parameter. Unfortunately this does not - # change the "id" in the pytest output. - if directed: - if cugraph_input_type is cugraph.Graph: - cugraph_input_type = cugraph.DiGraph - elif cugraph_input_type is nx.Graph: - cugraph_input_type = nx.DiGraph - - G_or_matrix = utils.create_obj_from_csv(dataset, cugraph_input_type) - - compare_bfs( - gpubenchmark, - G_or_matrix, all_nx_values, start_vertex=start_vertices, - return_sp_counter=use_spc - ) - - def test_scipy_api_compat(): graph_file = utils.DATASETS[0] @@ -522,7 +422,7 @@ def test_scipy_api_compat(): # Ensure cugraph-compatible options work as expected cugraph.bfs(input_cugraph_graph, i_start=0) - cugraph.bfs(input_cugraph_graph, i_start=0, return_sp_counter=True) + cugraph.bfs(input_cugraph_graph, i_start=0) # cannot have start and i_start with pytest.raises(TypeError): cugraph.bfs(input_cugraph_graph, start=0, i_start=0) @@ -531,7 +431,6 @@ def test_scipy_api_compat(): cugraph.bfs(input_coo_matrix, i_start=0) cugraph.bfs(input_coo_matrix, i_start=0, directed=True) cugraph.bfs(input_coo_matrix, i_start=0, directed=False) - result = cugraph.bfs(input_coo_matrix, i_start=0, - return_sp_counter=True) + result = cugraph.bfs(input_coo_matrix, i_start=0) assert type(result) is tuple - assert len(result) == 3 + assert len(result) == 2 diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index 0467bf05090..b6465a6698c 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -19,6 +19,8 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool +cdef extern from "limits.h": + cdef int INT_MAX cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef void call_bfs[vertex_t, weight_t]( @@ -27,6 +29,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t *identifiers, vertex_t *distances, vertex_t *predecessors, - double *sp_counters, + vertex_t depth_limit, const vertex_t start_vertex, - bool directed) except + + bool direction_optimizing) except + diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index a483b96850b..1e6cc42b760 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -41,7 +41,7 @@ import_from="scipy.sparse.csc") -def _ensure_args(G, start, return_sp_counter, i_start, directed): +def _ensure_args(G, start, i_start, directed): """ Ensures the args passed in are usable for the API api_name and returns the args with proper defaults if not specified, or raises TypeError or @@ -52,9 +52,6 @@ def _ensure_args(G, start, return_sp_counter, i_start, directed): raise TypeError("cannot specify both 'start' and 'i_start'") if (start is None) and (i_start is None): raise TypeError("must specify 'start' or 'i_start', but not both") - if (return_sp_counter is not None) and \ - (return_sp_counter not in [True, False]): - raise ValueError("'return_sp_counter' must be a bool") G_type = type(G) # Check for Graph-type inputs @@ -67,10 +64,8 @@ def _ensure_args(G, start, return_sp_counter, i_start, directed): start = start if start is not None else i_start if directed is None: directed = True - if return_sp_counter is None: - return_sp_counter = False - return (start, return_sp_counter, directed) + return (start, directed) def _convert_df_to_output_type(df, input_type): @@ -92,30 +87,23 @@ def _convert_df_to_output_type(df, input_type): if is_cp_matrix_type(input_type): distances = cp.fromDlpack(sorted_df["distance"].to_dlpack()) preds = cp.fromDlpack(sorted_df["predecessor"].to_dlpack()) - if "sp_counter" in df.columns: - return (distances, preds, - cp.fromDlpack(sorted_df["sp_counter"].to_dlpack())) - else: - return (distances, preds) + return (distances, preds) else: distances = sorted_df["distance"].to_array() preds = sorted_df["predecessor"].to_array() - if "sp_counter" in df.columns: - return (distances, preds, - sorted_df["sp_counter"].to_array()) - else: - return (distances, preds) + return (distances, preds) else: raise TypeError(f"input type {input_type} is not a supported type.") def bfs(G, start=None, - return_sp_counter=None, + depth_limit=None, i_start=None, directed=None, return_predecessors=None): - """Find the distances and predecessors for a breadth first traversal of a + """ + Find the distances and predecessors for a breadth first traversal of a graph. Parameters @@ -128,13 +116,13 @@ def bfs(G, start : Integer The index of the graph vertex from which the traversal begins - return_sp_counter : bool, optional, default=False - Indicates if shortest path counters should be returned - i_start : Integer, optional Identical to start, added for API compatibility. Only start or i_start can be set, not both. + depth_limit : Integer or None + Limit the depth of the search + directed : bool, optional NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises @@ -156,10 +144,6 @@ def bfs(G, df['predecessor'] for each i'th position in the column, the vertex ID immediately preceding the vertex at position i in the 'vertex' column - df['sp_counter'] for each i'th position in the column, the number of - shortest paths leading to the vertex at position i in the 'vertex' - column (Only if retrun_sp_counter is True) - If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame @@ -191,34 +175,30 @@ def bfs(G, >>> df = cugraph.bfs(G, 0) """ - (start, return_sp_counter, directed) = \ - _ensure_args(G, start, return_sp_counter, i_start, directed) + (start, directed) = \ + _ensure_args(G, start, i_start, directed) # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj( G, nx_weight_attr="weight", matrix_graph_type=DiGraph if directed else Graph) - if type(G) is Graph: - is_directed = False - else: - is_directed = True - if G.renumbered is True: - start = G.lookup_internal_vertex_id(cudf.Series([start]))[0] - - df = bfs_wrapper.bfs(G, start, is_directed, return_sp_counter) + if isinstance(start, cudf.DataFrame): + start = G.lookup_internal_vertex_id(start, start.columns).iloc[0] + else: + start = G.lookup_internal_vertex_id(cudf.Series([start]))[0] + df = bfs_wrapper.bfs(G, start, depth_limit) if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") - df["predecessor"].fillna(-1, inplace=True) + df.fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type) -def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None, - return_sp_counter=False): +def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -239,14 +219,10 @@ def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None, depth_limit : Int or None Limit the depth of the search - Currently not implemented sort_neighbors : None or Function Currently not implemented - return_sp_counter : bool, optional, default=False - Indicates if shortest path counters should be returned - Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, @@ -260,10 +236,6 @@ def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None, df['predecessor'] for each i'th position in the column, the vertex ID immediately preceding the vertex at position i in the 'vertex' column - df['sp_counter'] for each i'th position in the column, the number of - shortest paths leading to the vertex at position i in the 'vertex' - column (Only if retrun_sp_counter is True) - If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame @@ -300,9 +272,4 @@ def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None, "reverse processing of graph is currently not supported" ) - if depth_limit is not None: - raise NotImplementedError( - "depth limit implementation of BFS is not currently supported" - ) - - return bfs(G, source, return_sp_counter) + return bfs(G, source, depth_limit) diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index f475842a7bf..f524b133d02 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -24,54 +24,44 @@ from libc.stdint cimport uintptr_t import cudf import numpy as np -def bfs(input_graph, start, directed=True, - return_sp_counter=False): +def bfs(input_graph, start, depth_limit, direction_optimizing=False): """ Call bfs """ # Step 1: Declare the different varibales cdef graph_container_t graph_container - # FIXME: Offsets and indices are currently hardcoded to int, but this may - # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, np.dtype("int64") : numberTypeEnum.int64Type, np.dtype("float32") : numberTypeEnum.floatType, np.dtype("double") : numberTypeEnum.doubleType} - # Pointers required for CSR Graph - cdef uintptr_t c_offsets_ptr = NULL # Pointer to the CSR offsets - cdef uintptr_t c_indices_ptr = NULL # Pointer to the CSR indices - cdef uintptr_t c_weights = NULL - cdef uintptr_t c_local_verts = NULL; - cdef uintptr_t c_local_edges = NULL; - cdef uintptr_t c_local_offsets = NULL; weight_t = np.dtype("float32") + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + weights = None # Pointers for SSSP / BFS cdef uintptr_t c_identifier_ptr = NULL # Pointer to the DataFrame 'vertex' Series cdef uintptr_t c_distance_ptr = NULL # Pointer to the DataFrame 'distance' Series cdef uintptr_t c_predecessor_ptr = NULL # Pointer to the DataFrame 'predecessor' Series - cdef uintptr_t c_sp_counter_ptr = NULL # Pointer to the DataFrame 'sp_counter' Series + if depth_limit is None: + depth_limit = c_bfs.INT_MAX # Step 2: Verifiy input_graph has the expected format - if input_graph.adjlist is None: - input_graph.view_adj_list() cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) handle_ = handle_ptr.get(); - # Step 3: Extract CSR offsets, indices, weights are not expected - # - offsets: int (signed, 32-bit) - # - indices: int (signed, 32-bit) - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - c_offsets_ptr = offsets.__cuda_array_interface__['data'][0] - c_indices_ptr = indices.__cuda_array_interface__['data'][0] - - # Step 4: Setup number of vertices and edges + # Step 3: Setup number of vertices and edges num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) + # Step 4: Extract COO + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + # Step 5: Check if source index is valid if not 0 <= start < num_verts: raise ValueError("Starting vertex should be between 0 to number of vertices") @@ -79,30 +69,29 @@ def bfs(input_graph, start, directed=True, # Step 6: Generate the cudf.DataFrame result # Current implementation expects int (signed 32-bit) for distance df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(num_verts), dtype=np.int32) df['distance'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) df['predecessor'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - if (return_sp_counter): - df['sp_counter'] = cudf.Series(np.zeros(num_verts, dtype=np.double)) # Step 7: Associate to cudf Series c_identifier_ptr = df['vertex'].__cuda_array_interface__['data'][0] c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0] - if return_sp_counter: - c_sp_counter_ptr = df['sp_counter'].__cuda_array_interface__['data'][0] # Step 8: Proceed to BFS - # FIXME: [int, int, float] or may add an explicit [int, int, int] in graph.cu? - populate_graph_container_legacy(graph_container, - ((graphTypeEnum.LegacyCSR)), - handle_[0], - c_offsets_ptr, c_indices_ptr, c_weights, - ((numberTypeEnum.int32Type)), - ((numberTypeEnum.int32Type)), - ((numberTypeMap[weight_t])), - num_verts, num_edges, - c_local_verts, c_local_edges, c_local_offsets) + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_edges, + num_verts, num_edges, + False, + False, + False, + False) # Different pathing wether shortest_path_counting is required or not c_bfs.call_bfs[int, float](handle_ptr.get()[0], @@ -110,8 +99,8 @@ def bfs(input_graph, start, directed=True, c_identifier_ptr, c_distance_ptr, c_predecessor_ptr, - c_sp_counter_ptr, + depth_limit, start, - directed) + direction_optimizing) return df diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx index 36e4797e0c8..46966cd3e99 100644 --- a/python/cugraph/traversal/sssp_wrapper.pyx +++ b/python/cugraph/traversal/sssp_wrapper.pyx @@ -46,7 +46,7 @@ def sssp(input_graph, source): cdef uintptr_t c_local_verts = NULL; cdef uintptr_t c_local_edges = NULL; cdef uintptr_t c_local_offsets = NULL; - weight_t = np.dtype("int32") + weight_t = np.dtype("float32") # Pointers for SSSP / BFS cdef uintptr_t c_identifier_ptr = NULL # Pointer to the DataFrame 'vertex' Series @@ -110,31 +110,21 @@ def sssp(input_graph, source): num_verts, num_edges, c_local_verts, c_local_edges, c_local_offsets) - if weights is not None: - if weight_t == np.float32: - c_sssp.call_sssp[int, float](handle_[0], - graph_container, - c_identifier_ptr, - c_distance_ptr, - c_predecessor_ptr, - source) - elif weight_t == np.float64: - c_sssp.call_sssp[int, double](handle_[0], - graph_container, - c_identifier_ptr, - c_distance_ptr, - c_predecessor_ptr, - source) - else: # This case should not happen - raise NotImplementedError - else: - c_bfs.call_bfs[int, float](handle_[0], - graph_container, - c_identifier_ptr, - c_distance_ptr, - c_predecessor_ptr, - NULL, - source, - 1) + if weight_t == np.float32: + c_sssp.call_sssp[int, float](handle_[0], + graph_container, + c_identifier_ptr, + c_distance_ptr, + c_predecessor_ptr, + source) + elif weight_t == np.float64: + c_sssp.call_sssp[int, double](handle_[0], + graph_container, + c_identifier_ptr, + c_distance_ptr, + c_predecessor_ptr, + source) + else: # This case should not happen + raise NotImplementedError return df From 4a26dd772ded0fc0596d643d5cf268dfa2e0f53a Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 21 Apr 2021 12:12:18 -0400 Subject: [PATCH 231/343] update changelog --- CHANGELOG.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0011b99fbf3..5c72b095a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,67 @@ -# cuGraph 0.19.0 (Date TBD) - -Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch. +# cuGraph 0.19.0 (21 Apr 2021) + +## 🐛 Bug Fixes + +- Fixed copyright date and format ([#1526](https://github.com//rapidsai/cugraph/pull/1526)) [@rlratzel](https://github.com/rlratzel) +- fix mg_renumber non-deterministic errors ([#1523](https://github.com//rapidsai/cugraph/pull/1523)) [@Iroy30](https://github.com/Iroy30) +- Updated NetworkX version to 2.5.1 ([#1510](https://github.com//rapidsai/cugraph/pull/1510)) [@rlratzel](https://github.com/rlratzel) +- pascal renumbering fix ([#1505](https://github.com//rapidsai/cugraph/pull/1505)) [@Iroy30](https://github.com/Iroy30) +- Fix MNMG test failures and skip tests that are not supported on Pascal ([#1498](https://github.com//rapidsai/cugraph/pull/1498)) [@jnke2016](https://github.com/jnke2016) +- Revert "Update conda recipes pinning of repo dependencies" ([#1493](https://github.com//rapidsai/cugraph/pull/1493)) [@raydouglass](https://github.com/raydouglass) +- Update conda recipes pinning of repo dependencies ([#1485](https://github.com//rapidsai/cugraph/pull/1485)) [@mike-wendt](https://github.com/mike-wendt) +- Update to make notebook_list.py compatible with numba 0.53 ([#1455](https://github.com//rapidsai/cugraph/pull/1455)) [@rlratzel](https://github.com/rlratzel) +- Fix bugs in copy_v_transform_reduce_key_aggregated_out_nbr & groupby_gpuid_and_shuffle ([#1434](https://github.com//rapidsai/cugraph/pull/1434)) [@seunghwak](https://github.com/seunghwak) +- update default path of setup to use the new directory paths in build … ([#1425](https://github.com//rapidsai/cugraph/pull/1425)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 📖 Documentation + +- Create C++ documentation ([#1489](https://github.com//rapidsai/cugraph/pull/1489)) [@ChuckHastings](https://github.com/ChuckHastings) +- Create cuGraph developers guide ([#1431](https://github.com//rapidsai/cugraph/pull/1431)) [@ChuckHastings](https://github.com/ChuckHastings) +- Add boost 1.0 license file. ([#1401](https://github.com//rapidsai/cugraph/pull/1401)) [@seunghwak](https://github.com/seunghwak) + +## 🚀 New Features + +- Implement C/CUDA RandomWalks functionality ([#1439](https://github.com//rapidsai/cugraph/pull/1439)) [@aschaffer](https://github.com/aschaffer) +- Add R-mat generator ([#1411](https://github.com//rapidsai/cugraph/pull/1411)) [@seunghwak](https://github.com/seunghwak) + +## 🛠️ Improvements + +- Random Walks - Python Bindings ([#1516](https://github.com//rapidsai/cugraph/pull/1516)) [@jnke2016](https://github.com/jnke2016) +- Updating RAFT tag ([#1509](https://github.com//rapidsai/cugraph/pull/1509)) [@afender](https://github.com/afender) +- Clean up nullptr cuda_stream_view arguments ([#1504](https://github.com//rapidsai/cugraph/pull/1504)) [@hlinsen](https://github.com/hlinsen) +- Reduce the size of the cugraph libraries ([#1503](https://github.com//rapidsai/cugraph/pull/1503)) [@robertmaynard](https://github.com/robertmaynard) +- Add indirection and replace algorithms with new renumbering ([#1484](https://github.com//rapidsai/cugraph/pull/1484)) [@Iroy30](https://github.com/Iroy30) +- Multiple graph generator with power law distribution on sizes ([#1483](https://github.com//rapidsai/cugraph/pull/1483)) [@afender](https://github.com/afender) +- TSP solver bug fix ([#1480](https://github.com//rapidsai/cugraph/pull/1480)) [@hlinsen](https://github.com/hlinsen) +- Added cmake function and .hpp template for generating version_config.hpp file. ([#1476](https://github.com//rapidsai/cugraph/pull/1476)) [@rlratzel](https://github.com/rlratzel) +- Fix for bug in SCC on self-loops ([#1475](https://github.com//rapidsai/cugraph/pull/1475)) [@aschaffer](https://github.com/aschaffer) +- MS BFS python APIs + EgoNet updates ([#1469](https://github.com//rapidsai/cugraph/pull/1469)) [@afender](https://github.com/afender) +- Removed unused dependencies from libcugraph recipe, moved non-test script code from test script to gpu build script ([#1468](https://github.com//rapidsai/cugraph/pull/1468)) [@rlratzel](https://github.com/rlratzel) +- Remove literals passed to `device_uvector::set_element_async` ([#1453](https://github.com//rapidsai/cugraph/pull/1453)) [@harrism](https://github.com/harrism) +- ENH Change conda build directories to work with ccache ([#1452](https://github.com//rapidsai/cugraph/pull/1452)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Updating docs ([#1448](https://github.com//rapidsai/cugraph/pull/1448)) [@BradReesWork](https://github.com/BradReesWork) +- Improve graph primitives performance on graphs with widely varying vertex degrees ([#1447](https://github.com//rapidsai/cugraph/pull/1447)) [@seunghwak](https://github.com/seunghwak) +- Update Changelog Link ([#1446](https://github.com//rapidsai/cugraph/pull/1446)) [@ajschmidt8](https://github.com/ajschmidt8) +- Updated NCCL to version 2.8.4 ([#1445](https://github.com//rapidsai/cugraph/pull/1445)) [@BradReesWork](https://github.com/BradReesWork) +- Update FAISS to 1.7.0 ([#1444](https://github.com//rapidsai/cugraph/pull/1444)) [@BradReesWork](https://github.com/BradReesWork) +- Update graph partitioning scheme ([#1443](https://github.com//rapidsai/cugraph/pull/1443)) [@seunghwak](https://github.com/seunghwak) +- Add additional datasets to improve coverage ([#1441](https://github.com//rapidsai/cugraph/pull/1441)) [@jnke2016](https://github.com/jnke2016) +- Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP to use the new R-mat graph generator ([#1438](https://github.com//rapidsai/cugraph/pull/1438)) [@seunghwak](https://github.com/seunghwak) +- Remove raft handle duplication ([#1436](https://github.com//rapidsai/cugraph/pull/1436)) [@Iroy30](https://github.com/Iroy30) +- Streams infra + support in egonet ([#1435](https://github.com//rapidsai/cugraph/pull/1435)) [@afender](https://github.com/afender) +- Prepare Changelog for Automation ([#1433](https://github.com//rapidsai/cugraph/pull/1433)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update 0.18 changelog entry ([#1429](https://github.com//rapidsai/cugraph/pull/1429)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update and Test Renumber bindings ([#1427](https://github.com//rapidsai/cugraph/pull/1427)) [@Iroy30](https://github.com/Iroy30) +- Update Louvain to use new graph primitives and pattern accelerators ([#1423](https://github.com//rapidsai/cugraph/pull/1423)) [@ChuckHastings](https://github.com/ChuckHastings) +- Replace rmm::device_vector & thrust::host_vector with rmm::device_uvector & std::vector, respectively. ([#1421](https://github.com//rapidsai/cugraph/pull/1421)) [@seunghwak](https://github.com/seunghwak) +- Update C++ MG PageRank test ([#1419](https://github.com//rapidsai/cugraph/pull/1419)) [@seunghwak](https://github.com/seunghwak) +- ENH Build with `cmake --build` & Pass ccache variables to conda recipe & use Ninja in CI ([#1415](https://github.com//rapidsai/cugraph/pull/1415)) [@Ethyling](https://github.com/Ethyling) +- Adding new primitives: copy_v_transform_reduce_key_aggregated_out_nbr & transform_reduce_by_adj_matrix_row|col_key_e bug fixes ([#1399](https://github.com//rapidsai/cugraph/pull/1399)) [@seunghwak](https://github.com/seunghwak) +- Add new primitives: compute_in|out_degrees, compute_in|out_weight_sums to graph_view_t ([#1394](https://github.com//rapidsai/cugraph/pull/1394)) [@seunghwak](https://github.com/seunghwak) +- Rename sort_and_shuffle to groupby_gpuid_and_shuffle ([#1392](https://github.com//rapidsai/cugraph/pull/1392)) [@seunghwak](https://github.com/seunghwak) +- Matching updates for RAFT comms updates (device_sendrecv, device_multicast_sendrecv, gather, gatherv) ([#1391](https://github.com//rapidsai/cugraph/pull/1391)) [@seunghwak](https://github.com/seunghwak) +- Fix forward-merge conflicts for #1370 ([#1377](https://github.com//rapidsai/cugraph/pull/1377)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add utility function for computing a secondary cost for BFS and SSSP output ([#1376](https://github.com//rapidsai/cugraph/pull/1376)) [@hlinsen](https://github.com/hlinsen) # cuGraph 0.18.0 (24 Feb 2021) From 9c0f7d1c9910bb3afa3615a33be75b3757ecb0ce Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 22 Apr 2021 16:59:11 -0400 Subject: [PATCH 232/343] Fix doc build paths for CI (#1554) This PR updates the doc build paths in CI to reflect the changes in #1537. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1554 --- ci/docs/build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 6ce223d8b2b..5d81e5521a8 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -47,10 +47,10 @@ conda list --show-channel-urls gpuci_logger "Build Doxygen docs" cd $PROJECT_WORKSPACE/cpp/build make docs_cugraph - + # Build Python docs gpuci_logger "Build Sphinx docs" -cd $PROJECT_WORKSPACE/docs +cd $PROJECT_WORKSPACE/docs/cugraph make html #Commit to Website @@ -60,10 +60,10 @@ for PROJECT in ${PROJECTS[@]}; do if [ ! -d "api/$PROJECT/$BRANCH_VERSION" ]; then mkdir -p api/$PROJECT/$BRANCH_VERSION fi - rm -rf $DOCS_WORKSPACE/api/$PROJECT/$BRANCH_VERSION/* + rm -rf $DOCS_WORKSPACE/api/$PROJECT/$BRANCH_VERSION/* done mv $PROJECT_WORKSPACE/cpp/doxygen/html/* $DOCS_WORKSPACE/api/libcugraph/$BRANCH_VERSION -mv $PROJECT_WORKSPACE/docs/build/html/* $DOCS_WORKSPACE/api/cugraph/$BRANCH_VERSION +mv $PROJECT_WORKSPACE/docs/cugraph/build/html/* $DOCS_WORKSPACE/api/cugraph/$BRANCH_VERSION From 53be1f92a97be755df7582f6ea2d30505c421520 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 23 Apr 2021 12:36:52 -0400 Subject: [PATCH 233/343] Update our thrust version to 1.12 (#1553) cuDF and RMM have updated to version 1.12, we have been requested to update as well. This PR is dependent on: * https://github.com/rapidsai/raft/pull/211 * https://github.com/rapidsai/cuhornet/pull/49 Also disabled some unit tests to reduce our CI time, since we've started seeing timeouts. Issue #1555 will fully address this issue. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1553 --- cpp/CMakeLists.txt | 8 +++----- cpp/src/traversal/mg/common_utils.cuh | 3 ++- cpp/src/traversal/mg/vertex_binning.cuh | 4 +++- .../centrality/betweenness_centrality_test.cu | 19 +++++++++++++++++++ .../edge_betweenness_centrality_test.cu | 15 +++++++++++++++ 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fefd66423db..794b516f818 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -225,8 +225,7 @@ message("Fetching Thrust") FetchContent_Declare( thrust GIT_REPOSITORY https://github.com/thrust/thrust.git - # August 28, 2020 - GIT_TAG 52a8bda46c5c2128414d1d47f546b486ff0be2f0 + GIT_TAG 1.12.0 ) FetchContent_GetProperties(thrust) @@ -276,7 +275,7 @@ message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") FetchContent_Declare( cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG e58d0ecdbc270fc28867d66c965787a62a7a882c + GIT_TAG 6d2fc894cc56dd2ca8fc9d1523a18a6ec444b663 GIT_SHALLOW true SOURCE_SUBDIR hornet ) @@ -302,8 +301,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG f0cd81fb49638eaddc9bf18998cc894f292bc293 - + GIT_TAG 66f82b4e79a3e268d0da3cc864ec7ce4ad065296 SOURCE_SUBDIR raft ) diff --git a/cpp/src/traversal/mg/common_utils.cuh b/cpp/src/traversal/mg/common_utils.cuh index 2cda827b471..d922636e740 100644 --- a/cpp/src/traversal/mg/common_utils.cuh +++ b/cpp/src/traversal/mg/common_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include #include #include "../traversal_common.cuh" diff --git a/cpp/src/traversal/mg/vertex_binning.cuh b/cpp/src/traversal/mg/vertex_binning.cuh index 3d8c963c466..b4ed881a06e 100644 --- a/cpp/src/traversal/mg/vertex_binning.cuh +++ b/cpp/src/traversal/mg/vertex_binning.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include "common_utils.cuh" #include "vertex_binning_kernels.cuh" +#include + namespace cugraph { namespace mg { diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index 7ff6ab31213..89168618b9c 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -25,6 +25,7 @@ #include #include +#include #include @@ -363,6 +364,9 @@ TEST_P(Tests_BC, CheckFP32_NO_NORMALIZE_NO_ENDPOINTS) run_current_test(GetParam()); } +#if 0 +// Temporarily disable some of the test combinations +// Full solution will be explored for issue #1555 TEST_P(Tests_BC, CheckFP64_NO_NORMALIZE_NO_ENDPOINTS) { run_current_test(GetParam()); @@ -372,6 +376,7 @@ TEST_P(Tests_BC, CheckFP32_NO_NORMALIZE_ENDPOINTS) { run_current_test(GetParam()); } +#endif TEST_P(Tests_BC, CheckFP64_NO_NORMALIZE_ENDPOINTS) { @@ -384,6 +389,9 @@ TEST_P(Tests_BC, CheckFP32_NORMALIZE_NO_ENDPOINTS) run_current_test(GetParam()); } +#if 0 +// Temporarily disable some of the test combinations +// Full solution will be explored for issue #1555 TEST_P(Tests_BC, CheckFP64_NORMALIZE_NO_ENDPOINTS) { run_current_test(GetParam()); @@ -393,12 +401,16 @@ TEST_P(Tests_BC, CheckFP32_NORMALIZE_ENDPOINTS) { run_current_test(GetParam()); } +#endif TEST_P(Tests_BC, CheckFP64_NORMALIZE_ENDPOINTS) { run_current_test(GetParam()); } +#if 0 +// Temporarily disable some of the test combinations +// Full solution will be explored for issue #1555 INSTANTIATE_TEST_SUITE_P(simple_test, Tests_BC, ::testing::Values(BC_Usecase("test/datasets/karate.mtx", 0), @@ -406,5 +418,12 @@ INSTANTIATE_TEST_SUITE_P(simple_test, BC_Usecase("test/datasets/netscience.mtx", 4), BC_Usecase("test/datasets/wiki2003.mtx", 4), BC_Usecase("test/datasets/wiki-Talk.mtx", 4))); +#else +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_BC, + ::testing::Values(BC_Usecase("test/datasets/karate.mtx", 0), + BC_Usecase("test/datasets/netscience.mtx", 0), + BC_Usecase("test/datasets/netscience.mtx", 4))); +#endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index 2432943504c..50cbef86e11 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -22,6 +22,7 @@ #include #include +#include #include @@ -296,6 +297,9 @@ TEST_P(Tests_EdgeBC, CheckFP32_NO_NORMALIZE) run_current_test(GetParam()); } +#if 0 +// Temporarily disable some of the test combinations +// Full solution will be explored for issue #1555 TEST_P(Tests_EdgeBC, CheckFP64_NO_NORMALIZE) { run_current_test(GetParam()); @@ -306,12 +310,16 @@ TEST_P(Tests_EdgeBC, CheckFP32_NORMALIZE) { run_current_test(GetParam()); } +#endif TEST_P(Tests_EdgeBC, CheckFP64_NORMALIZE) { run_current_test(GetParam()); } +#if 0 +// Temporarily disable some of the test combinations +// Full solution will be explored for issue #1555 INSTANTIATE_TEST_SUITE_P(simple_test, Tests_EdgeBC, ::testing::Values(EdgeBC_Usecase("test/datasets/karate.mtx", 0), @@ -319,5 +327,12 @@ INSTANTIATE_TEST_SUITE_P(simple_test, EdgeBC_Usecase("test/datasets/netscience.mtx", 4), EdgeBC_Usecase("test/datasets/wiki2003.mtx", 4), EdgeBC_Usecase("test/datasets/wiki-Talk.mtx", 4))); +#else +INSTANTIATE_TEST_SUITE_P(simple_test, + Tests_EdgeBC, + ::testing::Values(EdgeBC_Usecase("test/datasets/karate.mtx", 0), + EdgeBC_Usecase("test/datasets/netscience.mtx", 0), + EdgeBC_Usecase("test/datasets/netscience.mtx", 4))); +#endif CUGRAPH_TEST_PROGRAM_MAIN() From e40570ab42d71a2835b86e9a99c200f11f81541c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 26 Apr 2021 09:12:37 -0400 Subject: [PATCH 234/343] Update build system and docs to new minimum cugraph-0.20 requirements (#1552) Close #1528 Following cuDF (https://github.com/rapidsai/cudf/pull/7780) cugraph-0.20 increase the minimum requirements in the following way: GCC version 9.3+ is required CUDA and C++ code now is compiled with -std=c++17 We require CUDA Toolkit version 11.0 or greater This updates the build-system and the README with these new requirements Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1552 --- README.md | 12 +++--- SOURCEBUILD.md | 38 ++++++++--------- conda/environments/cugraph_dev_cuda10.1.yml | 47 --------------------- conda/environments/cugraph_dev_cuda10.2.yml | 47 --------------------- cpp/CMakeLists.txt | 4 +- python/setup.py | 4 +- 6 files changed, 28 insertions(+), 124 deletions(-) delete mode 100644 conda/environments/cugraph_dev_cuda10.1.yml delete mode 100644 conda/environments/cugraph_dev_cuda10.2.yml diff --git a/README.md b/README.md index ccc91bfe225..c5785e6cb08 100644 --- a/README.md +++ b/README.md @@ -151,14 +151,14 @@ Install and update cuGraph using the conda command: ```bash -# CUDA 10.1 -conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=10.1 - -# CUDA 10.2 -conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=10.2 - # CUDA 11.0 conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.0 + +# CUDA 11.1 +conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.1 + +# CUDA 11.2 +conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.2 ``` Note: This conda installation only applies to Linux and Python versions 3.7/3.8. diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 0cbf6ccdaa3..0c825197cee 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -7,13 +7,13 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot ## Prerequisites __Compiler__: -* `gcc` version 5.4+ -* `nvcc` version 10.0+ +* `gcc` version 9.3+ +* `nvcc` version 11.0+ * `cmake` version 3.18+ __CUDA:__ -* CUDA 10.1+ -* NVIDIA driver 396.44+ +* CUDA 11.0+ +* NVIDIA driver 450.80.02+ * Pascal architecture or better __Other__ @@ -47,16 +47,14 @@ __Create the conda development environment__ ```bash # create the conda environment (assuming in base `cugraph` directory) +# for CUDA 11.0 +conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml +# for CUDA 11.1 +conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.1.yml -# for CUDA 10.1 -conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.1.yml - -# for CUDA 10.2 -conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.2.yml - -# for CUDA 11 -conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml +# for CUDA 11.2 +conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.2.yml # activate the environment conda activate cugraph_dev @@ -70,14 +68,14 @@ conda deactivate ```bash -# for CUDA 10.1 -conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.1.yml +# for CUDA 11.0 +conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml -# for CUDA 10.2 -conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda10.2.yml +# for CUDA 11.1 +conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.1.yml -# for CUDA 11 -conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml +# for CUDA 11.2 +conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.2.yml conda activate cugraph_dev ``` @@ -232,8 +230,8 @@ Next the env_vars.sh file needs to be edited vi ./etc/conda/activate.d/env_vars.sh #!/bin/bash -export PATH=/usr/local/cuda-10.1/bin:$PATH # or cuda-10.2 if using CUDA 10.2 -export LD_LIBRARY_PATH=/usr/local/cuda-10.1/lib64:$LD_LIBRARY_PATH # or cuda-10.2 if using CUDA 10.2 +export PATH=/usr/local/cuda-11.0/bin:$PATH # or cuda-11.1 if using CUDA 11.1 and cuda-11.2 if using CUDA 11.2, respectively +export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH # or cuda-11.1 if using CUDA 11.1 and cuda-11.2 if using CUDA 11.2, respectively ``` ``` diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml deleted file mode 100644 index 9108f642c20..00000000000 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: cugraph_dev -channels: -- rapidsai -- nvidia -- rapidsai-nightly -- conda-forge -dependencies: -- cudatoolkit=10.1 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* -- dask>=2.12.0 -- distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* -- nccl>=2.8.4 -- ucx-py=0.20* -- ucx-proc=*=gpu -- scipy -- networkx>=2.5.1 -- clang=8.0.1 -- clang-tools=8.0.1 -- cmake>=3.18 -- python>=3.6,<3.9 -- notebook>=0.5.0 -- boost -- cython>=0.29,<0.30 -- pytest -- libfaiss=1.7.0 -- faiss-proc=*=cuda -- scikit-learn>=0.23.1 -- sphinx -- sphinx_rtd_theme -- sphinxcontrib-websupport -- sphinx-markdown-tables -- sphinx-copybutton -- nbsphinx -- numpydoc -- ipython -- recommonmark -- pip -- rapids-pytest-benchmark -- doxygen -- pytest-cov -- gtest -- gmock diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml deleted file mode 100644 index 593af0a99bd..00000000000 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: cugraph_dev -channels: -- rapidsai -- nvidia -- rapidsai-nightly -- conda-forge -dependencies: -- cudatoolkit=10.2 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* -- dask>=2.12.0 -- distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* -- nccl>=2.8.4 -- ucx-py=0.20* -- ucx-proc=*=gpu -- scipy -- networkx>=2.5.1 -- clang=8.0.1 -- clang-tools=8.0.1 -- cmake>=3.18 -- python>=3.6,<3.9 -- notebook>=0.5.0 -- boost -- cython>=0.29,<0.30 -- pytest -- libfaiss=1.7.0 -- faiss-proc=*=cuda -- scikit-learn>=0.23.1 -- sphinx -- sphinx_rtd_theme -- sphinxcontrib-websupport -- sphinx-markdown-tables -- sphinx-copybutton -- nbsphinx -- numpydoc -- ipython -- recommonmark -- pip -- rapids-pytest-benchmark -- doxygen -- pytest-cov -- gtest -- gmock diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 794b516f818..3f421da5e19 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -52,12 +52,12 @@ option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search ################################################################################################### # - compiler options ------------------------------------------------------------------------------ -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_C_COMPILER $ENV{CC}) set(CMAKE_CXX_COMPILER $ENV{CXX}) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) if(CMAKE_COMPILER_IS_GNUCXX) diff --git a/python/setup.py b/python/setup.py index 59292f32032..799cb805afa 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -111,7 +111,7 @@ def run(self): runtime_library_dirs=[conda_lib_dir], libraries=['cugraph', 'nccl'], language='c++', - extra_compile_args=['-std=c++14']) + extra_compile_args=['-std=c++17']) ] for e in EXTENSIONS: From f6cecf51ce83081ddfc2ab9a6495368cdbf4460c Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 28 Apr 2021 11:25:23 -0400 Subject: [PATCH 235/343] Add centrality and random walk notebook (#1546) This PR adds three new notebooks: - A general Centrality notebook that summarizes all the centrality metrics - A example notebook for Random Walk - A performance benchmarking notebook on Random Walk Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1546 --- notebooks/README.md | 14 +- notebooks/centrality/Centrality.ipynb | 443 +++++++++++++ .../random_walk_benchmark.ipynb | 544 +++++++++++++++ .../cugraph_benchmarks/random_walk_perf.ipynb | 621 ++++++++++++++++++ notebooks/cugraph_benchmarks/release.ipynb | 3 +- notebooks/sampling/RandomWalk.ipynb | 313 +++++++++ .../tests/test_betweenness_centrality.py | 3 + .../tests/test_edge_betweenness_centrality.py | 3 + python/cugraph/tests/test_utils.py | 1 + 9 files changed, 1938 insertions(+), 7 deletions(-) create mode 100644 notebooks/centrality/Centrality.ipynb create mode 100644 notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb create mode 100644 notebooks/cugraph_benchmarks/random_walk_perf.ipynb create mode 100644 notebooks/sampling/RandomWalk.ipynb diff --git a/notebooks/README.md b/notebooks/README.md index a5706720235..3769ceb6957 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -10,6 +10,7 @@ This repository contains a collection of Jupyter Notebooks that outline how to r | Folder | Notebook | Description | | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | | Centrality | | | +| | [Centrality](centrality/Centrality.ipynb) | Compute and compare multiple centrality scores | | | [Katz](centrality/Katz.ipynb) | Compute the Katz centrality for every vertex | | | [Betweenness](centrality/Betweenness.ipynb) | Compute both Edge and Vertex Betweenness centrality | | Community | | | @@ -33,6 +34,8 @@ This repository contains a collection of Jupyter Notebooks that outline how to r | Traversal | | | | | [BFS](traversal/BFS.ipynb) | Compute the Breadth First Search path from a starting vertex to every other vertex in a graph | | | [SSSP](traversal/SSSP.ipynb) | Single Source Shortest Path - compute the shortest path from a starting vertex to every other vertex | +| Sampling | +| | [Random Walk](sampling/RandomWalk.ipynb) | Compute Random Walk for a various number of seeds and path lengths | | Structure | | | | | [Renumbering](structure/Renumber.ipynb)
[Renumbering 2](structure/Renumber-2.ipynb) | Renumber the vertex IDs in a graph (two sample notebooks) | | | [Symmetrize](structure/Symmetrize.ipynb) | Symmetrize the edges in a graph | @@ -49,22 +52,21 @@ Running the example in these notebooks requires: * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html)) * cuGraph is dependent on the latest version of cuDF. Please install all components of RAPIDS -* Python 3.6+ +* Python 3.7+ * A system with an NVIDIA GPU: Pascal architecture or better -* CUDA 9.2+ -* NVIDIA driver 396.44+ +* CUDA 11.0+ +* NVIDIA driver 450.51+ #### Notebook Credits - Original Authors: Bradley Rees -- Last Edit: 04/24/2020 +- Last Edit: 04/19/2021 -RAPIDS Versions: 0.14 +RAPIDS Versions: 0.19 Test Hardware - - GV100 32G, CUDA 9,2 diff --git a/notebooks/centrality/Centrality.ipynb b/notebooks/centrality/Centrality.ipynb new file mode 100644 index 00000000000..591c27419ba --- /dev/null +++ b/notebooks/centrality/Centrality.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Centrality" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we will compute vertex centrality scores using the various cuGraph algorithms. We will then compare the similarities and differences.\n", + "\n", + "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", + "| --------------|------------|--------------|-----------------|----------------|\n", + "| Brad Rees | 04/16/2021 | created | 0.19 | GV100, CUDA 11.0\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Centrality is measure of how important, or central, a node or edge is within a graph. It is useful for identifying influencer in social networks, key routing nodes in communication/computer network infrastructures, \n", + "\n", + "The seminal paper on centrality is: Freeman, L. C. (1978). Centrality in social networks conceptual clarification. Social networks, 1(3), 215-239.\n", + "\n", + "\n", + "__Degree centrality – done but need new API__
\n", + "Degree centrality is based on the notion that whoever has the most connects must be important. \n", + "\n", + "

\n", + " Cd(v) = degree(v)\n", + "
\n", + "\n", + "cuGraph currently does not have a Degree Centrality function call. However, since Degree Centrality is just the degree of a node, we can use _G.degree()_ function.\n", + "Degree Centrality for a Directed graph can be further divided in _indegree centrality_ and _outdegree centrality_ and can be obtained using _G.degrees()_\n", + "\n", + "\n", + "__Closeness centrality – coming soon__
\n", + "Closeness is a measure of the shortest path to every other node in the graph. A node that is close to every other node, can reach over other node in the fewest number of hops, means that it has greater influence on the network versus a node that is not close.\n", + "\n", + "__Betweenness Centrality__
\n", + "Betweenness is a measure of the number of shortest paths that cross through a node, or over an edge. A node with high betweenness means that it had a greater influence on the flow of information. \n", + "\n", + "Betweenness centrality of a node 𝑣 is the sum of the fraction of all-pairs shortest paths that pass through 𝑣\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "To speedup runtime of betweenness centrailty, the metric can be computed on a limited number of nodes (randomly selected) and then used to estimate the other scores. For this example, the graphs are relatively smalled (under 5,000 nodes) so betweenness on every node will be computed.\n", + "\n", + "__Eigenvector Centrality - coming soon__
\n", + "Eigenvectors can be thought of as the balancing points of a graph, or center of gravity of a 3D object. High centrality means that more of the graph is balanced around that node.\n", + "\n", + "__Katz Centrality__
\n", + "Katz is a variant of degree centrality and of eigenvector centrality. \n", + "Katz centrality is a measure of the relative importance of a node within the graph based on measuring the influence across the total number of walks between vertex pairs. \n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "See:\n", + "* [Katz on Wikipedia](https://en.wikipedia.org/wiki/Katz_centrality) for more details on the algorithm.\n", + "* https://www.sci.unich.it/~francesc/teaching/network/katz.html\n", + "\n", + "__PageRank Centrality__
\n", + "PageRank is classified as both a Link Analysis tool and a centrality measure. PageRank is based on the assumption that important nodes point (directed edge) to other important nodes. From a social network perspective, the question is who do you seek for an answer and then who does that person seek. PageRank is good when there is implied importance in the data, for example a citation network, web page linkages, or trust networks. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Data\n", + "We will be using the Zachary Karate club dataset \n", + "*W. W. Zachary, An information flow model for conflict and fission in small groups, Journal of\n", + "Anthropological Research 33, 452-473 (1977).*\n", + "\n", + "\n", + "![Karate Club](../img/zachary_black_lines.png)\n", + "\n", + "\n", + "Because the test data has vertex IDs starting at 1, the auto-renumber feature of cuGraph (mentioned above) will be used so the starting vertex ID is zero for maximum efficiency. The resulting data will then be auto-unrenumbered, making the entire renumbering process transparent to users." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the modules\n", + "import cugraph\n", + "import cudf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd \n", + "from IPython.display import display_html " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Functions\n", + "using underscore variable names to avoid collisions. \n", + "non-underscore names are expected to be global names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute Centrality\n", + "# the centrality calls are very straight forward with the graph being the primary argument\n", + "# we are using the default argument values for all centrality functions\n", + "\n", + "def compute_centrality(_graph) :\n", + " # Compute Degree Centrality\n", + " _d = _graph.degree()\n", + " \n", + " # Compute the Betweenness Centrality\n", + " _b = cugraph.betweenness_centrality(_graph)\n", + "\n", + " # Compute Katz Centrality\n", + " _k = cugraph.katz_centrality(_graph)\n", + " \n", + " # Compute PageRank Centrality\n", + " _p = cugraph.pagerank(_graph)\n", + " \n", + " return _d, _b, _k, _p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print function\n", + "# being lazy and requiring that the dataframe names are not changed versus passing them in\n", + "def print_centrality(_n):\n", + " dc_top = dc.sort_values(by='degree', ascending=False).head(_n).to_pandas()\n", + " bc_top = bc.sort_values(by='betweenness_centrality', ascending=False).head(_n).to_pandas()\n", + " katz_top = katz.sort_values(by='katz_centrality', ascending=False).head(_n).to_pandas()\n", + " pr_top = pr.sort_values(by='pagerank', ascending=False).head(_n).to_pandas()\n", + " \n", + " df1_styler = dc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Degree').hide_index()\n", + " df2_styler = bc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Betweenness').hide_index()\n", + " df3_styler = katz_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Katz').hide_index()\n", + " df4_styler = pr_top.style.set_table_attributes(\"style='display:inline'\").set_caption('PageRank').hide_index()\n", + "\n", + " display_html(df1_styler._repr_html_()+df2_styler._repr_html_()+df3_styler._repr_html_()+df4_styler._repr_html_(), raw=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the path to the test data \n", + "datafile='../data/karate-data.csv'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "cuGraph does not do any data reading or writing and is dependent on other tools for that, with cuDF being the preferred solution. \n", + "\n", + "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "it was that easy to load data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", + "G = cugraph.Graph()\n", + "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compute Centrality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "Typically, analyst look just at the top 10% of results. Basically just those vertices that are the most central or important. \n", + "The karate data has 32 vertices, so let's round a little and look at the top 5 vertices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A Different Dataset\n", + "The Karate dataset is not that large or complex, which makes it a perfect test dataset since it is easy to visually verify results. Let's look at a larger dataset with a lot more edges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the path to the test data \n", + "datafile='../data/netscience.csv'\n", + "\n", + "gdf = cudf.read_csv(datafile, delimiter=' ', names=['src', 'dst', 'wt'], dtype=['int32', 'int32', 'float'] )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", + "G = cugraph.Graph()\n", + "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(G.number_of_nodes(), G.number_of_edges())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now see a larger discrepancy between the centrality scores and which nodes rank highest.\n", + "Which centrality measure to use is left to the analyst to decide and does require insight into the difference algorithms and graph structure." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### And One More Dataset\n", + "Let's look at a Cyber dataset. The vertex ID are IP addresses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the path to the test data \n", + "datafile='../data/cyber.csv'\n", + "\n", + "gdf = cudf.read_csv(datafile, delimiter=',', names=['idx', 'src', 'dst'], dtype=['int32', 'str', 'str'] )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", + "G = cugraph.Graph()\n", + "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(G.number_of_nodes(), G.number_of_edges())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc, bc, katz, pr = compute_centrality(G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_centrality(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are differences in how each centrality measure ranks the nodes. In some cases, every algorithm returns similar results, and in others, the results are different. Understanding how the centrality measure is computed and what edge represent is key to selecting the right centrality metric." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "Copyright (c) 2019-2021, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cugraph_dev", + "language": "python", + "name": "cugraph_dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb new file mode 100644 index 00000000000..be50c075455 --- /dev/null +++ b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb @@ -0,0 +1,544 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random Walk Performance\n", + "# Skip notebook test¶ \n", + "\n", + "Random walk performance is governed by the length of the paths to find, the number of seeds, and the size or structure of the graph.\n", + "This benchmark will use several test graphs of increasingly larger sizes. While not even multiples in scale, the four test graphs should give an indication of how well Random Walk performs as data size increases. \n", + "\n", + "### Test Data\n", + "Users must run the _dataPrep.sh_ script before running this notebook so that the test files are downloaded\n", + "\n", + "| File Name | Num of Vertices | Num of Edges |\n", + "| ---------------------- | --------------: | -----------: |\n", + "| preferentialAttachment | 100,000 | 999,970 |\n", + "| dblp-2010 | 326,186 | 1,615,400 |\n", + "| coPapersCiteseer | 434,102 | 32,073,440 |\n", + "| as-Skitter | 1,696,415 | 22,190,596 |" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the modules\n", + "import cugraph\n", + "import cudf" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# system and other\n", + "import gc\n", + "import os\n", + "import time\n", + "import random\n", + "\n", + "# MTX file reader\n", + "from scipy.io import mmread" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "try: \n", + " import matplotlib\n", + "except ModuleNotFoundError:\n", + " os.system('pip install matplotlib')\n", + "\n", + "import matplotlib.pyplot as plt; plt.rcdefaults()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Test File\n", + "data = {\n", + " 'preferentialAttachment' : './data/preferentialAttachment.mtx',\n", + " 'dblp' : './data/dblp-2010.mtx',\n", + " 'coPapersCiteseer' : './data/coPapersCiteseer.mtx',\n", + " 'as-Skitter' : './data/as-Skitter.mtx'\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data and create a graph" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Data reader - the file format is MTX, so we will use the reader from SciPy\n", + "def read_and_create(datafile):\n", + " print('Reading ' + str(datafile) + '...')\n", + " M = mmread(datafile).asfptype()\n", + "\n", + " _gdf = cudf.DataFrame()\n", + " _gdf['src'] = M.row\n", + " _gdf['dst'] = M.col\n", + " _gdf['wt'] = 1.0\n", + " \n", + " _g = cugraph.Graph()\n", + " _g.from_cudf_edgelist(_gdf, source='src', destination='dst', edge_attr='wt', renumber=False)\n", + " \n", + " print(\"\\t{:,} nodes, {:,} edges\".format(_g.number_of_nodes(), _g.number_of_edges() ))\n", + " \n", + " return _g" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the call to RandomWalk\n", + "We are only interested in the runtime, so throw away the results" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def run_rw(_G, _seeds, _depth):\n", + " t1 = time.time()\n", + " _, _ = cugraph.random_walks(_G, _seeds, _depth)\n", + " t2 = time.time() - t1\n", + " return t2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 1: Runtime versus path depth" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading ./data/preferentialAttachment.mtx...\n", + "\t100,000 nodes, 499,985 edges\n", + "update i\n", + "Reading ./data/dblp-2010.mtx...\n", + "\t326,183 nodes, 807,700 edges\n", + "update i\n", + "Reading ./data/coPapersCiteseer.mtx...\n", + "\t434,102 nodes, 16,036,720 edges\n", + "update i\n", + "Reading ./data/as-Skitter.mtx...\n", + "\t1,696,415 nodes, 11,095,298 edges\n", + "update i\n" + ] + } + ], + "source": [ + "# some parameters\n", + "max_depth = 6\n", + "num_seeds = 500\n", + "\n", + "# arrays to capture performance gains\n", + "names = []\n", + "\n", + "# Two dimension data\n", + "time_algo_cu = [] # will be two dimensional\n", + "\n", + "i = 0\n", + "for k,v in data.items():\n", + " time_algo_cu.append([])\n", + " \n", + " # Saved the file Name\n", + " names.append(k)\n", + "\n", + " # read data\n", + " G = read_and_create(v)\n", + " \n", + " num_nodes = G.number_of_nodes()\n", + " nodes = G.nodes().to_array().tolist()\n", + "\n", + " seeds = random.sample(nodes, num_seeds)\n", + "\n", + " for j in range (2, max_depth+1) :\n", + " t = run_rw(G, seeds, j)\n", + " time_algo_cu[i].append(t)\n", + "\n", + " # update i\n", + " i = i + 1\n", + " print(\"update i\")\n", + " \n", + " del G\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFNCAYAAABIc7ibAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAACCeUlEQVR4nOzdd3hUZfbA8e876b2HHkINIXRCb6F3kd6Lil1XdF3brmtZXd11f7trXZcmCZCELlVBUKQLAQFJoQdCDek9mfL+/piQDRAENJMAns/z8JiZ+957z0xG5vC+556rtNYIIYQQQoiqZajuAIQQQgghfoskCRNCCCGEqAaShAkhhBBCVANJwoQQQgghqoEkYUIIIYQQ1UCSMCGEEEKIaiBJmBD3EaVUkFIqTyllV92x2JpSaoZSakd1x3EnlFLBSimtlLK/g30GKqW+tGFY1ep23xOl1HCl1JKqikuIqiBJmBA2ppRKVkoVliZHl5RSC5RS7pV47H5XH2utz2qt3bXW5so4fmVTSkUopSyl70WuUuqoUuqh29jvjpOXCo5xzXtVFSrpnO8C7193zKufpzyl1Kbrzvl86ecsRyk1XynlVG5bsFLqO6VUgVIq6ediU0rVVUqtUEqlKaWylVJHlFIzfuVr+cW01muBMKVUq+qKQYjKJkmYEFVjuNbaHWgDtAVerd5wqtWF0vfCE3gZmKOUal7NMd2VlFIdAC+t9Z7rNg0vTbbdtdYDyo0fCLwC9AXqAw2Bt8rtFwP8CPgBfwSWK6UCbnL6hUBK6XH8gKnA5V//qn6VGOCxao5BiEojSZgQVUhrfQnYiDUZuzozdK78mPKzJ0qpN5VSS5VSUaUzR/FKqfDSbQuBIGBt6YzIS9fPGCmltiql3lFK7Sods1Yp5aeUWlw6U7JPKRVc7tzNlFLfKKUySmepxlX0OpRS45VScdc997xSak3pz0OUUgmlMZ9XSr1YwXuhtdZfAplAc6XUUKXUj6VxpSil3iw3fFvpf7NKX0eXcuf9h1IqUyl1Wik1+Fa/gwpei0Ep9YpS6qRSKr30/fYt3Xb1/ZyulDpbOiv0x3L7uiilIkvPn1j6OzhXuu2G30+5006u6HgVGAx8fwcvZzowT2sdr7XOBP4CzCiNpynQDnhDa12otV4B/ASMvsmxOgALtNb5WmuT1vpHrfVX5V5759LPVZZS6pBSKqLcNi+l1Dyl1MXS3/87qnSJXCllV/o7S1NKnQKGlj+psi4znyr97JxWSk0ut3nr9eOFuJdJEiZEFVJK1cX6xXriDnZ7AIgFvIE1wCcAWuupwFn+Nyvy95vsPwHrLEYdoBGwG/gC8AUSgTdKY3MDvgGigcDS/T67ySzVWiBEKdWk3HOTSvcFmAc8rrX2AFoA315/gNLkZ2Tp6/oJyAemlT4eCjyplHqwdHjP0v96l77W3aWPOwFHAX/g78A8pZS6yftwM88CDwK9gNpYk8JPrxvTHQjBOsP0Z6VUaOnzbwDBWGec+gNTru5wi9/PzY53vZalr+96i5VSV5RSm5RSrcs9HwYcKvf4EFBDKeVXuu2U1jr3uu1hNzn3HuBTpdQEpVRQ+Q1KqTrAeuAdrJ+jF4EV5WbVFgAmoDHWmd8BwMzSbY8Cw0qfDwfGlDuuG/ARMLj0s9MVOFju1IlAsFLK8yYxC3FPkSRMiKrxpVIqF+vyTiqlic9t2qG13lBa57UQaH2rHa7zhdb6pNY6G/gKOKm13qy1NgHLsH4ZgvWLMVlr/cXVmQ9gBTD2+gNqrQuA1cBEgNJkrBnWJBHAiHV2y1Nrnam1PlBu99pKqSwgDev7MFVrfVRrvVVr/ZPW2qK1Pox16anXLV7bGa31nNL3JhKoBdS4o3cHngD+qLU+p7UuBt4Exqhr68/eKp09OoQ1cbn6OxgH/LX0NZ7DmkDcjpsd73reQO51z03GmvjVB74DNiqlvEu3uQPZ5cZe/dmjgm1Xt3vc5Nxjge3A68BppdRBZV0eBWuyuaH0c2nRWn8DxAFDlFI1gCHArNJZtFTgX1iTerC+Z//WWqdorTOA9647rwVooZRy0Vpf1FrHl9t29b3wRoj7gCRhQlSNB0v/ZR+BNVnxv4N9L5X7uQBwVndWoF6+jqewgsdXLxKoD3QqXV7KKk2UJgM1b3LcaEqTMKyzYF+WJmdgXeIaApxRSn1ffvkQa02Yt9baV2vdRmsdC6CU6qSsReNXlFLZWJOjW71PZe9NuXPf6UUP9YFV5V5zImDm2mTu+t/B1XPUxppYX1X+559zs+NdL5PrkiSt9c7SBK5Aa/0ekAX0KN2ch7XW7qqrP+dWsO3q9uuTvKvnydRav6K1DsP6XhzE+o8JhfU9G3vdZ6U71iS4PuAAXCy37b9YZ1fhxvfsTLlz5gPjsf7uLyql1iulmpUbe/W9yKooZiHuNZKECVGFtNbfY12q+UfpU/mA69XtpXUzNyuUrvCQlRac9Yvx+9IE6eofd631kzcZ/w0QoJRqgzUZu7oUidZ6n9Z6BNYv3i+Bpbdx/misM2n1tNZewOfA1aXFynyd10vBuvxV/nU7a63P38a+F4G65R7Xu277r437MND0FmM0/3uf4rl2Vq01cFlrnV66raFSyuO67eVnmio+gdZpWD+ztbEuP6YAC697z9y01u+XbisG/Mtt8yxN5sD6npV/n65Z6tRab9Ra98ea0CUBc8ptDsU6W5tzq5iFuBdIEiZE1fs30L+0lucY1pmtoUopB+BPgNPP7Xydy1jrkSrDOqCpUmqqUsqh9E+Hm9Uraa2NWJczP8D6xfwNgFLKUSk1WSnlVTomB+sS0614ABla6yKlVEess2tXXSk9xq99rQ5KKedyf+yxJnvvKqXql8YfoJQacZvHWwq8qpTyKa2Teua67b/297OBckuyytoHrlvpe+yslPoD1tnCnaVDooBHlFLNS5co/4Q16UdrfQzrbNYbpfuOBFphXXK+gVLqb0qpFkop+9LE7UngRGlCtwgYrqw9zOxKjxehlKqrtb4IbAL+TynlWVr710gpdfV1LAV+p6wtMHywXs159Zw1lFIjSmvDirHO3pX/7PTCuqQuxH1BkjAhqpjW+grWL8s/l9ZpPQXMBc5jnRk79zO7X+894E+lyz43XIF4h3HlYi2gngBcwLpk9jd+PimMBvoBy0przK6aCiQrpXKwLi1Nrmjn6zwFvF1aO/dnys2elS41vgvsLH2tnW/7hV1rA9Yl2Kt/3gQ+xDoDt6n03HuwFvzfjrex/r5OA5uB5ViTh6t+1e+ntJYuWyl1NR4P4D9YlynPA4OwzuKll47/GusFCt9hvSjgDNfWH07AWgyfibX32JjSz2NFXIFVWJf+TmFdZnyg9DwpwAjgNawJcgrwB/73nTINcAQSSs+1HOvMFlhntjZirYU7AKwsd04D8ALWz18G1qSr/EzsRKxLm0LcF5TWtpzlF0KI3w6l1JPABK31rS4ouJNjDgCe0lo/WFnHvBcppYZjvYijwrYpQtyLJAkTQohfSClVC+ty426gCda2DZ9orf9dnXEJIe4Nv/gWIEIIIXDEujzWAOuyXSzwWXUGJIS4d8hMmBBCCCFENZDCfCGEEEKIaiBJmBBCCCFENbjnasL8/f11cHBwdYchhBBCCHFL+/fvT9NaV9iE+55LwoKDg4mLi6vuMIQQQgghbkkpdeZm22Q5UgghhBCiGkgSJoQQQghRDSQJE0IIIYSoBvdcTVhFjEYj586do6ioqLpDEfchZ2dn6tati4ODQ3WHIoQQ4j5yXyRh586dw8PDg+DgYJRS1R2OuI9orUlPT+fcuXM0aNCgusMRQghxH7kvliOLiorw8/OTBExUOqUUfn5+MssqhBCi0t0XSRggCZiwGflsCSGEsIX7Jgm7123fvp2wsDDatGlDYWGhzc6zYMECLly4UPZ45syZJCQk/Ow+ERER1/RmO3jwIEopvv7667LnkpOTiY6OvmbMhg0bfnGcwcHBpKWl/eL979SvjVcIIYS4U5KEVSGz2XzTbYsXL+bVV1/l4MGDuLi43PJYWmssFssdx3B9EjZ37lyaN29+R8eIiYmhe/fuxMTElD1X2UlYVbvX4hVCCHHvkySskiQnJ9OsWTMmT55MaGgoY8aMoaCggODgYF5++WXatWvHsmXL2LRpE126dKFdu3aMHTuWvLw85s6dy9KlS3n99deZPHkyAB988AEdOnSgVatWvPHGG2XnCAkJYdq0abRo0YKUlJSbjgsNDeXRRx8lLCyMAQMGUFhYyPLly4mLi2Py5MllM27lZ7mefPJJwsPDCQsLKzvW9bTWLFu2jAULFvDNN9+U1Uq98sorbN++nTZt2vC3v/2NP//5zyxZsoQ2bdqwZMkS9u7dS5cuXWjbti1du3bl6NGjgDUxffHFF2nRogWtWrXi448/LjvXxx9/TLt27WjZsiVJSUkAvPnmm0yfPp0ePXpQv359Vq5cyUsvvUTLli0ZNGgQRqMRgP3799OrVy/at2/PwIEDuXjxImCd1Xv55Zfp2LEjTZs2Zfv27ZSUlNwQrxBCiPuX2WJm85nNHLpyqHoD0VrfU3/at2+vr5eQkHDDc1Xt9OnTGtA7duzQWmv90EMP6Q8++EDXr19f/+1vf9Naa33lyhXdo0cPnZeXp7XW+v3339dvvfWW1lrr6dOn62XLlmmttd64caN+9NFHtcVi0WazWQ8dOlR///33+vTp01oppXfv3n3LcXZ2dvrHH3/UWms9duxYvXDhQq211r169dL79u0ri7v84/T0dK211iaTSffq1UsfOnTohjE7duzQffr00VprPXHiRL18+XKttdbfffedHjp0aNlxv/jiC/3000+XPc7OztZGo1FrrfU333yjR40apbXW+rPPPtOjR48u23Y1hvr16+uPPvpIa631p59+qh955BGttdZvvPGG7tatmy4pKdEHDx7ULi4uesOGDVprrR988EG9atUqXVJSort06aJTU1O11lrHxsbqhx56qOy1vPDCC1prrdevX6/79u1bYbzXuxs+Y0IIIX6dvJI8vTB+oR64fKBusaCFfm37azY/JxCnb5LT3BctKsp7a208CRdyKvWYzWt78sbwsFuOq1evHt26dQNgypQpfPTRRwCMHz8egD179pCQkFA2pqSkhC5dutxwnE2bNrFp0ybatm0LQF5eHsePHycoKIj69evTuXPnW45r0KABbdq0AaB9+/YkJyffMv6lS5cye/ZsTCYTFy9eJCEhgVatWl0zJiYmhgkTJgAwYcIEoqKiGD169C2PnZ2dzfTp0zl+/DhKqbIZq82bN/PEE09gb2/9KPr6+pbtM2rUqLL4V65cWfb84MGDcXBwoGXLlpjNZgYNGgRAy5YtSU5O5ujRoxw5coT+/fsD1tm2WrVqVXjc23lfhBBC3Nsu5F0gOjGaFcdXkGfMo11gO14Mf5He9XpXa1z3XRJWna6/iu7qYzc3N8A669i/f/9raqkqorXm1Vdf5fHHH7/m+eTk5LJj3Wqck5NT2WM7O7tbFvufPn2af/zjH+zbtw8fHx9mzJhxQ1sGs9nMihUrWL16Ne+++25ZD63c3NyfPTbA66+/Tu/evVm1ahXJyclERETccp+rr8HOzg6TyXTD8waDAQcHh7L32WAwYDKZ0FoTFhbG7t277+i4Qggh7i+HrxwmKiGKzWc2AzCg/gCmNp9Ky4CWaJMJXVQMrq7VFt99l4TdzoyVrZw9e5bdu3fTpUsXoqOj6d69Oz/++GPZ9s6dO/P0009z4sQJGjduTH5+PufPn6dp06bXHGfgwIFl9WHu7u6cP3++wm7ttzuuPA8PjwqTppycHNzc3PDy8uLy5ct89dVXNyRKW7ZsoVWrVmzcuLHsuenTp7Nq1SrCwsKuOe7158nOzqZOnTqA9eKAq/r3789///tfevfujb29PRkZGdfMhv0SISEhXLlypex3YTQaOXbsGGFhN/9s3Ox9EUIIcW8xWUxsObuFhQkLOXTlEB4OHkxrPo1JoZOo6VYTS1ERGdHRZMybj+eQwQT+/vfVFqvNCvOVUvOVUqlKqSM32d5MKbVbKVWslHrRVnFUpZCQED799FNCQ0PJzMzkySefvGZ7QEAACxYsYOLEibRq1YouXbqUFZyXN2DAACZNmkSXLl1o2bIlY8aMqTBBuN1x5c2YMYMnnnjihlYYrVu3pm3btjRr1oxJkyaVLZmWFxMTw8iRI695bvTo0cTExNCqVSvs7Oxo3bo1//rXv+jduzcJCQllhe4vvfQSr776Km3btr1m9mnmzJkEBQXRqlUrWrdufc0Vlr+Uo6Mjy5cv5+WXX6Z169a0adOGXbt2/ew+18crhBDi3pJXkkdUfBRDVw7lxe9fJKMog1c6vsLmsZt5IfwFAixupM2ew4l+/bn89l+w9/fHtUOHao1ZWWvGbHBgpXoCeUCU1rpFBdsDgfrAg0Cm1voft3Pc8PBwXb5nFUBiYiKhoaG/OuZfIzk5mWHDhnHkSIU5p7jH3Q2fMSGEEDc6n3eexYmLWXl8JfnGfNoFtmNa2DQi6kZgZ7DDlJZGRmQUmTExWPLycOveHb/HHsW1Q4cqacatlNqvtQ6vaJvNliO11tuUUsE/sz0VSFVKDbVVDEIIIYS4Px1MPUhUQhRbzm7BgIEBwQOY1nwaYf7W0pOSc+e4Mn8+WStWoktK8Bg4EL9HZ+LyM6UpVe2+qwmrLsHBwTILJoQQQtiQyWJi89nNLIxfyOG0w3g4ejAjbAYTm02kpltNAIqOHSN9zlxyNmwAgwHvB0fg98gjOAYHV2/wFbgnkjCl1GPAYwBBQUHVHI0QQgghqlJuSS4rj69kceJiLuZfJMgjiNc6vcaIRiNwdbBe3Vhw4EfSZ88mb+tWlKsrvtOm4TtjOg41alRz9Dd3TyRhWuvZwGyw1oRVczhCCCGEqAIpuSlEJ0az8vhKCkwFdKjZgVc7vkqver0wKANaa/K2bSN99hwK4uKw8/bG/9ln8J08GTtv7+oO/5buiSRMCCGEEL8NWmsOXjlIVHwU36Z8iwEDgxoMYmrzqTT3s97rWJvN5GzcQNqcuRQnJmJfsyY1XnsV7zFjMFRj3687ZbMkTCkVA0QA/kqpc8AbgAOA1vpzpVRNIA7wBCxKqVlAc6115ba7F0IIIcRdz2gx8k3yNyxMWMiR9CN4OnrycIuHmRAygRpu1iVFS0kJ2au+JH3ePIxnz+LYsCG1/vpXvIYNRTk6VvMruHO2vDpy4i22XwLq2ur81e3NN9/E3d2ddevW8Y9//IPw8GuvTl2wYAFxcXF88skn1RShEEIIUf1ySnJYcWwFixMXc7ngMsGewfyp058Y3mh4Wb2XOS+frCVLyFiwANOVKzi3aEHgRx/i0a8fymCzlqc2J8uRQgghhKhyKTkpLEpcxKoTqyg0FdKxZkde7/w6Per2wKCsiZUpI4OMhQvJXByNJScH1y6dqf2393Ht0qVKenzZmiRhlejdd98lMjKSwMBA6tWrR/v27QFYuHAhM2fOxGQyMX/+fDp27HjNfjNmzMDZ2Zm4uDhycnL45z//ybBhw6rjJQghhBA2o7XmQOoBouKj+C7lO+wMdgxpMISpzafSzLdZ2TjjhQukf7GArGXL0EVFePTvh9+jj+LSqlU1Rl/5JAmrJPv37yc2NpaDBw9iMplo165dWRJWUFDAwYMH2bZtGw8//HCF/cSSk5PZu3cvJ0+epHfv3pw4cQJnZ+eqfhlCCCFEpTNajGxK3kRUQhQJ6Ql4OXkxs+VMJjabSIBrQNm44pMnSZ8zl+x16wDwGj4cv5mP4NSoUXWFblP3XxL21Stw6afKPWbNljD4/Z8dsn37dkaOHIlr6VUZDzzwQNm2iROt5XE9e/YkJyeHrKysG/YfN24cBoOBJk2a0LBhQ5KSkmjTpk2lvQQhhBCiqmUXZ7P82HKik6JJLUgl2DOY1zu/zvBGw3GxdykbV3j4MOlz5pC7eQvKyQmfiRPxe2gGDrVrV2P0tnf/JWF3oevXrStax76dMUIIIcS94EzOGRYlLGL1ydUUmgrpVKsTb3R5g+51upfVe2mtKdi9m7TZcyjYsweDpyf+Tz6Bz9Sp2Pv4VPMrqBr3XxJ2ixkrW+nZsyczZszg1VdfxWQysXbtWh5//HEAlixZQu/evdmxYwdeXl54eXndsP+yZcuYPn06p0+f5tSpU4SEhFT1SxBCCCF+Ma01cZfjiEqI4vuU77E32JfVe4X4/u87TVss5H6zmfQ5cyg6cgT7gAACX3oJ73HjsHN3q8ZXUPXuvySsmrRr147x48fTunVrAgMD6dChQ9k2Z2dn2rZti9FoZP78+RXuHxQURMeOHcnJyeHzzz+XejAhhBD3BKPZyNfJX7MwYSGJGYn4OPnwWKvHmNBsAv4u/mXjdEkJ2WvXkT53LiWnT+NQP4iab7+F14MPYrgHe3xVBqX1vXUXoPDwcB0XF3fNc4mJiYSGhlZTRL/ejBkzGDZsGGPGjKnuUMRN3OufMSGEqGzZxdksO7aM6MRorhReoaFXQ6Y2n8qwhsNwtv/fRIKloICsZctI/2IBpkuXcAoNxf+xR/EYMABlZ1eNr6BqKKX2a63DK9omM2FCCCGEuG3J2cksSlzE6hOrKTIX0aVWF97u9jZda3ctq/cCMGdlkbFoMZkLF2LOzsa1Qwdq/eVt3Lp3vzvqnrUGsxHsq28WTpKwu8CCBQuqOwQhhBDiprTW7Lu0z1rvde57HAwODGs4jCnNp9DUp+k1Y42XL5PxxQIyly5FFxTg3rs3fo89imvbttUU/XWMhfDTcvjhvxA6DCJeqbZQJAkTQgghRIWMZiNfJX9FVHwURzOP4uvsy5Otn2RcyLhr6r0Aik+fJn3ePLJXrwGLBc+hQ/CbORPnpk1vcvQqln0O9s2D/QugMINLzg1JzvGlczWGJEmYEEIIIa6RWZTJsmPLiEmKIa0wjUZejXir61sMbTgUJzuna8YWxseTPnsOuZs2oRwd8Rk7Ft+HH8Kx7l1we2it4ewe+OFzdOJa0Jq9Tp35V0kfDpjCeMatiSRhQgghhKh+p7JPsShhEWtOrqHYXEy32t14t9u7dKl97b0atdYU7N1H+uzZ5O/cicHdHb9HH8V32lTs/f1/5gxVxFgE8Svhh8/h4iGK7DxYyjBmF/fB4FqfKYOC+E/7evi4Ve9VmZKECSGEEL9hWmv2XNzDwoSFbD+/HUeDI8MbDWdK6BQa+zS+dqzFQt5335E2ezZFhw5j5+9PwO9fwGfCBOw8PKrpFZSTcwH2zUPvX4AqSOO8QzCfGh/hy+JudAkJ4p0u9enZJACD4S64MABJwqrFm2++yZw5cwgICMBkMvHXv/71mtscVZVjx44xa9Ysjh8/joeHB40bN+bjjz8mJSWFqKgoPvroI7Zu3YqjoyNdu3at8viEEELYTom5hPWn1rMwcSHHM4/j6+zLU22eYlzTcfi5+F0zVhuNZK9fb+3xdeIkDnXrUvONP+M1ciSG6u5rqTWk7C1dclwDFjM77TryWcljJBnaMq57EBs7BVHP17V646yAJGHV5Pnnn+fFF18kMTGRHj16kJqaisFguPWOd8hkMmFvf+OvuaioiKFDh/LPf/6T4cOHA7B161auXLlCeHg44eHhZc+5u7tXSxJmNpux+w30kBFCiKqUUZTB0qNLiU2KJb0onSY+TXi769sMaTjkhnovS2EhWStWkjF/PsYLF3Bq2pTaH3yA5+BBqAq+W6qUqRiOXF1yPEihwZ0Y0yC+MPbFv14I07rUZ3CLWjg73L3fI5X/rf8bFhUVRatWrWjdujVTp04lOTmZPn360KpVK/r27cvZs2dv2Cc0NBR7e3vS0tJ48MEHad++PWFhYcyePbtsjLu7O88//zxhYWH07duXK1euAHDy5EkGDRpE+/bt6dGjB0lJSYC1+esTTzxBp06deOmll/j+++9p06YNbdq0oW3btuTm5hIdHU2XLl3KEjCAiIgIWrRowdatWxk2bBjJycl8/vnn/Otf/6JNmzZs376dK1euMHr0aDp06ECHDh3YuXMnQIXnAPjggw/o0KEDrVq14o033ig716JFi+jYsSNt2rTh8ccfx2w2l73W3//+97Ru3Zrdu3dX8m9ICCF+u05mneTNXW8yYPkAPj34KaF+oczuP5sVw1cwssnIaxIwc04OaZ9/zom+/bj8zjvY16xJ3c//Q4PVX+I1fFj1JmA5F+Hbd9H/CoMvnyAlNZ0/Gh+mm/ETjrZ6mf88M5pVT3VjZNu6d3UCBljXgu+lP+3bt9fXS0hIuOG5qnbkyBHdpEkTfeXKFa211unp6XrYsGF6wYIFWmut582bp0eMGKG11vqNN97QH3zwgdZa6z179uhatWppi8Wi09PTtdZaFxQU6LCwMJ2Wlqa11hrQixYt0lpr/dZbb+mnn35aa611nz599LFjx8qO07t3b6211tOnT9dDhw7VJpNJa631sGHD9I4dO7TWWufm5mqj0aiff/55/e9//7vC1/Ldd9/poUOH3hCr1lpPnDhRb9++XWut9ZkzZ3SzZs1ueo6NGzfqRx99VFssFm02m/XQoUP1999/rxMSEvSwYcN0SUmJ1lrrJ598UkdGRpa91iVLltzx+29rd8NnTAgh7pTFYtE7z+/Uj3/zuG6xoIVuv7C9fnPXm/pk5skKx5dcvqwv/f3vOqlde50Q0kyfeewxnb9vXxVHXQGLReuze7Ve9rC2vOWrLW946a1v9NGTXn1PR/z9Wz13+ymdlV9S3VFWCIjTN8lp7rvlyL/t/RtJGUmVesxmvs14uePLPzvm22+/ZezYsfiXXhXi6+vL7t27WblyJQBTp07lpZdeKhv/r3/9i0WLFuHh4cGSJUtQSvHRRx+xatUqAFJSUjh+/Dh+fn4YDAbGjx8PwJQpUxg1ahR5eXns2rWLsWPHlh2zuLi47OexY8eWLeV169aNF154gcmTJzNq1Cjq/orLhjdv3kxCQkLZ45ycHPLy8io8x6ZNm9i0aRNtSxv05eXlcfz4cQ4fPsz+/fvL7q9ZWFhIYGAgAHZ2dowePfoXxyeEEAKKzcVsOLWBqIQoTmSdwM/Zj2faPMO4kHH4OPvcML7k7FnS580ne9UqtMmE56BB+D32KM7NmlVD9OWYiiH+S/QPn6MuHKBAuRJt7M8i8wCahrbiiS716dbI/64ptL9T910Sdq+4WhN21datW9m8eTO7d+/G1dWViIgIioqKKtxXKYXFYsHb25uDBw9WOMbN7X93on/llVcYOnQoGzZsoFu3bmzcuJGwsDC+//77O47bYrGwZ8+eG24wXtE5tNa8+uqrPP7449eM/fjjj5k+fTrvvffeDcd3dnaWOjAhhPiF0gvTrfVeR2PJKMqgqU9T3un2DoMbDMbR7sZ2DEVJSaTPnkPO11+j7OzwGjUKv0cexjEoqBqiLyf3EsR9gSVuHob8K5xVdZhjfIjvnfswokcI0Z2CqO3tUr0xVoL7Lgm71YyVrfTp04eRI0fywgsv4OfnR0ZGBl27diU2NpapU6eyePFievTocdP9s7Oz8fHxwdXVlaSkJPbs2VO2zWKxsHz5ciZMmEB0dDTdu3fH09OTBg0asGzZMsaOHYvWmsOHD9O6desbjn3y5ElatmxJy5Yt2bdvH0lJSUyaNIn33nuP9evXM3ToUAC2bduGr6/vNft6eHiQk5NT9njAgAF8/PHH/OEPfwDg4MGDtGnTpsJzDBw4kNdff53Jkyfj7u7O+fPncXBwoG/fvowYMYLnn3+ewMBAMjIyyM3NpX79+r/qdyCEEL9VxzOPsyhxEetOrqPEUkLPuj2Z1nwaHWt2rPA+jQVxcaTNmUP+99swuLri+9AMfKdPx6F0VaLanNsPP3yOJX4VBouR7y1tmW96hOJ6PZjcpQF/blELR/v7p5z9vkvCqktYWBh//OMf6dWrF3Z2drRt25aPP/6Yhx56iA8++ICAgAC++OKLm+4/aNAgPv/8c0JDQwkJCaFz5//18HVzc2Pv3r288847BAYGsmTJEgAWL17Mk08+yTvvvIPRaGTChAkVJmH//ve/+e677zAYDISFhTF48GCcnJxYt24ds2bNYtasWTg4ONCqVSs+/PBD0tLSyvYdPnw4Y8aMYfXq1Xz88cd89NFHPP3007Rq1QqTyUTPnj35/PPPb3qOxMREunTpAliL7hctWkTz5s155513GDBgABaLBQcHBz799FNJwoQQ4g5ordl5YScLExay68IunO2cebDxg0xuPpmGXg0rHJ/3/fekz55D4YED2Pn4EDDrOXwmTsTOy6saXkEpUwkkrMay5z8YLuynQLkQa+zLMsMg2rYN57XO9Qmt5Vl98dmQstaM3TvCw8N1XFzcNc8lJiYSGhpaTRHZnru7O3l5edUdxm/a/f4ZE0LcO4pMRaw7tY5FCYs4mX2SAJcAJjabyNimY/F29r5hvDaZyPnqa9LnzKH42DHsa9fC76GH8R4zGoNLNS7p5aVC3BeY987FriCVZGox3ziAAz6DGNMllFHt6+Lp7FB98VUSpdR+rXV4RdtkJkwIIYS4B6QVprHk6BKWJC0hsziTZr7N+Gv3vzIoeBAOdjcmK5biYrJXriR93nyM587h2KgRtd5/D6+hQ1EO1ZjcnD+A5YfP4chKDBYj28ytibI8hEuz/kzp2oC3GvpVuIR6P5Ik7B4gs2BCCPHbdSzzGAsTFrL+1HpMFhO96vZiWtg0wmuEV5ismPPyyIyJISMyCnNaGs6tWlHjlZdx79MHZYOm4LfFbISE1Rh3f47DhX0U4cwSUx/WOw+jW7fOvNcxiJpe1dx5vxpIEiaEEELcZSzaws7zO4lKiGLPxT242LswqskopoROIdgruMJ9TOnpZERGkRkTgyU3F7euXfH7xz9w7VRxcX6VyLuC3v8Fxh/m4lhwmfO6JgtM0zhTbwRjuoYRE1YDB7v7p9D+TkkSJoQQQtwlikxFrD21loUJCzmdfZpAl0Cea/ccY5uOxcup4uL5knPnyZg/n6wVK9AlJXgMGIDfo4/i0iKsiqMv58KPmHb/FxW/HDuLkd3mVsQaHiKw7VAmd2lA0xp3wc2+7wKShAkhhBDV7ErBFWKPxrL06FKyirMI9Q3lvR7vMbD+wArrvQCKjx8nbc4cctZvAIMBrxEP4PfwIzg1bFDF0ZcyGyFxLUU7P8P54j5KcGKZKYJt3iOJ6N6dD9rWwd1J0o7y5N0QQgghqsnRjKNEJUSx4fQGzBYzEfUimNZ8Gu1rtL/pEmLhwYOkzZ5D3rffolxd8Z0yBd+HZuBQs2YVR18qPw1L3BeU7JmDc+FlLlsCWWiZSlbIOMZ2C2NaA9/fTKH9nZIk7C7w7rvvEh0djZ2dHQaDgf/+97906tSJ4OBg4uLiym6FdFXXrl3ZtWsXycnJ7Nq1i0mTJgHWxqkXLlxgyJAh1fEyhBBC3AaLtrD93HYWJizkh0s/4GLvwtimY5kSOoUgz4o71Wutyd+xk/TZsynYtw87Ly/8n3kGn8mTsPe58TZEVeLiIYp2fIZ9wkrsdQl7zS1Z7fQIwZ1H8FinYAI9fnuF9ndKkrBqtnv3btatW8eBAwdwcnIiLS2NkpKSn91n165dACQnJxMdHX1NEhYXF3dHSZjJZMLeXj4GQghha4WmQtacWMOixEUk5yQT6BrI8+2fZ3ST0Tet99JmM7mbNpE2Zw7FCYnY16hB4Csv4zN2LIZyt6erMmYTOnEteds+wSM1Dot2Isbck5/qjKNPj178LTQQ+99wof2dkm/fSvTggw+SkpJCUVERzz33HI888giPPPIIcXFxKKV4+OGHef7556/Z5+LFi/j7++Pk5ARww6wXWG9wPWrUKEaNGsWjjz5a1rz1lVdeITExkTZt2jBx4kQ+/fRTCgsL2bFjB6+++irDhg3j2Wef5ciRIxiNRt58801GjBjBggULWLlyJXl5eZjN5l90D0khhBC3J7UgldikWJYeW0p2cTZhfmH8rcff6B/cHwdDxfVelpISslevJmPuPErOnMExOJha776D1/DhKMcb7wFpc/nplOydj/GHObgVXSbTEsDnhmmY20xmTLcWTA10r/qY7gOShFWi+fPn4+vrS2FhIR06dKB9+/acP3+eI0eOAJCVlXXDPgMGDODtt9+madOm9OvXj/Hjx9OrV6+y7Xl5eUyYMIFp06Yxbdq0a/Z9//33+cc//sG6desAqFGjBnFxcXzyyScAvPbaa/Tp04f58+eTlZVFx44d6devHwAHDhzg8OHDN9wrUgghROVITE9kYcJCvkr+CrPFTJ+gPkxrPo22gW1vWiNlzssna+lSMhYswJSainNYGHU+/BCPfn1RdnZV/AqAi4fJ+f5TXI+uxFGXsNccxmbPx2jWczRPt62Hq6OkEb/GfffuXfrrXylOTKrUYzqFNqPma6/dctxHH33EqlWrAEhJSaGkpIRTp07x7LPPMnToUAYMGHDDPu7u7uzfv5/t27fz3XffMX78eN5//31mzJgBwIgRI3jppZeYPHnyHce9adMm1qxZwz/+8Q8AioqKOHv2LAD9+/eXBEwIISqZRVvYdm4bUQlR7Lu0Dxd7F8aHjGdys8nU86x30/1MmZlkLlxIxuJoLNnZuHbuTK33/opb165VX9RuNmFOXEf21o/xTYvDQTuyzNKTM40n0z8igjeCfKTQvpLcd0lYddm6dSubN29m9+7duLq6EhERQXFxMYcOHWLjxo18/vnnLF26lLfeeovhw4cD8MQTT/DEE09gZ2dHREQEERERtGzZksjIyLIkrFu3bnz99ddMmjTpjj/0WmtWrFhBSEjINc//8MMPuFVHLYEQQtynCowFrD65msWJizmTc4aabjX5ffvfM6rpKDwdb37zaePFi6R/8QVZy5ajCwtx79cX/0cfxaV16yqMvlRBBnm75qH3zcGj+DL5lgAWO87ApeN0Huwahr+7U9XHdJ+775Kw25mxsoXs7Gx8fHxwdXUlKSmJPXv2kJaWhsViYfTo0YSEhDBlyhTq1avHwYMHy/Y7evQoBoOBJk2aANbi+vr165dtf/vtt3n77bd5+umn+eyzz645p4eHB7m5uTd9PHDgQD7++GM+/vhjlFL8+OOPtG3b1kbvgBBC/PZcyr9ETFIMy48tJ6ckh5b+Lfmg5wf0rd/3pvVeAMWnTpE+Zy7Za9cC4DVsGH6PzsSpUaOqCr2MvvQTqZs/xufkKtx1CTvNYeyr8RRhEeN4qnkt7Awy62Ur910SVl0GDRrE559/TmhoKCEhIXTu3Jnz588TERGBxWIB4L333rthv7y8PJ599lmysrKwt7encePGzJ49+5oxH374IQ8//DAvvfQSf//738ueb9WqFXZ2drRu3ZoZM2Ywffp03n//fdq0acOrr77K66+/zqxZs2jVqhUWi4UGDRqU1Y8JIYT45eLT41mYsJCNpzdiwULfoL5Maz6N1gGtf3bVovCnn0ifPYfczZtRTk74TJiA30MzcKhTpwqjByxmCo+sJfu7j6mZGYendmSN6kl68+kM6NOXWf6yWlIVlNa6umO4I+Hh4TouLu6a5xITEwkNDa2miMRvgXzGhBBmi5mt57ayMGEh+y/vx9XelVFNRjE5dDJ1PeredD+tNQV79pA2ezYFu/dg8PTEZ/IkfKdOxb6qa3MLMkjdNheH/fPwMV7inPZnk9sD+HZ/hIHhobg4VkPx/31OKbVfax1e0TaZCRNCCCF+RoGxgFUnVrE4cTEpuSnUcqvFi+EvMqrJKDwcb34PRG2xkLt5M+lz5lL000/YBwQQ+Ic/4D1+HHbuVdvSwXjhCBc2fUjN5NUEUsweS3MSg56mbf9JPBTkJ4X21USSMCGEEKICl/IvEZ0UzfJjy8ktyaVVQCuea/ccfYP6Ym+4+denLikhe+060ufOpeT0aRyCgqj51lt4PTgCg1MVFrdbzGQcXEPe958QlB1HDe3AJvteFLWbSd+IvnR2q4Z+Y+IakoQJIYQQ5RxJO0JUQhSbkjeh0fQL6sfU5lNpE9jmZ/ezFBSQtXw56V8swHTxIk7NmlHnn/+Hx8CBVdrjSxdkcmbzf3E//AX+pksUaj+W+c6kZsRjDGnZVArt7yI2S8KUUvOBYUCq1rpFBdsV8CEwBCgAZmitD/zS82mtZTpV2MS9VjcphLhzZouZrSlbiUqI4kDqAdwc3JgcOplJoZOo4/7zRfPmrCwyFi8mc+EizFlZuIaHU+vtt3Dr3r1Kv5fyzh3h3Nf/pv65NQRTzH5C2dbkd4QPmMrYgJu3yRDVx5YzYQuAT4Com2wfDDQp/dMJ+E/pf++Ys7Mz6enp+PnJuraoXFpr0tPTcXaWG9EKcT/KN+bz5YkvWZSwiHN556jjXoeXOrzEyMYjcXf8+bot4+XLZCyIJGvJEiwFBbhHROD32GO4tqvCVkAWM2d/+JLinZ/RJC+OBtqB7S69oeNjdO/Rh/YOUmh/N7NZEqa13qaUCv6ZISOAKG2dZtijlPJWStXSWl+803PVrVuXc+fOceXKlV8arhA35ezsTN26N7/ySQhx77mcf5nFSYtZfnQ5ucZc2gS04fn2z9MnqM/P1nsBlCQnkz5vHtlfrkZbLHgOGYLfzJk4hzStouihOC+DY19/TkBiJEHmS1zSvnxV8zHq93+Kfo0bVFkc4tepzpqwOkBKucfnSp+74yTMwcGBBg3kQyeEEOLnHc04SmR8JF+d/goLFvoF9WNamLW/160UJSSQNnsOuRs3ohwc8BozGr+HH8ax3s1vR1TZLp86xIWNHxJyeT0tKeKwIZT4Fi/QfuAUBntIb697zT1RmK+Uegx4DCAoKKiaoxFCCHEv0Vqz88JOIuMj2XNxDy72LkxoNuGW/b2u7luwdx/pc+aQv2MHBnd3/GbOxHf6NOz9/askfovZTMK25ai9/yWscD/e2p44j764dH+SNh0jMEih/T2rOpOw80D5fz7ULX3uBlrr2cBssDZrtX1oQggh7nUl5hLWn1pPVEIUJ7JOEOASwKx2sxjTdAxeTl4/u6+2WMjbupX0/86m8NAh7Pz8CHj+eXwmTcTO4+a9wSpTdmY6CRs+I+jEIlroS6Tiy7Z6T9B40NN0qyMTEveD6kzC1gDPKKVisRbkZ/+SejAhhBCivOzibJYeXUp0UjRphWk08WnCu93fZXDwYBzsbn4/RwBtNJKzYQPpc+dSfPwEDnXqUOPPr+M9ahSGKrpA51j8fq5s+Zg26RvooopJcmjOvtZ/oFX/KfR0kouE7ie2bFERA0QA/kqpc8AbgAOA1vpzYAPW9hQnsLaoeMhWsQghhLj/peSksDBxIV+e+JJCUyFda3fl3e7v0qVWl1teOW8pKiJrxQoy5s3HeOECTk2aUPuDv+M5eDDK3vbzFUUlRvZvWYbLgbm0M+4nWNtzxLcfXhFP06x1T5ufX1QPW14dOfEW2zXwtK3OL4QQ4rfhYOpBohKi2HxmM3YGO4Y0GMK05tMI8Q255b7mnBwyo2PIiIrCnJGBS5s21PjTn3CP6IUyGGwe+7mLl0n8+j+EnImhG5dIUz4caPQUTYY8Qzu/Kr6pt6hy90RhvhBCCFGe2WLmu5TviIyP5OCVg3g4evBwi4eZFDqJQNfAW+5fdPQomYujyV67Fl1YiFuPHvg/9igu4eE27zdpsWj27t9L3rbP6JzzNf1VEaecmnM0/GWa9p6Mv30V3tpIVCtJwoQQQtwzCowFrD65moUJC0nJTaGOex1e6fgKIxuPxNXB9Wf31SUl5G7eTEZ0NIVx+1FOTngOG4rvlCk4h4baPPbMvCJ2b1qK75Ev6Gw5gBF7jgf2J7DfszQM6Wbz84u7jyRhQggh7npphWlEJ0az9NhSsouzaeV/ezfTBmtn+6wlS8lcthTzlTQc6tUj8A9/wHv0KOy8vW0e++GT5zjxzWzaXFzKEHWRLIMPR5s9Q4NBz9Dcu5bNzy/uXpKECSGEuGudyDxBVEIU606tw2Qx0bteb2a0mEGbgDY/u2yotaZg3z4yF0eTu3kzWCy49eyB76RJuPXoYfN6ryKjmW937cG0+7/0LtxEK1VIilsoFzq/Su2uE/G2d7Tp+cW9QZIwIYQQdxWtNT9c+oHI+Eh2nN+Bs50zo5qMYmrzqdT3rP+z+5rz8slZu4bM6GiKj5/A4OWF7/Tp+EwYj2MVNPtOvpLHrm+WUfdYFIP0j5iVHSm1B2Do/xz1Gna2+fnFvUWSMCGEEHcFo8XI16e/JiohiqSMJHydfXm6zdOMDxmPj7PPz+5bfOqUtdD+yy+x5Ofj3Lw5td59B88hQzC4uNg0brNFs+2n05zdOp9u6SuYZLhAjp0PF8KepU6/p2joKUuOomKShAkhhKhWuSW5LD+2nMWJi7lccJmGXg15q+tbDG04FCe7m18pqE0mcr/7jszoaAp270E5OOAxeBC+kybh3Lq1za9yTM8r5qvtu7CPm8cQ02Z6q0Iue4aS3f01vMLH4SlXOYpbkCRMCCFEtbiQd4FFiYtYcWwFBaYCOtbsyJ+7/JnudbpjUDev2TKlp5O1bBmZS5ZiungR+1q1CJg1C++xY7D387NpzFprDpzJ5IctKwg5E8MkdQCLMpBabyCu/Z+jRlAnsHHyJ+4fkoQJIYSoUvFp8UTGR7LpzCYABgYPZHrYdJr7Nb/pPlprCg8eJDM6hpyvvwajEbeuXaj5x9dwj4iweVf7ghIT6+JOkLojkoF5q3nKcJ58R2+yWv8O315PUNuztk3PL+5PkoQJIYSwOYu2sO3cNiLjI4m7HIebgxtTQqcwOXQytdxvXjNlKSwkZ/16MqKjKU5IxODujs/48fhMmohTw4Y2j/vklTzWfb8L7yORjNTf4qkKSPcOpbjna7i1HoObg9zLUfxykoQJIYSwmSJTEWtPrSUqPorknGRqutXkxfAXGd1kNO6O7jfdr+TsWTJjYslauRJLdjZOTZpQ88038Bo+HIObm01jNpktbE64zI/fryb88lKeNRxAK0V2gyHoPs/iV0+WHEXlkCRMCCFEpcsoymBJ0hJij8aSUZRBqG8o7/d4nwHBA3AwOFS4j7ZYyNu2jczoaPK37wA7Ozz69cN38qQquZ1Qam4RK3YfI2fvIh4sWc8gwzkKnb0pbDcLt66P4usl93IUlUuSMCGEEJXmdPZpFiYsZM3JNRSbi+lZtyczwmYQXuPmSZQ5K4usFSvJjI3FmJKCXYA//k89hfe4cTjUuPV9IH8NrTX7kjNZv30P9Y4vZpLhW7xUATm+oZh7fopLyzEgS47CRiQJE0II8atordl/eT+RCZFsTdmKo8GR4Y2GM635NBp637xuq/BIPJnR0eSsX48uLsYlvD2Bz8/Co18/lKNtO8obzRbWHTrP7m/X0Cd7FX+2i0PZGShoNBh6PoNnUGdZchQ2J0mYEEKIX8RkMbH5zGYi4yM5kn4EbydvHm/1OBOaTcDfxb/CfSwlJeR+9RUZ0dEUHTqMcnHB68EH8Zk0CeeQpjaPObfIyNI9pzi/YyGjS9Yw0nCGYmdvLB1m4dBpJu5edW0egxBXSRImhBDijuQb81l5fCWLEhZxIf8C9T3r83rn1xneaDgu9hV3pzdeuEBm7BKyli/HnJGBY3AwNV57Da+RD2Ln4WHzmC9mF7Jk60EMBxYwga8JVFnkezfC0vNDnFqPBwfbdtUXoiKShAkhhLgtl/MvszhpMcuPLifXmEu7wHa83PFlIupFVNhcVWtNwe7dZCyOJu+77wBw790bn0kTcevSxeY30QZIvJjD6m++pf7xKJ4wbMNZGcmt0xMinsOtcV9ZchTVSpIwIYQQP+toxlEi4yP56vRXWLDQL6gf08Om0yqgVYXjzbm5ZK/6ksyYGEpOn8bOxwe/mTPxGT8Ohzq2v8JQa82O41fY9c1yOl2K5RW7Q5jsHSlqPhZ6/Q6PwFCbxyDE7ZAkTAghxA201uy8sJPI+Ej2XNyDi70LE5pNYHLoZOp6VFw3VXT0GJnR0WSvXYsuKMC5dStq/+19PAYNwuBk+/soGs0WNvx4mlPfLmBw3ipeNqRQ4OxHUadXcO7yKO5uFdepCVFdJAkTQghRpsRcwvpT64lKiOJE1gkCXAKY1W4WY5qOwcvJ64bx2mgkd/NmMhdHUxAXh3JywnPoUHwmTcKlRViVxJxbZGT1joMU7Z7Ng6avGaFyyPJqijHiE1xbjwO5kba4S0kSJoQQguzibJYeXUp0UjRphWk08WnCu93fZXDwYBzsbmyuarycStbSpWQtXYrpyhUc6tYl8A8v4jVqFPY+PlUS88XsQtZ/sxm/n+Yylh04KRNpdSKw9Hke70a9pN5L3PUkCRNCiN+wlJwUFiYu5MsTX1JoKqRr7a682/1dutTqckNzVa01hXFxZERHk/vNZjCZcOvZg5p/eRv3Hj1QdnZVEnPihSx2fBVL6JmFzDQcocTgRE6zCTj1nYW/f5MqiUGIyiBJmBBC/AYdTD1IVEIUW85uwaAMDGkwhGnNpxHiG3LDWEt+Ptlr15IZHUPxsWMYPD3xnTIFn4kTcKxfv0ri1VqzO+kcRzfNpmf6Mh41XCTHyZ+s8Ffx7vEY/q6+VRKHEJVJkjAhhPiNMFvMfJfyHZHxkRy8chAPRw8eCnuISaGTCHS98fZAxadOkxkTQ/aqVVjy8nBqHkqtd/6C59ChGFyqpq+W0Wxh8w8Hyd72GQMLv6KryiPVM5SCXn/Gs+0YsLdtZ30hbEmSMCGEuM8VGAtYfXI1CxMWkpKbQh33OrzS8RVGNh6Jq4PrNWO1yUTe1q3Wm2jv2g0ODngOHIjP5Em4tGlj85toX5VbZOSbLZtw2f9f+pp3YK8sXKjZG7eBvyewQTep9xL3BUnChBDiPpVWmEZ0YjRLjy0luzibVv6tmNVuFn2D+mJnuLZ+y5SRQday5WQuicV04SL2NWsSMOs5vMeMwd6/6lo7XMzMY+eGxQQf+4JRKpFC5cLlkMnUGfA8df1vfh9KIe5FkoQJIcR95kTmCaISolh3ah0mi4ne9Xozo8UM2gRcO5Oltabo0CFrof1XX6ONRly7dKbGq6/i0bs3yr7qviKSzl4kYf1ntL+0hDHqMhkONbjQ7o/U7v0Y9Vy8qywOIaqSJGFCCHEf0Frzw6UfiIyPZMf5HTjbOTOqySimNp9Kfc9ri+ctRUXkrF9P5uJoihISMLi54T1uHD6TJuLUqFGVxrzv0E9c2fIRPXLW00wVkOIeRlqPt/HvMAbs5CtK3N/kEy6EEPcwo8XI16e/JiohiqSMJHydfXm6zdOMDxmPj/O1/bpKUlLIjIkle8UKzNnZODVpTM03/ozn8Aewc3erupjNFnZu/Rq15zO6lexEKTgd2AfDgN9Tr0nXKotDiOomSZgQQtyDcktyWX5sOYsTF3O54DINvRryVte3GNpwKE52/+sQry0W8nfsIGPxYvK3bQeDAY9+/fCZNAnXjh2qrNAeILegkB82RFEjfh4R+ij5uHKy0VSCh7xAY//gKotDiLuFJGFCCHEPuZB3gUWJi1h5fCX5xnw61uzIn7v8me51umNQhrJx5qwsslauIjM2FuPZs9gF+OP/5JN4jx+HQ40aVRrzpdTLHFn7CaFnY+inrnDZribHWv+Jxv0fJ8TFs0pjEeJuIkmYEELcA+LT4omMj2TTmU0ADAweyPSw6TT3a37NuML4eDKjo8lZtx5dXIxL+/YEznoOj379UI5V21PrxNEjnN/4b9qnr6OfKuSEayuSu71DcNex1DBUTXd9Ie5mkoQJIcRdyqItbDu3jcj4SOIux+Hm4MaU0ClMDp1MLfda/xtXUkLuxo1kLo6m8OBBlIsLXiNG4DNpIs7NmlVpzNpi4afdmyje8THtCnZSHwOJfn0JHPACjZt1qdJYhLjbSRImhBB3mSJTEWtPrSUqPorknGRqutXkxfAXGd1kNO6O7mXjjBcvkhm7hKxlyzBnZOBYvz41Xn0Fr5EjsfOs2mU+Y0kxBzcuwOvgHFqZj5ONOweDptN42Au0qlE1tzYS4l4jSZgQQtwlMooyWJK0hNijsWQUZRDqG8r7Pd5nQPAAHAwOgLWtQ8GePWRGR5O75VsA3CMi8Jk0CbeuXVAGw8+dotLlZl0hce1H1D+5mA6kc85Qm/0t/kSLIY/T3lXqvYT4OZKECSFENTudfZqFCQtZc3INxeZietbtyYywGYTXCC+7etGcl0f2qi/JjImh5NQp7Ly98XvkEbzHj8exbp0qjzk1+QgpG/5J6OV1dFTFHHFsw+VOf6VFxFjq2km9lxC3Q5IwIYSoBlpr9l/eT2RCJN+nfI+DwYHhjYYzrfk0Gnr/7/Y8xcePkxEdTfbqNeiCApxbtaLW++/hOXgwBiennzmDTYLmzP6N5G79kOa5u/HCjoNeffHpO4sWraW/lxB3SpIwIYSoQiaLic1nNxN5JJIj6UfwdvLmsVaPMaHZBPxdrPdo1EYjuVu2kLk4moJ9+1COjngOGWK9iXbLllUeszYVc3xLJE5xn1PfeJIM7cH22jNoPGQWneoFV3k8QtwvJAkTQogqkG/MZ+XxlSxKWMSF/AvU96zP651fZ3ij4bjYuwBgTE0la+kyspYuxZSaikOdOgS++Hu8Ro/G3sfnFmeofMbcKxxf/xE1jy6kqc7kFHX5tumfaDf0MXp5eVV5PELcbyQJE0IIG7qcf5nFSYtZfnQ5ucZc2gW24+WOLxNRLwKDMlgL7ePirL29Nn0DJhNuPXpQ8603ce/ZE1UN9VX55+I5u+H/aHhhLc0pYZ9dW35q/z6d+4+hoYN8bQhRWeT/JiGEsIGjGUeJjI/kq9NfYcFCv6B+TA+bTquAVgBY8vPJXLuOzJgYio8exeDpie/kyfhMnIBjcHDVB6w16T99TdaWf9Moew8NtAM73Prh1utZOnboisFQdbc3EuK3wqZJmFJqEPAhYAfM1Vq/f932+sB8IADIAKZorc/ZMiYhhLAVrTU7L+wkMj6SPRf34GLvwoRmE5gcOpm6HnUBKD59msyYGLJXfYklNxen0FBq/uVtvIYOxeDqWvVBG4u4sCMKteczahWfxqK9WOf/EA0GPUvfJo2qPh4hfkPuKAlTSrlqrQtuc6wd8CnQHzgH7FNKrdFaJ5Qb9g8gSmsdqZTqA7wHTL2TmIQQorqVmEtYf2o9UQlRnMg6QYBLALPazWJM0zF4OXmhzeayQvv8XbvAwQHPAQOshfZt21bpTbSv0rmXSdn0Md7xC6ltySJJ1+eHBn+i/ZCZDAus+vozIX6LbisJU0p1BeYC7kCQUqo18LjW+qmf2a0jcEJrfar0GLHACKB8EtYceKH05++AL+8oeiGEqEbZxdksPbqU6KRo0grTaOLThHe7v8vg4ME42DlgysggLXIOmbExmC5cxL5GDQKe+x3eY8ZgHxBQLTEbL/zEha//j9pn1xKEie2qPemtZhIxcDTN3Kq45YUQv3G3OxP2L2AgsAZAa31IKdXzFvvUAVLKPT4HdLpuzCFgFNYly5GAh1LKT2udfptxCSFElUvJSWFh4kK+PPElhaZCutbuyrvd36VLrS4opSg8fJgrixeTs+ErtNGIa6dO1HjlFTz69EHZV0MprsVCQcJXZG75N3Uy9xKgndjgOACHrk/Rt0dXnOyluaoQ1eG2/zbQWqdcN2VuroTzvwh8opSaAWwDzld0XKXUY8BjAEFBQZVwWiGEuHOHrhwiMj6SLWe3YFAGhjQYwrTm0wjxDcFSVGTtaB8dTdGRIxhcXfEeOxafSRNxaty4egIuKSB7z0JMuz7Fr+gM2dqXaM+HqNvvSYa3bCrF9kJUs9tNwlJKlyS1UsoBeA5IvMU+54F65R7XLX2ujNb6AtaZMJRS7sBorXXW9QfSWs8GZgOEh4fr24xZCCF+NbPFzNaUrSyIX8DBKwfxcPTgobCHmBQ6iUDXQErOnePyvA/IXr4Cc3Y2jo0aUeP1P+E1YgR27u63PL5N5Fwg7btPcDm8EC9zDoctDVlb+4+0G/IQk4KqZxlUCHGj203CnsC6ZFgHayK1CXj6FvvsA5oopRqU7jMBmFR+gFLKH8jQWluAV7FeKSmEENWuwFjA6pOrWZiwkJTcFOq41+GVjq8wsvFIXOycyd+5k5RFfyZv2zYwGPDo2xefSZNw7dSxWgrtAfSFH7my6d/4Jq/FV1vYQjgpIQ/Tf+AIZvi5VUtMQoibu60kTGudBky+kwNrrU1KqWeAjVhbVMzXWscrpd4G4rTWa4AI4D2llMa6HHmrxE4IIWwqrTCN6MRolh5bSnZxNq38WzGr3Sz6BvWF3DyyFi3lQmwMxjNnsfP3x//JJ/AeNw6HmjWrJ2CLGVPiBrK+/Tf+6XG4ameW2Q3CFP4YwyO60t/VsXriEkLcktL61qt7pbNZzwLBlEvctNYP2CyymwgPD9dxcXFVfVohxH3uROYJohKiWHdqHSaLid71ejOjxQzaBLShOCmJzOhosteuQxcV4dKuHT6TJuE5oD/KsZqSnOI8ivZFUbzzM7wKUzin/VnnPJwavR5jSMcQKbYX4i6hlNqvtQ6vaNvtLkd+CcwD1gKWSopLCCGqldaaHy79QGR8JDvO78DZzplRTUYxtflUgpxrkbNxE2eiP6Dwxx9Rzs54DR+Oz6SJOIeGVl/QWSnkb/8Mu4NROJvziLc0YZvfa7TpP4XHmtWWYnsh7iG3m4QVaa0/smkkQghRRYwWI1+f/pqohCiSMpLwdfbl6TZPMz5kPO6ZRWTOX8LxZcsxp6fjUD+IwFdexnvkSOyq86bV5+LI+e7fuJ3cgJPWfG3pSFLwVAYMHMbzdb2rLy4hxC92u0nYh0qpN7AW5BdffVJrfcAmUQkhhA3klOSw4tgKFiUuIrUglYZeDXmr61sMaTAEc9xBMv/wBpe+/RYsFtwjIvCZNAm3bl1RBkP1BGw2oRPXkrv1QzzTfgTtSiRDyG75EGP6dGGYbzXc5kgIUWluNwlrifV2Qn3433KkLn0shBB3tfN551mUsIiVx1dSYCqgU81OvNHlDbrV6Eze15u48OIEio8exc7bG7+HZuA9YQKOdetWX8BF2Zj3R1G84zNcCy+QaQnkv/YP49VlBuO6heItxfZC3BduNwkbCzTUWpfYMhghhKhMR9KOEBkfyTdnvkGhGNhgINObTyfENZislSs5Pf9tjOfP49ioEbXefRfPYUMxOFXjrXsyTlOy6z/w4yIczfn8ZGnGercZtOw9nt+1qyfF9kLcZ243CTsCeAOptgtFCCF+PYu28H3K90QmRLL/8n7cHdyZ1nwak0InEWByITM6mhMLF2HOyMClTRtq/PE13CMiqm/JUWs4u4ei7R/jeOIrlFastXRhX43x9O83kDebBkqxvRD3qdtNwryBJKXUPq6tCavyFhVCCFGRIlMRa06uYWHCQpJzkqnlVos/hP+BUU1G4ZSZT8YnkZxYsgRLQQFuvXri/+ijuLRvX22NVTEbIf5LCrd/hMuVwxRqd+abh3G+yRTG9+3EKCm2F+K+d7tJ2Bs2jUIIIX6h9MJ0Yo/GsiRpCZnFmYT5hfH3nn+nf/3+mJPPkv7We2SvWQsWC55DhuA38xGcQ0KqL+DCTHTcAkp2fY5T4SUuWGqxiJnYt5vEtJ6h1JNieyF+M263Y/73tg5ECCHuxKnsU0TFR7H25FpKLCVE1I1geth02tdoT9FPP3HpuefJ3bwF5eiIz7hx+D70EI5161RfwGknMO/5DP1jNPbmQvaZw1jm8DBNe47kuc7BUmwvxG/QzyZhSqkdWuvuSqlcrFdDlm0CtNba06bRCSFEOVpr4i7HsSB+AdvObcPJzokRjUcwtflUgj2Dyd+xk7MvP0TBDz9g8PTE74nH8Z0yBXs/v+oKGJK3Y9r5CXYnNmHGjtWmrnzjNZp+EX35e9vaUmwvxG/YzyZhWuvupf/1qJpwhBDiRkaLkU3Jm4iMjyQxIxFfZ1+eav0U45uNx8fek5yNGzk99/cUJyZiX6MGgS+/jPfYsdi5V9NNq03FcGQFxp2f4nDlCNl4ssg0kvjaY5jQJ5zPpdheCMFtLkcqpRZqrafe6jkhhKhMeSV5rDhuba56Kf8SwZ7B/LnLnxnecDiOJsj+8ktOzpuPMSUFxwYNqPXuu3gNH1Z993PMT4e4+Zh+mI19QSqndV3mmx6lKHQMD/VqxnP1vKsnLiHEXel2C/PDyj9QStkD7Ss/HCGEgIt5F1mcuJjlx5eTb8wnvEY4f+r0J3rU7YHOzSNzXhQZUVGY09NxbtWKwJf+gEffvtXXZiI1Cb3nMyyHYrEzF7PD3JqFzKRe+FCe7tFQiu2FEBW6VU3Yq8BrgItSKufq00AJMNvGsQkhfmPi0+OJjI9kU/ImAAYED2B62HTC/MIwXk7lyj/+j6zYJVjy83Hr3h2/Rx/FtWOH6mkzoTWc/BbL7k8xnNxCCY6sMHVnldMDRPTqwf91CpJieyHEz7pVTdh7wHtKqfe01q9WUUxCiN8Qi7aw/dx2IhMi2XdpH24ObkwOncyU0CnUcq9F8enTXPz362R/uRptNuM5aJC1zUTz5tUTsLEIDi/BvPsz7NKSyMCbBcax7PIZwYRebVgkxfZCiNt0uy0qXlVK1QHql99Ha73NVoEJIe5vxeZi1p5cS1RCFKezT1PDtQa/b/97RjcdjYejB4U/HeHc3L+Ru2kTysEBrzGj8XvoIRyDgqon4LxU2DcX89652BWmc5z6zCl5gktBQ3gkohkvSLG9EOIO3W5h/vvABCABMJc+rQFJwoQQdySjKIMlR5cQmxRLRlEGob6hvN/jfQYED8Be2VOwezdn5syhYPceDB4e+D32GL5Tp2Dv7189AV86Ans+w3J4GViMbLW0ZZ7pCXzD+vJoz0a0lmJ7IcQvdLuF+SOBEK118S1HCiFEBU5nn2ZhwkLWnFxDsbmYnnV7Mr35dDrU7AAWC7mbviF99hyKEhKwDwgg8A9/wHv8OOzc3as+WIsFTnyD3v0p6vT3FCtnYo29iFFD6dyhI3/r3kCK7YUQv9rtJmGnAAfK3TdSCCFuRWvN/sv7iUyI5PuU73EwODC80XCmNZ9GQ++GWIqLyVq6jPT58zCeOYtjcDA1//I2XiNGYKiONhMl+XAoBr3nc1T6cdKVH3ONE9jkPIjRfVsSK8X2QohKdLtJWAFwUCm1hWtv4P07m0QlhLinmSwmNp/ZTGR8JEfSj+Dt5M3jrR9nfMh4/F38MefmkjZnjrXNxJU0nFu0IPDDD/Ho1xdlVw1F7TkXYe9sdNwXqKJMklQj/lPyDEm+vXlkSFM2tKmDs4MU2wshKtftJmFrSv8IIcRN5RvzWXFsBYsTF3Mh/wL1PevzeufXGd5oOC72LpiuXCH1s3+SGRODJS8Pt65d8fv733Ht3Ll62kxc+BF2f4aOX4m2WNhCOP8tHoRd/S48HtGICCm2F0LY0O1eHRlp60CEEPeuS/mXiE6MZvmx5eQac2kX2I6XO75MRL0IDMpAyZkzXJz/BdmrVqFNJjwGDsDvkZm4tAi79cErm8UMRzdY673O7qbQ4EqMsT+RpgG0bNmG13s0lGJ7IUSVuN2rI09z7Q28AdBaN6z0iIQQ94ykjCQi4yP5+vTXWLDQv35/pjefTsuAlgAUxseTPncuuRs3oezs8Bo1Cr+HH8Kxfv2qD7Y4F35chGXP5xiykrlsqMEc4xS+cujHsC7NWNS5vhTbCyGq1O0uR4aX+9kZGAv4Vn44Qoi7ndaaHed3EBkfyQ+XfsDV3pUJzSYwpfkU6rjXQWtN/p49pM+eQ/6uXRjc3fF75GF8pk7FITCw6gPOOgs//BfL/kgMJbkcJoTPS2aR7N+LaQMbs7ltbVwdb/evQiGEqDy3uxyZft1T/1ZK7Qf+XPkhCSHuRsXmYtafWk9UfBQns08S6BrI8+2fZ0zTMXg6eqLNZnI2biJ9zhyKjhzBLsCfgN+/gM+ECdh5eFR9wCl70bs/hcQ1WDRsMHdinnkwgc26MaNrMF0a+VVPHZoQQpS63eXIduUeGrDOjMk/HYX4DcgqymLJ0SXEJMWQXpROiE8If+3+VwYFD8LBzgFLSQlZy5eTPnceJcnJOAQFUfOtt/B6cAQGJ6eqDdZsgsTVWHZ9iuHCfvKVG4uMQ1lhP5jeXdrxsSw5CiHuIrebSP1fuZ9NQDLWJUkhxH3qbM5ZohKiWH1iNUXmIrrV6caMsBl0qtkJpRTmvDzSlywkIzISU2oqzs2bU+df/8RjwICqbzNRmAUHIjHv+S92uec5R03mGGdwyG8wE7o1Z7UsOQoh7kK3uxzZu/xjpZQd1tsYHbNFUEKI6qG15uCVgyw4soDvUr7D3mDPsIbDmNZ8Go19GgNgSksjY+Eia5uJnBxcu3Sm1nt/xa1r16pf3ks/id7zHyw/LsbOVMBeS3PmmSdg13QQ07s15G1ZchRC3MV+NglTSnkCTwN1gNXA5tLHvwcOA4ttHaAQwvZMFhNbzm4hKj6Kw2mH8XLyYmbLmUwKnYS/i/WejSUpKaTPn0/2ylXokhI8+vfH79GZuLRsWbXBag1ndmLe9SmGY19hwo415i7E2g2nbeeevCFLjkKIe8StZsIWApnAbuBR4I+AAkZqrQ/aNjQhhK0VGAtYdWIVCxMWcj7vPPU86vFap9cY0WgErg7WRKYoMZH0OXPJ+fpra5uJB0fg+/DDODVoULXBmkogfiXGnZ/gkPoTOXiw0DSCXT4P8kD39kTKkqMQ4h5zq7+xGmqtWwIopeYCF4EgrXWRzSMTQthMakEq0YnRLD22lNySXNoGtuUP4X8gol4EdgY7a5uJH/aSPncu+du3Y3B1xfehGfhOm45DjSpuM5Gfjo6bj3HPbBwLU0nWdZhvmklOk1FM7h7Cs7LkKIS4R90qCTNe/UFrbVZKnZMETIh719GMo0QlRLHh9AYs2kLfoL5MD5tO64DWAGiLhdzNm0mbM4eiQ4ex8/MjYNYsfCZOwM7Lq2qDvXIU065PUYdisbMUs9vcili7mdTrOJynugTLkqMQ4p53qySstVIqp/RnBbiUPlaA1lp72jQ6IcSvprVm14VdRMZHsvviblzsXRjXdBxTmk+hnkc965iSErLXriN93jxKTp3CoV49ar7xZ7xGjsTg7FyVwcKp7yja/jHOyd9ixoGVpu586z2a3j168X+y5CiEuI/87N9mWusqvs5cCFFZSswlbDi9gaiEKI5nHifAJYDn2j3H2KZj8XKyzmqZ8/LJWraMjAULMF2+jFOzZtT+v3/gOXAgyr4Kkx1jEfrwEgq3f4pr1lFytRefmcdwrtFExvRow2xZchRC3Ifkn5RC3Geyi7NZdmwZ0YnRXCm8QhOfJrzT7R2GNBiCg50DAKaMDDIWLiQzOgZLdjauHTtS6513cOverWqTnbxUTHtmY9o7D+eSDJIt9Yk2PIVHhwlM6tpElhyFEPc1ScKEuE+k5KSwMHEhX574kkJTIV1rd+Wdbu/QpXaXssSq5Nx5Mr74gqwVK9DFxXj064vfzJm4tG5dtcFeOkLBto9wTFyJQZvYbm7LJs8XaNvjAV5rV0eWHIUQvwnyN50Q97iDqQeJSohiy9ktGJSBIQ2GMK35NEJ8Q8rGFB09SvrceeRs2AAGA14PDMfvkUdwatiw6gK1WNDHN5K79SM8L+4C7US0JYJjwVMY0qs7f5clRyHEb4wkYULcg8wWM9+lfMeC+AUcunIID0cPHm7xMBObTSTQ1dpCQmtN4f79pM2ZQ/7321CurvhOnYrvjOk41KxZdcGW5GM8EE3R9k/wyE8mX/syT01Ct5/O2O6tmCZLjkKI3yhJwoS4hxQYC/jyxJcsSlxESm4Kddzr8ErHVxjZeGRZc1VtsZC3dSvpc+ZS+OOP2Pn4EPDc7/CZOBE7b++qCzbnAnnbP8PuQCQu5hziLQ3Z4PZ7GvSczOPt68uSoxDiN0/+FhTiHnCl4AoxSTEsObqEnJIcWgW0Yla7WfQN6oudwXoRszYayV63nvR5cyk5cRKHOnWo8fqf8B41CoOLS5XFqs8fIGPLv/E+tQ4XbWGTpQOH602mR++hvNrYX5YchRCilCRhQtzFjmceJyohivWn1mOymMqaq7YJbFM2xlJQQNby5aR/sQDTxYs4NW1K7Q8+wHPwoKprM2ExUxK/luzvPiQg4wCO2oXFahC5bR7hgV5dGCxLjkIIcQOb/g2tlBoEfAjYAXO11u9ftz0IiAS8S8e8orXeYMuYhLjbaa3Zc3EPkfGR7LywExd7F0Y3Gc3U5lMJ8gwqG2fKzCRz0WIyFy3CnJ2Na3g4td58A7eePatutqk4l+xdX6D3/Afv4gsUWQL43PURfLvPZEzHprLkKIQQP8Nmf0MqpeyAT4H+wDlgn1JqjdY6odywPwFLtdb/UUo1BzYAwbaKSYi7mdFs5Kvkr4iMj+RY5jH8nP14tu2zjGs6Dm9n7/+Nu3CB9C8WkLV8ObqwEPc+ffCbORPXdm2rLFadmczlzR/hlRiLlyWfOEtT4mq+Qat+k3m8SaAsOQohxG2w5T9TOwIntNanAJRSscAIoHwSpoGrtz7yAi7YMB4h7ko5JTksO2ptrppamEpj78a83fVthjYciqOdY9m44uPHSZ87j+z16wHwGjYMv5mP4NS4cdUEqjXFybtJ3fQval/cjJ9WfKM6c7n5I/TrN5gnZMlRCCHuiC2TsDpASrnH54BO1415E9iklHoWcAP62TAeIe4q53LPsThxMSuOr6DQVEjnWp15q9tbdKt9bdf6ggMHSJ8zl7zvvkO5uOA7eRK+06fjULt21QRqNpK5fzmF2z6mdl48ntqVZc4jceryBAO7tpMlRyGE+IWq+2/PicACrfX/KaW6AAuVUi201pbyg5RSjwGPAQQFBVVwGCHuHYevHCYyPpLNZzdjwMDgBoOZFjaNZr7NysZorcn7/ntrm4n9+7Hz9sb/2WfwmTQJex+fKolTF2Rybst/cD80Hx/TFbIsNVkc8Dsa9X+U8SH1ZMlRCCF+JVsmYeeBeuUe1y19rrxHgEEAWuvdSilnwB9ILT9Iaz0bmA0QHh6ubRWwELZitpjZem4rUfFRHEg9gIeDB9PDpjOp2SRquv2vcao2Gsn56ivS58yl+Phx7GvXosYf/4j36FEYXKtmua/48jFSNvyTOmdWUY8i9hJGcpOX6TJoIpP93KskBiGE+C2wZRK2D2iilGqANfmaAEy6bsxZoC+wQCkVCjgDV2wYkxBVqtBUyJoTa1iYuJAzOWeo7Vablzu8zMgmI3FzcCsbZyksJGv5CtK/mI/pwkWcmjSm9t/ex3PIEJSDg+0D1Zr0+C1kbPk3jTJ3UE/b8b1jT0wdnySiVx86ypKjEEJUOpv9zaq1NimlngE2Ym0/MV9rHa+UehuI01qvAX4PzFFKPY+1SH+G1lpmusQ9L60wjdikWJYcXUJWcRYt/FrwQa8P6BfUD3vD//63M2dlkbF4MZmLFmPOzMSlXTtqvv467r16oQwGm8epTcWc3hqFw77PqVd8ArQHG3wmU6PvM/Rv0UyWHIUQwobUvZbzhIeH67i4uOoOQ4gKncw6SVRCFOtOrsNoMRJRL4LpYdNpF9jumoTGePEiGQsiyVy2DF1QgHtEBH6PzsS1ffsqibMoO5UTGz6i9rFF+OpMTlCXo8FTaT3kUeoG+lVJDEII8VuglNqvtQ6vaJusMQjxK2mt2XtpL5HxkWw/vx0nOycebPwgU5tPJdgr+JqxxSdPWttMrF0LWuM1bCi+jzyCc9OmVRLrlVMHufD1PwlJ/YoWlLDPvh0H27xP5/6jaexUBcueQgghykgSJsQvZLQY2Zi8kaj4KBIzEvF19uXpNk8zPmQ8Ps7XXsFYePAgaXPmkrdlC8rZGZ+JE/GbMR2HOnVsHqe2WDi2ew3mnZ/QvGAfHtqBvZ4D8Oj1LOHtO8uSoxBCVBNJwoS4Q7kluSw/tpzFiYu5XHCZhl4NebPLmwxrNAwnO6eycVpr8rdvJ332HAri4rDz8sL/qafwmTqlStpMFBXkceTrOfjHzyfEfJYrePN9ncdoPOR39KxT79YHEEIIYVOShAlxmy7kXWBR4iJWHl9JvjGfjjU78ucuf6Z7ne4Y1P+K6LXJRM5XX5M+dy7FR49iX7MmNV59Be8xYzC4uf3MGSrH5QtnOLn+34SeX044OZwwNGB3q3dpPegherna/vxCCCFujyRhQtxCfFo8kfGRbDqzCYBBDQYxrfk0mvs1v2acpaiIrJUryZg3H+P58zg2akSt997Da+gQlKNjRYeuNFpr4n/cSd7Wj2ibvYXOmDns1oXzXZ8mrOsQGlfBlZZCCCHujCRhQlTAoi1sO7eNBfEL2H95P+4O7kxtPpXJoZOvaa4KYM7OJjMmhoyohZgzMnBp3Zoar72Ke+/eNm8zUVRiJO6bWDx+nE1r02EKcOKnGg9SZ9DztGnYwqbnFkII8etIEiZEOUWmItacXMPChIUk5yRTy60WL4a/yOgmo3F3vLZbvPHyZTIWRJK1ZAmWggLcevXEf+ZMXMLDbV7sfiktjZ/Wf06T04vpzgWuKH9+DHmekCHPEO7lb9NzCyGEqByShAkBXMq/xNKjS1lxfAUZRRk092vO33r8jf7B/XEwXNu6ofjUKdLnzSN7zVqwWPAcPBi/mY/g3KzZTY5eObTWHEpIIHXzx3TMWEN/lc9pp2Yc7fASTSMmE2Bv2yVPIYQQlUuSMPGbpbVm36V9xB6N5duz32LRFnrV68W05tMIr3HjbFbh4cOkz5lD7uYtKEdHfMaOxffhh3CsW9emcRYZzez4fiP2ez+nW/EOWioLx3x7U9xvFg2a9wRpMSGEEPckScLEb06BsYC1J9cSkxTDyeyTeDl5MT1sOuNCxlHH/dq+XVpr8nfuIn3OHAp++AGDpyd+TzyO75Qp2PvZtrP8xcw89n4VRdCxSPqRRD6unGgwmeAhzxMa2NCm5xZCCGF7koSJ34zT2adZcnQJq0+sJs+YR6hvKH/p9hcGBQ/C2d75mrHaZCJ30ybS5s6lOCER+8BAAl96Ce9x47Bzt12bB601B46f5fQ3n9MpdRkj1BWu2NfidJs/EdzvMUKdvWx2biGEEFVLkjBxXzNbzGw7t43Yo7HsurALe4M9g4IHMaHZBFr5t7phydFSXEz2qlWkz5uPMSUFxwYNqPXuO3gOH47Bhm0mioxmtuzeR8nOz+hXtIn2qpAUj9ak9XyPgPBRBBjsbHZuIYQQ1UOSMHFfyirKYuWJlSxJWsKF/AsEugbybNtnGdVkFP4uN149aExNJWvpMjJjYzGnpeHcqhWBL/0Bj759bdpm4mJWAd99s5bA+PkM0j+glYFztQfiMOgF6tXvYLPzCiGEqH6ShIn7Snx6PLFJsXx1+iuKzcV0qNmBFzu8SO96vbE3XPtx11pTeOAAmYsXk7PpGzCZcOvZA79HZuLasYPN2kxordl/OpXDm6JofyGGSYaT5Bs8uBj6OHUGPEuwl20L/YUQQtwdJAkT97wScwmbzmwiJimGw1cO42LvwoONH2R8yHia+DS5YbyloIDsdevIXBxN8dGjGDw98Z0yBZ+JE3CsX99mcRYZzXy9L5HM7XMYWLCGcJVBumsQGZ3fx7frNNwc5ZZCQgjxWyJJmLhnXcq/xLJjy1h+bDkZRRkEewbzSsdXeKDRA3g4etwwvuTMGTJjYslauRJLTg5OISHUfPstvIYNw+DqarM4L2YXsu677Xgemsdwy3e4qmIu+XeiqO8s/JoNArmlkBBC/CZJEibuKVpr4i7HEZMUc01vr4khE+lcu/M1N9IG0BYL+du3k7F4Mfnbd4CdHZ4D+uMzeTIu7drZdMkxLjmDnVu+pMXZRTyifsSs7Mls9AAu/WdRs1Yrm5xXCCHEvUOSMHFPKDAWsO7UOmKSYjiRdQIvJy+mhU1jXNNx1PW4sYbKnJ1N1spVZMbEYDx7FrsAf/yfegrvceNwqBFosziLjGbWH0jmzLaFDMxdySzDGfIdfcht9zxePZ4g0KOGzc4thBDi3iJJmLirVdTb6+2ubzO4weAbensBFCUlkbl4Mdlr16GLinBp357AWc/h0a8fyoYtJi5mF7Ji+0EM+79gjGUjo1UWWZ6NKe75IW5tJ4DDjbEKIYT4bZMkTNx1zBYz289vJyYppqy318DggUxsNrHC3l66pIScb74hMzqGwv37Uc7OeA0fjs/kSTa9n6PWmrgzmWz87juanFrITMMOnJWRzDq90H1m4d2ot9xSSAghxE1JEibuGllFWaw6sYolR5dwPu88ga6BPNPmGUY3HV1xb6/LqWQtXUrm0iWYr6ThUK8egS+/jPeokdh52a6zfJHRzNqD5zn8/Ur6Za/gT3aHMdo7URw2AXo9i09AiM3OLYQQ4v4hSZiodgnpCcQkxVzT2+v34b8nol4EDgaHa8ZqrSncv5+MxYvJ/WYzmM249eyB7+TJuHXvbtvGqtmFxO48St6+aMab1zHWcJ4C1wBKOr+GY8eZOLjZ9l6SQggh7i+ShIlqYTQby3p7HbpyCBd7F0Y0GsGEZhPuqt5eV5ccV247QJ3ji5hm2IyfyiXPLwzd63VcW4wGe9vVmgkhhLh/SRImqtTV3l4rjq0gvSid+p71b93bKzqGrFWrrL29mjWj5l/etvb2cnGxWZxFRjNrD11g27Zvichcxtt2u7C3s1DYcAD0/B3u9btJvZcQQohfRZIwYXMV9vaq24uJzW6jt9e27WBvj+eAAfhMnmTT3l5gXXJctPs05/euZrxxDR/bJWB0dIG2D6O6PImrXyObnVsIIcRviyRhwmYq7O3VfBrjQm7S2ysr63+9vVJSsA8IwP+ZZ/AeNxaHQNv19jJbNNuOXWHFnqP4nVjBDMNXNDBcotijFrrr2zi0nw4u3jY7vxBCiN8mScJEpUvOTmbJ0SV8eeLL2+vtlZhIxuLF5Kxbb+3tFd6ewBeet/b2cnCo4AyV43xWIUv3pbB5308MLFjDO/ab8bbPo7hGO+jxV5xCR4Cd/C8ihBDCNuQbRlQKs8XMjvM7iEmKYeeFndgb7BlQfwATm02kdUDrm/f2WhxN4YEDVdbby2i28G1SKrF7z3Lm+GFm2q1nld12HOxN6JAh0O05nII62ez8QgghxFWShIlf5Rf19lqyhMxlS629vYKCCHzlZbxH2ra315n0fJbsS2HZ/nPUy/uJ3zlvoJfjPrBzRLWZDF2eRfk3ttn5hRBCiOtJEiZ+kcT0RGKSYthwegPF5mLCa4TzQvsX6B3U+67p7VVsMrMp/jKx+86y68QVBtjtJ8Z9E42d4tFO3qiOL0LHx8DddvVmQgghxM1IEiZum9Fs5Jsz3xCTFMPBKwdxsXfhgUYPMKHZBJr6NL1hvKWggOy168iMLtfba+pUa2+voCCbxXkiNY/YvWdZ+eN58vPzeNj9Bz7xWY9P4VlwCYI+f0e1nQKObjaLQQghhLgVScLELV3Ov8yyY8tYfmx5WW+vlzu8zAONH8DT0fOG8WW9vVauxJKbi1OzZtR65y94Dh1qs95eRUYz6w9fJHbfWfYlZ+JnyOeNmrsZ7LAax6J0CGgDQ98EKbYXQghxl5BvI1EhrTX7L+8nJimGLWe3YNEWetbtycRmE+lSu8uNvb3MZvK2bydzcTT528v19poyGZe2bW3W2yvxYk7ZrFdukYnOvnmsbfwdLS6vRmUUQOP+0O13ENxDmqsKIYS4q0gSJq5xfW8vT0fPW/f2WrGSzNjY//X2evYZvMfarrdXfrGJtYcuELMvhUMpWTjaG3i0cQ4PsQa/M1+hLhig5Vjo+izUaG6TGIQQQohfS5IwAcCZnDPEJsWy+sRqco25NPNtxttd32ZQg0G42N+4hFiUkEBGdDQ5a9ehi4tt3ttLa83hc9nE7jvLmoMXyC8x0zTQjf92zqRPRiwOydvB0QO6PAWdngSvOpUegxBCCFGZJAn7DSvr7XU0hp3nrb29+tfvz6Rmk27e22vTN2RGl/b2cnHBa8QIa2+vkBCbxJhdaGT1wfPE7E0h8WIOLg52jGjhz+P+Bwk+Og91MAE8akH/t6H9DHC2XZsLIYQQojJJEvYblF2czarjq4g9Gmvt7eUSyNNtnmZM0zE/39tr6VLMaWk41Ldtby+tNXFnMonZe5YNP12kyGghrLYn7w8L5kHzJpzjZkPiBQhsDg/+B1qMAXvHSo9DCCGEsCVJwn5DEtMTiT0ay/pT68t6ez3f/nn6BPWpuLdXXBwZi6PJ3Wzt7eXesyc+Uybj1q2bTXp7ZeSXsPLAOWL3pXAiNQ93J3tGtavL1OaOhJ5ZBNsXQHGOtcj+gY+gcT8pthdCCHHPkiTsPveLe3stXkzxsWMYvLzwnTbN2turXr1Kj89i0ew5lU7MvhQ2HrlEidlC2yBv/j66FcNqZeG67zNYsgy0GZo/aC22r9Ou0uMQQgghqpokYfep1IJUlh1bxrKjy0gvSifII4iXOrzEiMYjKu7tlZxMZkwMWStXWXt7hYbatLdXam4Ry/efY8m+FM6kF+Dl4sCkTkFM7FCPkKKDsPN5WP8NOLhC+MPWgnuf4EqPQwghhKgukoTdR6729oo9GsuWM1swazM96/ZkQrMJdK3dteLeXtu2kRkd87/eXgMH4jN5kk16e5ktmm3HrxC79yxbElMxWTSdGvjyfL+mDGruj/Px9bD2abjwI7gFQO8/QYdHwNW3UuMQQggh7gY2TcKUUoOADwE7YK7W+v3rtv8L6F360BUI1Fp72zKm+1GBsYD1p9cTkxTD8czjeDp6MqX5FMaFjKOex41LiGW9vWJiMJ47Z/PeXheyClkal8KyuHOczyrEz82RR7o3YHyHejT0UvDjYvjPJ5B1Bvwaw7B/Q+sJ4GCb7vpCCCHE3cBmSZhSyg74FOgPnAP2KaXWaK0Tro7RWj9fbvyzQFtbxXM/OptzltijsXx5/Muy3l5vdX2LwQ0G31ZvL9fwcAJf/D0efftWem8vo9nCt0mpxO49y/fHrmDR0KOJP38cGkq/0BrWWwnt/RD2zYHCTKjXCQb+FUKGgI1u6C2EEELcTWw5E9YROKG1PgWglIoFRgAJNxk/EXjDhvHcFyzawo7zO4hOirb29lL29A++jd5eixdT+OOP1t5eDz6Iz6RJOIfcWJj/a51NLyB231mW7T/Hldxiang68VREY8Z3qEc9X1dIPwlfvQCHYsBUbE26uv0OgjpXeixCCCHE3cyWSVgdIKXc43NAp4oGKqXqAw2Ab20Yzz0tuzibL098SWxSLOfyzhHoEshTbZ5iTJMxBLgG3DDeePlyaW+vZWW9vWq8+gpeI0di53ljYf6vUWwy803CZWL3prDjRBoGBb1DApnQMYjeIQHY2xkgZS9s+hCS1oOdo3W5seuz4N+kUmMRQggh7hV3S2H+BGC51tpc0Ual1GPAYwBBQUFVGVe1S8pIIiYphg2nNlBkLqJ9jfbMaj/r1r29vvkGLBbce/XCZ/Ikm/T2OpGax5J9Z1lx4DwZ+SXU8Xbhhf5NGRtel1peLmCxwLGvYOdHkLIHnL2hx++h0+Pgbpv7SgohhBD3ClsmYeeB8lXhdUufq8gE4OmbHUhrPRuYDRAeHq4rK8C7ldFsZPPZzcQkxfBj6o+42LswrNEwJoRMIMT3xtsDWfLz/9fb6/hxa2+v6dNt0turyGhmw08Xid2bwt7kDOwNiv7NazChYxDdG/tjZ1BgLIL9C2DXJ5B+HLyCYNDfoO0UcHKv1HiEEEKIe5Utk7B9QBOlVAOsydcEYNL1g5RSzQAfYLcNY7knXO3ttfzYctIK0+6q3l6JF3OI3XuWVT+eJ6fIRLCfK68MbsbodnUJ8HCyDirIgLh58MNsyE+FWq1h9Dxrk1W7u2XSVQghhLg72OybUWttUko9A2zE2qJivtY6Xin1NhCntV5TOnQCEKu1vu9nuCqiteZA6gFikmLKenv1qNuDic0m/nxvr8XR5O/YAQ4OeA4YgM/kybi0bVOpvb3yi02sPXSBmH0pHErJwtHOwOCWNZnQIYjODX3/d66ss7D7MzgQBcZ86+2Euv4OGvSU2woJIYQQN6HutdwnPDxcx8XFVXcYv1qBsYANpzcQkxTDscxjeDp6MqrJqJv29jJlZpK9ciWZMbHW3l6BgXhPGI/P2LHYB9xYmP9Laa05fC6b2H1nWXPwAvklZpoEujOhYxCj2tbBx63cjbIvHrLWe8WvsiZbLcdai+1rhFVaPEIIIcS9TCm1X2sdXtE2WSOqYmW9vU58SW5JLiE+IT/b26swPp7M6Ghy1q239vbq0MEmvb1yioys/vE80XtTSLyYg7ODgWGtajOxYz3aBfn8b9ZLazi5xZp8nf4eHD2g85PWP151Ky0eIYQQ4n4nSVgVuNrbKyYphh3nd1h7e9Xvz8TQibQJuHEJUZeUkLNxk7W318GDNuvtpbVm/5lMYvamsP6nCxQZLYTV9uQvD7ZgRJvaeDqXS/LMRjiyAnZ9DJePgEct6PcWhD8Ezl6VFpMQQgjxWyFJmA1d39srwCXgjnp7Odavb5PeXpn5Jaw4YL159vHUPNyd7BnVri4TOwTRsu51CVVRDhyIhD3/gZzzEBAKIz6zLj3aO1Z8AiGEEELckiRhNnA04ygxSTGsP7WeInMR7QLb8Vz75+gb1LfC3l4F+/aRuTia3M2by/X2moxbt66V1tvLYtHsOZVOzL4UNh65RInZQtsgb/4+uhVDW9XCzem6j0LORfjhPxC3AIqzIbiH9Z6OTfpLsb0QQghRCSQJqyRGs5EtZ7cQkxTDgdQDd01vr9TcIpbvt856nUkvwNPZnkmdgpjQsR7NalYwu5aaaF1yPLwUtBmaj7Be6VinXaXFJIQQQghJwn611IJUlh9bzrJjy0grTKOeRz3+EP4HRjQegZfTjbVSxadPkxkTQ/bKVVjy8nBqHkqtd9/Bc8iQSuvtZbZoth+/Qszes2xJTMVk0XRs4Musfk0Y3KIWzg521+6gNSTvgF0fwfFNYO9irfXq/BT4NqiUmIQQQghxLUnCfgGtNT+m/khMUgybz2y+vd5e328jc/Fi8nfutPb2GjgQn8mTcGlTeb29LmYXsnTfOZbGpXA+qxBfN0ce7t6A8R3q0Siggk71FjMkrrFe6XjhALj6Q+8/QoeZ4OpbKTEJIYQQomKShN2BQlMhG05Ze3sdzTyKh6MHk0MnMz5kPPU8f6a3V3QMxvPnsQ8MxP93z1Zqby+T2cK3SanE7kth69FULBp6NPHntSGh9G9eA0f7CmrKSgrg4GLY/QlkJoNvQxj2L2g9ERwqt9O+EEIIISomSdhtSMlJIfZoLKtOrCrr7fVmlzcZ0nDIzXt7LY4mZ3253l5/+AMefftUWm+vlIwCYvedZVncOVJziwn0cOKpiMaM71CPer6uFe+UnwZ7Z8PeOVCYAXU7QP+/QLOhYLCreB8hhBBC2IQkYTdh0RZ2nt9Z1tvLTtlVe2+vEpOFTQmXiN2bwo4TaRgU9A4JZELHIHqHBGBvd5MrKdNPWme9DkaDqQhChliL7YM6y5WOQgghRDWRJOw6OSU5rDq+iiVHl5CSm0KASwBPtn6SMU1v0tvr0iUylywha9ny//X2eu1VvB58sNJ6e528kkfs3rOsOHCejPwS6ni78Hy/pozrUJdaXj+zfJiyD3Z9CInrwM4BWk+ALs9CQOU1fBVCCCHELyNJ2HXO557nH3H/oF1gO37X7nc37+21dx+Z0eV6e0VE4DNpUqX19ioymtnw00Vi96Ww93QG9gZFv9AaTOhYjx5NArAz3GQGy2KBY19br3Q8u9vazb7HC9DxcfCo8avjEkIIIUTlkCTsOqF+oax9cC3BXsE3bLP29lpb2tvrBHZeXvjOmI7PxIk41q2c+yYmXcohdm8KKw+cI6fIRLCfKy8PasaY9nUJ8HC6+Y7GIji8xLrsmHYMvIJg0PvQdio4VXBlpBBCCCGqlSRhFbg+ASs+Vdrba1X53l7v4jl0CAZn5199vvxiE+sOXyBmbwoHU7JwtDMwqEVNJnSsR+cGfhhuNusFUJgJcfNhz+eQnwo1W8HoedD8QbCTX68QQghxt5Jv6ZuwdW8vrTU/nc8mZm8Kaw6eJ7/ETJNAd14f1pxRbevg43aL+zJmnbXez3F/JBjzoVFf6PY7aNBLiu2FEEKIe4AkYdcxZ2eTtWwZmTGx1t5eNWoQ8Nzv8B47Fnt//199/JwiI6t/PE/M3hQSLubg7GBgaMvaTOpUj3ZBPrdO7i4ettZ7HVlpTbZajIauz0LNlr86NiGEEEJUHUnCrmO8eJHUf/xfpfb20lqz/0wmMXtTWP/TBYqMFprX8uQvI8J4oE0dvFxucXyt4eS31uTr1FZwdIfOT1r/eFVOLZoQQgghqpYkYddxbtaMRps24hgU9KuPlZlfwsofzxO79yzHU/Nwc7RjZNu6TOxYj5Z1vG4962U2Wme8dn0Ml38C95rQ701o/xC4eP/q+IQQQghRfSQJq8CvScC01uw+lU7s3hS+PnKJErOFNvW8+dvolgxrVRs3p9t4y4tzrbVee/4DOecgoBmM+BRajgX7n7lCUgghhBD3DEnCKsmV3GKW7z/Hkn1nSU4vwNPZnkmdgpjQsR7Nat5m09aci/DD5xD3BRRnQ/3uMOyf0Lg/VELvMSGEEELcPSQJ+xXMFs3241eI3ZvC5sTLmCyajsG+PNevCYNb1MLZ4Tbvx5iaZF1yPLwEtBlCH7Be6VinvW1fgBBCCCGqjSRhv8DF7EKW7jvH0rgUzmcV4uvmyEPdghnfIYjGgbfZGFVrOLPLWmx/7Guwd4H2M6DLU+Db0KbxCyGEEKL6SRJ2m0xmC98mpRK7L4WtR1OxaOje2J9XhzSjf/MaONnf5qyXxQyJa63J1/n94OoHEa9Ch0fBzc+2L0IIIYQQdw1Jwm4hJaOAJftSWBqXQmpuMYEeTjwZ0Yjx4UEE+bne/oFKCuDgYtj9KWSeBp8GMPT/oPUkcLyD4wghhBDiviBJWAVKTBa+SbhM7L6zbD+ehkFBREggEzrUo0+zQOzt7qBIPj8N9s6BfXOgIB3qhEP/t6DZMDDc5uyZEEIIIe47koRd51BKFg8v2Ed6fgl1vF14vl9TxobXpba3y50dKP2kddbr4GIwFUHTwdZi+6AuclshIYQQQkgSdr0mNdzp1tifUe3q0KNJAHY/d/PsipyLg50fWuu+7Byg1XjrbYUCQmwTsBBCCCHuSZKEXcfV0Z6PJra9s50sFji+EXZ+BGd3gbMXdH8eOj0OHjVtE6gQQggh7mmShP0apmJrb69dn0DaUfCqBwPfg3ZTwcmjuqMTQgghxF1MkrBfojAL4uZbu9vnXYYaLWHUHAgbaV2CFEIIIYS4BUnC7kRWivV+jgcioSQPGvaGkZ9b/yvF9kIIIYS4A5KE3Y5LP1nrvY6ssD5uMdpabF+rVfXGJYQQQoh7liRhN6M1nPrOmnyd+g4c3aHTE9D5SfCuV93RCSGEEOIeJ0nY9cxGiF9lva3QpZ/AvQb0fQPCHwYX7+qOTgghhBD3CUnCrnd+P6x8FPxD4IFPoNU4sHeq7qiEEEIIcZ+RJOx69TrB9HVQvxsY7uD2REIIIYQQd0CSsOspBQ16VHcUQgghhLjPyVSPEEIIIUQ1kCRMCCGEEKIaSBImhBBCCFENJAkTQgghhKgGNk3ClFKDlFJHlVInlFKv3GTMOKVUglIqXikVbct4hBBCCCHuFja7OlIpZQd8CvQHzgH7lFJrtNYJ5cY0AV4FummtM5VSgbaKRwghhBDibmLLmbCOwAmt9SmtdQkQC4y4bsyjwKda60wArXWqDeMRQgghhLhr2DIJqwOklHt8rvS58poCTZVSO5VSe5RSg2wYjxBCCCHEXaO6m7XaA02ACKAusE0p1VJrnVV+kFLqMeAxgKCgoCoOUQghhBCi8tlyJuw8UK/c47qlz5V3DlijtTZqrU8Dx7AmZdfQWs/WWodrrcMDAgJsFrAQQgghRFVRWmvbHFgpe6xJVV+sydc+YJLWOr7cmEHARK319P9v735j5KrKOI5/f6mt1ra2iWhtBF0TGowQXYqWvzYVYhWoJYYm7QuVIibWiNR/EPBFLbxRY4IoMaC0IiiElUVIQf60xpqUCNX+0wLlBYGiJZgWkNJq06bL44t7FqfjzsxVmDk79/4+yWRn7j1z7/P0bM6e3ntmHknHANuAwYh4sc1x9wLPdiXoox0DvNCD84xHzr2+6px/nXOHeufv3OurF/m/NyLGvILUtduREXFE0qXAQ8AE4GcR8bika4DNEbE27Vsg6QlgBLi83QQsHbcnl8IkbY6ID/fiXOONc69n7lDv/OucO9Q7f+dez9whf/5dXRMWEfcD9zdtW9nwPICvp4eZmZlZbfgb883MzMwy8CSstZ/mDiAj515fdc6/zrlDvfN37vWVNf+uLcw3MzMzs9Z8JczMzMwsg9pOwiQdJ2lDQ/HwFWO0kaQfpQLkf5E0J0es3VAy//mS9knanh4rxzpWv5H0Fkl/lPTnlPvVY7R5s6Sh1PebJA1kCLUrSua/TNLehr7/Qo5Yu0XSBEnbJN03xr7K9j10zL3q/b5L0o6U2+Yx9ld5zO+UeyXH+1GSZkgalvSkpJ2STm/an6Xvc39jfk5HgG9ExFZJ04AtktY3FhgHzqX48tjZwKnADelnFZTJH2BjRCzMEF83HQLOjogDkiYCD0t6ICIebWhzCfCPiDhe0lLge8CSHMF2QZn8AYYi4tIM8fXCCmAn8LYx9lW576F97lDtfgf4WES0+l6oKo/50D53qOZ4P+qHwIMRsVjSJOCtTfuz9H1tr4RFxPMRsTU9308xKDXXtrwAuDUKjwIzJM3qcahdUTL/Skr9eSC9nJgezYsjLwBuSc+HgXMkqUchdlXJ/CtL0rHA+cDqFk0q2/clcq+7yo75dSZpOjAPWAMQEYebyyOSqe9rOwlrlG43nAxsatpVpgh532uTP8Dp6bbVA5JO7G1k3ZNuyWwH9gDrI6Jl30fEEWAf8PaeBtlFJfIHuDBdlh+WdNwY+/vVdcAVwKst9le576+jfe5Q3X6H4j8b6yRtUVGTuFmVx/xOuUNFx3vgfcBe4OZ0K361pClNbbL0fe0nYZKmAncBX42IV3LH02sd8t9KUW7hQ8D1wD09Dq9rImIkIgYpaprOlXRS5pB6qkT+9wIDEfFBYD3/uTLU1yQtBPZExJbcsfRaydwr2e8NzoqIORS3nr4saV7ugHqoU+6VHe8pll7NAW6IiJOBfwJX5g2pUOtJWFoPcxdwW0T8eowmZYqQ961O+UfEK6O3rVL1g4kqanxWRrokvQH4ZNOu1/peRR3U6UDbklr9qFX+EfFiRBxKL1cDp/Q4tG45E1gkaRdwB3C2pF82talq33fMvcL9DkBEPJd+7gHuBuY2NansmN8p94qP97uB3Q1X/IcpJmWNsvR9bSdhaY3HGmBnRFzbotla4HPpUxOnAfsi4vmeBdlFZfKX9K7RtTCS5lL8vvT9HyNJ75A0Iz2fDHwceLKp2VrgovR8MfC7qMiX6pXJv2ktxCKKNYN9LyKuiohjI2IAWErRr59palbJvi+Te1X7HUDSlPQhJNKtqAXAY03NKjnml8m9quM9QET8HfibpBPSpnOA5g+hZen7On868kzgs8COtDYG4FvAewAi4kaKupfnAU8B/wIu7n2YXVMm/8XAlyQdAQ4CS6vwxwiYBdwiaQLFQPOriLhPRxeXXwP8QtJTwEsUf7Sqokz+l0laRPEp2peAZdmi7YEa9f1/qVG/zwTuTvOMNwG3R8SDkpZD5cf8MrlXdbwf9RXgtvTJyKeBi8dD3/sb883MzMwyqO3tSDMzM7OcPAkzMzMzy8CTMDMzM7MMPAkzMzMzy8CTMDMzM7MMPAkzs3FL0oik7ZIek3SnpOaiu41t50s6o+H1zyUtLnGOA53a/K8kDUo6r+H1KknffKPPY2b9zZMwMxvPDkbEYEScBBwGlrdpOx84o83+Xhqk+M4hM7OWPAkzs36xEThe0qckbUqFeH8raWYqQr8c+Fq6cvbR9J55kv4g6emSV8Uul/SnVMD66rRtQNJOSTdJelzSulRpAEkfSW23S/p+umI3CbgGWJK2L0mH/4Ck36dYLnuj/3HMrP94EmZm416q4XgusAN4GDgtFeK9A7giInYBNwI/SFfONqa3zgLOAhYC3+1wjgXAbIqaeoPAKQ1FjmcDP46IE4GXgQvT9puBL6Zi6CMAEXEYWAkMpViGUtv3A59Ix/92qt1qZjVW57JFZjb+TW4oq7WRoqTQCcBQqnM4CXimzfvviYhXgSckzexwrgXpsS29nkox+for8ExEjMaxBRhI9TenRcQjafvtFJO9Vn6TimMfkrSHopTM7g4xmVmFeRJmZuPZwXSV6TWSrgeujYi1kuYDq9q8/1DjWzucS8B3IuInTecbaDrOCDC5w7E6xTKCx1+z2vPtSDPrN9OB59Lzixq27wemvY7jPgR8XtJUAEnvlvTOVo0j4mVgv6RT06bGQt+vNxYzqwFPwsys36wC7pS0BXihYfu9wKebFuaXFhHrKG4pPiJpBzBM54nUJcBN6ZbpFGBf2r6BYiF+48J8M7OjKCJyx2Bm1pckTY2IA+n5lcCsiFiROSwz6xNek2Bm9v87X9JVFGPps8CyvOGYWT/xlTAzMzOzDLwmzMzMzCwDT8LMzMzMMvAkzMzMzCwDT8LMzMzMMvAkzMzMzCwDT8LMzMzMMvg3QJhXpIQLGN8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "seed_idx = list(range(2,max_depth +1))\n", + "\n", + "plt.figure(figsize=(10,5))\n", + "\n", + "for i in range(len(data)):\n", + " plt.plot(seed_idx, time_algo_cu[i], label = names[i])\n", + "\n", + "\n", + "plt.title(f'Runtime vs. Path Length ({num_seeds} Seeds)')\n", + "plt.xlabel('Path length')\n", + "plt.ylabel('Runtime')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12979" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del time_algo_cu\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 2: Runtime versus number of seeds\n", + "The number of seeds will be increased over a range in increments of 10. \n", + "The runtime will be the sum of runtime per increment. Increaing number of seeds by 1 would make for very long execution times " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading ./data/preferentialAttachment.mtx...\n", + "\t100,000 nodes, 499,985 edges\n", + "\t.................................................................................................... \n", + "Reading ./data/dblp-2010.mtx...\n", + "\t326,183 nodes, 807,700 edges\n", + "\t.................................................................................................... \n", + "Reading ./data/coPapersCiteseer.mtx...\n", + "\t434,102 nodes, 16,036,720 edges\n", + "\t.................................................................................................... \n", + "Reading ./data/as-Skitter.mtx...\n", + "\t1,696,415 nodes, 11,095,298 edges\n", + "\t.................................................................................................... \n" + ] + } + ], + "source": [ + "# some parameters\n", + "rw_depth = 4\n", + "max_seeds = 1000\n", + "\n", + "# arrays to capture performance gains\n", + "names = []\n", + "\n", + "# Two dimension data\n", + "time_algo_cu = [] # will be two dimensional\n", + "\n", + "i = 0\n", + "for k,v in data.items():\n", + " time_algo_cu.append([])\n", + " \n", + " # Saved the file Name\n", + " names.append(k)\n", + "\n", + " # read data\n", + " G = read_and_create(v)\n", + " \n", + " num_nodes = G.number_of_nodes()\n", + " nodes = G.nodes().to_array().tolist()\n", + " \n", + " print('\\t', end='')\n", + " for j in range (10, max_seeds +1, 10) :\n", + " print('.', end='')\n", + " seeds = random.sample(nodes, j+1)\n", + " t = run_rw(G, seeds, rw_depth)\n", + " time_algo_cu[i].append(t)\n", + "\n", + " # update i\n", + " i = i + 1\n", + " print(\" \")\n", + " \n", + " del G\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAFNCAYAAABFbcjcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAByJUlEQVR4nO3dd3gU1dvG8e/ZFFJJp/cOoQQIHaRI71WQInbAiqgodn3t+rNhQUTEBgoICkjvvQQIvYQSIKETCAmpu/u8f2SJCR1N2BCez3VxsTtzzsw5O9lwM3PmjBERlFJKKaVU3mBxdgOUUkoppdQ/NJwppZRSSuUhGs6UUkoppfIQDWdKKaWUUnmIhjOllFJKqTxEw5lSSimlVB6i4UwpddOMMaWMMYnGGBdntyUvMsaUMcaIMcbVSftvYoyJchyj7s5og6Md9xtjVjpr/0rdrjScKZVPGGOijTHJjn+QjxtjJhhjfHJw260vvheRwyLiIyK2nNh+bnOEBDHGjLxkeYwxpoVzWpWr3gK+dByjPy9daYxpaoxZbYyJN8bEGWNWGWPq3fpmKqWuRMOZUvlLFxHxAcKA2sAo5zYnT4kDRhpjfJ3dkJvxL8++lQZ2XGV7BYFZwGggECgOvAmk/ts2KqVyloYzpfIhETkOzCMjpGGMaWGMiclaJuvZMGPMG8aYycaYn4wxCcaYHcaYcMe6n4FSwEzHWbmRl162M8YsNca87Tgbk2iMmWmMCTLG/GqMOW+M2WCMKZNl31WMMQscZ232GGPuuVI/jDF9jTERlyx7xhgzw/G6ozFmp6PNscaY567xsewC1gAjrrKvCcaYt7O8z/aZOT6v540xW40xF4wx3xtjChtj5jj2v9AYE3DJZh80xhw1xhzL2jZjjMUY86IxZr8x5ozjsw90rLv42T5kjDkMLL5Kex8xxuxzfIYzjDHFHMv3A+X453gVuKRqJQARmSQiNhFJFpH5IrI1y7YfNMbsMsacNcbMM8aUzrLuqsfOccxnOI75eqB8lnXGGPOpMeakY/02Y0z1K/VNqTudhjOl8iFjTAmgA7DvJqp1BX4D/IEZwJcAIjIIOIzjrJyIfHiV+v2AQWSciSlPRhD6gYyzM7uA1x1t8wYWABOBQo56Xxtjql1hmzOBysaYilmW9XfUBfgeGCIivkB1rhJksngVGH4xCP0LvYA2ZAScLsAc4CUghIzfp09dUr4lUBFoC7yQ5dLwk0B3oDlQDDgLfHVJ3eZAVaDdpY0wxrQC3gPuAYoCh8g4dohIebIfr0vPiO0FbMaYH40xHS4NlMaYbo4+9XT0awUwybHuesfuKyDF0aYHHX8uagvc5fjs/BxtP3Np35RSGs6Uym/+NMYkAEeAkzgC0Q1aKSKzHePIfgZq3eS+fxCR/SIST0Zo2S8iC0XECkwh4zIrQGcgWkR+EBGriGwG/gD6XLpBEUkC/gLuBXCEtCpkhEeAdKCaMaagiJwVkU3XaqCIRJIRLl64yb5dNFpETohILBmhZZ2IbBaRFGB6lj5e9KaIXBCRbWQE1Xsdy4cCL4tIjCM8vQH0vuQS5huOuslXaMcAYLyIbHLUHwU0ynp28mpE5DzQFBDgO+CU42xX4Sxte09EdjmO3btAmOPs2VWPncm4OaQX8Jqj3duBH7PsOh3wJeP4Gcf2j12vvUrdiTScKZW/dHecRWpBxj+CwTdR93iW10mAx02OdzqR5XXyFd5fvDmhNNDAGHPu4h8ywkaRq2x3Iv+Emv7An47QBhlhoCNwyBizzBjT6Aba+RowLEsYuRk32seLjmR5fYiMs2SQ8RlMz9L/XYANKHyVupcq5tgeACKSSMZZqOLX7wI4gtH9IlKCjDOOxYDPsrTt8yxtiwOMY9vXOnYhgOsV+nxxn4vJOBv7FXDSGDPWZIx/U0pdQsOZUvmQiCwDJgAfOxZdALwurnec5Qi5mU3mWOMy/vFeJiL+Wf74iMiwq5RfAIQYY8LICGkXL2kiIhtEpBsZl9j+BCZfb+cishuYBrx8yapsnxFXD4s3o2SW16WAo47XR4AOl3wGHo4zcplNvcZ2j5IRlIDMy41BQOxVa1yF4/OYQEZIu9i2IZe0zVNEVnPtY3cKsF6hz1n39YWI1AWqkXF58/mbba9SdwINZ0rlX58BbYwxtcgYZ+RhjOlkjHEDXgEuHSh+LSfIGGSeE2YBlYwxg4wxbo4/9YwxVa9UWETSybgs+hEZ49cWABhj3I0xA4wxfo4y5wH7DbbhTeABMsbXXRQJdDTGBBpjigDDb75rl3nVGONljAl17O93x/IxwDsXB9obY0IcY71u1CTgAWNMmGPA/7tkXGKNvl5Fx4D+Zx3jEjHGlCQj9K7N0rZRjjZjjPEzxly85HzVY+e4HD4NeMPR52rA4Cz7rWeMaeD4+btAxti0Gz1eSt1RNJwplU+JyCngJzLGAMUDjwHjyDi7cgGIuUb1S70HvOK4lHWtOyJvpF0JZAwO70fGGaDjwAdcOyxOBFoDUxzjoC4aBEQbY86TMVZqwA224SAZ4+q8syz+GdgCRAPz+SdI/RfLyLgpYxHwsYjMdyz/nIxxc/MdYwTXAg1udKMispCMmxv+AI6RcQNGvxusnuDY1zpjzAXHvrcDzzq2PZ2M4/Gb43PdTsbNJTdy7J4g49LucTLOxv2QZb8FyRjjdpaMy51nyAjcSqlLGJGcvFqhlFJKKaX+Cz1zppRSSimVh2g4U0oppZTKQzScKaWUUkrlIRrOlFJKKaXyEA1nSimllFJ5yM3M/p3nBQcHS5kyZZzdDKWUUkqp69q4ceNpEblsQvB8Fc7KlClDRESEs5uhlFJKKXVdxphDV1qulzWVUkoppfIQDWdKKaWUUnmIhjOllFJKqTwkX405u5L09HRiYmJISUlxdlNUPuPh4UGJEiVwc3NzdlOUUkrlI/k+nMXExODr60uZMmUwxji7OSqfEBHOnDlDTEwMZcuWdXZzlFJK5SP5/rJmSkoKQUFBGsxUjjLGEBQUpGdklVJK5bhcO3NmjBkPdAZOikj1K6x/HhiQpR1VgRARiTPGRAMJgA2wikj4f2zLf6mu1BXpz5VSSqnckJtnziYA7a+2UkQ+EpEwEQkDRgHLRCQuS5GWjvX/KZjd7lasWEFoaChhYWEkJyfn2n4mTJjA0aNHM98//PDD7Ny585p1WrRokW1eucjISIwxzJ07N3NZdHQ0EydOzFZm9uzZ/7qdZcqU4fTp0/+6/s36r+1VSimlblauhTMRWQ7EXbdghnuBSbnVlrzOZrNddd2vv/7KqFGjiIyMxNPT87rbEhHsdvtNt+HScDZu3DiqVat2U9uYNGkSTZs2ZdKkfw5lToezW+12a69SSqnbn9PHnBljvMg4w/ZHlsUCzDfGbDTGPOqcluWM6OhoqlSpwoABA6hatSq9e/cmKSmJMmXK8MILL1CnTh2mTJnC/PnzadSoEXXq1KFPnz4kJiYybtw4Jk+ezKuvvsqAARlXgD/66CPq1atHzZo1ef311zP3UblyZe677z6qV6/OkSNHrlquatWqPPLII4SGhtK2bVuSk5OZOnUqERERDBgwIPMMXdazYsOGDSM8PJzQ0NDMbV1KRJgyZQoTJkxgwYIFmWOxXnzxRVasWEFYWBgffPABr732Gr///jthYWH8/vvvrF+/nkaNGlG7dm0aN27Mnj17gIzA+txzz1G9enVq1qzJ6NGjM/c1evRo6tSpQ40aNdi9ezcAb7zxBoMHD6ZZs2aULl2aadOmMXLkSGrUqEH79u1JT08HYOPGjTRv3py6devSrl07jh07BmScBXzhhReoX78+lSpVYsWKFaSlpV3WXqWUUvnbwfiD/LHDyb/vRSTX/gBlgO3XKdMXmHnJsuKOvwsBW4C7rlH/USACiChVqpRcaufOnZctu5UOHjwogKxcuVJERB544AH56KOPpHTp0vLBBx+IiMipU6ekWbNmkpiYKCIi77//vrz55psiIjJ48GCZMmWKiIjMmzdPHnnkEbHb7WKz2aRTp06ybNkyOXjwoBhjZM2aNdct5+LiIps3bxYRkT59+sjPP/8sIiLNmzeXDRs2ZLY76/szZ86IiIjVapXmzZvLli1bLiuzcuVKadWqlYiI3HvvvTJ16lQREVmyZIl06tQpc7s//PCDPP7445nv4+PjJT09XUREFixYID179hQRka+//lp69eqVue5iG0qXLi1ffPGFiIh89dVX8tBDD4mIyOuvvy5NmjSRtLQ0iYyMFE9PT5k9e7aIiHTv3l2mT58uaWlp0qhRIzl58qSIiPz222/ywAMPZPZlxIgRIiLy999/y913333F9l7K2T9fSimlckbkyUh5buYw+V//ajKvWTWJT4zL9X0CEXKFbJMXptLoxyWXNEUk1vH3SWPMdKA+sPxKlUVkLDAWIDw8XK61ozdn7mDn0fM50eZM1YoV5PUuodcsU7JkSZo0aQLAwIED+eKLLwDo27cvAGvXrmXnzp2ZZdLS0mjUqNFl25k/fz7z58+ndu3aACQmJhIVFUWpUqUoXbo0DRs2vG65smXLEhYWBkDdunWJjo6+bh8nT57M2LFjsVqtHDt2jJ07d1KzZs1sZSZNmkS/fv0A6NevHz/99BO9evW67rbj4+MZPHgwUVFRGGMyz3AtXLiQoUOH4uqa8SMaGBiYWadnz56Z7Z82bVrm8g4dOuDm5kaNGjWw2Wy0b58x5LFGjRpER0ezZ88etm/fTps2bYCMs3NFixa94nZv5HNRSil1exMRVsSu4Iet3+O3IIL+ywSfZPDq0wMf3J3WLqeGM2OMH9AcGJhlmTdgEZEEx+u2wFtOamKOuPSuvovvvb29gYwfjjZt2mQbq3UlIsKoUaMYMmRItuXR0dGZ27peuQIFCmS+d3Fxue5NBgcPHuTjjz9mw4YNBAQEcP/99182fYTNZuOPP/7gr7/+4p133smcAywhIeGa2wZ49dVXadmyJdOnTyc6OpoWLVpct87FPri4uGC1Wi9bbrFYcHNzy/ycLRYLVqsVESE0NJQ1a9bc1HaVUkrlLyLC8pjlfBn5Jfatu3h0kYVSR+0UqFObYq++ikfVqk5tX25OpTEJaAEEG2NigNcBNwARGeMo1gOYLyIXslQtDEx3/MPqCkwUkbnkgOud4cothw8fZs2aNTRq1IiJEyfStGlTNm/enLm+YcOGPP744+zbt48KFSpw4cIFYmNjqVSpUrbttGvXLnP8mY+PD7GxsVecnf5Gy2Xl6+t7xTB1/vx5vL298fPz48SJE8yZM+eyALVo0SJq1qzJvHnzMpcNHjyY6dOnExoamm27l+4nPj6e4sWLAxk3JVzUpk0bvv32W1q2bImrqytxcXHZzp79G5UrV+bUqVOZxyI9PZ29e/cSGnr1n4urfS5KKaVuPyLC2mNr+XLzl5zas4X713pQe6sNl8JBFP54JAU7dcwT0yTl5t2a94pIURFxE5ESIvK9iIzJEswQkQki0u+SegdEpJbjT6iIvJNbbbxVKleuzFdffUXVqlU5e/Ysw4YNy7Y+JCSECRMmcO+991KzZk0aNWqUOdA9q7Zt29K/f38aNWpEjRo16N279xWDw42Wy+r+++9n6NChl03ZUatWLWrXrk2VKlXo379/5qXXrCZNmkSPHj2yLevVqxeTJk2iZs2auLi4UKtWLT799FNatmzJzp07MwfYjxw5klGjRlG7du1sZ6sefvhhSpUqRc2aNalVq1a2Oz7/LXd3d6ZOncoLL7xArVq1CAsLY/Xq1desc2l7lVJK3Z42ntjIg/Me5NXfH6Htz7v5bJydOlE2goYOocLs2fh17pQnghmAyRiPlj+Eh4dL1nm3AHbt2kVVJ56ejI6OpnPnzmzfvt1pbVC5x9k/X0oppa5ORFhzbA1jt47l8J4I+q9zp3FkChY3dwL69yfo4YdwDQrKLJ9mtTNj6Wri96zgwcdezPWwZozZKFeYzzUv3BCglFJKKZVjRISlR5by3bbvOLtrK303uFN/mx3jYiVgwECCHnkYt0KFspVftmYtyYs/pHv6UqwWd87HP4qff9DVd5KLNJzlsjJlyuhZM6WUUuoWSLYm8/eBv/l116+YHVH0W+9Ojd02jKcQMGgQgQ88gFuRItnqbI9cx5k579IsZRlW48axyoMo0ekFPPycE8xAw5lSSimlbnOxibH8vvt3/to1lWpb43l0uwdlDtqwFHQj8PEHCRg4ANeAgH8q2O0ciphN3NKvqHVhDSmmAFHlB1Ox+yhKFixy9R3dIhrOlFJKKXXbsdqtrIpdxR9Rf7B/8xLujhQ+22nB44Idt5JBBLzwJAH39MGSZaopks9xasUP2DeMo3R6DL74srHUA4T2epEq/oWd15lLaDhTSiml1G3jyPkjTN83nbk7plNh80nabrNQ/ogV3Nwo2KY1/n364NWgAcaSZUKKhOPEz38Pj+2/ESIpbJZKbKz0Jk26PEy9gj7O68xVaDhTSimlVJ639+xePlz/Aec2rKPVVuG93Qb3NDtu5coQ8EIf/Lp3y37pEiD5LGfnf4R35Di87FZmyF3E1xxM9w6dqO3tvCcAXI+Gs1vsjTfewMfHh1mzZvHxxx8THp79DtoJEyYQERHBl19+6aQWKqWUUnmH1W7lh+0/MG/2lzz2t42ip2zg5Yl/t0749+qFR61al095kXaB0ws/xyviS/xsScySJsSEDadPm7sI8S1w5R3lIRrOlFJKKZUn7T+3n5dXvESJ+dt5Y7HgVrgIhd99koLt22Hx8rq8QuIpji0cjc+2Hwm2nWOx1OVAzeF0b9eOYJ+8H8ou0nB2C7zzzjv8+OOPFCpUiJIlS1K3bl0Afv75Zx5++GGsVivjx4+nfv362erdf//9eHh4EBERwfnz5/nkk0/o3LmzM7qglFJK3TJptjR+2fUL360bzdA5dupvt+PTogXFPngfFz+/y8pbj+3g6Nz/UeTQDIqSzlLqEBM6jI4du9MqD1++vBoNZ7ls48aN/Pbbb0RGRmK1WqlTp05mOEtKSiIyMpLly5fz4IMPXnE+tOjoaNavX8/+/ftp2bIl+/btw8PD41Z3QymllMp1p5NPM2XPFH7f8zseMaf530wPAk6mEfLMMwQ98nD2Qf5AwsGNnJ31GqXOrCRE3Jnjdje2+kNp27wZLQrcvhHn9m35vzHnRTi+LWe3WaQGdHj/qqtXrFhBjx498HKcfu3atWvmunvvvReAu+66i/Pnz3Pu3LnL6t9zzz1YLBYqVqxIuXLl2L17N2FhYTnaBaWUUsqZdp3ZxS+7fmHu/tlUjE7jiUOFqbbJBVdvD4qP/wrvhg2yld+9ZzeJc16nztl5WPHmN9/7KNxqGJ3DquJiyRvPx/wv7qxwlsdcOoDxSs/wupEySiml1O3mbMpZZh+czcx9M0jdtoPmuy18t9cVz3N2jFc8Bdu1J2TEs7gVznjMktVmZ+7GKFKW/o/OF6ZhgBWF7qVwx5foV7akczuTw+6scHaNM1y55a677uL+++9n1KhRWK1WZs6cyZAhQwD4/fffadmyJStXrsTPzw+/K1xHnzJlCoMHD+bgwYMcOHCAypUr3+ouKKWUUjki3ZbOsphlzNg/g8g9y2myLZ3HtrsRctIG7i74Nm9CwU6d8GneHIunJ5Dx3MuFkfuInvMFPVOnE2QS2Fe0I4W6v0PzIuWc3KPccWeFMyeoU6cOffv2pVatWhQqVIh69eplrvPw8KB27dqkp6czfvz4K9YvVaoU9evX5/z584wZM0bHmymllLrtiAiLDi/isw2f4LftEB23uzFkdzoWmx3P2tXwf7oXvm3b4uLrm63e+p0H2Dfrf3S8MJ025gKnizTB3uVtKpSo46Se3BpGRJzdhhwTHh4uERER2Zbt2rWLqlWrOqlF/839999P586d6d27t7Oboq7idv75UkqpW2Hrsc1MnfwmQWv30ijKgm+iDYu/P/7du+PfuxcFKlTIVl5EiNwdxZE5/6Nl/F/4mmRiCrekSOdXcC0ZfpW93J6MMRtF5LJO6ZkzpZRSSuW4wxFL2fzNexTZdJh7k8FewI2CLVri16EDPq1aYXHPPsWFzS6sXL+e5GWf0SJpAbWMlYOFW+Pe9VVKlKjlpF44h4azPGzChAnOboJSSil1U84ln2XxB09TcfIGSrpCfL2KlOvzKEHN784cR5ZVqtXGokXz8dwwmrvSV2MzLhwq0ZUSnUdSvuideWVCw5lSSiml/rNUWyp/rBmH+7tjCD1g5XDd4tT8eAx1ila4Ynmx21m7dBYuK/9HR3skF4wX0ZUfokyn56joV/QWtz5v0XCmlFJKqZsWnxrP0cSjxCbGcuj8IbbO/JF+U0/hk27BvPAYbe9/4srTP4mwf82fpC35kEbpOzlr/DkY9hxl2j1JeU//W96PvEjDmVJKKaVuyNmUs7y2+jU2Ht9IQnoCAMHxQo81doZuFmxli1Pxi28oULHi5ZVt6ZxaP5mUpZ9SPjWK4wSzMXQUYV2fIqDAFZ6TeQfTcKaUUkqp6zoYf5DHFz3OiQsn6F6hO5XPeVL+7214LdkIxkLAoP4UenYElkunfEqKI3bR13hF/kCI7TQHpShzK7xK016PUfdKDy9XGs7ykjfeeIPvvvuOkJAQrFYr7777brbHPd0qe/fuZfjw4URFReHr60uFChUYPXo0R44c4aeffuKLL75g6dKluLu707hx41vePqWUUrfWhuMbGL5kOG64ML7wcwT+sorERYswnp4EDBxI4P2DcSuafZyY7cRuYud8TOHovyhOGmuoydEqo2jWoT/t/TSUXYuGszzmmWee4bnnnmPXrl00a9aMkydPYrnkQa85wWq14up6+eFPSUmhU6dOfPLJJ3Tp0gWApUuXcurUKcLDwwkPD89c5uPj45RwZrPZcHFxueX7VUqpO9Gfu6Yy/fe3ePigF42iBIl7i2Q/P4KfeIKAAf1xDQjIVv74rtUkLPiQ8nFLKSSuzHNridQfQpsWLWjkrrHjRuT8v/rqMj/99BM1a9akVq1aDBo0iOjoaFq1akXNmjW5++67OXz48GV1qlatiqurK6dPn6Z79+7UrVuX0NBQxo4dm1nGx8eHZ555htDQUO6++25OnToFwP79+2nfvj1169alWbNm7N69G8iY1Hbo0KE0aNCAkSNHsmzZMsLCwggLC6N27dokJCQwceJEGjVqlBnMAFq0aEH16tVZunQpnTt3Jjo6mjFjxvDpp58SFhbGihUrOHXqFL169aJevXrUq1ePVatWAVxxHwAfffQR9erVo2bNmrz++uuZ+/rll1+oX78+YWFhDBkyBJvNltnXZ599llq1arFmzZocPkJKKaUudXbPdhY80o0S/V7lxd/SabQ1DZ/6DSj28cdUWLyIkCcezwxmqelWVi/8k23vtaTI7x0odGYdMwr2Z1XX5XR6aTLd2rbGS4PZjRORfPOnbt26cqmdO3detuxW2r59u1SsWFFOnTolIiJnzpyRzp07y4QJE0RE5Pvvv5du3bqJiMjrr78uH330kYiIrF27VooWLSp2u13OnDkjIiJJSUkSGhoqp0+fFhERQH755RcREXnzzTfl8ccfFxGRVq1ayd69ezO307JlSxERGTx4sHTq1EmsVquIiHTu3FlWrlwpIiIJCQmSnp4uzzzzjHz22WdX7MuSJUukU6dOl7VVROTee++VFStWiIjIoUOHpEqVKlfdx7x58+SRRx4Ru90uNptNOnXqJMuWLZOdO3dK586dJS0tTUREhg0bJj/++GNmX3///feb/vxzm7N/vpRSKqcd2bVB5j/YSbZXqSKbqleRvx9sL2cXzhdbcvJlZdPTrbJ01q8S+UY9kdcLyunXS8nyH16R2OMnnNDy2w8QIVfIM3dUjP1g/Qfsjtudo9usEliFF+q/cNX1ixcvpk+fPgQHBwMQGBjImjVrmDZtGgCDBg1i5MiRmeU//fRTfvnlF3x9ffn9998xxvDFF18wffp0AI4cOUJUVBRBQUFYLBb69u0LwMCBA+nZsyeJiYmsXr2aPn36ZG4zNTU183WfPn0yLwk2adKEESNGMGDAAHr27EmJEiX+9eewcOFCdu7cmfn+/PnzJCYmXnEf8+fPZ/78+dSuXRuAxMREoqKi2Lp1Kxs3bsx8/mhycjKFChUCwMXFhV69ev3r9imllLq27duXsO/T96iw+giFXGBn63JUe+plOla8fPiK2O1ELp6M5+qPaW6P4qQlhL1136B826E0c798oll1c3ItnBljxgOdgZMiUv0K61sAfwEHHYumichbjnXtgc8BF2CciLyfW+3May6OObto6dKlLFy4kDVr1uDl5UWLFi1ISUm5Yl1jDHa7HX9/fyIjI69YxtvbO/P1iy++SKdOnZg9ezZNmjRh3rx5hIaGsmzZsptut91uZ+3atZc9mP1K+xARRo0axZAhQ7KVHT16NIMHD+a99967bPseHh46zkwppXJYuj2dJTtmEvvVF9RdcYJywJE21ag54k1qlbnsn26wprJ/5WTMyk+pbd3PUVOYbXX+j+odh1DItcAtb39+lZtnziYAXwI/XaPMChHpnHWBMcYF+ApoA8QAG4wxM0Rk55U2cDOudYYrt7Rq1YoePXowYsQIgoKCiIuLo3Hjxvz2228MGjSIX3/9lWbNml21fnx8PAEBAXh5ebF7927Wrl2buc5utzN16lT69evHxIkTadq0KQULFqRs2bJMmTKFPn36ICJs3bqVWrUufy7Z/v37qVGjBjVq1GDDhg3s3r2b/v3789577/H333/TqVMnAJYvX05gYGC2ur6+vpw/fz7zfdu2bRk9ejTPP/88AJGRkYSFhV1xH+3atePVV19lwIAB+Pj4EBsbi5ubG3fffTfdunXjmWeeoVChQsTFxZGQkEDp0qX/0zFQSimV3bmUc/yx63dif/mB9oviKZ4KZ1rVouYL71CzVPnshUWwHVpL7PIfCDj4N+UlkcMUYVX1/6N+1yEUc9dQltNyLZyJyHJjTJl/UbU+sE9EDgAYY34DugH/OZw5Q2hoKC+//DLNmzfHxcWF2rVrM3r0aB544AE++ugjQkJC+OGHH65av3379owZM4aqVatSuXJlGjZsmLnO29ub9evX8/bbb1OoUCF+//13AH799VeGDRvG22+/TXp6Ov369btiOPvss89YsmQJFouF0NBQOnToQIECBZg1axbDhw9n+PDhuLm5UbNmTT7//HNOnz6dWbdLly707t2bv/76i9GjR/PFF1/w+OOPU7NmTaxWK3fddRdjxoy56j527dpFo0aNgIzB/r/88gvVqlXj7bffpm3bttjtdtzc3Pjqq680nCmlVA4QEbad3saUPZM5MW8WfRel0jgOUmtXpuyr7xBaLTR7hbQLpK0cTeqGn/FNjiFYCrDStQFSsx9N2/ailKfHlXek/jOTMR4tlzaeEc5mXeOy5h9knB07CjwnIjuMMb2B9iLysKPcIKCBiDxxlX08CjwKUKpUqbqHDh3Ktn7Xrl1UrZo/H5zq4+NDYmKis5txR8vPP19KqfzhQvoF/j7wN1P2TkG27GLQMqh0xAalS1Bi1Mv4NG+e/TFLIpyO+AO3BS/jl3acFbbqbPJvR5WW99I6rAIulis8kkn9K8aYjSISfulyZ94QsAkoLSKJxpiOwJ/AFZ73cG0iMhYYCxAeHp57SVMppZS6jaTb0/lxx4+M2zaOwNhEHlntSZVdNiwhwRR643H8e/XCuLlllhcRNm+JxG3+C9RIWsdue0nGlfqc5q278nSZwGvsSeU0p4UzETmf5fVsY8zXxphgIBYomaVoCccydQk9a6aUUupKtp3axhtr3uD8/j28uLkQlTacx8XHEDRiBIGDBmLxzH5HZcS+Y+ya+n/ckzwZq3FhcenhVOn2HM8G+jqpB3c2p4UzY0wR4ISIiDGmPhkT4p4BzgEVjTFlyQhl/YD+zmqnUkopdbtISk9i9ObRrF38C33WuxK2y47FPY6ABx8g+JFHcPH3z1Y+ITmNGZO/p+n+Twi3nORw8Q4U6v0xrQL//dRK6r/Lzak0JgEtgGBjTAzwOuAGICJjgN7AMGOMFUgG+jkmZLMaY54A5pExlcZ4EdmRW+1USimlbndWu5X5B+cxb/IHNF16mq6HBOPrQeAj9xE4aCCuISGX1Vm7YT32OS8wwL6Jk15lSe4xnVKVWzmh9epSuXm35r3XWf8lGVNtXGndbGB2brRLKaWUyi9SbanM2D6FXRPH0GD1GYaeAgkOoNDzD+Pf9x5cfHyylRcR9kbt4fDcz7nrzGSsxp0jDV6lZNunwcXtKntRt9od9YQApZRSKj+IT41n1srvifv1VxpuSqJ6CqRVKEGR4UPx69IFi7t7tvKHo/dzYPlEgg79TQ3bLioDOwp3okL/jynpX8w5nVBXpeEsD3vnnXeYOHEiLi4uWCwWvv32Wxo0aECZMmWIiIjIfCTURY0bN2b16tVER0ezevVq+vfPGKoXGRnJ0aNH6dixozO6oZRS6j+yi53dcbtZEbOCLdsXU+eP7TTaaUcsBnvz+pR6+Cm86tTJNiWGNTWJzXPG47l9EtXSd1DKCNGuZYms+ARlmg8ktIROA5RXaTjLo9asWcOsWbPYtGkTBQoU4PTp06SlpV2zzurVqwGIjo5m4sSJ2cJZRETETYUzq9WKq6v+eCillDMlpSfxzZZvmLl/JucTTtN1nTB0rWDBgmVQTyo8/BRuhQtnq5N2Opp9sz+n2IEp1COBI5bibCo3hFJ3DaBM2ZpO6om6Gfqv7y3QvXt3jhw5QkpKCk8//TQPPfQQDz30EBERERhjePDBB3nmmWey1Tl27BjBwcEUKJDxWIxLz5JBxoPBe/bsSc+ePXnkkUcyJ6V98cUX2bVrF2FhYdx777189dVXJCcns3LlSkaNGkXnzp158skn2b59O+np6bzxxht069aNCRMmMG3aNBITE7HZbP/qGZtKKaVyxoqYFfzf2v/jeOIxHo6rQYu/0nA7cRbfdu0oPPJ53IoX/6ewCGn7lnJ8wecUP7mMyiKsL9AQ90ZDqdO8KyUtFud1RN00DWe3wPjx4wkMDCQ5OZl69epRt25dYmNj2b59OwDnzp27rE7btm156623qFSpEq1bt6Zv3740b948c31iYiL9+vXjvvvu47777stW9/333+fjjz9m1qxZABQuXJiIiAi+/DLj/ouXXnqJVq1aMX78eM6dO0f9+vVp3bo1AJs2bWLr1q2XPUtTKaXUrRGXEscny97m8Mp59DruR9OjxbDs30yBSpUo/MGneDds8E/h9BSSNv1G0vIvCb4Qhbf48qdPH4q3fpyGYTWzz/yvbht3VDg7/u67pO7anaPbLFC1CkVeeumaZb744gumT58OwJEjR0hLS+PAgQM8+eSTdOrUibZt215Wx8fHh40bN7JixQqWLFlC3759ef/997n//vsB6NatGyNHjmTAgAE33eb58+czY8YMPv74YwBSUlI4fPgwAG3atNFgppRSTnDy1CG2j/mAlKUr6HfUiouAcb+AZ1hFCg56AP/evTEXh5tcOEP8sq9w2TQeH+tZDtlL8lfIs4S2e5ieFYtqKLvN3VHhzBmWLl3KwoULWbNmDV5eXrRo0YLU1FS2bNnCvHnzGDNmDJMnT+bNN9+kS5cuAAwdOpShQ4fi4uJCixYtaNGiBTVq1ODHH3/MDGdNmjRh7ty59O/f/6a/hCLCH3/8QeXKlbMtX7duHd7e3jnSb6WUUtdmFzs7z+xkxcElpEz9k8bzj1I0GWLKeOM2uBclWrTHMywMi0eWB4zb7ZxcPhbv5W/jZ09gib02e8q8RssOvXmoaEHndUblqDsqnF3vDFduiI+PJyAgAC8vL3bv3s3atWs5ffo0drudXr16UblyZQYOHEjJkiWJjIzMrLdnzx4sFgsVK2Y8bjQyMpLSpUtnrn/rrbd46623ePzxx/n666+z7dPX15eEhISrvm/Xrh2jR49m9OjRGGPYvHkztWvXzqVPQCml1KXWH1vP6ytfpey6GPqusFMoHs5VLwnPPEXrxp2u+J/ugzvWYZ/5DOVTdrBeqrG5xst0bXM3Lf08r7AHdTu7o8KZM7Rv354xY8ZQtWpVKleuTMOGDYmNjaVFixbY7XYA3nvvvcvqJSYm8uSTT3Lu3DlcXV2pUKECY8eOzVbm888/58EHH2TkyJF8+OGHmctr1qyJi4sLtWrV4v7772fw4MG8//77hIWFMWrUKF599VWGDx9OzZo1sdvtlC1bNnN8mlJKqdyTbktn9ObRrJo3nmcXulD8mB3XyhUp+r8XqNKk8RVD2Y7oY8T8+Qatzk4hES/+rvAaDbs/Tn1fjyvsQeUHJuOJSflDeHi4REREZFu2a9cuqlbVuVxU7tCfL6XUjTpw7gBvzH+eutN30SZScA0pROHnn6dgp46YS+6mFBE27Iji8PzRNI//ixATz/ZCXSjZ92P8goo4qQcqpxljNopI+KXL9cyZUkoplYvSbelM3TuFVT99yGPz0yiYDEH33Ufwk0/h4pN9nK/NLqxas5oLK0bTMnkh9U060UFN8OzwEtUrNnVSD9StpuFMKaWUygWHzx/mj6g/WLd6Kt1mn+Xxg4Jr1cqUePsdPENDs5UVETasXIB16YfcZdtAKm4cLtWVUh2fo0zRak7qgXIWDWdKKaVUDrHZbSw8vJApe6ewJ2otfVcIr26xg6cHhV56hsABAzAuLtnq7N+0hIR5b1M/NYJ4fNlT5QkqdHqair6FnNQL5Wx3RDgTEZ3zReW4/DReUyn134gIK2JX8OnGT4k5EUX/zd4MX21wsRkCBg0geNgwXAMCslbg9M6lnJn9NpUvRHAWXzZWfIqaPZ6jspef8zqi8oR8H848PDw4c+YMQUFBGtBUjhERzpw5g4eH3i2l1J1u55mdfBLxCUd2rKXLHh9abPHE5dx5CnbsQMjw4biXKvVP4dQEzm+YSNKq7yiSHIWRgiwu9QThfZ6jbsGAq+9E3VHyfTgrUaIEMTExnDp1ytlNUfmMh4cHJUqUcHYzlFJOciThCONWfUbi7Ln03GGhbKwNXBLxadaM4MeG4Vkzy0PGT+wgefVYLNsmU9CeRIy9NKuKP0uD7o/RqvDlz05Wd7Z8H87c3NwoW7ass5uhlFIqnzh0/hA/rPsan5//ptdGG242cK1UjsAXeuHXuROuISH/FL5whuQ5r+C5fSJG3Jhpb0Rs+Xvp1qkrvUJ8nNcJlafl+3CmlFJK5YQD8Qf4bstYEmbOYsBiO35JgkfXjhS7/2E8Lp3v0G7n7KrvKbD0LdysF/jW1oXD1R7loTZ16K2hTF2HhjOllFLqGqLORvHd1u/YvXYODy0UKh2x4VqjGiVeexPPGtUvKx+zcy22mc9QOnkn6+xVWVFpFL3at6ZssD67WN0YDWdKKaXUFeyO283YTd9wZulCWm+3cP8eGxZ/P4q88zx+Pbpnm9VfRNi2bjFpKz6nduJyzlKQaWVepUH3x3guwMuJvVC3Iw1nSimllINd7Gw8sZFZ87/Ge+E67tkBfhcES4AfAQ/1JOjRR3EpWDCzfEpaOuvnTSQg8ltq2naQgBcRxQdRvscr9Awp7MSeqNuZhjOllFJ3vL1n9/L3vpnEzJlO02Vn6BcLdhcLXi3uIrhXH3yaNcO4uWWWP3c+gQ0zvqHCvh+4i6OcMIXYXO0FqnZ8jAY+/s7riMoXNJwppZS6I9nFzh9RfzB120SKL99L5w1C+zghvUggASMfILh7T1wDA7PVOXXqFNtmfEr1wxNpY85yyL0iUQ2+oEKL/hR2cbvKnpS6ORrOlFJK3XGizkbxfytep+ScLTy/3uB9wY5raFUKv/Yovm3aXPaIpSPR+zgw+1PqnPiDViaZPd51SW31PKXrtged4FzlMA1nSiml7hiptlS+3fItq2d/zyPzbBQ7Zce7WVOCHnkEr3r1sj1JRux2tq+aSdqasdS6sJriCNv9WxDS/gUqV23kxF6o/E7DmVJKqXwv3Z7OqthVfL3sA1rMOMTrWwVL0SIU++Y1fFu2zFY2+fxZdsz+ikJ7J1LDHss5fNhcYgDl2j1BzVJVnNQDdSfJtXBmjBkPdAZOishlE8EYYwYALwAGSACGicgWx7poxzIbYBWR8Nxqp1JKqfwpIS2BlbErWXJ4CREHVhAWeZ6RK8Ar1ULQIw8QPGwYFq9/prmwp15gx58fUWrXd4STyG7Xymyo+R412w+mnofOUaZundw8czYB+BL46SrrDwLNReSsMaYDMBZokGV9SxE5nYvtU0oplQ/tjtvNl5u/ZFXsSsofttJ+hxsDd6bjlmqnQO0wir3xJh6VK/1TwZbOoQXf4LP+U2rY44hwD6dA61eoXq9FtsucSt0quRbORGS5MabMNdavzvJ2LaBPkFZKKfWvnU4+zejNo5m9Yxpdtrrz3bYCeB9PxXgVwK9rD/x69sQzLOyfwGWzcnrtRGTpe5ROP8oWU4UdTT+jaauuWCwaypTz5JUxZw8Bc7K8F2C+MUaAb0VkrHOapZRSKq9Lsabw886f+SHyOxpvSmbsGjc84pPwDK+L/9O9KdiubbbLl2JLJ3rpj3it/ZTC6THsllKsCv2Utt0G4VVAp8NQzuf0cGaMaUlGOGuaZXFTEYk1xhQCFhhjdovI8qvUfxR4FKBUqVK53l6llFJ5Q0JaAtOjpvPzzp8os/EY/1vtgf9JK551a1Ho2WfxqlM7W/n09DS2zR1H4cjRlLUdZQ+lWVnxPZp0up/u/vqIJZV3ODWcGWNqAuOADiJy5uJyEYl1/H3SGDMdqA9cMZw5zqqNBQgPD5dcb7RSSimnOnL+CBN3T+SvvdMI3Z7IqI2eFD1ip0DFYoS8OQKfFtnHiqUkXyBy5jeU2PUddeQ4UZayLAn7hPrtB1HZw92JPVHqypwWzowxpYBpwCAR2ZtluTdgEZEEx+u2wFtOaqZSSqk84kD8AT7f+DlroxbTeit8HumGzxk7biUDCX7vFfy6dsk2eWxywlm2/fUp5fb9SEPOEeVaicgGr1KzVX8quliusSelnCs3p9KYBLQAgo0xMcDrgBuAiIwBXgOCgK8d/8O5OGVGYWC6Y5krMFFE5uZWO5VSSuVtqbZUxm0bx9TV39F9HTy81eCanI5XeBiBbw7Gp2XLbKHswunD7JnxCRUP/059ktjqXpvjd40gtHFnjEVDmcr7jEj+uRIYHh4uERERzm6GUkqpHLLu2Do+WvoGYQsO0TXCgpsdCnboSODgwXhWD81WNj5qNUfnfUqF04twETsRXk3wbvU8ofVaOKfxSl2HMWbjleZydfoNAUoppVRWIsK+c/v4cfM4rH/M4sU14J0kFOzYjpCnn8K9dOl/CtusxEVMIWn5aEpc2IERT5b5d6d42+HUD63pvE4o9R9oOFNKKeV0IsLOMztZcGgBSw4soOzqaHqushNyHjwbN6Lws8/iGfrPmbLUlAtEzRtL4W1jCLEe57wUYXrRp6nZeRitSxR1Yk+U+u80nCmllHKaA+cO8Nf+v5hzcA6n449y91bDS+tdKHjWjmtoVYo9+xzejRtnlt+6P4aji76i7tGJVOcc201FllccQYMOA+gR6OPEniiVczScKaWUuqXiU+OZc3AOM/bPYNvpbXilW3gwujSNF3vjGncez9o1CP7wMbybNsmcEuPAsdNETH6ftnETqWkusMerDscaPkNok85U1zsvVT6j4UwppdQtEZsYy5gtY/j7wN/YrGm0O12Mx6IqE7R+P6RE4VWvHsGfPIZXgwaZoSw+KZVFU76h/oHR3GNOczioCa6dX6dyuQbX2ZtSty8NZ0oppXLVyaSTjN06lj+i/qDEaXgjugwVNxyHuCNYChakYNeu+HXvnm1G/3SbnaXzplNs/Tv0ZD9HPStyrvMYSlVv48SeKHVraDhTSimVK+JS4hi/bTy/7fmN4sfS+XBLYUpsjAHX/fi2aE7BLl3wadECi/s/s/THJaayct5kim4fSxvZymlLMDHNPqFE8wdA5yhTdwgNZ0oppXLU6eTT/LTjJ37b8xvFYpJ5d3MIJSOPYfGOI+DRRwkcfB+ugYHZ6uyKjWPznAmEHfmRriaas5ZAomqOpEKn4Rh3byf1RCnn0HCmlFIqR5xKOsUPO35gyp4pBJ9M5e11QZTckoCl4AUCH3+cwPsG4eLnl63O4ZPnWDb5U5qf/JX+llOc9CjN8UYfU6TpfQS4FnBST5RyLg1nSiml/pP41Hi+3fotk/dMxvt8Oq9sKU6llYeweCYS9PRTBAwciIuvb7Y6ScnJrJjyBaH7xzLInOaYXw0SW/+PQjW66OVLdcfTcKaUUupfsdqtTN4zma+3fE3qhfM8t68SteYdgLQjBPS7l+DHH7vs8qVY09jy9xgKbR5NO04S7VmVuA5fUrRme3DcoanUnU7DmVJKqZu2OnY1H274kLiYfTwYVYxGG2wQtwPfNq0JGTGCAmXLZiufELub6IXfUjx6OmFylj0uFdnb8gMqNemhoUypS2g4U0opdUNsdhurjq5i0q6JnFy/kj5bPKi9QzD2GHzuuougRx7GK/yfZzhLWhIHV0zCvvEnKiRFUlUsbHQPJ7X2AzRp1w8XnTxWqSvScKaUUuqajiUeY/q+6czd9geVNhyn2zYLpY/ZsPha8B84iIAB/XEvVSqzvDXhFPv+/pSie36inCRwWAozr+gQSrV8iAaVKzuxJ0rdHjScKaWUuqKdZ3by9aYvObdyOS232nknClytgnuVSgQO6Ytfly5YvP+Z5iL11AEOzvyAMoenUYU01rjWI7nOEBq06kY7D/dr7EkplZWGM6WUUtkcv3CcMcs/wvrXXO7dBIHn7Rg/X/z7dcO/Zw88qlXLVj7h+D6OTnuVCifnUk4MKzxa4tF8OI0aNsVi0fFkSt0sDWdKKaUAuJB+gd9mf0jqb9Povs1KASsUaFCP4Hv749OqVbaZ/AHiTh1n/7Q3qXV0MqUxzCvYkyJtn6FV9dDMZ2MqpW6ehjOllLpDpdvT2Ru3l8hTkRxdv4xyv6+l8YF0rG4WPLt0pMQDQ/CoVOmyesfPnGPbtI+oH/MDdUlirV87Aju9ScfKVZzQC6XyHw1nSil1h1l6ZCk/bP+BHWd24HUuhf5L7XTeLiQVdMc2pD9V738C14CAy+pF79/D3nnfUOPEX7QxcezybYB3p7dpXLX+re+EUvmYhjOllLpDHL9wnPfXv8+iw4uo4FmKl/ZUpvLM7VhsFgIfGUylIUNx8bnkOZa2dA6smsqFteMJvbCBUsD+gvU42fo5qtZq55R+KJXfaThTSql8zma3MWn3JEZvHo3XBRsfxDWj4oI9WGM34dumNYVGjsS9ZMlsdewpCeyfM5qgbd9Rzh7HSQJYV/IBKrcfRsUSl1/qVErlHA1nSimVT4kIa46tYfSGz3DfsINXogKpsOMcWJfgWqMGxd55B++GDbPVSU44y+4Z/6Nc1AQqksB6U5OztV6nSft+NPL0cE5HlLrDaDhTSql8Jik9iRn7ZzB5+69UWXyAp9eDX6Idl0A7fgMG4NezJx6Vs5/9OnYshoN/f0r1mEnU5gIb3MJJaTSChs074KYz+St1S2k4U0qpfOJY4jF+2vkTf0ZNp+rOBJ5d6krgGTuejRsR1L8/PnfdhckyHUaa1c6aDRtIX/UlTRLm0tiksdm7Ce4tXyC87l06HYZSTqLhTCmlbnMXx5R9sfkLihxL5Z0VBSm6x457+RIUfn8UPs2aZit/MiGFOXNmUmLXOFra12E1Luwt3JGgNiOoXbG2k3qhlLpIw5lSSt3Gos5G8cbqNzhycAvPbypE9bUXcPFNJ/jVVwjo2xfj+s+v+dQL51g74zuCdk9ksDnABYsPR6oNoUT74VT3K+rEXiilstJwppRSt6E0Wxpjt45l0oZx9Fpv4aX1BoucInDQQIIfewwXP7/MsnJ0M7ELxxB44E+ak0KMe1nONHqboCYP4F3Ax4m9UEpdSa6GM2PMeKAzcFJEql9hvQE+BzoCScD9IrLJsW4w8Iqj6Nsi8mNutlUppW4H6bZ0/tz/Jz9uGkf1FTF8tdaFAhdSKdilCyFPP4V7iRL/FD4bzYU/R+B9aBHB4sYyt2YUajmU2o3bgo4nUyrPyu0zZxOAL4GfrrK+A1DR8acB8A3QwBgTCLwOhAMCbDTGzBCRs7ncXqWUypNSbalMi5rG5LXfUWvVCV7bYsH3vB3vJo0o9OyI7A8jt6aSuOQTCqz+BLFb+NT0J7jFUPrdVUPvvFTqNnBT4cwY4yUiSTdaXkSWG2PKXKNIN+AnERFgrTHG3xhTFGgBLBCROMd+FwDtgUk3016llLrdnUw6yawDs1i66AcarjzDWzvB1SZ4N2tM0IMP4t2oUbbyF/YsJmX6cIJSDjHH3oDdtV5icPvGBHq7X2UPSqm85obCmTGmMTAO8AFKGWNqAUNE5LH/uP/iwJEs72Mcy662XCml8r0L6RdYdHgRiyKn4b4sgsY7bIyKBfFwJ+CeXgQOGkiBcuX+qWC3cW7rHM4t/4YycSs5bS/E5JIf0KnnfXQI8nJeR5RS/8qNnjn7FGgHzAAQkS3GmLtyrVU3wRjzKPAoQKlSpZzcGqWU+veOJBzh+zVfcH7+AupvT+PRQ4JFwJQrTcjIvvj37oVLwYKZ5a3xx4leOAb/XRMJtp4gTfyZ6jeYKj1fZliZwk7siVLqv7jhy5oicuSSCQltObD/WCDrA91KOJbFknFpM+vypVdp11hgLEB4eLjkQJuUUuqWOpdyjm8jvyFu4kT6LrXimQb2YoUIebQ7BTt1wqNS9tn8E/evI3bux5Q/tYgK2FhnarCy4tPUaj2A3oX9ndMJpVSOudFwdsRxaVOMMW7A08CuHNj/DOAJY8xvZNwQEC8ix4wx84B3jTEBjnJtgVE5sD+llMozUqwp/LLrF+YvHMvAmYlUPCq4NQyn+DPP4VGzZvYZ+u02EiKnE7/kc0okbKWoeLKwYHe8mzxMw3oNdaC/UvnIjYazoWRMeVGcjLNa84HHr1fJGDOJjDNgwcaYGDLuwHQDEJExwGwyptHYR8ZUGg841sUZY/4P2ODY1FsXbw5QSqnbXVJ6ElP3TmXilgncteA4r60Dl4K+FPvoFQp27pw9lKUlkbD6e6yrvyYg7ShnJYQphR6nZucnaF+6mPM6oZTKNSbjRsn8ITw8XCIiIpzdDKWUuqIzyWeYtOVHtiz8jYp7E7kryg2/s2n49ehBoZHP4xoQ8E/h1ETiV4zBZe2X+FjPstFeia0lB9Gsy2AqFPG7+k6UUrcNY8xGEQm/dPmN3q1ZFngSKJO1joh0zakGKqVUfnXszCGWfPMKZu1mmkbbuNsK4uaKd3gdgh99NPt0GCnxnFv2FW7rx+Bni2elvTrby79Fh869eCDI23mdUErdMjd6WfNP4HtgJmDPtdYopVQ+Ep8azx/T3qXM6JnUjhMSCvvg1aMlRe/uiHf9+li8skxzYUvn2MLR+K37BH97AkvstdlTeQidO3ajaYBOh6HUneRGw1mKiHyRqy1RSql8IsWawu9bfuLs6K9pszaVpCAvvL56nap3X36xwWq1sWXRbxRd/w7FbLGslhpsr/oMXTt0oqWfhxNar5RythsNZ58bY14n40aA1IsLLz4HUymlFByIP8C8g/PYtHAi/f44Tf2zQI/21Hn5bVx8sl+SvJBqZfbCBZSNeIdw2Ua0Kc7cWl/QqO29NNbZ/JW6o91oOKsBDAJa8c9lTXG8V0qpO9aR80eYGz2XNZEzCV6/n/p7hWeOCFI4mFITPsK7YcNs5dNSU1k9+2c8t06gj2wjwfiyu/arVOz4FGXcNJQppW48nPUByolIWm42Rimlbhenk0/z+YI34O8l1N9j5/njGctdypcl4PFOBD1wPxbvf86W2c/FEDXnS4L2/kYLOcspl0IcrfUcxdo8QRXPgCvvRCl1R7rRcLYd8AdO5l5TlFIq7xMR5mycxL4vP6J3RAruVnAJrULggI74tm5NgbJls5c/e4ijM96iyMFpVBQhwq0uRxs/Qo3mvTEuN/yQFqXUHeRGfzP4A7uNMRvIPuZMp9JQSt0xjsbsZvH7T1F96RFK28C1Y2vKPvUc7qVLX1bWHn+UIzP+j2L7JxMsMN2tIwVbPEXrxvWxWMwVtq6UUhluNJy9nqutUEqpPMpqt7JtzwqOff8tRedtobYV4u6qTvgLH+BZrtxl5W0JJ4me8R4lon6hmNiY69YalxbP071RXVz1EUtKqRtwQ+FMRJbldkOUUiqvOJN8htVHV7N581xCpq+icWQqpe2wr25hwl54h9AaTS6rE3f8EEdmvU/lmKmUlXQWuTVHWrxIh0YNNJQppW7KNcOZMWaliDQ1xiSQcXdm5ipARKRgrrZOKaVugaT0JDad3MTao2tZe2wtSXt2032tnZ67BCwWkto2ouTjzxJaITRbPRFh+85txC/4mPpn/yYUO6u9WiFNR9CqUWNc9PKlUupfuGY4E5Gmjr99b01zlFLq1olNjOWD9R+wInYFVls6tQ+58OBmL8rutiGeBQgcfC9B9z+AW+FC2eqJCBu2bOXc3HdpmbwAgC0hnQluN5K7KlZ3RleUUvnIjT5b82cRGXS9ZUopdTuwi51Juyfx+abPcbXBC2caELpgP64HYnAJKUjg8OEE9OuLi79/tnoiwvptuzgz513uTpqDMbC/VG9KdX2Z8JDLbwpQSql/40ZvCMh2Lt8Y4wrUzfnmKKVU7jpw7gCvr36dyJObGXyyMp3nnEGOLaNAxQoEvvMOBbt0xuKefTLYjFC2h5NzP6TNhRm4GjsHSvagdI/XqBKkoUwplbOuN+ZsFPAS4GmMOX9xMZAGjM3ltimlVI5JSEvgl12/8N3W76h0xp0fV5XGc9tO3CtXptCbb+HdrBnGZB8jZrcLa1YuJHXVNzRJWYarsXOweGdKdn+DSoXKO6knSqn87npjzt4D3jPGvCcio25Rm5RSKsecSjrFL7t+YfKeyZj4BF7ZXJyqK2NwKWgIeeMN/Pv0xri4ZKuTlprKxrk/4rvle5rYd5OEB4fK9KF0h2eoUKSyk3qilLpT3OhUGqOMMcWB0lnriMjy3GqYUkr9F9Hx0UzYMYEZ+2cQHGdlxJ7ihK5NxaTGEjBwACGPP46Ln1+2Ohfiz7B95heU3fcTjYgj1lKU7TVfomqHoVTy9LvKnpRSKmfd6A0B7wP9gJ2AzbFYAA1nSqk85UjCEb6J/Ia/D8yixmELH+0IptiWo+AaS8EO7Ql+9FEKVKiQrU780X3sn/kxlY9NpwEpbHcP40SjD6jevBfFLS5X2ZNSSuWOG70hoAdQWURSr1tSKaWc4PiF44zd8i2bVk6j3j5h7AEfCsacxSUgiYBhQ/Hv1w+3QtmnxDi+YxWnF/6PqnGLqYGFjb4t8bt7ONVrN3NSL5RS6sbD2QHAjSzP1VRKqbzgeOJx/p78HomLFtMyykqf84AxeIaVwX/YsxTs3BlLgQKZ5cVmZf/KKciar6iYsg0v8WJZcF/KdhxBw/I6nkwp5Xw3Gs6SgEhjzCKyP/j8qVxplVJKXcfes3uZvvBLyo9bRONDdqzuLng0akxI2074tGiOa1BQtvLW5AR2zR1D8LbvqWA/RiwhLCr9DNU7P0GrkGAn9UIppS53o+FshuOPUko5jYiw7vg6fo4cT5GpK+m2VpAC7hR4YQiV730Qi4fHZXVSzh4jatYnlNo/iRoksMNSmb1hz1Kv/SDuvkJ5pZRythu9W/PH3G6IUkpdy8H4g7y37j0SV69iyHxDoTjBo2M7Sr70Cq7Bl5/5SozdyeFZH1L+2CxCxcr6Ag2h8ZPUv6sjFn3mpVIqD7vRuzUPkv3B5wCISLkcb5FSSmWRlJ7Et1u/ZdWCCfRZaafmPjtupUtR9OPX8W7c+LLyFw6s5djf71PuzFLKiysrfdoS0Go4DerUu2ySWaWUyotu9LJmeJbXHkAfIDDnm6OUUhlEhPmH5jP1j7dptfAM/3dQMP5+BI94iMDB92Ub5I8IKbvnc2behxQ/F0GIeDHbvz9lO4/g7ooVrr4TpZTKg270suaZSxZ9ZozZCLyW801SSt3J7GJnyZElzP/rc+r8vY8R0YL4+VLouUcJuPdeLN7e/xQWIXX7TBLmvU1w4h4sEsikwCFU7/wUncuXcF4nlFLqP7jRy5p1sry1kHEm7UbPuiml1HWl29OZc2A2y/78kkbzY7j/MFj9vAl+bihB/ftj8fL6p7AISTvnkDjnLQol7iLWXoQ/gkZQr8sQ7i1XxHmdUEqpHHCjAet/WV5bgWgyLm1ekzGmPfA54AKME5H3L1n/KdDS8dYLKCQi/o51NmCbY91hEel6g21VSt1GktKTmB41jQ1/jqXFolM8HAvWoIIEjxpG0D19sXh6/lNYhMTdizg/+w2KJWzjjD2EscHPEdZpCEPKF7r6TpRS6jZyo5c1W2Z9b4xxIeNxTnuvVsdR5iugDRADbDDGzBCRnVm2+0yW8k8CtbNsIllEwm6kfUqp28+ppFNM3Pkre2b+QodlFxhyHGyFAin82hP49+r5z5iy5HNwYCnx2+bA/sX4pZ8kQQL5JWQ4tbo8zqOlNZQppfKXa4YzY0xB4HGgOPAXsNDx/llgK/DrNarXB/aJyAHHtn4DupHxfM4ruRd4/WYar5S6/ew7u4+ftk/g5OwZdF9lpe1JQYoVosj/PYF/t24Yd/eMgrtnY1/5GcRGYBEbRjxZLTU4V/x+and6hIElNJQppfKn6505+xk4C6wBHgFeBgzQQ0Qir1O3OHAky/sYoMGVChpjSgNlgcVZFnsYYyLIuIz6voj8eZ39KaXysG2ntvHT+m9IX7iMLuuh2Bk7pnQJin7wBAU7dcK4On4dJZ8jacZzeO2awhGKMcPahV1e9Qhr1Jpe9coS5FPg2jtSSqnb3PXCWTkRqQFgjBkHHANKiUhKDrejHzBVRGxZlpUWkVhjTDlgsTFmm4jsv7SiMeZR4FGAUqVK5XCzlFL/hYiwIXYdC6Z+TKHlOxm4V3C3gmulChR+5TF827bFuLhklt254k+KLnuegtYzjLZ1Z0u5RxnQuCKPVQrBRSeOVUrdIa4XztIvvhARmzEm5iaCWSxQMsv7Eo5lV9KPjMulmUQk1vH3AWPMUjLGo10WzkRkLDAWIDw8/LKJcpVSt1a6LZ3IU5Gs27sYy++zqLXuND0TwerjgX+vLgT36o1HjRqZE8JabXb+Wr8X18Vv0C19Dgcozozq4+h+dweeDPS6zt6UUir/uV44q2WMOe94bQBPx3sDiIgUvEbdDUBFY0xZMkJZP6D/pYWMMVWAADIunV5cFgAkiUiqMSYYaAJ8eIN9UkrdYmm2NGYdmMWSw0uIPLKO5msv0H2tHa9USAivTOEBj+LfqjWWi+PJyDhTtixyD4fmfkbXlJn4mQvsLX8/pXq9w/1ePk7sjVJKOdc1w5mIuPzbDYuI1RjzBDCPjKk0xovIDmPMW0CEiFx8kHo/4DcRyXrWqyrwrTHGTsa8au9nvctTKZU3pNvTmbFvBt9u/ZaT54/Sc48/ny+z4nnOjsddTSk64lk8qlS5rN72nTs4NOsjWl6YTQuTyolirTCdXqZSifAr7EUppe4sJnsmur2Fh4dLRESEs5uhVL5ns9v4++DffBP5DbEJR+gbU5JuS5JwiT2JZ506FBrxDF7hlwQtu42DEXM4uXw8dRKWYgwcKtaRUl1G4VY01Cn9UEopZzLGbBSRy/5XqrP8K6VumIiwInYFH0d8zMFzB+h8uiQfLCuOW1Q0BSpWJOSbN/Bp0SL7A8bP7OfYsu9x3zGZsrZTBOHN9uJ9qNz9RcoXKuu8ziilVB6l4UwpdUP2nd3HRxEfsfroapqdK8Kbq8vhsSUKt+LFCfnwg4zpMFz+GQkhRzYQ//er+B9fQyExrDFhXKg6gkadBlHbx9eJPVFKqbxNw5lS6priUuL4OvJrZm2dTMs9bvywOwTv/TG4BAYS/PLL+Pe9J/tA/1N7OfXXKxSKmUe6FORLlwEENRlMt2Z18XLXXzlKKXU9+ptSKXVFCWkJ/LrjF9bMHU+jTUl8t8fgmpZEgcol8X/5Qfx69MDFxzuzvD3+GDF/vU7xA1PwEnfGufXDp8VwHm5QCQ+3f31vkVJK3XE0nCmlsklIjufvPz/m1JyZ1N6VSvMEwMsT/x5d8e/dG4/qof+MKbPbSdq7hBNLv6X48UUUEeEv9w64thjJ4IY1cXOxOLUvSil1O9JwppQC4OyhKDb/71W8Vm2l1gXB6mrBpVE9inXpjW/r1li8skwIm3CckyvG4xL5M0FpRwkQb+Z5dsSj2WN0a9RQZ/NXSqn/QMOZUne4w/GHWTHuLar8tIogGxyuUQjf7gOo1mlAtsuWAHHHD3Nq9juUOzyVQlhZZ6/G3JIPUaP1QLqULeKkHiilVP6i4UypO5CIsOnkJqasHUfV75dRf69wokIQhd/9P7rUbJmt7PmUdJZs3oNlzWhax0+jHDbmurcmqe4w2jZrQgNv96vsRSml1L+h4UypO8jFecrGbh2L26rNDJ0j+KQZvIYPpfkjj2ebCiMl3ca4RdtIX/U1D1lm4mOS2RXSjgKtX6Jz5ZrZ5zJTSimVYzScKXUHsIudRYcXMS5yLJ4bdtJ7sxuV99txr1KZ4h9+iEelStnKL9l+mK1/fUr/tKmEuJznXKnWmI5vEFqkhpN6oJRSdw4NZ0rlY1a7lTkH5zBx3VjKrjjAU5EWAs/acSnsR+CzAwkaPBiTZY6yI6fiWfz7Z7Q5NYGWJo5zRRtBp7fwL1nfib1QSqk7i4YzpfKhFGsKf+77k6nrvqfx/Fhe3Aru6YJneB0CBw7Et/XdGNd/vv7Hz5xjw4wx1Ij+gcHmOCf8qpPeZTz+FVteYy9KKaVyg4YzpfKRhLQEJu+ZzJRNP9J02WlejTC4WQ1+3bsRNHgwHpUrZyt/JOYwu2d+Ru3jU+li4onxrEBc258oXLsr6JgypZRyCg1nSt3mrHYra4+tZcb+GazYv4iW61N4d50FzyTBt0N7Qp56kgJlsz9gPGrnZk7O/x91z86lpElnj18j7HePoETNNhrKlFLKyTScKXWbOhB/gOlR0/n7wN8knj1J163ufL1B8Dhvx7tZEwo9MxyPatUyy1ttdtavXIDL6s+pl7KaUriys1BHSnV8jsplazqxJ0oppbLScKbUbebig8in7J2C/wXDo7uLErbCDUtSMt5NmhA8dAhe9epllo+/kMbKeb9RdPu3NLZvJwFvtpR5iPKdR1A7pLgTe6KUUupKNJwpdZtIt6UzcfdEvt3yLf7HE3l3X1nKrYqGtEP4tm1L0COP4Fk9NLP86fNJrJz5A5X3jqWTieaMJZjdtV6kYvvHqe1Z0HkdUUopdU0azpTK41JtqSw7sozRGz8ncHM0r233o/TudIzbQQp27ULQQw9ToNw/Y8pOnE1g7Z9jqBE9nu7mKCfcSxLb+GOKNxtMkKvO5q+UUnmdhjOl8qD41HiWxyxn8eHFbN+7gvAtyYzc4kLwGTuuhdwIGP40/n364BoUlFnnVNxZ1k/7nLAjP9PNnCbWsyLHm4+hSIN7wOJyjb0ppZTKSzScKZWHRJ2N4uOIj9l8eC119lhpvcuNh/enYbELHrVrEvTyIHzbtMG4uWXWSTx3mi3TPqLqoV/pZBKI9q7OqdafUbx2Z73zUimlbkMazpTKA9Jt6YzbPo7py8fQZ7WFJ3YLrsl2XIsG4PdwV/y6daVA+fLZ6qTFxbD3rw8oe2gyTUhhm3dDktu9QJlarZzUC6WUUjlBw5lSTrbj9A7eWzCKsNn7+GQTuLi549epC35du+FVLxxjsWQrnxC7h5hZ71Hh2EyqiJ01Xs0Jaf8CNWo1clIPlFJK5SQNZ0o5yfm084yPGEPcTz8xYo0dj3QI6NWL4CeewK1w4cvKH965nnPz3yf07GLK4cpi7/b43T2CpnXqYPTypVJK5RsazpS6xY4lHuOXbT9yaurvdF2eQlACeLRoRrHnRlKgQoVsZUWEHRuWYF3yAWHJawkUT5aF3Evx9s/S7pKySiml8gcNZ0rdIrvjdjNh2w+cnz2b3sttFD0rEFqJ0i++km3SWAC7Xdi4egEuyz+gTloE5/BhTamhVOo6glbBl59VU0oplX9oOFMqlx2IP8AXGz/n7OKF9F8OpU7asVQoR7F3nsOnZYtslyST02ysWz4bn7WfUM+6iXP4sqniU1Tr9iyNfPyd1gellFK3joYzpXLJyaSTfLPlG1atn8bD8+3U2G/HpWQJCn88nIIdO2QO9LfZhTVRJ9m34ndqHfmZFmYv50xBtlcbQeUuz1BHZ/NXSqk7Sq6GM2NMe+BzwAUYJyLvX7L+fuAjINax6EsRGedYNxh4xbH8bRH5MTfbqlROSUxLZPz28Uzc9hNt16byyUrB1c2dQi+NJODefplzlJ08n8KEpduxRP7KPdaZNLWc4ox7UQ7WeoXSrYdR3cPHyT1RSinlDLkWzowxLsBXQBsgBthgjJkhIjsvKfq7iDxxSd1A4HUgHBBgo6Pu2dxqr1L/VZotjcl7JvPt1m8ptP8snyzyIuCoFd82bSj88ku4FSkCQFKalZ8WbULWfsMQ5uFnLhAXUpu0Fh8TFNqFIJ3NXyml7mi5eeasPrBPRA4AGGN+A7oBl4azK2kHLBCROEfdBUB7YFIutVWpf80uduYcnMPoTV9QcFcsz233p9IWG65FfSjy9fv4tsqYFNZmF2at2kjiks8YZJuPp0kjuXwHaDGCwJL1rrMXpZRSd4rcDGfFgSNZ3scADa5Qrpcx5i5gL/CMiBy5St3iudVQpf6NdFs6S2OWMn7TtwSt3MXzke4Ui7Vh8bMSMGQIwY8+gsXbG4AtO3Zy5K+3aJ+6AFdj51yFrni3G4V3oSpO7oVSSqm8xtk3BMwEJolIqjFmCPAjcFPPnjHGPAo8ClCqVKmcb6FSl9gdt5s/9/3Jmk0zqLf+HM9sMfhesONevjiBb96HX9cuWDw9AUhKPMeGX96g/rFfqWbsHC3bk1JdXyIosKyTe6GUUiqvys1wFguUzPK+BP8M/AdARM5keTsO+DBL3RaX1F16pZ2IyFhgLEB4eLj8lwYrdTVptjT+2v8XU3f+jmfELtpuhq4H7IDBp3kzgu67D69Gjf6ZFsNuY9/8bwhY+xHNOce2gNaUv/dDShcuf839KKWUUrkZzjYAFY0xZckIW/2A/lkLGGOKisgxx9uuwC7H63nAu8aYAMf7tsCoXGyrUleUlJ7E1L1TmbXie6pHnOKZba74xduxhAQTMLQ3Ab1741Y8yxV3ES5s+5uE2a9SIeUA2yxVONZuPDUa3O28TiillLqt5Fo4ExGrMeYJMoKWCzBeRHYYY94CIkRkBvCUMaYrYAXigPsddeOMMf9HRsADeOvizQFK3QrxqfFM2fIzUX/8RL3NCbx5GMQYfBrXx79fX3xbtMicEgMAEc5unUPy/P+j2IWdnJLCTC3/Dp36DsWzgLNHDyillLqdGJH8cyUwPDxcIiIinN0MdRtKtiaz+eRm1h9bz4Ety6k8bw+Nd2Y8jNxeogiFe/fDr1tX3IoWzV5RhKOb55K28G3KJG0nRoJZXuQBwroMo1qJIOd0Riml1G3BGLNRRMIvXa7/pVd3tKizUfxv4/9Yd2wd5Q6n032d0G6vHZubCy7tW1P63gfwrF072yOWABBh75q/MMs/pmLKNo5LIH+VfI7aXZ+kfyF/p/RFKaVU/qDhTN2RktKT+GbLN/yy/ScaHSrAlxv9CNx7AoufH4GPDSBg4ABcAwMvq2ez2YhcOAm/DZ9RyRrFcYJYWOZZanV7mm4Bfk7oiVJKqfxGw5m6o4gICw8vZPTS96i+7gRjt3riczoB12I+BL30Ev69e2Hx8rqsXsyxE+xb9isl90ygrhwi1hRmVdXXCOsyjNZXKK+UUkr9WxrO1B1jT9wefp7xfxSZs4m3d4CbVfAKr0rAKwPwvfvu7AP8gSOnzrF9+TR89kyjXupaSph0jriUJLLOh9Ro9yDFXd2usiellFLq39NwpvK96Phovl/5GYV/ms+ALYKtgBsBPboRNHAgHpUrZysrImzYsJq4pd/Q4MISOphE4k1B9pXoQXCjQZQMbUbJS8efKaWUUjlIw5nKt45fOM6Yzd9wbvo0+i+24Z1m8L6/P8WHPYmLX/bxYWJNY/viSbDhO+qnbyMVNw4Xvhtrw4GE1GqPn4ueJVNKKXVraDhT+c6OMzuYuGsi29f9zQNz0qkcY8c1rAYl33wbj8qVspW1JZ5h/5zPCdz5MzUkjmMmhMjKz1Ct0+NULBjipB4opZS6k2k4U/lCuj2dRdELmbvse7zX76L+PsOAGBuWggUp8s5I/Hr0wFgsmeVTTh/iwMyPKHtoCpVIYb1LGLvrvEWDtv0o6qZnyZRSSjmPhjN1W4tJiGHOul9InTiVarsuMORsxnK3KpXwG9aagEEDcQ0IyCx/9tA2Yv9+n8on51BJhBUeLbA0fZomje/C1cVylb0opZRSt46GM3XbSbWlsvjwYqbv/oOAWWvou8KOu81gq12NQh16UrBlS9yKFctW58zBrRyb8SbV4hbhgRvL/LoS3GYELarXuHyCWaWUUsqJNJyp24Zd7EzcNZExW8cQcuAcjy2wUOKYHdfG9Sn95tu4lyx5WZ0z0ds5OuNNQs8swAN3loQMoFzXF2hdqpQTeqCUUkpdn4YzdVs4lniMV1e9yvaDaxkeUYiaq+y4hQRR+POX8W3b5rKzXyejIjg25yOqn5mHJ+4sDbmXit1HcXcJDWVKKaXyNg1nKk8TEWYdmMX7a9+lwZYUvlvmgWvCCQLvG0Twk0/i4uPzT2GblZi1U0ld9TXlk7bgK+6sDL6Hct1fplXJ0s7rhFJKKXUTNJypPCsuJY63177N3vXzeWOxJyWiU/CsXZsir76CR7VqmeUk6SzR87/Gd9sPlLCdIlZCmF/iCUI7P0bzosWd2AOllFLq5mk4U3lOXEocE3ZMYObmSXRbmszDG+24BrhT6N138eveLXNKjOTTh4ie9TGlo6dQlmTWmxqsD32RJh0G0tbHw8m9UEoppf4dDWcqzziTfIYfd/zIktUTuXttMp9vN7hZhYD+/Ql56p9Z/Y/ti+Tk3A+pdnouFUVYUaAZaQ2epPldrfBwc3FyL5RSSqn/RsOZcrrjF47z846f2D7/N9qsTeHDfQJurvh16Urg/YPxqJQxq//5syfZ+/NwwuP+xl/cWeXfFf/WOh2GUkqp/EXDmXKaA+cOMH77ePYvnUn/xel0PioQ4Efw4wMJuLcfrsHBGQVF2DZvPMXXvkmYJLCqcH8q9HiJFkUvnzpDKaWUut1pOFO3XOTJSL7f/j1Rm5cwaCkMiLJhCodQ+P+exK9rVywFCmSWPRMTxfGJj1MjaR17XCpysstvNAlr7LzGK6WUUrlMw5m6JexiZ0XMCsZvH8+BAxsZuMqVYZvtWLy9CB4xhMD7BmHx+GcQf3L8GXb/9RFVDoynjMDy8s/SsN8o3N31uZdKKaXyNw1nKlel29KZfXA2E3ZMIPFAFH03ejByC1jERsCggQQPG5bt2ZfnT8US9df7VImZTG1SWF+gMYXu+ZS7yldxYi+UUkqpW0fDmcoVF9IvMHXvVH7e+TPe+48zYKMX1bfbsbil49ejF0EPP4R7lkconY7ZR/SM96h+4i/CsLLBuzned4+kft0mTuyFUkopdetpOFM56mTSSX7d9Sszt04mdPt5Ru72puR+GxZfCHjkEQIHDcQ1JCSz/PF9mzn29/tUj1tALSDCvx0h7V+kYdVazuuEUkop5UQazlSOOBh/kAlbvufQohk03Wbj8yhwTbfjViqAgOeH4t/3nmyPWjq6fTlx8z6gesJKCkoB1of0pHTn52lUprITe6GUUko5n4Yz9Z/sPLOTH9d/g9tfi+kYIQQkChT0xb93J/y6dsUzLOyfOcisqRxa+Rvp68ZRIXkrXuLN0qIPUqXbszQpWsK5HVFKKaXyCA1n6qaJCBEnIpi46msKzVzHPZsFr1RwaxBOoYH34dO8ORZ398zyttMHODT/K4KjJlNaznNECrOw1FPU6j6cFkFBTuyJUkoplfdoOFM3zC52lhxewp9LvqbivF08sE1wtRm82ram8CND8awemq18yqEITs94jRJnVlFKLKx0qU9y2GCatu1Fa88CV9mLUkopdWfL1XBmjGkPfA64AONE5P1L1o8AHgaswCngQRE55FhnA7Y5ih4Wka652VZ1dem2dGbtn8ni2d8QvjiWoVECri749ehOoYcfwb106Wzlk47u5Nj0Vyl/aiFe4sNvPgMp1PwRmofXwsWij1lSSimlriXXwpkxxgX4CmgDxAAbjDEzRGRnlmKbgXARSTLGDAM+BPo61iWLSFhutU9dm4iwO243cw/MJmbWH9y14iyPHQWbrxfBQwcRNGDgP49Xckg4cZAj016j8omZFJYCTPcbSMlOI+lXufRV9qKUUkqpS+XmmbP6wD4ROQBgjPkN6AZkhjMRWZKl/FpgYC62R92AvWf3MvfgXBbvm0uZ1Yfotk5of1awFStE4VeH4N+zBxZPz2x1zhzezZFZ71Ht5CzKCywu2IPCnV+iR+WKTuqFUkopdfvKzXBWHDiS5X0M0OAa5R8C5mR572GMiSDjkuf7IvJnjrdQARljyZbHLOf7bd+z58hm2kXCyxst+Jy341atKoVeH4Jvm9YYF5ds9Q7vXEfcvA+ocW4xPriwzq8DIR1G0aZq6JV3pJRSSqnryhM3BBhjBgLhQPMsi0uLSKwxphyw2BizTUT2X6Huo8CjAKWyzDivri/dns6cg3MYv2088Yf30XurFyM3ueKalIp34wYEPfoIXg0a/DMVhsPu9fNJXfIxtZLXESQerC58L2U7P0+zUuWc1BOllFIq/8jNcBYLlMzyvoRjWTbGmNbAy0BzEUm9uFxEYh1/HzDGLAVqA5eFMxEZC4wFCA8Plxxsf74VnxrP9Kjp/LrrFwruPU7/SC+q77BjTBIF27Ul8KGH8AzNfvZL7DZ2L5+KWf05VdJ2cBZfVpcaQpUuI2gWUsRJPVFKKaXyn9wMZxuAisaYsmSEsn5A/6wFjDG1gW+B9iJyMsvyACBJRFKNMcFAEzJuFlD/wc4zO/lt92/MOfA3tXam8OJGT4odtmEpaAh48EEC+vfHrVixbHWsKRfYs/hnfDd+TVXbIY4SwupKI6nV5Qka+/o5qSdKKaVU/pVr4UxErMaYJ4B5ZEylMV5Edhhj3gIiRGQG8BHgA0xxXDq7OGVGVeBbY4wdsJAx5mznFXekrmt5zHLGbh3L1hOR3BXlxuh1BfCLTcK9dDABr43Av3t3LF5emeVjTp1l/5oZeO79i9DEVYSSwn5TipU13yW840MU8/BwYm+UUkqp/M2I5J8rgeHh4RIREeHsZuQZcSlxvL/+febun02Xg4H0Wm3HM+YM7uXLEzxsGAU7tM8c5J9mtbNg3kxcN/9Ao/S1FDTJxOPD7oCWUL0XdZp3xc3V5Tp7VEoppdSNMsZsFJHwS5fniRsCVM4SEeZFz+Ojle8QtukcP2z2wev4KQpUrEDwJy/h265dZigTu511i6fjvvpTOtm3ccF4E1usDQl1+lCsdnsauLpfZ29KKaWUykkazvKZk0kn+WjR63jOXM77myz4JFrxqF6SoBffxLdtG4zFklHQbmfv8t9h5Sc0tO7ljAlkX+1RlG//OJUK+Dq3E0oppdQdTMNZPnEg/gBTV32L22+z6b/Zikc6eDVrTPDDj+BVv17mdBhiS2f/kp/wWPc5ldIPEUNhImq8Tu0ujxHkrmPJlFJKKWfTcHYbExE2HN/AtBVjKDp9Le23Ci5icG/XmpJDn8SjcqXMsva0FHbP+5bAzV9TwX6c/ZRkYbV3aNrtEUoU0IeQK6WUUnmFhrPbkF3sLDq8iGmLv6LG7L0M3CEYiwXvHt0oNvRx3EuUyCwrtnR2zPiCIlu/pJrEsdNUYFftz2jUYSDl3d2c2AullFJKXYmGs9vIxRn9Z83/ivrzj/DEbgE3N/z796HQI4/iVrhwtvKxm+dh+/sFqlsPstUllN31P6Th3T2ppnddKqWUUnmWhrPbQLI1mRn7ZrB4zhiaLjrBM/sEu2cBgh4aSPD99+MaHJyt/IWT0Rz6bQTV4hYRSwhLwz6hWZcHcHGxOKkHSimllLpRGs7ysNPJp/lt1yS2zvmF1ssTGH5IsPt6E/zkgwQOHICLX/YZ+q0Jp4ma+RFl946nrMC8wg8Rfu9rtAjwd04HlFJKKXXTNJzlQVFno/hl+0+cmDODLqvTuPs42IP8CRn5CIF9+2Lx9s5WPuXMYQ7O+IAyh6ZQlVSWuzcjsPt7tKtWw0k9UEoppdS/peEsj0ixprDg0AKm75hMwcWb6LZOKHJWMKWKU+TtoRTs2hWLe/YJYRNjd3Jk5ntUOD6bimJnhUcLCrR4lqYNmmCxGCf1RCmllFL/hYYzJ9sTt4c/ov5gyY4ZNFqbwJBNhoKJdtyqVaXQG0PxbX135mz+F53auYK4BR9RMW45ZXFliW8ngto8S4uaNTPnM1NKKaXU7UnDmRMcSTjCvOh5zDk4h7MH99A1wvC/rYJbqh3vZk0JeughvBo0yB607HYOrpmGfeVnlE/ehpt4My9oIKU6DKdtxQrO64xSSimlcpSGs1skPjWev/b9xdzouWw7vY2KscKArQWputWOsVjw69SZwAcfzDZxLAB2O3uW/ITXmv9R1nqYoxLMvFLPUL3z43QoHOKcziillFIq12g4y2Xp9nSm7JnCN1u+4XzyWXrGFmP4+kL47j2KxdeK/wMPEDhoEG5FimSvaLezb/kk3Fd+SGVrNAdMCRZXe4fwTg/SztvLOZ1RSimlVK7TcJZLRIQVh5by6+z3KbAvhsfiCxG2LwBOHMGtVCkCX34Z/549LrvzErudg6umYFn+PhXSDxBNMZZUf59GXR6iXAH3K+9MKaWUUvmGhrMcJiLs/OlLTkz6maCYBIZbM5ZbfBLxDAsj4LV++LRocdkgf0lPIWrReLwjvqGs9TCHKMqiqm/TqNujlPHQZ18qpZRSdwoNZznEareybOtfXPi/j6m44xzpRVyI61CfWk174F2zFu6lS2Msl8/Qb71wlr2zv6DwzglUkjj2UoZFVd+mQddHKO3p4YSeKKWUUsqZNJz9R/Gp8fy570+2/TGO3n+eJiQNjjzQmsZPv4OvR8Gr1ks+fYgDMz+mzKEpVCOZCJdabK/7Hg3b9KaSmx4WpZRS6k6lKeBfOJ18msWHF7Po8CK2HVzHwIVpPLRVSKtQggr/+5KalStftW7c/giOz/mIiqcXUFmENR7NcG32NA0at9KJY5VSSiml4exGiQi/7vqVBYcWsC96E3Wj7LQ94MGT+6242AxBw4YQMmwYxv0Kg/bTkzkR8SfnV42jYmIE7uLBMv/uFG47nGahNW99Z5RSSimVZ2k4u0HGGOJ+/pm+kacpE23DiOBa1A/fe1vj3707HtWqZa9gt8HB5Zxb/ysFomZT2H4BkUDmFhtGlU5P0rpEced0RCmllFJ5moazm9D5UBC4eeA77G58W7emQNWqlz8u6cJpWPct6RETcEs6iUU8mUMDUqv2pnWHXrT30znKlFJKKXV1Gs5uQplx47B4XSVcnTuCrB6NfeOPuNhSWGarzRyXgZRu1IP77qqCv5fOUaaUUkqp69NwdhOuGMxO7sa28jPMtinYRfjT1oTJBXrRskUz3mhYCl8Pt1vfUKWUUkrdtjSc/RsisH8R6au+wu3gYtJxZ5L1bhYH3kP35g35pVYx3F0vn9NMKaWUUup6NJzdjPRk2Pp7RiiL28s58edHax/2l76H/i1rc3+F4MvHoCmllFJK3QQNZzdKhNQvm1Agfj977GWYYH8ME9qDB1tUoWrRq082q5RSSil1M3I1nBlj2gOfAy7AOBF5/5L1BYCfgLrAGaCviEQ71o0CHgJswFMiMi8323o9doGPUroRZfelQr12PNOsHMX9PZ3ZJKWUUkrlQ7kWzowxLsBXQBsgBthgjJkhIjuzFHsIOCsiFYwx/YAPgL7GmGpAPyAUKAYsNMZUEhFbbrX3eiwWQ/f7hlMywAs/Lx3kr5RSSqnckZuj1usD+0TkgIikAb8B3S4p0w340fF6KnC3yRi01Q34TURSReQgsM+xPaeqXtxPg5lSSimlclVuhrPiwJEs72Mcy65YRkSsQDwQdIN1lVJKKaXyndt+vgdjzKPGmAhjTMSpU6ec3RyllFJKqf8kN8NZLFAyy/sSjmVXLGOMcQX8yLgx4EbqAiAiY0UkXETCQ0JCcqjpSimllFLOkZvhbANQ0RhT1hjjTsYA/xmXlJkBDHa87g0sFhFxLO9njClgjCkLVATW52JblVJKKaXyhFy7W1NErMaYJ4B5ZEylMV5Edhhj3gIiRGQG8D3wszFmHxBHRoDDUW4ysBOwAo87805NpZRSSqlbxWScqMofwsPDJSIiwtnNUEoppZS6LmPMRhEJv3T5bX9DgFJKKaVUfqLhTCmllFIqD9FwppRSSimVh2g4U0oppZTKQ/LVDQHGmFPAoRzcZDBwOge3p3KGHpe8S49N3qTHJe/SY5M33arjUlpELpukNV+Fs5xmjIm40l0Uyrn0uORdemzyJj0ueZcem7zJ2cdFL2sqpZRSSuUhGs6UUkoppfIQDWfXNtbZDVBXpMcl79Jjkzfpccm79NjkTU49LjrmTCmllFIqD9EzZ0oppZRSeYiGsyswxrQ3xuwxxuwzxrzo7PbcaYwxJY0xS4wxO40xO4wxTzuWBxpjFhhjohx/BziWG2PMF47jtdUYU8e5PcjfjDEuxpjNxphZjvdljTHrHJ//78YYd8fyAo73+xzryzi14fmcMcbfGDPVGLPbGLPLGNNIvzPOZ4x5xvF7bLsxZpIxxkO/M85hjBlvjDlpjNmeZdlNf0eMMYMd5aOMMYNzo60azi5hjHEBvgI6ANWAe40x1ZzbqjuOFXhWRKoBDYHHHcfgRWCRiFQEFjneQ8axquj48yjwza1v8h3laWBXlvcfAJ+KSAXgLPCQY/lDwFnH8k8d5VTu+RyYKyJVgFpkHCP9zjiRMaY48BQQLiLVARegH/qdcZYJQPtLlt3Ud8QYEwi8DjQA6gOvXwx0OUnD2eXqA/tE5ICIpAG/Ad2c3KY7iogcE5FNjtcJZPwjU5yM4/Cjo9iPQHfH627AT5JhLeBvjCl6a1t9ZzDGlAA6AeMc7w3QCpjqKHLpcbl4vKYCdzvKqxxmjPED7gK+BxCRNBE5h35n8gJXwNMY4wp4AcfQ74xTiMhyIO6SxTf7HWkHLBCROBE5Cyzg8sD3n2k4u1xx4EiW9zGOZcoJHKf1awPrgMIicsyx6jhQ2PFaj9mt8xkwErA73gcB50TE6nif9bPPPC6O9fGO8irnlQVOAT84LjmPM8Z4o98ZpxKRWOBj4DAZoSwe2Ih+Z/KSm/2O3JLvjoYzlWcZY3yAP4DhInI+6zrJuM1YbzW+hYwxnYGTIrLR2W1Rl3EF6gDfiEht4AL/XJ4B9DvjDI7LXd3ICM/FAG9y4SyLyhl56Tui4exysUDJLO9LOJapW8gY40ZGMPtVRKY5Fp+4eOnF8fdJx3I9ZrdGE6CrMSaajMv9rcgY5+TvuGQD2T/7zOPiWO8HnLmVDb6DxAAxIrLO8X4qGWFNvzPO1Ro4KCKnRCQdmEbG90i/M3nHzX5Hbsl3R8PZ5TYAFR1307iTMXhzhpPbdEdxjLH4HtglIp9kWTUDuHhnzGDgryzL73PcXdMQiM9ymlrlEBEZJSIlRKQMGd+LxSIyAFgC9HYUu/S4XDxevR3l88T/SvMbETkOHDHGVHYsuhvYiX5nnO0w0NAY4+X4vXbxuOh3Ju+42e/IPKCtMSbAcWa0rWNZjtJJaK/AGNORjLE1LsB4EXnHuS26sxhjmgIrgG38M7bpJTLGnU0GSgGHgHtEJM7xS+9LMi4XJAEPiEjELW/4HcQY0wJ4TkQ6G2PKkXEmLRDYDAwUkVRjjAfwMxljBuOAfiJywElNzveMMWFk3KjhDhwAHiDjP+D6nXEiY8ybQF8y7kLfDDxMxhgl/c7cYsaYSUALIBg4QcZdl39yk98RY8yDZPybBPCOiPyQ423VcKaUUkoplXfoZU2llFJKqTxEw5lSSimlVB6i4UwppZRSKg/RcKaUUkoplYdoOFNKKaWUykM0nCmlbjvGGJsxJtIYs90YM8UY43WNsi2MMY2zvJ9gjOl9tfJZyiXmVHuzbDPMMVXPxfdvGGOey+n9KKVubxrOlFK3o2QRCROR6kAaMPQaZVsAja+x/lYKAzper5BS6s6m4UwpdbtbAVQwxnQxxqxzPPh7oTGmsDGmDBnB7RnHmbZmjjp3GWNWG2MO3OBZtOeNMRuMMVsdk4pijCljjNlljPnOGLPDGDPfGOPpWFfPUTbSGPOR4wyfO/AW0NexvK9j89WMMUsdbXkqpz8cpdTtR8OZUuq25Xj+YAcyniaxEmjoePD3b8BIEYkGxgCfOs60rXBULQo0BToD719nH22BikB9Ms581TXG3OVYXRH4SkRCgXNAL8fyH4AhIhIG2ABEJA14Dfjd0ZbfHWWrAO0c23/d8VxZpdQdzPX6RZRSKs/xNMZEOl6vIONZrJWB3x0PL3YHDl6j/p8iYgd2GmMKX2dfbR1/Njve+5ARyg6T8VDri+3YCJQxxvgDviKyxrF8Ihkh8Gr+FpFUINUYcxIoTMaDzJVSdygNZ0qp21Gy46xUJmPMaOATEZnhePbnG9eon5q16nX2ZYD3ROTbS/ZX5pLt2ADP62zrem2xob+Xlbrj6WVNpVR+4QfEOl4PzrI8AfD9D9udBzxojPEBMMYUN8YUulphETkHJBhjGjgW9cvBtiil7gAazpRS+cUbwBRjzEbgdJblM4Eel9wQcMNEZD4ZlybXGGO2AVO5fsB6CPjOcenVG4h3LF9Cxg0AWW8IUEqpbIyIOLsNSimVrxhjfEQk0fH6RaCoiDzt5GYppW4TOrZBKaVyXidjzCgyfsceAu53bnOUUrcTPXOmlFJKKZWH6JgzpZRSSqk8RMOZUkoppVQeouFMKaWUUioP0XCmlFJKKZWHaDhTSimllMpDNJwppZRSSuUh/w8lHpCtWvBwwAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "seed_idx = list(range (10, max_seeds +1, 10))\n", + "\n", + "plt.figure(figsize=(10,5))\n", + "\n", + "for i in range(len(data)):\n", + " plt.plot(seed_idx, time_algo_cu[i], label = names[i])\n", + "\n", + "\n", + "plt.title('Runtime vs. Number of Seeds')\n", + "plt.xlabel('Number of Seeds')\n", + "plt.ylabel('Runtime')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4094" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del time_algo_cu\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 3: Multi-seed versus Sequential\n", + "This test uses a single files since sequential execution is slow" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading ./data/coPapersCiteseer.mtx...\n", + "\t434,102 nodes, 16,036,720 edges\n" + ] + } + ], + "source": [ + "G = read_and_create('./data/coPapersCiteseer.mtx')\n", + "nodes = G.nodes().to_array().tolist()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "rw_depth = 4\n", + "max_seeds = 100\n", + "num_nodes = G.number_of_nodes()\n", + "runtime_seq = [0] * max_seeds" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# sequenctial = so also get a single random seed\n", + "for i in range (max_seeds) :\n", + " for j in range(i):\n", + " seeds = random.sample(nodes, 1)\n", + " t = run_rw(G, seeds, rw_depth)\n", + " runtime_seq[i] = runtime_seq[i] + t" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "runtime = [None] * max_seeds\n", + "\n", + "for i in range (max_seeds) :\n", + " seeds = random.sample(nodes, i+1)\n", + " t = run_rw(G, seeds, rw_depth)\n", + " runtime[i] = t" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAEWCAYAAAAHC8LZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAA3DklEQVR4nO3dd5xU1f3/8ddnZndZOgpYEBFUUASkq4gFC2DBkhiNDaMmMc0Yf4nELho1aiyJvcSer72bxIKoKBa6CCgGUVFAVEA6bJv5/P44d5dhZWGBnb07u+/n47EPdsq99zN3h/uee+6Zc8zdERERyTWJuAsQERHZHAowERHJSQowERHJSQowERHJSQowERHJSQowERHJSQowyQlm1sHMVppZMu5a6iIz62hmbmZ5MW1/oJl9Gv2Njo2jhqiO083snbi2L7VLASabzczmmNma6KD1jZk9aGbNanDdh5bfdvev3L2Zu6dqYv3ZFh1I3cz+XOn+eWY2KJ6qsuovwG3R3+j5yg+a2X5m9p6ZLTOz783sXTPrX/tlSn2iAJMtdZS7NwN6Ab2BC+Mtp075HvizmTWPu5BNsZlncTsBH1WxvhbAf4Bbga2BHYArgOLNrVEEFGBSQ9z9G+BVQpBhZoPMbF7mczLPqszscjN70sweNrMVZvaRmfWLHvsX0AH4d3R29+fKTWRmNsbMroo+1a80s3+bWWsze8TMlpvZRDPrmLHt3c3stejT///M7IT1vQ4z+6mZTap03/8zsxej348ws4+jmueb2Xkb2C0zgfeBP1axrQfN7KqM2+vss2h/jTCzaWa2yszuM7NtzezlaPujzWyrSqs908y+NrMFmbWZWcLMLjCzz8xscbTvt44eK9+3Pzezr4A3qqj3l2Y2O9qHL5pZu+j+z4CdWfv3alRp0S4A7v6Yu6fcfY27j3L3aRnrPtPMZprZEjN71cx2ynisyr9d9Dd/MfqbTwB2yXjMzOzvZvZd9Ph0M+u+vtcmuUkBJjXCzNoDhwOzN2Gxo4HHgVbAi8BtAO4+HPiK6OzO3f9WxfInAsMJn+h3IYTFA4RP+TOBkVFtTYHXgEeBbaLl7jCzPdazzn8Du5lZ54z7To6WBbgP+JW7Nwe6U8XBPsOlwLnlYbEZjgMGE0LgKOBl4CKgLeH/7zmVnn8Q0BkYApyf0Qz7e+BY4ECgHbAEuL3SsgcCXYGhlYsws4OBa4ATgO2BLwl/O9x9F9b9e1U+s5oFpMzsITM7vHLomtkx0Wv6cfS6xgKPRY9t7G93O1AU1XRm9FNuCHBAtO9aRrUvrvzaJHcpwGRLPW9mK4C5wHdEoVFN77j7S9F1rX8BPTdx2w+4+2fuvoxwYP/M3Ue7exnwFKFJE2AYMMfdH3D3Mnf/AHgGOL7yCt19NfACcBJAFGS7EwIWoBTYw8xauPsSd5+yoQLdfSrhAHz+Jr62cre6+7fuPp9wYB/v7h+4exHwXMZrLHeFu69y9+mEMD8puv/XwMXuPi8KmMuBn1RqLrw8WnbNeuo4Bbjf3adEy18IDMg8y62Kuy8H9gMc+CewMDpr2jajtmvcfWb0t/sr0Cs6C6vyb2ehQ89xwGVR3TOAhzI2XQo0J/z9LFr/go3VK7lDASZb6tjobGQQ4UDRZhOW/Sbj99VA4SZef/k24/c167ld3qFkJ2BvM1ta/kM4IG9XxXofZe2B/2Tg+SjYIBwwjwC+NLO3zGxANeq8DPhNxgF7U1T3NZabm/H7l4SzLQj74LmM1z8TSAHbVrFsZe2i9QHg7isJZzM7bPwlQBQep7t7e8KZazvgHxm13ZxR2/eAReve0N+uLZC3ntdcvs03CGf1twPfmdk9Fq7HST2hAJMa4e5vAQ8CN0R3rQKalD8efVpuuymrrLHiwgHuLXdvlfHTzN1/U8XzXwPamlkvQpCVNx/i7hPd/RhCc9bzwJMb27i7fwI8C1xc6aF19hFVB+qm2DHj9w7A19Hvc4HDK+2DwujMrqLUDaz3a0KYABVNe62B+VUuUYVofzxICLLy2n5VqbbG7v4eG/7bLQTK1vOaM7d1i7v3BfYgNCWO2NR6pe5SgElN+gcw2Mx6Eq57FJrZkWaWD1wCVL64vyHfEjoG1IT/AF3MbLiZ5Uc//c2s6/qe7O6lhCbI6wnX014DMLMCMzvFzFpGz1kOpKtZwxXAGYTrfeWmAkeY2dZmth1w7qa/tB+41MyamFm3aHtPRPffBVxd3jnCzNpG156q6zHgDDPrFXXS+CuhOXPOxhaMOmH8KbpOipntSPhgMC6jtgujmjGzlmZW3rxb5d8uanp+Frg8es17AD/L2G5/M9s7ev+tIlwrq+7fS3KAAkxqjLsvBB4mXJNYBvwWuJfwKX0VMG8Di1d2DXBJ1Gy0oZ5+1alrBeGC/omEM4lvgOvYcKA+ChwKPBVdlyk3HJhjZssJ125OqWYNXxCu8zXNuPtfwIfAHGAUa8NmS7xF6EjzOnCDu4+K7r+ZcB1vVHTNchywd3VX6u6jCR1SngEWEDrNnFjNxVdE2xpvZquibc8A/hSt+znC3+PxaL/OIHQIqs7f7mxCM+o3hLO6BzK224JwzW0JoWlxMeFDidQTpgktRUQkF+kMTEREcpICTEREcpICTEREcpICTEREclIsUy9UpU2bNt6xY8e4yxARkTpi8uTJi9x9vd8hrVMB1rFjRyZNmrTxJ4qISINgZl9W9ZiaEEVEJCcpwEREJCcpwEREJCfVqWtg61NaWsq8efMoKiqKuxSphsLCQtq3b09+fn7cpYhIPVfnA2zevHk0b96cjh07YmZxlyMb4O4sXryYefPm0alTp7jLEZF6rs43IRYVFdG6dWuFVw4wM1q3bq2zZRGpFVk9AzOzOYSRqFNAmbv328z11GRZkkX6W4lIbamNM7CD3L3X5oaXiIjkmO8/h5cvgFRpVjdT55sQ64NmzcKs73PmzOHRRysm92XSpEmcc845G1x2zpw5dO/efYPPqQ3lr0FEpEorF8JLI+C2/jDlIVgwLauby3aAOWECvclmdtb6nmBmZ5nZJDObtHDhwiyXE6/KAdavXz9uueWWGCsSEakBJavhrevhll4w8T7oPRzO+QDa983qZrMdYPu5ex/C7Kq/M7MDKj/B3e9x937u3q9t2/UOdxWrOXPmsPvuu3P66afTpUsXTjnlFEaPHs3AgQPp3LkzEyZMAODyyy/nhhtuqFiue/fuzJkzZ511XXDBBYwdO5ZevXrx97//nTFjxjBs2LCK5YcPH86AAQPo3Lkz//znP39QSyqVYsSIEfTv358999yTu++++wfPWbVqFUceeSQ9e/ake/fuPPFEmOR38uTJHHjggfTt25ehQ4eyYMECAD777DMOO+ww+vbty/77788nn3wCwBdffMGAAQPo0aMHl1xyScX6FyxYwAEHHECvXr3o3r07Y8eO3YK9KyI5LZ2GD5+A2/rBm1fBzoPgt+PgqH9A8+2yvvmsduJw9/nRv9+Z2XPAXsDbm7u+K/79ER9/vbymygNgj3YtGHlUtw0+Z/bs2Tz11FPcf//99O/fn0cffZR33nmHF198kb/+9a88//zz1drWtddeyw033MB//vMfAMaMGbPO49OmTWPcuHGsWrWK3r17c+SRR67z+H333UfLli2ZOHEixcXFDBw4kCFDhqzTZf2VV16hXbt2/Pe//wVg2bJllJaW8vvf/54XXniBtm3b8sQTT3DxxRdz//33c9ZZZ3HXXXfRuXNnxo8fz29/+1veeOMN/vCHP/Cb3/yG0047jdtvv71i/Y8++ihDhw7l4osvJpVKsXr16mq9dhGpZ74aD69eCPMnw/Y94cf/hI4Da7WErAWYmTUFEu6+Ivp9CPCXbG0vmzp16kSPHj0A6NatG4cccghmRo8ePX5wlrUljjnmGBo3bkzjxo056KCDmDBhAr169ap4fNSoUUybNo2nn34aCOH06aefrhNgPXr04E9/+hPnn38+w4YNY//992fGjBnMmDGDwYMHA+FMbvvtt2flypW89957HH/88RXLFxcXA/Duu+/yzDPPADB8+HDOP/98APr378+ZZ55JaWkpxx577Dr1iUgDsPgzGH05zHwRmm8Px94Je54IidrvUpHNM7BtgeeibtV5wKPu/sqWrHBjZ0rZ0qhRo4rfE4lExe1EIkFZWRkAeXl5pNPpiudtznehKndBr3zb3bn11lsZOnRolevo0qULU6ZM4aWXXuKSSy7hkEMO4Uc/+hHdunXj/fffX+e5y5cvp1WrVkydOrVa9QAccMABvP322/z3v//l9NNP549//COnnXZaNV+hiOSs1d/DW3+DifdCsgAGXQT7ng0FTWMrKWuR6e6fu3vP6Kebu1+drW3VBR07dmTKlCkATJkyhS+++OIHz2nevDkrVqyoch0vvPACRUVFLF68mDFjxtC/f/91Hh86dCh33nknpaWha+qsWbNYtWrVOs/5+uuvadKkCaeeeiojRoxgypQp7LbbbixcuLAiwEpLS/noo49o0aIFnTp14qmnngJCQH744YcADBw4kMcffxyARx55pGL9X375Jdtuuy2//OUv+cUvflHxmkWknipdA+/8A27uBRPuhl4nwTlTYND5sYYX5MBQUrniuOOO4+GHH6Zbt27svffedOnS5QfP2XPPPUkmk/Ts2ZPTTz+d3r17/+Dxgw46iEWLFnHppZfSrl27dZoof/GLXzBnzhz69OmDu9O2bdsfXH+bPn06I0aMIJFIkJ+fz5133klBQQFPP/0055xzDsuWLaOsrIxzzz2Xbt268cgjj/Cb3/yGq666itLSUk488UR69uzJzTffzMknn8x1113HMcccU7H+MWPGcP3115Ofn0+zZs14+OGHa3Q/ikgdkU7D9Kfg9b/A8nnQeQgcegVsu0fclVUwd4+7hgr9+vXzyhNazpw5k65du8ZUUe25/PLLadasGeedd17cpWyxhvI3E6m35k6AVy5Y20Fj8JWw84GxlGJmk6saCENnYCIiEiz5MpxxzXgamm0XaweN6lCA1RGXX3553CWISEO1ciGMvSF8CTmRhANGwMBzoVHdHoFHASYi0lCVrIb3boH3bg2dNXqfCgeeDy13iLuyalGAiYg0NO4w/WkYPRKWz4euR8Mhl0GbznFXtkkUYCIiDcm8SfDKhTBvQuigcdy9sNO+cVe1WRRgIiINweLPQgeNj5+HptvA0bdBr5PDNa8cVTe7lsgmO+KII1i6dOkGn9OxY0cWLVpUOwWJSN2w+nt4+Xy4fW/4dBQceEH4InKf4TkdXqAzsHrjpZdeirsEEalLUqVh2Kcx10LxcuhzGgy6sFZGia8tOgPbiPVNT1LV1CSTJ0+mZ8+e9OzZkxEjRlRMRPnggw9y9tlnV6xz2LBhFSPRjxo1igEDBtCnTx+OP/54Vq5cCYSzpZEjR9KnTx969OhRMc3JypUrOeOMM+jRowd77rlnxYC7mWdXxx57LH379qVbt27cc889tbKfRKSOcIdPXoI7BoQvI7frDb9+F466uV6FF+TaGdjLF8A302t2ndv1gMOvrfLh9U1Pcvjhh693apIzzjiD2267jQMOOIARI0ZsdNOLFi3iqquuYvTo0TRt2pTrrruOm266icsuuwyANm3aMGXKFO644w5uuOEG7r33Xq688kpatmzJ9OlhPyxZsuQH673//vvZeuutWbNmDf379+e4446jdevWm7N3RCSXzHk3jBQ/bwK03hVOfjIMAbWegbnrg9wKsBhUnp5kq622Wu/UJEuXLmXp0qUccECYs3P48OG8/PLLG1z3uHHj+Pjjjxk4MMyhU1JSwoABAyoe//GPfwxA3759efbZZwEYPXp0xSC7AFtttdUP1nvLLbfw3HPPATB37lw+/fRTBZhIffbdJzDqEpj9Wpji5KibodcpkMyPu7Ksyq0A28CZUrZUnp7k4IMPXu/UJBvqQFHVVCvuzuDBg3nsscfWu1z5tC3JZLJi2paNGTNmDKNHj+b999+nSZMmDBo0aLOmdhGRHLBmKbx1HYy/O4yaMfhK2OuXkN847spqha6BbUTl6UnGjx+/3qlJWrVqRatWrXjnnXeAdacg6dixI1OnTiWdTjN37lwmTJgAwD777MO7777L7NmzgXC9bdasWRusZ/DgwevMkFy5CXHZsmVstdVWNGnShE8++YRx48Zt+U4QkbolVQaTHoBb+8K4O0MHjd9/AAPPaTDhBbl2BhaD9U1PkpeXt96pSR544AHOPPNMzIwhQ4ZUrGPgwIF06tSJPfbYg65du9KnTx8A2rZty4MPPshJJ51UMRPyVVddtd6pWMpdcskl/O53v6N79+4kk0lGjhxZ0dQIcNhhh3HXXXfRtWtXdtttN/bZZ58s7RkRqXXu8Ml/4fUrYNEs6DAADn82fCG5AdJ0KlkyZ84chg0bxowZM+Iupdbl6t9MpE6bOzFc55o7Dlp3hkMvh92PrLcdNMppOhURkVy1ZE7oWfjRc9BsWxj2D+g9HJI6fGsPZEnHjh0b5NmXiNSQNUtg7I2hg4Ylwyjx+55T56c4qU05EWDujtXz0+T6oi41SYvkpNIimHBPCK+iZWG8woMvgRbt4q6szqnzAVZYWMjixYtp3bq1QqyOc3cWL15MYWFh3KWI5B53mPFMaC5cNhd2HRyuc23XPe7K6qw6H2Dt27dn3rx5LFy4MO5SpBoKCwtp37593GWI5JavPwgjDc0dB9vtCcfcBjsPiruqOq/OB1h+fj6dOnWKuwwRkZq34lt440r44P+gaRs4+tYwgkaOjxJfW+p8gImI1Dslq+H92+Gdv0OqBPY9Gw4YAYUt464spyjARERqSzoN058ME0sunw9dj4JDr4DWu8RdWU5SgImI1IbZr8NrI+Hb6bB9L/jxP6HjwLirymkKMBGRbPpmehhB4/Mx0GonOO4+6PZjSGgo2i2lABMRyYaVC0MHjSkPQ+Ot4LBrod+ZkNco7srqDQWYiEhNKl0TRs94+wYoWwP7/BYOHBFCTGqUAkxEpCakyuDDx2DMNaGDRpfDYMjV0GbXuCurt7IeYGaWBCYB8919WLa3JyJSq1Z8C5/8Jwz/tPAT2KEv/Ohu6LR/3JXVe7VxBvYHYCbQoha2JSKSfekUTH4Qpj8NX70POLTtCic8DF2PrvdTnNQVWQ0wM2sPHAlcDfwxm9sSEakVqxbBMz8PvQq32QMGXQB7HANtd1dw1bJsn4H9A/gz0LyqJ5jZWcBZAB06dMhyOSIiW2DuRHjqZyHEjr4V+pwWd0UNWta+iGBmw4Dv3H3yhp7n7ve4ez9379e2bdtslSMisvnKSsKwTw8cDok8+PkohVcdkM0zsIHA0WZ2BFAItDCz/3P3U7O4TRGRmjVrFLxyAXz/Gew+LIwUry7xdULWAszdLwQuBDCzQcB5Ci8RyRnffQKvXQqfjoLWneGUZ6DzoXFXJRn0PTARkUzL5ofvck19BAqawZCrYK9fQV5B3JVJJbUSYO4+BhhTG9sSEdksRcvDda5xd4Ru8nv/GvY/D5q2jrsyqYLOwESkYUuVwZSH4M2/wupF0OMEOPhi2Kpj3JXJRijARKThmj0aXr04jKCx00AY8hTs0CfuqqSaFGAi0vAsmg2vXgSfvgpb7ww/fQR2P1JfRM4xCjARaTjWLAmjxI+/G/IKYfCV4VqXOmjkJAWYiNR/ZSUw8V54+2+wZin0PgUOGQnNtom7MtkCCjARqb/c4eMXYPRIWDIHdh4UusVv1yPuyqQGKMBEpH6aNzlc55o7Lgy6e8ozsOshus5VjyjARKR+WTIH3rgapj8JTbeBo26G3sMhkYy7MqlhCjARqR9WLoSxN8DE+0JY7X8e7HcuNKpyMgzJcQowEcltJavhvVvhvVugdA30PjXM0dWiXdyVSZYpwEQkN7mHGZFHj4Tl88NMyIdcBm06x12Z1BIFmIjknrkTQweNeRNg+55w3L2w075xVyW1TAEmIrlj0Wx4/QqY+SI02xaOvg16nQKJrM3NK3WYAkxE6r5Vi8MUJ5Puh/zGMOgiGPA7aNQs7sokRgowEam7UqWhV+GYv0LxSuh7euigoRE0BAWYiNRF7mEm5FGXwqL/wc4HwWHXwDZd465M6hAFmIjULV+Nh9GXw1fvhZHiT3wMdjtcI2jIDyjARKRuWPxZOOP633/DCBpH3gh9fgbJ/LgrkzpKASYi8SpaBm9fD+PugrxGcNAlsM9v1EFDNkoBJiLxSJXB1P+DN66CVYvCFCcHXwbNt427MskRCjARqV3uMOvVMILGwk9gx73h5Cdhhz5xVyY5RgEmIrVn/hR47TKYMxa23gVO+Bd0PUodNGSzKMBEJPuWfgWv/wWmPwVNWsMRN4TvdKmDhmwBBZiIZM+apfDOTaGDhhns/ycYeC4Utoi7MqkHFGAiUvNSpTDpgTD805ol0PMkOPgSaLlD3JVJPaIAE5Ga4w7/eylc51o8GzodAEOuCiPGi9QwBZiI1IyvP4BXL4Ev34HWneGkx6HLYeqgIVmjABORLbP0q/BdrmlPQJM2GkFDao0CTEQ2z+rvYeyNMOEesATs9//CT2HLuCuTBkIBJiKbpqw4hNbb10PRcuh1Mhx0EbRsH3dl0sBkLcDMrBB4G2gUbedpdx+Zre2JSJa5w0fPhZHil34Jux4Kh14B23WPuzJpoLJ5BlYMHOzuK80sH3jHzF5293FZ3KaIZMPcCfDqxTBvAmzTDYY/B7scHHdV0sBlLcDc3YGV0c386MeztT0RyYIlX4Yzro+ehWbbwtG3Qq9TIJGMuzKR7F4DM7MkMBnYFbjd3cdnc3siUkOKlsHYm2DcnaGDxoHnw77naIoTqVOyGmDungJ6mVkr4Dkz6+7uMzKfY2ZnAWcBdOjQIZvliMjGpEph8oNhBI3Vi2HPn8Ihl6mDhtRJmxRgZtbE3Vdv6kbcfamZvQkcBsyo9Ng9wD0A/fr1UxOjSBzcYdYrYQSNRbNgp/1g6FXQrnfclYlUKVGdJ5nZvmb2MfBJdLunmd2xkWXaRmdemFljYHD58iJSh8yfAg8Og8dOBE/DiY/B6f9ReEmdV90zsL8DQ4EXAdz9QzM7YCPLbA88FF0HSwBPuvt/NrtSEalZS+fC61dEU5y00RQnknOq3YTo7nNt3THNUht5/jRAH+FE6priFfDO3+H928Pt/c+DgX/QFCeSc6obYHPNbF/Ao+90/QGYmb2yRKTGpUrhg3/Bm9fAqu+gxwlw6Eh10JCcVd0A+zVwM7ADMB8YBfwuW0WJSA1Kp8P3uN68Gr7/HHbcJ4wU375v3JWJbJFqBZi7LwJOyXItIlLT5rwDr1wA30wPI2ic9AR0GaopTqReqFaAmVkn4PdAx8xl3P3o7JQlIltk2Xx47VKY8Qy03BF+/E/o/hNIVKvjsUhOqG4T4vPAfcC/gXTWqhGRLVOyGsbdEUbRSJeFETQGngsFTeKuTKTGVTfAitz9lqxWIiKbL52CqY+G61wrFsDuw2DIVbB1p7grE8ma6gbYzWY2ktB5o7j8TnefkpWqRKR63OF/L8Prf4GFM2GHfvCT+2GnfeOuTCTrqhtgPYDhwMGsbUL06LaIxOGLt0NwzZsIW+8CJzwMXY9WBw1pMKobYMcDO7t7STaLEZFqWDgr9Cz87HVosQMcdUuYFVkjaEgDU90AmwG0Ar7LXikiskHFK+Ctv4VOGvlNYcjV0P8XkF8Yd2UisahugLUCPjGziax7DUzd6EWyLZ2CaU+GcQtXLIDep8Ihl0OztnFXJhKr6gbYyKxWISI/5A6fvhZmRP7uI2jXB376f9C+X9yVidQJ1R2J461sFyIiGRZMg1cvgjljYatO8JMHYI9j9UVkkQwbDDAze8fd9zOzFYRehxUPAe7uGr5apCatWgRvXAVTHoLCVnD49WGKk7yCuCsTqXM2GGDuvl/0b/PaKUekgSorgYn3wlvXQvFK2OtXMOh8aLxV3JWJ1FnVHQvxX+4+fGP3icgmcoePngsdNJbMgV0OhqHXwDa7x12ZSJ1X3U4c3TJvmFkeoLkYRLbEl+/DqEtg/qQwUvypz8Cuh8ZdlUjO2Ng1sAuBi4DGZra8/G6gBLgny7WJ1E+LP4PRI2Hmv6H59nDM7dDzJEgk465MJKds7BrYNcA1ZnaNu19YSzWJ1E+rFsPbfwvXupKN4KBLYMBvoaBp3JWJ5KTqdqO/0Mx2AHZi3fnA3s5WYSL1RmkRTLgb3r4RSlZAn9Ng0EXQfNu4KxPJadXtxHEtcCLwMZCK7nZAASZSleKVMO1xeOdmWPYVdB4Kg6+AbbrGXZlIvVDdThw/AnZz9+KNPlOkoVv8GUz4J0x9BIqXhxE0jrkVdh4Ud2Ui9Up1A+xzIJ+McRBFpJJUKYy9Ed6+HjDodmz4Plf7fpriRCQLqhtgq4GpZvY66w7me05WqhLJNd9+BM/9Gr6ZBj1OgCFXQvPt4q5KpF6rboC9GP2ISKaSVfDuzTD2JihsGQbb7XpU3FWJNAjV7YX4ULYLEckp6TRMeyLMiLzia+j+Ezj8OmjaJu7KRBqM6vZC/IJ1B/MFwN13rvGKROq6Oe/CqIvh6w9CB43jH4AO+8RdlUiDU90mxMwJiAqB44Gta74ckTps0afw2kj433+hxQ7wo3ugx/Ga4kQkJtVtQlxc6a5/mNlk4LKaL0mkjlm1CMZcC5Puh/wmcMhlsM9vIb9x3JWJNGjVbULsk3EzQTgjq+7Zm0huKiuG8XfB2zeEzhp9T4dBF0KztnFXJiJUP4RuzPi9DJhDaEYUqX/Safjo2dBBY+mXYQSNIVdC293irkxEMlS3CfGgzNtmliQMLTWrqmXMbEfgYWBbQgeQe9z95s0vVaQWfP4WvHYZLJgK23aH4c+FObpEpM7Z2HQqLYDfATsALwCjo9t/AqYBj2xg8TLgT+4+xcyaA5PN7DV3/7hGKhepSd99Eubmmv0atNwRfnR3+EKyOmiI1FkbOwP7F7AEeB/4JXAxYT6wH7n71A0t6O4LgAXR7yvMbCYhCBVgUnes/j500Jh4LxQ0g8FXwl5nQX5h3JWJyEZsLMB2dvceAGZ2LyGQOrh70aZsxMw6Ar2B8et57CzgLIAOHTpsympFNl9ZCUy6L4RX8XLod2aY4qRp67grE5Fq2liAlZb/4u4pM5u3GeHVDHgGONfdl1d+3N3vIZrduV+/fj/4srRIjXKHj1+A0ZfDki9g54Ng6F9h2z3irkxENtHGAqynmZWHjgGNo9sGuLu32NDCZpZPCK9H3P3ZLa5WZEt8NR5euxTmjodt9oBTn4FdD427KhHZTBsMMHdPbu6KzcyA+4CZ7n7T5q5HZIst/iyccc18EZptB0ffCr1OgcRmv71FpA7I5peRBwLDgelmNjW67yJ3fymL2xRZa/X38NZ1oYNGslG4xrXv2VDQNO7KRKQGZC3A3P0dQlOjSO1KlYZhn978a+ig0ee0EF7Nt427MhGpQRoOSuoPd5j1Shhwd9H/oNOBcNg1sG23uCsTkSxQgEn9MG8SjLoUvnoPtt4FTnocuhwGpkYAkfpKASa57fsvQgeNj5+Hpm3hyBuhz88gmR93ZSKSZQowyU1Fy8Io8ePvgkQeHHg+7Pt7aNQ87spEpJYowCS3pMpgyoPw5jWwehH0PBkOuRRatIu7MhGpZQowyQ3uMOvV8EXkRbNgp4Ew9Glo1zvuykQkJgowqfsWfBg6aHzxFrTeFU58FHY7Qh00RBo4BZjUXUvnwhtXwbQnoPFWcPjfwqC76qAhIijApC4qWgbv/B3evyPcHvgH2P+PUNgy3rpEpE5RgEndkSqDyQ+EKU5WL4I9fwoHXwqtdoy7MhGpgxRgEr8fdNDYD4ZepQ4aIrJBCjCJ14JpMOpi+OLtMIKGOmiISDUpwCQeS+eGwXY/fEwdNERksyjApHat/h7euQnG3wN4mN5k//Ogcau4KxORHKMAk9pRVgIT7oa3rg9TnPQ6GQZdqA4aIrLZFGCSXeVTnLx6MXz/Gew6GAZfoSlORGSLKcAke779CEZdAp+9AW26wCnPQOdD465KROoJBZjUvOUL4M2rYeoj0KgFHHYt9P+FOmiISI1SgEnNKVkF794C790CqVLY57dwwHmhl6GISA1TgMmWS6fDeIWv/wVWfA17HAuHXg5bd4q7MhGpxxRgsnlWfAvzJ8H8yfDpKPhmehg54yf3w04D4q5ORBoABZhsmkWz4cWz4av3w+1EHmzbHX50N/Q4ARKJeOsTkQZDASbVk07DxHvhtcsgr1FoIuywL2y/J+Q3jrs6EWmAFGCyccu/hud+HSaU3HUwHH0rtNg+7qpEpIFTgMmGff4WPPNzKFkNR90MfX6mgXZFpE5QgMn6pdNhzMI3r4bWneH0/0Lb3eKuSkSkggJMfmjhLHjlAvjsdej+k3Dm1ahZ3FWJiKxDASZrrfgWxlwDUx6G/CZwxA1hBA01GYpIHaQAkzCCxnu3hlE0UsUhtA78MzRtE3dlIiJVUoA1ZOlUmFDyjatgxQLY4xg4ZCS03iXuykRENiprAWZm9wPDgO/cvXu2tiOb6fMxYaT4b6bDDv3g+Aehwz5xVyUiUm3ZPAN7ELgNeDiL25BNtfB/MOpS+PRVaNkBjrsPuh+n61wiknOyFmDu/raZdczW+mUTrVwYOmhMfhAKmsKhV8Dev4b8wrgrExHZLLFfAzOzs4CzADp06BBzNfVQaRGMvxPG3hQ6a/Q7EwZdoA4aIpLzYg8wd78HuAegX79+HnM59Yc7zHgGRl8By76CLofD4L9A2y5xVyYiUiNiDzDJgq/GwasXhalOtusBx7wIOx8Yd1UiIjVKAVafLP4MXr8CPn4Bmm8Px9wBPU+ERDLuykREalw2u9E/BgwC2pjZPGCku9+Xre01aKsWw9t/g4n3QbIABl0I+/4+dNYQEamnstkL8aRsrVsipUUw/i4YeyOUrIQ+p4Xwar5d3JWJiGSdmhBzUToNM56G1/8Cy+ZC56Ew+ArYpmvclYmI1BoFWK758r3QQePrD2C7PeGY29VBQ0QaJAVYrvj+C3jtMpj5IjRvB8feBXv+FBKJuCsTEYmFAqyuW7MkXOMafzck8uCgi2HA2VDQJO7KRERipQCrq8qKYeK98NbfoGgZ9DoFDr4EWmwfd2UiInWCAqyucYePngvf51oyB3Y5OIygsV2PuCsTEalTFGB1yVfjwhQn8ybCNt3g1Gdg10PjrkpEpE5SgNUF63TQ2B6Ovg16nawRNERENkABFqfiFaGDxvu3hw4agy6Cfc/WCBoiItWgAItDqgym/h+8+VdY+S3seSIcOhJatIu7MhGRnKEAq03uMPPfYQSNxZ9C+73gxEehfb+4KxMRyTkKsNry1XgYdXHooNFmtxBcux0BZnFXJiKSkxRg2bbkSxg9MnSNL++g0fMkSGrXi4hsCR1Fs6VoWeigMe4usAQceAEMPEcdNEREaogCrKalSmHS/TDm2jAMVM8T4ZDL1EFDRKSGKcBqijvMeiV8EXnxbOh0AAy5CrbvGXdlIiL1kgKsJnz7EbxyIXzxFrTpAic/CZ2HqIOGiEgWKcC2xIpvYMw1MOVhKGwJh18P/c6AZH7clYmI1HsKsM1RvALevQXevy1c89rrLDjwfGiyddyViYg0GAqwTZEqhSkPhQ4aqxZCtx/DIZfC1jvHXZmISIOjAKuO8g4ar10Gi2bBTvvByU/ADn3jrkxEpMFSgG3M11NDz8I5Y6H1rnDiY7Db4eqgISISMwVYVZbNhzeuhA8fD9e21EFDRKROUYBVVrwC3r0Z3rsNPB1Gz9j/T6GXoYiI1BkKsHLpFHzwL3jjalj1HXQ/Dg4ZCVvtFHdlIiKxSqedFUVlLFldwqqSMtaUpFhTmqI0lcbMSJiRNKMklWJVcYrVJWWsKk5xXJ/2tGySvVYrBRjA7Ndh1KXw3Uew495w0mOa4kREtlhJWZpVxWWsKU3hgBEun7tD2p10GsrSaZatKWXp6lKWrC5hTWmKwrwkjQuSNM5PUlSaYumaUpatKWVVcRnJhJGfTJCfNAwj5U7anVTKWV2aYlVxCI+VxaUsX1PG8qJSVhSVkZ80mhXm07xRHoX5CUpSTnFpipJUmlTa19bkUJZKU5Z2ytJpVheH7afSvsmvf99dWyvAsua7maGDxuzR0GonOP5B2ONYddAQqQVlqTQrispYVVJGcVmakrI0xWXpioNn+UG1MD9BYX6SwvwEAKUppywVDq5laac0la64XVLmlKTSlJalw4E9HQ7IxWUplq4OIbB8TSmlacd97QE5L2EkEwnyEsaa0hRLVpewZHUJy9eUUZCXoHG0/WTCKEuFbZamQo2l6RAA5feX11RcmqYkla7VfZqXMJo2yqNZozyaNkrSsnE+27UopPM2eZSmnZVFZawsLuP7VSUU5CUoyEvQrFEeyUQ4i0oYgJGfNPKSCfITRuOCJFs1KaBVk3y2alJA00Z5NC5I0qQgSUEyEYWek0pDQV6CpgVJmjTKo2lBkuaF2e0z0DADbOV3YTbkKQ9BQfMwZuFeZ0Feo7grE6mWdNopLkuzuqSM1SUpisvSmEHCDAOcEBAl0cG9uCzNmtIURVGzT/mx26N1laV9neAI/6bXCYtwwI7uS5eHRliutHz5lFf8XppKU5JySsvC7+UH+9IyZ0VRKatKUrW+35oX5tGycT4FyRCG5Tsr5WtfZ2F+OGC3bdaIXdo2oyzlrClNsaYkRSrtNMpP0LRRXjjIJxIkk0Z+FIDhwB/uL8xP0qxRMhzw85MVZ17lElFoJBPQsnEIh62aFNC4IJx1lW+zMD8EUasm+TTOT+IOpekQ+EC0jrCugrxEre/TODWsACstgnF3wNiboGwN9P9lGEGjaeu4K5Ma4B4O1EWlKQwjkYBkwnCn4kC79uC87qflskoH5fL7ispSFJWGdaaiT+0OpB1WFpWFT/RFpZSUpSs+0RYkE7iXH9zDulaXpFhdmmJNSRmptEefdsOZ/qqS8Kl4RVEZpWVpEgkjL2EkElaxnlTKM8IlzWa05mwRM8hPJMhLWkUTVl70bzIRDtrlj5d/ci/IS9CkIBzUy59XvlyLxvm0KMynZeM8mjTKo1FegkbR/ssrX08iHIyLouAtig7Y+YmwjbzE2rAoP2MoSCYoyFu7vfKDe0EyQfPCPPKSuX+AN4NGiSSN8pJxlxK7hhFgJavgg0fgvVtg2dwwE/Lgv0CbznFXljUetWWXH6QzP12XZhzI1zaHpCtulzfNlKXWfrqu/Gm8LKP5JPOT+A/XsfZTeWlZuuL+VPnBOOO5qbSTl0zQKJmgUX6ChNkPQqWkbO1ZRfk6Uu4UlaZYHX1Crk1NC8Kn44K8REVtxWVpEmYVB9j8ZIImBUkaF+TRJD9JYb5VXP9wnO1aFNK8MI9mhXnkJxOko9dUHnTlYZaXceBOmFGYn4zWm6RR9Mm7/DqGGWsP7Im1TXCF+UkK8hIV12IgfIKvCKJoG8mEVdwuf0ykrqnfAbbiW5hwN0y8D4qWwg794JjbYecDK55S/om2LDo4l7dnZ34qr9zmvs5BN/M5VTS5lD+vPAhCaEShUPHYD9vR1w2F8rrW1ltaaV0VQRJtu7ZlfiKu+Ddh5EefiCs+oUcH5KSF5zfND23wSTNK005JWYqVxWWko0DLSxiF+eETdPiEnSA/mahofkkmjEZ5SZo2StKkIDTXlDeNpaM2m/L1hLOAtdc7ym+X15xf8ck+HPwL85MU5oWDf/mndyMc9Js2StaLT/QiuSqrAWZmhwE3A0ngXne/Npvbe/6D+dz25mzSqRSHl7zKb0sfojFFjKE/D3EUk+Z2Jv3AGlL+ckXTjNfycT7zIJ958Fx7YM1slklEbetGk4K8ioN/VQfbzE/Q5W3y4Xlrl0kmrMqmoLwqnpd5wF/7PFsnFPIShqnzi4jUoqwFmJklgduBwcA8YKKZvejuH2drm62a5DNwq+Wc+t0NdC6dyqymffl3+z+ypPFOdEok2CX6xG8GSSs/8K57QM6vdPAuD4O8jAN3ZsjkZwZRFe3yeYm1y+kgLyJSM7J5BrYXMNvdPwcws8eBY4CsBdigkrcZNP/sMNzTUbfQpc9p/EmBISJSL2UzwHYA5mbcngfsXflJZnYWcBZAhw4dtmyLrXeBXQ6GI66Hljts2bpERKROi/0KtLvf4+793L1f27Ztt2xl7XrDSY8qvEREGoBsBth8YMeM2+2j+0RERLZYNgNsItDZzDqZWQFwIvBiFrcnIiINSNaugbl7mZmdDbxK6EZ/v7t/lK3tiYhIw5LV74G5+0vAS9nchoiINEyxd+IQERHZHAowERHJSQowERHJSQowERHJSea1PZrtBpjZQuDLzVi0DbCohsupL7RvNkz7Z8O0fzZM+6dqNbVvdnL39Y5yUacCbHOZ2SR37xd3HXWR9s2Gaf9smPbPhmn/VK029o2aEEVEJCcpwEREJCfVlwC7J+4C6jDtmw3T/tkw7Z8N0/6pWtb3Tb24BiYiIg1PfTkDExGRBkYBJiIiOSmnA8zMDjOz/5nZbDO7IO564mZmO5rZm2b2sZl9ZGZ/iO7f2sxeM7NPo3+3irvWuJhZ0sw+MLP/RLc7mdn46D30RDT1T4NkZq3M7Gkz+8TMZprZAL131jKz/xf9v5phZo+ZWWFDfv+Y2f1m9p2Zzci4b73vFwtuifbTNDPrUxM15GyAmVkSuB04HNgDOMnM9oi3qtiVAX9y9z2AfYDfRfvkAuB1d+8MvB7dbqj+AMzMuH0d8Hd33xVYAvw8lqrqhpuBV9x9d6AnYT/pvQOY2Q7AOUA/d+9OmCLqRBr2++dB4LBK91X1fjkc6Bz9nAXcWRMF5GyAAXsBs939c3cvAR4Hjom5pli5+wJ3nxL9voJwANqBsF8eip72EHBsLAXGzMzaA0cC90a3DTgYeDp6SkPeNy2BA4D7ANy9xN2XovdOpjygsZnlAU2ABTTg94+7vw18X+nuqt4vxwAPezAOaGVm229pDbkcYDsAczNuz4vuE8DMOgK9gfHAtu6+IHroG2DbuOqK2T+APwPp6HZrYKm7l0W3G/J7qBOwEHggamK918yaovcOAO4+H7gB+IoQXMuAyej9U1lV75esHK9zOcCkCmbWDHgGONfdl2c+5uF7Ew3uuxNmNgz4zt0nx11LHZUH9AHudPfewCoqNRc21PcOQHQt5xhC0LcDmvLD5jPJUBvvl1wOsPnAjhm320f3NWhmlk8Ir0fc/dno7m/LT9ejf7+Lq74YDQSONrM5hObmgwnXfFpFTULQsN9D84B57j4+uv00IdD03gkOBb5w94XuXgo8S3hP6f2zrqreL1k5XudygE0EOke9gAoIF1RfjLmmWEXXdO4DZrr7TRkPvQj8LPr9Z8ALtV1b3Nz9Qndv7+4dCe+VN9z9FOBN4CfR0xrkvgFw92+AuWa2W3TXIcDH6L1T7itgHzNrEv0/K98/ev+sq6r3y4vAaVFvxH2AZRlNjZstp0fiMLMjCNc1ksD97n51vBXFy8z2A8YC01l7neciwnWwJ4EOhOlqTnD3yhdfGwwzGwSc5+7DzGxnwhnZ1sAHwKnuXhxjebExs16EDi4FwOfAGYQPuXrvAGZ2BfBTQm/fD4BfEK7jNMj3j5k9BgwiTJvyLTASeJ71vF+i0L+N0Oy6GjjD3SdtcQ25HGAiItJw5XITooiINGAKMBERyUkKMBERyUkKMBERyUkKMBERyUkKMGlQzMzN7MaM2+eZ2eU1tO4HzewnG3/mFm/n+Gi0+Dcr3Z+IRvyeYWbTzWyimXWqge11zBxxXKSuUIBJQ1MM/NjM2sRdSKaM0Ryq4+fAL939oEr3/5QwzNGe7t4D+BGwtGYqFKl7FGDS0JQB9wD/r/IDlc+gzGxl9O8gM3vLzF4ws8/N7FozO8XMJkRnOrtkrOZQM5tkZrOi8RfL5yC7PjojmmZmv8pY71gze5EwqkPlek6K1j/DzK6L7rsM2A+4z8yur7TI9sACd08DuPs8d18SLTfEzN43sylm9lQ0XiZm1jd6bZPN7NWMYYD6mtmHZvYh8LuMmrpFr3tq9Fo6b9LeF6lBCjBpiG4HTommEKmunsCvga7AcKCLu+9FGLni9xnP60iY6udI4C4zKyScMS1z9/5Af+CXGU17fYA/uHuXzI2ZWTvCXFMHA72A/mZ2rLv/BZgEnOLuIyrV+CRwVBQuN5pZ72hdbYBLgEPdvU+0/B+jcTNvBX7i7n2B+4Hy0WweAH7v7j0rbePXwM3u3gvoRxhDUSQWm9JsIVIvuPtyM3uYMEHhmmouNrF87DYz+wwYFd0/HchsynsyOgP61Mw+B3YHhgB7ZpzdtSRM7FcCTHD3L9azvf7AGHdfGG3zEcJ8Xc9v4HXNi8YyPDj6ed3MjgcaEyZ9fTeM6EMB8D6wG9AdeC26PwksMLNWQKtovieAfxEmJCRa7mILc6s96+6fVrnHRLJMASYN1T+AKYQzjXJlRK0SZpYgHOjLZY5vl864nWbd/0eVx2ZzwAhnM69mPhCNybhqc4qvSjQO38vAy2b2LWFCwVHAa+5+UqXt9wA+cvcBle5vtYH1P2pm4wlnmC+Z2a/c/Y2afA0i1aUmRGmQogFpn2TdKeDnAH2j348G8jdj1cdHvQF3AXYG/ge8CvwmarLDzLpYmCxyQyYAB5pZGzNLAicBb21oATPrEzU9lgfwnoQBVccBA81s1+ixpmbWJaqtrZkNiO7PN7Nu0UzMS6PBoQFOydjGzsDn7n4LYaTxPau3W0RqngJMGrIbCSNpl/snITQ+BAaweWdHXxHC52Xg1+5eRLhO9jEwJeqOfjcbaf2ImisvIEzX8SEw2d03NlXHNsC/o21MI5xR3hY1Q54OPGZm0wjNgLu7ewlhKpDrotc8Fdg3WtcZwO1mNpVwBlnuBGBGdH934OGN1CSSNRqNXkREcpLOwEREJCcpwEREJCcpwEREJCcpwEREJCcpwEREJCcpwEREJCcpwEREJCf9fyf+/1JKxIZnAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "seed_idx = list(range(1,max_seeds +1))\n", + "\n", + "\n", + "plt.figure(figsize=(7,4))\n", + "plt.plot(seed_idx, runtime, label = \"multiple seeds\")\n", + "plt.plot(seed_idx, runtime_seq, label = \"sequential\")\n", + "\n", + "\n", + "plt.title('Runtime vs. Number of Seeds')\n", + "plt.xlabel('Number of Seeds')\n", + "plt.ylabel('Runtime')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----\n", + "Copyright (c) 2021, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cugraph_dev", + "language": "python", + "name": "cugraph_dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/cugraph_benchmarks/random_walk_perf.ipynb b/notebooks/cugraph_benchmarks/random_walk_perf.ipynb new file mode 100644 index 00000000000..738298767c5 --- /dev/null +++ b/notebooks/cugraph_benchmarks/random_walk_perf.ipynb @@ -0,0 +1,621 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random Walk Performance\n", + "# Skip notebook test¶ \n", + "\n", + "Random walk performance is governed by the length of the paths to find, the number of seeds, and the size or structure of the graph.\n", + "This benchmark will use several test graphs of increasingly larger sizes. While not even multiples in scale, the four test graphs should give an indication of how well Random Walk performs as data size increases. \n", + "\n", + "### Test Data\n", + "Users must run the _dataPrep.sh_ script before running this notebook so that the test files are downloaded\n", + "\n", + "| File Name | Num of Vertices | Num of Edges |\n", + "| ---------------------- | --------------: | -----------: |\n", + "| preferentialAttachment | 100,000 | 999,970 |\n", + "| dblp-2010 | 326,186 | 1,615,400 |\n", + "| coPapersCiteseer | 434,102 | 32,073,440 |\n", + "| as-Skitter | 1,696,415 | 22,190,596 |" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the modules\n", + "import cugraph\n", + "import cudf" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# system and other\n", + "import gc\n", + "import os\n", + "import time\n", + "import random\n", + "\n", + "# MTX file reader\n", + "from scipy.io import mmread\n", + "\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "try: \n", + " import matplotlib\n", + "except ModuleNotFoundError:\n", + " os.system('pip install matplotlib')\n", + "\n", + "import matplotlib.pyplot as plt; plt.rcdefaults()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "try: \n", + " import pybind11\n", + "except ModuleNotFoundError:\n", + " os.system('pip install pybind11')\n", + " \n", + "import pybind11" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "try: \n", + " import walker\n", + "except ModuleNotFoundError:\n", + " os.system('pip install graph-walker')\n", + "\n", + "import walker" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Test File\n", + "data = {\n", + " 'preferentialAttachment' : './data/preferentialAttachment.mtx',\n", + " 'dblp' : './data/dblp-2010.mtx',\n", + " 'coPapersCiteseer' : './data/coPapersCiteseer.mtx',\n", + " 'as-Skitter' : './data/as-Skitter.mtx'\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data and create a graph" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Data reader - the file format is MTX, so we will use the reader from SciPy\n", + "def read_data(datafile):\n", + " print('Reading ' + str(datafile) + '...')\n", + " M = mmread(datafile).asfptype()\n", + "\n", + " _gdf = cudf.DataFrame()\n", + " _gdf['src'] = M.row\n", + " _gdf['dst'] = M.col\n", + " _gdf['wt'] = 1.0\n", + " \n", + " return _gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def create_cu_ugraph(_df):\n", + " _g = cugraph.Graph()\n", + " _g.from_cudf_edgelist(_df, source='src', destination='dst', edge_attr='wt', renumber=False)\n", + " return _g" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def create_nx_ugraph(_df):\n", + " _gnx = nx.from_pandas_edgelist(_df, source='src', target='dst', edge_attr='wt', create_using=nx.Graph)\n", + " return _gnx" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the call to RandomWalk\n", + "We are only interested in the runtime, so throw away the results" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def run_cu_rw(_G, _seeds, _depth):\n", + " t1 = time.time()\n", + " _, _ = cugraph.random_walks(_G, _seeds, _depth)\n", + " t2 = time.time() - t1\n", + " return t2" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def run_wk_rw(_G, _seeds, _depth):\n", + " t1 = time.time()\n", + " _ = walker.random_walks(_G, n_walks=1, walk_len=_depth, start_nodes=_seeds)\n", + " t2 = time.time() - t1\n", + " return t2 \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 1: Runtime versus path depth" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading ./data/preferentialAttachment.mtx...\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=2.23s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=2.48s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=2.02s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=2.31s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=2.01s\n", + "update i\n", + "Reading ./data/dblp-2010.mtx...\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=4.21s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=4.03s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=3.59s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=3.95s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=3.68s\n", + "update i\n", + "Reading ./data/coPapersCiteseer.mtx...\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=59.64s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=49.43s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=47.45s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=54.66s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=46.96s\n", + "update i\n", + "Reading ./data/as-Skitter.mtx...\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=53.14s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=44.36s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=46.38s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=41.96s\n", + "\tcuGraph\n", + "\tWalkerRandom walks - T=53.18s\n", + "update i\n" + ] + } + ], + "source": [ + "# some parameters\n", + "max_depth = 6\n", + "num_seeds = 100\n", + "\n", + "# arrays to capture performance gains\n", + "names = []\n", + "\n", + "# Two dimension data\n", + "time_algo_cu = [] # will be two dimensional\n", + "time_algo_wk = [] # will be two dimensional\n", + "\n", + "i = 0\n", + "for k,v in data.items():\n", + " time_algo_cu.append([])\n", + " time_algo_wk.append([])\n", + " \n", + " # Saved the file Name\n", + " names.append(k)\n", + "\n", + " # read data\n", + " gdf = read_data(v)\n", + " pdf = gdf.to_pandas()\n", + " \n", + " # Create the Graphs\n", + " Gcg = create_cu_ugraph(gdf)\n", + " Gnx = create_nx_ugraph(pdf)\n", + " \n", + " num_nodes = Gcg.number_of_nodes()\n", + " nodes = Gcg.nodes().to_array().tolist()\n", + "\n", + " seeds = random.sample(nodes, num_seeds)\n", + "\n", + " for j in range (2, max_depth+1) :\n", + " print(\"\\tcuGraph\")\n", + " tc = run_cu_rw(Gcg, seeds, j)\n", + " time_algo_cu[i].append(tc)\n", + " \n", + " print(\"\\tWalker\", end='')\n", + " tw = run_wk_rw(Gnx, seeds, j)\n", + " time_algo_wk[i].append(tw)\n", + "\n", + " # update i\n", + " i = i + 1\n", + " print(\"update i\")\n", + " \n", + " del Gcg\n", + " del Gnx\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFNCAYAAADRi2EuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAACN0ElEQVR4nOzdd3hUxf748ffsphcgQIBQEyAJ6QlJ6CACAopgQYoVrGDv2Lv+roUrKnq/qAjoVRABEfUiIgoSejO0BAgl9B5Ib7s7vz/O5pCQEIKShPJ5Pc8+u2fPzJw5Z9tn58yZUVprhBBCCCFE9bPUdgWEEEIIIS4XEngJIYQQQtQQCbyEEEIIIWqIBF5CCCGEEDVEAi8hhBBCiBoigZcQQgghRA2RwEuIi4hSqqVSKkcpZa3tutQ2pdQipdQ9tV2Pc6GUelUp9fU55pmmlLq+mqpU65RSI5VSS6qQ7t9Kqftrok5CVCcJvIT4h5RS6UqpfGdAdEgpNUUp5XMey+5Tsqy13qO19tFa289H+TXNeWyKnMcqQyn1m1KqXRXynXPAclr+nkqpfX83f21tUykVDcQAc5zLAUqpH5VSB5RSWikVeFp6d6XUJKVUlvO9+MRp63srpbYopfKUUguVUq0q2XY3pdQypVSm87VaqpRK/Cf78w+NBZ5XSrnVYh2E+Mck8BLi/BiotfYBYoE44Lnarc4F7V3nsWoOHAGm1G51LmijgG/0qZGuHcA8YPAZ0r8KBAOtgCuBMUqp/gBKqYbA98BLQH1gDTC9okKUUnWAn4HxzrTNgNeAwn+8R3+T1vogsAUYVFt1EOJ8kMBLiPNIa30I+BUjAKuw1aN0K5azJec7pdRXSqlspdRmpVSCc91/gZbAT84WojFKqUBnS4eLM80ipdSbzpaJHKXUT0qpBkqpb5ytHqtLt4oopdo5W5kylFJblVJDK9oPpdQwpdSa0557XCn1o/PxNUqpFGed9yulnvobxyoPmApEOsv8UCm111nvtUqp7s7n+wPPA8Oc+7i+VDGtnC0x2Uqp+c7g4pwopZoqpWYppY4qpXYppR4pte6Mr49zfXul1F/OdTOUUtOdr4c38AvQ1FnnHKVUU2c2tzOVV4GrgT9LHbPDWuv/AKvPkH4E8IbW+oTWOhX4HBjpXHcjsFlrPUNrXYARpMWcocUxxLm9aVpru9Y6X2s9X2u9odS+36WUSlVKnVBK/Vq69ayy95nz/fmj83VeBbQptU4ppcYppY44129USkWWqtciYEAlx0uIC54EXkKcR0qp5hg/ltvPIdsg4FugHvAj8DGA1vp2YA/O1jSt9btnyD8cuB2jVaINsByYjNFSkQq84qybN/AbRrDTyJnvP0qp8ArK/AkIVUoFl3ruFmdegC+AUVprX4zA6Y9z2F+c9fEBbgX+cj61GiNgre/czgyllIfWeh7w/4DpzuMQc1qd7nTujxtwTgGgUsqCsa/rMY5fb+AxpVS/UskqfH2cp7xmY7TY1QemATcAaK1zMd4HB5x19tFaH6isvArq5g0EAVuruC9+QIBzX0qsByKcjyNKr3PWcUep9aVtA+xKqS+VUlc7yy69reswguEbAX8gybn/VXmffQIUOOt6l/NWoi/QAyPwqwsMBY6XWp+KcepViIuWBF5CnB8/KKWygb0Yp89eOYe8S7TWc539tv7Luf+wTNZa79BaZ2K0suzQWi/QWtuAGRinPgGuBdK11pO11jat9V/ALGDI6QU6W6PmADcDOAOwdhiBAkAxEK6UquNsXVl3DvV9Sil1EiM49cHZIqO1/lprfdxZt38D7kBoFfZ9m9Y6H/gOZ0vjOUgE/LXWr2uti7TWOzFaiYaXSnOm16cT4AJ8pLUu1lp/D6yqwjar+nrXc95nV3FfSvoVZpZ6LhPwLbU+k7JKrzdprbOAboDGOB5Hna1UjZ1JRgP/0lqnOt9n/w+IdbZ6nfF9poyLQgYDL2utc7XWm4AvS2262FmfdoByln+w1PpsTh0XIS5KEngJcX5c72z96Ynxo3Eup7wOlXqcB3iUnEqsosOlHudXsFzyg9wK6KiUOllyw2hxanKGcqfiDLwwWpZ+cAZkYPx4XgPsVkr9qZTqfA71Hau1rqe1bqK1HqS13gGglHrKeeoq01m3upz9OJ5+7M71ooZWGKcDSx+T54HGpdKc6fVpCuwv1f8KjMD7bKr6ep903pcLjM4gx3lfp9RzdTgVuOWctu709WU4g56RWuvmGK2aTYEPnKtbAR+WOmYZgMJoNazsfeaPEayWPk67S23zD4wWwE+AI0qpz5TR36yEL6eOixAXJQm8hDiPtNZ/Ypx6Gut8KhfwKlnv/Mfvfy5FnrfKGT92fzqDnpKbj9b6TJfo/wb4K6ViMQKwktOMaK1Xa62vwziV9ANGa9Pf5uzPNQbj1JKf1roeRmuMKtnkPym/EnuBXacdE1+t9TVVyHsQaKaUUqWea1Hq8T+qc6lTgSFVTH/CWafSLWgxwGbn482l1zlPCbYptb6ysrdgvK9L+lvtxTjVXPq4eWqtl1H5++woYKPscWp52rY+0lrHA+HOfX+61Oowyp5KFeKiI4GXEOffB8BVSqkYjL4yHkqpAUopV+BFjFNoVXUYaH2e6vUzEKKUul0p5eq8JSqlwipKrLUuxjhV+R5GH6bfwOjbpJS6VSlV15kmC+Nqu3/CF+MH+SjgopR6mbKtM4eBQGefrL9NKeVR+oZxajBbKfWMUspTKWVVSkWqqg2bsBywAw8ppVyc/Z46nFbnBkqpuv+gynOBK07fB069h9ydyyW+Al5USvk5O83fy6mrRmcDkUqpwc48LwMbnEFVGc7O8U86+yyilGqBEXyvcCaZADynlIpwrq+rlCo5ZX3G95nz9Or3wKtKKS9nv68RpbabqJTq6Pys5GL0BSv93roC43S6EBctCbyEOM+01kcxfgBfdva7egCYCOzH+DE5l7Gd/oXxQ3pS/Y0rB0+rVzZG5+XhwAGMU17vUHkgOBXoA8xw9uUpcTuQrpTKwujvcyuUGeC1ZfmiKvUrxjAJ2zBOPRVQ9nTUDOf9caXUufQnK60ZxqnX0rcgjD5JscAu4BjGa3XWYElrXYTRufxujNNft2EEHYXO9VswOpzvdL5+Tc9QVGU+A249rVUtn1OnFbc4l0u8gtFKthvjasj3nBcnlLwvBwNvASeAjpTty1ZatnP9SqVULkbAtQl40lnWbIz3zrfO98AmjIsJqvI+ewjjlPAhjKBwcqnt1sHoU3bCuQ/HMQJ/lFIBGK1gP5zpYAlxMVBluycIIYT4u5RSK4EJWuvJZ01c9TKnAt9prX84X2VejJRS/8a4cOQ/tV0XIf4JCbyEEOJvUkpdgTHcwzGMVr8JQOvTrsQTQgjTuVw5JYQQoqxQjAsLvIGdwE0SdAkhKiMtXkIIIYQQNUQ61wshhBBC1BAJvIQQQgghashF0cerYcOGOjAwsLarIYQQQghxVmvXrj2mta5wsOyLIvAKDAxkzZo1tV0NIYQQQoizUkrtPtM6OdUohBBCCFFDJPASQgghhKghEngJIYQQQtQQCbyEEEIIIWqIBF5CCCGEEDWkWgMvpVQ9pdRMpdQWpVSqUqqzUqq+Uuo3pVSa896vOusghBBCCHGhqO4Wrw+BeVrrdkAMkAo8C/yutQ4GfncuCyGEEEJc8qot8FJK1QV6AF8AaK2LtNYngeuAL53JvgSur646CCGEEEJcSKqzxSsIOApMVkr9pZSaqJTyBhprrQ860xwCGldjHYQQQgghLhjVGXi5AO2B/9NaxwG5nHZaUWutAV1RZqXUfUqpNUqpNUePHq3GahqO7UmnuKCg2rcjhBBCiMtXdU4ZtA/Yp7Ve6VyeiRF4HVZKBWitDyqlAoAjFWXWWn8GfAaQkJBQYXB2vmit+WHsm+SePEHruERCOnWjdVwCrh4e1blZIYQQQlxmqi3w0lofUkrtVUqFaq23Ar2BFOdtBPC2835OddXhXPQb9QhbVywlbeVStq1Ygou7O63bdyC0U1eC4hJwdZcgTAghhBD/jDLO9lVT4UrFAhMBN2AncCfG6c3vgJbAbmCo1jqjsnISEhJ0TU2S7XDY2ZeymW0rlpC2ahl5mSfNIKzDoME0bt22RuohhBBCiIuTUmqt1jqhwnXVGXidLzUZeJV2KghLYtvKZVz76BhaRsaQcWAfx/bupnX7Dri4utZ4vYQQQghx4aos8KrOPl4XPYvFSsvIaFpGRtPrztEopQBIWfwHq3/8nvs//xoXV1dOHj6Et58frm7utVxjIYQQQlzIJPCqIovVaj7uMuRWQjp1w8PbB4BfPv43R3fvonV8B0I7dyMwNl6CMCGEEEKUI4HX32CxWmkU2Npc7jrsNrYuTyJt5TK2LluMq4cnbeI7ENKpqwRhQgghhDBJH6/zyGG3s3fzRratWMK2VcsoyM4yg7C4/gNpGtKutqsohBBCiGomfbxqiMVqpVV0LK2iY+l99/3s3byRrSuSSFu1nNbtE2ka0o6cjOMc2pFGYGy8dMwXQgghLjPS4lUD7DYboLG6uLL2fz+w6KuJ3P3RROo1bkJOxnE8fHxxcXOr7WoKIYQQ4jyQFq9aZnU5dZhj+11LkzYh1GvcBIA/Jn/K7o1/0Sa+IyGduxMYHSdBmBBCCHGJksCrhlldXGjWLtxcjul7De7e3mxftZzUJYtw8/SkTUInQjp1IzCmvZyOFEIIIS4hcqrxAmG32dizaT3bVixh+6rlFOTm4ObpRZuEjkT36U/zdhG1XUUhhBBCVIGcarwIWF1cCIqNJyg2nj73PMCejevZumIJ21cvp1Fga5q3i6AwL499qZtoFR0nLWFCCCHERUhavC5wdlsxDpsdVw8PUpMWMvfjfzP89fdoFhpGfk42bh4eWF0kCBNCCCEuFNLidRGzuriagVVI5254+tahaXAoAEunf82WpYtom9CJkM7daBUVK0GYEEIIcQGTwOsiYnVxJTA23lwO6dgFW2EB21evYPOfv+Pu7U3bhM6Edu5Gy6gYCcKEEEKIC4wEXhexlpExtIyMwVZczJ6NyUbH/NXL2fznAiMIS+xM5BV9aB4eWdtVFUIIIQQSeF0SXFxdad0+kdbtE7EVF7N7w19sW7GEtJXL8PStQ/PwSBx2O3s2JtMiMqbMuGJCCCGEqDnyC3yJcXF1pU18B9rEd8BWXIytqBCAvZs3MutfrzDoyecJ7tCFooJ8Z/8xeQsIIYQQNUV+dS9hLq6u5rATzcIiuH7MS7SMigVg7f9+YN3cH2mb2JnQTl2lJUwIcUkrLizg0PZt7NuymfysLHrdOQowrhyX/rCiJskv7WXCaAnraC43D4vkxMEDbFuRxKaF8/Hw8TWCsM7daBERLUGYEOKilp+dxf4tKezfmsL+1M0c3rUdh90OShHX/1oAtNZMfOhuInv1pevQ29BaU1xYgJuHZy3XXlzK5Nf1MtUiPIoW4VHYiopI3/AX25YnlQnCgjt0JqxbT1pERNd2VYUQospW/ziLzX/+zvF9ewBjcOrGbUJIuPYGmrWLoGlIGB4+PgDYi4uJ7NWXAOcQPScPH2Ty46Np0jqYFhFRtIiIplloOK4eHrW2P+LSI4HXZc7FzY22CR1pm9DRCMLWr2PbiiVsWZaEvbiYFhHRaK3Zn7qZpqFhWKzW2q6yEOIypx0OAJTFQkrSQpbPmMqIf/8HF1dXbEVF1GnoT1i3njRrF06TNiG4uLlVWI6Lmxtdh952atnVjQ7X3cSezRtY8/NsVs2ZicVqpUmbECMQC4+maWg7XN0lEBN/n4xcLypUXFRIUV4e3vX8OLYnnS+ffog+9zxIzFVXYysuxmKxSBAmhKgRtqIiDu1MM04dbtnMga2p3PjcqzQNCSN9w19s+mM+V468D+96fudtm0UF+RzYksLelI3s3byRQzvT0A4HFqsLN7/xHk3aBFOQk4PVzRVXN/fztl1xaZCR68U5c3VzN79M6jVpyqCnXqBZaDgAKX/+zpJvvyK4QxdCOnWjRUSUBGFCiPOmIDeHA9tS2Z+6mf1bUzi0Iw17cTEADZq3JLRzd7MfVmB0HIHRcee9Dm4engTGxpuDVhfm5XFgqxGINWjeAoDVP83ir19+4oGJU3FxcyPr2BG86vrJXLqiUhJ4Oe3fv5969erh7e1d21W54Li4uRGc2Nlcrt+8BS2jYkldsogNv8/D07cOwR2dQVi4BGFCiHOTffwYDruduo0ac+LgfiY9Phq0xmK10rh1W+L6D3T2z2qHV526tVJHdy8vguISCIo71YjRun0HvOvWM09l/vLx+xzavo2AkHbOU5NRNGkbKoHYBcDhcLBr1y7q1q1Lw4YNa7UucqoR48qWjz76iBMnTtCsWTOCg4MJDg4mICAAi8VSbdu92BUXFpCevI6ty5PYuW41xYUFeNapS3CHzrTr0kM65gshytEOBxkH9pGfnUXzsEi0w8En99xMSKdu9L3vYbTDwao5M2ka0o4mbUMuqv5Uu/5aw+6Nf7Fn80aO7t4FWuPi5k7TkHbGBU0R0TRpGyzDV9SgwsJC3N3dKSgoYOzYsSQkJNC/f/9q325lpxol8MIIvA4cOEBaWhppaWns378fAG9vb9q2bUvbtm1p06YNXl5e1VaHi11xYQG7kteybfkSdqxbRbPQcG564Q0ADu/cjn9gEBaLtIQJcbmx24o5vHM7+7eksM/ZP6sgJ5sGzVsy8t//ASBt9XL8mjSlYYtWtVzb8yc/J5t9qZvYt3kjezdv4OiedADCu1/J1Q89idaawzvS8A9sLcP3nGe5ubls2rSJ9evXA3DfffcBsHfvXpo0aYJrDbRASuB1jnJzc9mxYwdpaWls376d/Px8lFL069ePTp064XA4UEqhlKqxOl1MigsLyMvMpG6jxuSePMGE0XfQbdjtdLxhKA6HHUCCMCEuYQe2bWHXX6vZt2Uzh9K2YSsuAsAvoCnN2kXQLDScZmER+DVpWss1rTn52VnsS9mEVz0/moWGcfLwIb545B5633U/sf0GkJ+TzclDB2gc1Fa6a/wNNpuNbdu2sX79etLS0nA4HAQEBBATE0OHDh1q/OyVBF7/gMPhYP/+/aSlpRESEkLz5s3ZvXs3M2bM4Oabb6ZZs2a1Uq+Lha2oiJ3rVtG4dVvqNmrCjrUrmf/peII7diW0U1eahUVIECbERe7wrh2kLP6DHrfeidXFhYVTPuOveT/TKKi1EWi1C6dZaPh5verwYldUkE968lqatA2lTkN/Uhb/wS+fvI+bpyfN2kWYpyYbBbWW78gz0Fqzb98+1q9fz6ZNmygoKMDX15fo6Giio6Np3LhxrdVNAq/z7MCBAyxdupSBAwfi4eHBsmXL2Lp1K8HBwbRt25bGjRtLa9gZHEzbypqfZ7Nz3WpsRYV41a1HSKeuhHTqRrN24fIFI8QFTGvNiYMH2L9lM/u3pBB/7fX4twxk6/Ik5v3fB9z2rw9o0KwFeZkncXFzw81TumdUVX52Fns2rWfv5g3s3byRjAP7AHDz9KJ5WAQtIqJpER5Fo8DWqMu877HWGqUUW7Zs4dtvv8XV1ZWwsDBiYmIICgq6IPpmS+BVzdatW8eqVas4dOgQAL6+vmYH/datW+PuLmO8nK64oICdf61m2/Il7PxrDbaiQrzr+ZlXRzYPi5TgVYha5rDbOZK+0wy09m9NIS/zJAAevnXoN/pR2iZ0xG4rBpT0VTqPck5ksM85htjelI2cOLgfZbHw0KRvcfP04mDaVqyurjQKbF3bVa0xNpuN//73v7Rt25bu3btTXFzM5s2bCQsLu+B+ZyXwqiFZWVls376dtLQ0du7cSWFhIRaLhZYtWxIREUFiYmJtV/GCVFSQz66/1rB1eRK7/lpLvSYBjHjvYwCO79+LX0BTaQkTogaUtCTkZZ7kf+PHcnDbFooLCwCo26ix2TerWWgE9Zs1lz9HNSg74xhHd++idZzxO/Ld689TmJvL7e98CEDaqmXUaxxAwxatLpkWMbvdzs6dOzly5Ahdu3YFYM6cObRo0YL27dvXcu0qJ4FXLbDb7ezdu9e8UtLf358hQ4YA8McffxAcHEyLFi1quZYXnqKCfLKPHaVB85YUFxXyf/fcSnSffvS841601qD1JfOlIkRtsxUVmWNQzXjjeRq2COTKkffhcNiZ/sqzNApqY/TPaheOb/3aHftIlJV9/Bh5mSdp3LotdpuNj+8ahq2wEA/fOrQIizTnmmzQvOVFFyAfOnSI9evXs2HDBnJzc/Hx8eHRRx+tkasRzxcJvC4ANpsNFxcXcnNz+fDDD+nVqxedOnUiNzeXDRs2EBwcTIMGDS66D0h1shUXs331cuo3bU6jwNYc3L6VH8e+RXDHroR07kazkDAJwoSoIq01mUcOO08bGqcOXdzczRaTRf/9gnqNmhDbb0At11T8HVlHj5jTG+1N2UDW0SMAeNapa3TUD48iKC6Buo1qr8N5ZbKzs9m4cSPr16/n8OHDWCwWQkJCiI2NpW3btrhcZKexJfC6wNhsNhwOB25ubqSmpjJ9+nQA/Pz8aNu2LcHBwQQGBuJ2holdL1eHd+1g5ffT2fXXGmzFRfj41Se4pGO+BGFClOFw2Dm6O92c33D/1hRyT2QA4OHtQ9PQMJqHR5Fw7Q3yh+8SlHnksNFR3xmMZR8/ypUjR9H+6oHkZZ4kbdUygjt2rbWZAEocO3aMefPmsWPHDrTWNGvWjJiYGCIjIy/qsTNrLfBSSqUD2YAdsGmtE5RS9YHpQCCQDgzVWp+orJxLLfA63YkTJ8y+Ybt27aK4uBgXFxcCAwPNKyUbNGhQ29W8YBTl57FjndExf1fyGuzFxfjUb0Bwxy6EdupO05B2EoSJy05xUSGHt6fRtF0YFouVP6Z8yl+//ASAb0N/moWG0zzMGEOrQfOW8hm5jJS0drp5euJVpy5blyfx8wfvcNu/PqBx67Yc2LaFY3vSaRERRb0mTas1ENdas3v3bpRStGrVipycHL744gsiIyOJiYmp9el8zpfaDrwStNbHSj33LpChtX5bKfUs4Ke1fqayci71wKu04uJidu/ebQZix48fRynFmDFj8PT05OTJk3h7e19U57qr06kgLIldyWtx9/Jm1IQvsVisZB07gm/9hvIDIy5J+TnZHNiaQqPANvg2aEhq0kLmfvxvbn/nIxoFtubQ9m2cOHSAZu3CqdOwUW1XV1xAtNacPHSAuo2bYLFY+fPrSaz56XsAfOo3MMcQaxEeRd3GTc5LIJaXl4eXlxdaa8aPH0/9+vW57bbbzPpcaq2uF1rgtRXoqbU+qJQKABZprUMrK+dyCrxOl5GRwYEDB4iMjATgyy+/pLCw0JwCITc3Vyb2dirMy+PEwf00aROMdjj47ME7aRUVR/8HHgMuzQ+3uHxkHT3C/i2b2efsn3V83x4Aet01mrh+15KXlcnBtC20iIjGzcOzlmsrLiZaazIO7GNfykb2bN7IvpSN5rAhvg38aREeSYvIGCKu6H1O36F5eXls3ryZ9evXc+zYMZ566ilcXFw4fPgwfn5+l3R3mtoMvHYBJwANfKq1/kwpdVJrXc+5XgEnSpZPy3sfcB9Ay5Yt43fv3l1t9byYbN++HZvNRrt27bDZbLz77rvUqVPHHDesZcuWF10nxOrgsNvZumwxPg0a0iI8iqyjR/j2lWeM05GduxHQNlRawsQFrbiokM2Lfjc7wmcfPwoYA2o2DQ0zTh22i6Bx22Bc3S6sMYzExU1rTcb+vUZHfWc/MZ/6Dbjj3fEArPl5Nn4BTWkT37FcXpvNxvbt20lOTmbbtm04HA4aNWpETEwMiYmJl3SwVVptBl7NtNb7lVKNgN+Ah4EfSwdaSqkTWutK55G4nFu8KlNUVMS6detIS0sjPT0du92Oq6srrVu3NvuG1atXr7areUE4vm8Pi6dOYff6ddhtNnwaNCTU2TFfgjBxoVg3dw6uHp5E9eprDBFw5zDcvb3N+Q2bh0XQsGUrGddO1CjtcJCXlYl3PT+01kx8+B6C4hLoc/f9OBx2fv9iAp5Nm3Os0MbWNGN+Y29vb6KiooiJiaFJk/NzuvJickFc1aiUehXIAe5FTjWed0VFRezatcscNywzMxMAf39/BgwYQGBgYO1W8AJRmJfLjjUr2bpiiRmEWaxW3L198PD2Zvhr7+JVtx5pq5aRvn4dfe5+AGWxsH9rKjkZx/Hw9sHd29u49/HB3ctLfgTFOSvMy+XA1lT2b00hPzuLq+59CIDprz2Ll29dBj7xHGCMXu5dz++y+9ESFzbtcFBcWICLuwfZx44w5cUxnGjaGhwOPG0FBDZpTGRsHK0iY/Cpf3leGFZZ4FVt56SUUt6ARWud7XzcF3gd+BEYAbztvJ9TXXW4nLi5uREaGkpoaChaa44dO2YGYSV9wLZs2cL69esZOHDgRX2Z7j/h7uVNeI9ehPfoZQRha1dxfO9uCvNyKcjJwdXdA4CMA/vZlbzWbAlbP/9/pC5ZVGGZbp5euHt741WnHrf9axwAmxb+RnbGMToPvhmAPZs2YCsqdAZ4p4I3l8uk2f1yl51x7NSwDltSOLon3RwMOKBtKA6HHYvFyk0vvIHV5dSFMz5+9Wuv0kKcgbJY+OGnn3FxceHGG2/kkQlTWL54EZbMDA5tS2XfX6uYt3QhAH4BzWgREUX7q6+jQXMZNByqMfACGgOznf/UXICpWut5SqnVwHdKqbuB3cDQaqzDZUkphb+/P/7+/nTp0sV8Pj8/nxMnTuDhYQQXy5YtIy8vj+DgYJo3b47Venm13Lh7eRPe/coK13W8fggdrx9iLvcccS8drruJgtwcCnJzKczNobDU44LcHLTDYabfvzWVY3vTzcBr6fT/cmBbarntWF1dnYGYD/4tA7n2MeMC33Vz5+Di7kF0734ApG/4C4vFeqq1zdsbd08vOUV6ASo5i6CUYuPC+az8fjqZRw4D4OruQUBIOzoPvpnmYREEtA3F1fl5BMoEXUJcKBwOh3lGpV+/fiilaNy4sfmboSwWuvTsVSq9naPpu8z+YVuW/klUL+O7bPeGZNJWLaPr8Nvx9PGtlf2pbTKA6mVs9uzZbNy4EYfDgYeHR5m+Yb6+l+cHorpkHjlMXuZJM0graWEreVyYk4NXvXr0vut+AKa/+iwePj5c99SLAEwYdTu5J8sOd6eUBXcvo7XN3duHVtFx9LhlJAArvp+Of6sg2sR3MMbNWb8O99KnSb295Uf+PLHbinE4HLi6ubN7YzI/f/guw175Fw1btGLbyqVsWfKn0UerXTiNAltjucz+4IiL15EjR8ype7Kzs3F3d2f06NH4+VXaLbsch92OUgplsZA8fy4rZk3j3k8mY3VxYc3Ps8k8cogW4VE0D4+q9QFdz5cLoo/XPyGBV/UpKChg586d5mnJnJwcAAICAggODiYsLIyAgIBaruXlqfTwF0f3pFOQnUWBM0grHbwV5hmtbo1bt6XLkFsB+L/7bqNdlx5cOfI+iosK+ej2weXKd3F3N4IwL288fHxo17UnsX2vweGws2LWtwTGxNM0pB3FRYUc2r7NbJnz8PbG1cPzsu13VJSfx4FtW9i/NYX9qZs5uH0bV468l+je/ck8cojlM78l8brBNGgmp1XExScnJ4dNmzaxfv16Dh48iFKK4OBgYmJiCAkJOS9jSGqHw2yt/2Pyp2xa+Js5GXvDloHOccSiaB4WiadvnX+8vdoggZeoEq01hw4dIi0tje3bt7N37146duxI//79sdvtbNq0ieDg4Mu2f9jFpuTLzWG3c2jHNiNYy8k5Fbw5A7aS4C24Q2fi+g8kPyeb/9x9M1eOvI/2Vw/i2N7dfPnUg2XKVhaLGYSVBGSx/a6lbUJH8rIy2bTwN9omdqZ+02YU5OZw8tDBU61tXt4XVatP7skTZt+sfVs2czR9F1o7UMqCf2AQzdtF0K7bFQS0rfQaISEuaLm5ucyZM4e0tDS01gQEBJhT9/j4+FTrtu02G4d3pjnnmdzI/q0p2AoLQSn8WwbSrusVdLjupmqtw/lWK53rxcVHKUVAQAABAQH06NGD/Px8bDYbAPv372f27NkMGTKEiIgIsrKyyMrKomnTplikn9EFqeQfpcVqpWlIWJXzefr48vi0OWiH8aesjn8jbnrxzXItbCWPC5z93Ry2YsAY6DNp6hQaNG9B/abN2L8lhR/efb3MNtw8PY3AzcvbeXWoD51uHEaTNsGcPHyInetW065Ld7zq1iMvK5O8zJOlLkhwr9bWthOHDlCYm0uTNsHYiov5/KG7sBcX4+LmTkDbEDreOJRmoeE0DWmHm6f8CREXr71795KdnU14eDienp7k5ubSpUsXoqOjady45ibTtrq40DQkjKYhYXS8YSh2WzGHtqc555ncYHaz0A4H373xPDFXXUO7Lj1qrH7nmwRe4ow8PU+Nft28eXPuu+8+c87IDRs2sGDBAry8vMyJvdu0aSOtYZcIi8UKznjazcOTVlGxVc7buHVbHvlqptmq1aRNMNePeYnCXCNIKxO8OS9OyDxyCHuxEbgd3pnGwimf0jIyGq+69diy9E8WTvnMLN/q4uLsr3YqcPPw9qH7LSOp09CfI+k7ObQjjfDuV+Li5kbuyRPmFaWnX5DgsNs5unsXJw7up13XKwD45ZP3UShufuM9XFxd6TfqEeo2DqBx6zbSL05c9LKzs80+vEuWLOHo0aOEhYVhsVi49957a7l2BquLK83ahdOsXTidbhxmPp+fk41Spz6/x/ft5ZdP/m1Ob9SsXQTuF8FvkJxqFH9LXl6eOZ/kjh07yMvLQylFs2bNzFH0mzRpIq1h4pw57HYKcnPw8PbBYrVy8tBBDu/aXq6FrezVpTkMefEt6vg3YuUPM1gy7Use+Womru4eLPzyc9bNdY5ao5RxQYKXMQbbycOHKC7Ix+riwkOTv8PFzY2DaVtx8/SSS9/FJaOgoMCcumfPnj08+uij+Pn5cfLkSTw9PXF3vzhnPji0fRt/fj2Jg2lbsNtsKGWhces2RiAWEU2z0LBaa5WWPl6iWjkcDg4cOGB20D9w4AAADRs25MEHH0Qphd1uv+yGqxC1o7iokPysTHwb+KOU4vDO7Rzdk15u+I/C3Bzq+DeiWWg4zdpF4NugYW1XXYjzxm63s2PHDtavX8+WLVuw2+00bNiQmJgY2rdvf0nN8VtcVMjBbVvMU5MH07bhsNtQFgtN2gRz7WPP1PhE8dLHS1Qri8VC8+bNad68OVdeeSU5OTns2LGD/Px8sy/Op59+Sps2bejXzxjLRSasFtXF1c0d11Jfso1bt6Vx67a1WCMhakbJBVLr169n48aN5Obm4unpSXx8PDExMTRt2vSS/N51dXOnZWQMLSNjACguLODAViMQO7AtFe96xvAXSdO+xMPbh8RB5a/yrkkSeInzzsfHh5iYGHPZbrcTGhpqdtbMzc01A7Hg4GBat25tDuoqhBDi3NhsNlxcXCgqKmLSpEnmd25MTAxt27bFxeXy+ql3dfegVXQsraJjyzzv28Cf+k2b1U6lSpFTjaLGZWRksGDBAnbs2EFhYSEWi4WWLVuag7c2atTokvxXJmpXfn6+eSsoKKCgoAAPDw8aNGhAnTp1pD+iuCjNmzePPXv2cN999wGwY8cOAgIC5EKnWiZ9vMQFyW63s3fvXrOT/uHDxrQqderUITg4mCuuuII6dS7OwfPE+edwOCgoKDADp4rurVYrvXv3BuCnn36ioKCAIUOMqZ8+/vhjjh07VmHZVquV+vXr07ZtW/N0+IEDB6hTp061j2EkRFU5HA52797Nhg0b6NevHx4eHmzYsIFjx45xxRVXSD/aC4j08RIXJKvVSmBgIIGBgfTp04fMzEwzCNu8ebP5A5qSksLJkyfp1KmTtEpc5Ox2e6XBU0FBAVdddRVKKZYuXcrOnTu5/fbbAZg2bRppaWlnLLskeCp539StW7fMkCg9e/bEZrPh4eGBp6cnHh4e5OXlcfz4cTIyMjh+/HiZ8r7++mvatWvHoEGDcDgczJo1Cz8/P+rXr0+DBg2oX78+Pj4+0jorqt2xY8fMqXsyMzNxc3MjNjaWVq1aER0dXdvVE+dIAi9xwahbty7x8fHEx8fjcDjMICstLY19+/aZE36vWbMGX19fgoKCcHNzq80qX5bsdnulrU6JiYl4eXmxefNmVq9ezW233YaLiwu//vory5cvr7RsFxcXrrjiCtzd3XF1dS3T9y8uLo42bdqUCZxK358+lUmPHmUHWIyMjKxwm0FBQeWe01ozePBg83RNfn4+hw4dIjU1FUepydDd3NzMQKxBgwaEhITQvHnzMhNlC/F35OXlmVP37N+/H6UUbdq0oU+fPoSGhsp330VMAi9xQSrdsnXddddRWFgIGE3tixYtIicnx2wxKxk3rGRwV3F2NpvtjIFTSEgIfn5+7N27lyVLlnD11VdTr1491qxZw6+//kqxc6DTMwkNDcXLywutNQ6Hw+z427p1a9zd3SsMmjw8PPDw8CgTPHXo0IEOHTqYy+Hh4dV2PE5X8iNXwtvbm4cffhi73U5mZmaZVrKMjAwOHDhASkoKnp6eNG/enIyMDD7//HOuv/562rVrR1ZWFunp6WaQVrolTojS7HY7M2fOZOvWrTgcDho1akTfvn2JiooyBz4VFzcJvMRFoWSAP4vFwmOPPcbu3bvNOSXnzZvHvHnz8PPzM4OwwMDA8zKZ64WsuLi4wuApICCARo0acfLkSRYuXEjHjh1p2rQpO3fuZPbs2RQUFFQaPNWpUwc/Pz9sNhsnTpygqKgIAH9/f+Lj488YOJXcl1xBFRkZWaaVqeS1uZiVnM6sX79+uXU2m81sDbNarURGRuLnZ1zGvnv3br7//nszraenp3m6svS9v7//Jf++FeUdPHiQffv2kZiYiNVqxWq10rFjR2JiYmjSpEltV0+cZ9K5Xlz0MjIyzL5hu3btwmaz0bdvX7p06UJhYSG5ubkV/lBeCIqLi8sFTnXq1CEgIICioiJ+//13QkJCaNOmDRkZGUybNs1MVzKP5ulK9v348eN89dVXDBgwgJCQEI4cOcKyZcvKBEpnCp6kk+75ZbPZyMjIKNNKVnKflZVlprvnnnto3rw527dvZ9OmTfTr1w9PT0/y8vJwcXGR00uXkJMnT+Lr64vVamXBggWsWrWKJ5988qIdRV6UJZ3rxSWtfv365mmp4uJidu/ejb+/P2D0D5s5cyb33nsvzZo1Iy8vDzc3t/M2ro3WulzLk6urK02bNgUgKSmJhg0bEhYWhs1m48svvywTaNnt9nJldujQgYCAACwWC8nJyfj5+dGmTRvc3Nxo2LBhpa1NHh4e5umIBg0a8Pjjj5vlNmrUiOuvv/687Lc4Ny4uLjRq1IhGjcqPnl1UVMSJEyc4fvy4+b7NzMxkx44dZuvXn3/+ycqVK/H19S3XSlbSAictZRe+wsJCUlJSWL9+Penp6dx6660EBwfTpUsXunXrJkHXZUJavMQlLTMzk61bt5KQkIDFYmHu3Ln89ddfBAUFmae+6tatS1FRkXlVncPhICAgAID169djsViIiooCYNasWZw4caJMoHV68BQcHMytt94KwPvvv09oaCgDBgxAa83UqVMJDAzEw8MDpVSFN4vFIldvijJsNpt5KtPhcGC32zn9u9tiseDr64tSCpvNhtb6ognGPDw8aN68+UVT33PhcDjYuXMn69evJzU1FZvNhp+fHzExMcTFxVG3bt3arqKoBtLiJS5bdevWLddBW2tNWloa27ZtA4wfrNJXqjVs2JCHHnoIgLVr15YJvIqLi3Fzc6NOnTpnbHUqPfbYY489ZgZRSim6dOmCr68vDRo0kCvexD9ScuGCzWbDbrfjcDjMH/Fjx46htTZb0DIyMtBaY7VacXFxMW9Wq7XW34daa44fP86+ffsqvML0YnX48GFzCIicnBw8PDyIjY0lOjqaFi1a1PpxF7VHAi9xWSkZN0xrzbFjx9i+fTu5ubllAqfSA2aWDIVQYvjw4ee0vdNbrgoKCggMDJQvXfGPWSwW3NzcKuz35efnV6ZFzGKxUFxcTFFRUbmWspJgzN3d3Xzv2+12LBZLjbxPlVI0aNCAo0ePVvu2qltRURFubm5orfnuu+84ceIEwcHBxMTEEBwcfEm26IlzJ4GXuCwppfD39zdbBM6kOjozS9AlqtvpF0fUq1cPoMwQH3a73WwxK7mVpDly5AheXl7UrVsXrTVZWVllWsnOd0vZpfCZWLZsGUlJSTzxxBO4urpy4403Uq9ePby9vWu7auICI4GXEEJcJpRSZuBUmTp16pitM3a7ndzc3HJpTj9l6e7uftlMxqy1Zs+ePaxfv56OHTvSuHFjmjVrRvv27bHZbLi6utKsWe1PxiwuTNKDVwhxTpKSkoiIiCA2Npb8/Pxq286UKVM4cOCAuXzPPfeQkpJSaZ6ePXtS+kKc5ORklFLMmzfPfC49PZ2pU6eWSTN37ty/Xc/AwMAzzgFZHf5pfU+Xk5PD/fffT5s2bWjfvj0JCQlMnTrVbO11cXExx4Zr0KABdevWxdvbG6vVis1mIycnh8zMTHOQ4+LiYo4cOWIuX3HFFSxbtqzCCwIuNhkZGSxcuJCPPvqIyZMns3HjRnOO2VatWnHVVVfJ4LjirC6PvydCiHNit9vP2CryzTff8Nxzz3HbbbdVqSytNVrrc75Sc8qUKURGRppDc0ycOPGc8oMxv2O3bt2YNm0a/fv3B04FXrfccgtgBDJr1qzhmmuuOefya8P5ru8999xD69atSUtLw2KxcPToUSZNmlQmTcmpQHd393JDHmitsdvtZU4XWq1W8/V2OBxkZmZy+PBhlFJmC9npLWYXqvz8fDZv3sz69evZu3cvAK1bt6Znz560a9dOhoAQ567kS/FCvsXHx2shLgUpKSm1XQW9a9cuHRoaqm+55Rbdrl07PXjwYJ2bm6tbtWqlx4wZo+Pi4vS0adP0r7/+qjt16qTj4uL0TTfdpLOzs/Xnn3+u/fz8dGBgoL7lllu01lq/++67OiEhQUdFRemXX37Z3EZISIi+/fbbdXh4uE5PTz9junbt2ul77rlHh4eH66uuukrn5eXpGTNmaG9vbx0SEqJjYmJ0Xl6evuKKK/Tq1au11lqPHj1ax8fH6/DwcLMsrXWZNA6HQwcFBent27frgIAAnZ+fr7XWumPHjrpOnTo6JiZGv/3227pFixa6YcOGOiYmRn/77bd65cqVulOnTjo2NlZ37txZb9myRWuttc1m008++aSOiIjQUVFR+qOPPtJaa92qVSv98ssv67i4OB0ZGalTU1O11lq/8sor+o477tDdunXTLVu21LNmzdJPP/20joyM1P369dNFRUVaa63XrFmje/Toodu3b6/79u2rDxw4YO7LmDFjdGJiog4ODtaLFy/WhYWF5ep7ulWrVunOnTvr6OhonZiYqLOysvTkyZP1gw8+aKYZMGCAXrhwod6+fbsOCgrSdru9wvfKwoULdbdu3fTAgQN1cHCw1lrr6667Trdv316Hh4frTz/91Ezr7e2tH3vsMR0eHq579eqljxw5orXWukePHvqJJ57Q8fHxuk2bNvqnn37Shw4d0vv37y9z27hxo9Za6x07duiFCxeax8dms1X+hq5GP/30k3799df1K6+8osePH6+TkpL0yZMna60+4uIBrNFniGmkxUuIWvLaT5tJOZB19oTnILxpHV4ZGHHWdFu3buWLL76ga9eu3HXXXfznP/8BjEFX161bx7Fjx7jxxhtZsGAB3t7evPPOO7z//vu8/PLLLFmyhGuvvZabbrqJ+fPnk5aWxqpVq9BaM2jQIBYvXkzLli1JS0vjyy+/pFOnTmdNN23aND7//HOGDh3KrFmzuO222/j4448ZO3YsCQnlh8J56623qF+/Pna7nd69e7Nhwwaio6PLpFm2bBlBQUG0adOGnj178r///Y/Bgwfz9ttvM3bsWH7++WcAGjduzJo1a/j4448ByMrKIikpCRcXFxYsWMDzzz/PrFmz+Oyzz0hPTyc5ORkXFxcyMjLMbTVs2JB169bxn//8h7Fjx5qtczt27GDhwoWkpKTQuXNnZs2axbvvvssNN9zA//73PwYMGMDDDz/MnDlz8Pf3Z/r06bzwwgtmi5PNZmPVqlXMnTuX1157jQULFvD666+XqW9pRUVFDBs2jOnTp5OYmEhWVlalp742b95MTExMpa2R69atY9OmTeZQD5MmTaJ+/frmhOiDBw+mQYMG5ObmkpCQwLhx43j99dd57bXX+Pjjj82WsDVr1jB37lzef/99FixYgNa6TCf/kydPArBnzx6WLl1qTnL+888/s3Xr1goHjm3QoMF5bXE6evQomzZtomfPniil8PLyIiEhgZiYGAICAi6JiwBE7ZPAS4jLUIsWLejatStgDJnx0UcfATBs2DAAVqxYQUpKipmmqKiIzp07lytn/vz5zJ8/n7i4OMDoL5SWlkbLli1p1aoVnTp1Omu6oKAgYmNjAYiPjyc9Pf2s9f/uu+/47LPPsNlsHDx4kJSUlHKB17Rp08zhP4YPH85XX33F4MGDz1p2ZmYmI0aMIC0tDaWUOa/lggULGD16tNmBvPQ0VDfeeKNZ/9JzMl599dW4uroSFRWF3W43T3dGRUWRnp7O1q1b2bRpE1dddRVgnOItGbz39HKrcly2bt1KQEAAiYmJAGXGlKuKt956ixkzZnDkyBGzf12HDh3KjK/10UcfMXv2bAD27t1LWloaDRo0wGKxmO+f2267zaz7mfZDKYWrq6vZib8kqOnZsyfdunUzg8E2bdqglCIjI8MciLQ0b29vAgICzFPf+/fvx83N7axXLJfIysrC1dUVT09PDhw4wJIlS4iMjMTf359evXpV/eAJUUUSeAlRS6rSMlVdTv/nXrJccum71pqrrrqKadOmVVqO1prnnnuOUaNGlXk+PT29zGX0laUr3WJhtVrP2mF/165djB07ltWrV+Pn58fIkSMpKCgok8ZutzNr1izmzJnDW2+9ZQ7SmZ2dXWnZAC+99BJXXnkls2fPJj09nZ49e541T8k+lHQ4P/15i8WCq6ureZwtFos5unxERATLly8/p3JL69evH4cPHyYhIYFHH320wjQuLi5lBgkuOV7h4eGsX78eh8OBxWLhhRde4IUXXigzll3p13HRokUsWLCA5cuX4+XlRc+ePcsd+xKl32NV2Y/T61vi9MnWi4qKys17Wdovv/yCi4sLI0eOBOCHH37AYrGUaSXz8fEhLS2NDRs2sHPnTq666iq6dOlCeHg4ISEh0kFeVCu5qlGIy9CePXvMH/upU6fSrVu3Mus7derE0qVL2b59OwC5ubnmSP+l9evXj0mTJpGTkwMYrQ1Hjhz52+lK8/X1rTBQysrKwtvbm7p163L48GF++eWXcml+//13oqOj2bt3L+np6ezevZvBgwcze/bscuWevpyZmWkOBTBlyhTz+auuuopPP/3UDBxO/8H/O0JDQzl69Kj5WhQXF7N58+ZK85xe319//ZXk5GQmTpxIaGgoBw8eZPXq1QBkZ2djs9kIDAwkOTkZh8PB3r17WbVqFQBt27YlISGBF1980Zz6qqCg4IxXH2ZmZuLn54eXlxdbtmxhxYoV5jqHw8HMmTOBit9T54ubmxtNmjQhPDyc7t27c91113HdddeZ6wcOHGi2IJbUecuWLfz2229Mnz6d//znP7z77rvMnj2b48eP0717d9q1awdgtnwJUZ2kxUuIy1BoaCiffPIJd911F+Hh4dx///2MHz/eXO/v78+UKVO4+eabzWEB3nzzTUJCQsqU07dvX1JTU83TkD4+Pnz99dflrlKrarrSRo4cyejRo/H09CzTIlQyx127du3KnDItbdq0adxwww1lnhs8eDD/93//x48//ojVaiUmJoaRI0cyYsQI3n77bWJjY3nuuecYM2YMI0aM4M0332TAgAFm/nvuuYdt27YRHR2Nq6sr9957rzm11N/l5ubGzJkzeeSRR8jMzMRms/HYY48REXHm1tArr7yyTH1LTu+VlDd9+nQefvhh8vPz8fT0ZMGCBXTt2pWgoCDCw8MJCwujffv2Zp6JEyfy9NNP07ZtWxo0aICnpyfvvvtuhdvu378/EyZMICwsjNDQUPNUMhgtY6tWreLNN9+kUaNGTJ8+/R8dm7+rcePGZZZHjBgBGFcnlrSSZWZm0qJFC1q2bCnzoooaJ5NkC1GDUlNTCQsLq9U6pKenc+2117Jp06ZarYe4tPj4+Jgtmn/HhfDZEOJ8qWySbAn1hRBCCCFqiAReQlxmAgMDpbVLnHf/pLVLiMtJtQdeSimrUuovpdTPzuUgpdRKpdR2pdR0pdT5n4VYCCGEEOICVBMtXo8CqaWW3wHGaa3bAieAu2ugDkIIIYQQta5aAy+lVHNgADDRuayAXsBMZ5Ivgeursw5CCCGEEBeK6m7x+gAYA5SM3NcAOKm1LhlBbx/QrJrrIIQQQghxQai2wEspdS1wRGu99m/mv08ptUYptebo0aPnuXZCiL8rKSmJiIgIYmNjzzrK/D8xZcoUc9oaMMbRSklJqTRPz549KT30THJyMkop5s2bZz6Xnp7O1KlTy6SZO3fu365nYGAgx44d+9v5z9U/re+ZvPrqq4wdOxYofxyFEOdPdbZ4dQUGKaXSgW8xTjF+CNRTSpUM3Noc2F9RZq31Z1rrBK11QlXn3BJCnB8lo5hX5JtvvuG5554jOTm5SqN8a63LTFdTVacHXhMnTiQ8PPycypg2bRrdunUrM/XR+Q68atqFXN+qTAckxOWu2gIvrfVzWuvmWutAYDjwh9b6VmAhcJMz2QhgTnXVQQhRXnp6Ou3atePWW28lLCyMm266iby8PAIDA3nmmWdo3749M2bMYP78+XTu3Jn27dszZMgQcnJymDhxIt999x0vvfQSt956KwDvvfceiYmJREdH88orr5jbCA0N5Y477iAyMpK9e/eeMV1YWBj33nsvERER9O3bl/z8fGbOnMmaNWu49dZbzZa10q0w999/PwkJCURERJhlnU5rzYwZM5gyZQq//fabOafgs88+S1JSErGxsbzzzju8/PLLTJ8+ndjYWKZPn86qVavo3LkzcXFxdOnSha1btwJGMPrUU08RGRlJdHR0mZH+x48fT/v27YmKimLLli2A0YI0YsQIunfvTqtWrfj+++8ZM2YMUVFR9O/f35x8e+3atVxxxRXEx8fTr18/Dh48CBitTs888wwdOnQgJCSEpKQkioqKytW3tNWrV5sTUs+ZMwdPT0+KioooKCigdevWAHz++eckJiYSExPD4MGDycvLO+N7xeFwMHLkSHNKoaefftp8DT/99FPAmL+xe/fuDBo06JwDYyEuS1rrar8BPYGfnY9bA6uA7cAMwP1s+ePj47UQl4KUlJSyT0y65uy3JR+WTb/ua+NxzrHyaatg165dGtBLlizRWmt955136vfee0+3atVKv/POO1prrY8ePaq7d++uc3JytNZav/322/q1117TWms9YsQIPWPGDK211r/++qu+9957tcPh0Ha7XQ8YMED/+eefeteuXVoppZcvX37WdFarVf/1119aa62HDBmi//vf/2qttb7iiiv06tWrzXqXXj5+/LjWWmubzaavuOIKvX79+nJplixZonv16qW11vrmm2/WM2fO1FprvXDhQj1gwACz3MmTJ+sHH3zQXM7MzNTFxcVaa61/++03feONN2qttf7Pf/6jBw8ebK4rqUOrVq30Rx99pLXW+pNPPtF333231lrrV155RXft2lUXFRXp5ORk7enpqefOnau11vr666/Xs2fP1kVFRbpz5876yJEjWmutv/32W33nnXea+/LEE09orbX+3//+p3v37l1hfUsrLi7WQUFBWmutn3zySZ2QkKCXLFmiFy1apIcPH6611vrYsWNm+hdeeMGs+yuvvKLfe+89c9vLly/Xw4cP12+++abWWutPP/1Uv/HGG1prrQsKCnR8fLzeuXOnXrhwofby8tI7d+6ssE5VVe6zIcRFDFijzxDT1MhcjVrrRcAi5+OdQIea2K4QomKl5zi87bbb+OijjwDMef9WrFhBSkqKmaaoqMicZ7G0+fPnM3/+fOLi4gBjEM20tDRatmxJq1atzLn8KksXFBREbGwsAPHx8aSnp5+1/t999x2fffYZNpuNgwcPkpKSQnR0dJk006ZNY/jw4QAMHz6cr776isGDB5+17MzMTEaMGEFaWhpKKbNlasGCBYwePRoXF+Nrs379+maeklam+Ph4vv/+e/P5q6++GldXV6KiorDb7fTv3x+AqKgo0tPT2bp1K5s2bTIndbbb7QQEBFRYblWOi4uLC23atCE1NZVVq1bxxBNPsHjxYux2O927dwdg06ZNvPjii5w8eZKcnBz69etXYVmjRo1i6NChvPDCC4DxGm7YsMGcCDszM5O0tDTc3Nzo0KEDQUFBZ62fEEImyRaidt35v7+f3rvBued3MkZ2Kb/s7e0NGC3hV111VZm+URXRWvPcc88xatSoMs+np6ebZZ0tnbu7u7lstVrP2mF/165djB07ltWrV+Pn58fIkSPN04gl7HY7s2bNYs6cObz11ltorTl+/DjZ2dmVlg3w0ksvceWVVzJ79mzS09Pp2bPnWfOU7IPVai3Tz6nkeYvFgqurq3mcLRYLNpsNrTURERFlJgGvSrml9evXj8OHD5OQkMDEiRPp0aMHv/zyC66urvTp04eRI0dit9t57733AGPy8R9++IGYmBimTJnCokWLKiy3S5cuLFy4kCeffBIPDw+01owfP75coLZo0aIyr7UQonIyZZAQl6E9e/aYP/ZTp06lW7duZdZ36tSJpUuXsn37dgByc3PZtm1buXL69evHpEmTzOli9u/fz5EjR/52utJ8fX0rDJSysrLw9vambt26HD58mF9++aVcmt9//53o6Gj27t1Leno6u3fvZvDgwcyePbtcuacvZ2Zm0qyZMcrNlClTzOevuuoqPv30UzMAysjIqLT+VREaGsrRo0fN16K4uJjNmzdXmuf0+v76668kJyczceJEALp3784HH3xA586d8ff35/jx42zdupXIyEgAsrOzCQgIoLi4mG+++eaM27n77ru55pprGDp0KDabjX79+vF///d/Zgvgtm3byM3N/Uf7L8TlSAIvIS5DoaGhfPLJJ4SFhXHixAnuv//+Muv9/f2ZMmUKN998M9HR0XTu3NnsNF5a3759ueWWW+jcuTNRUVHcdNNNFQZLVU1X2siRIxk9enS5YStiYmKIi4ujXbt23HLLLebp0NKmTZvGDTfcUOa5wYMHM23aNKKjo7FarcTExDBu3DiuvPJKUlJSzM7qY8aM4bnnniMuLq5MK9M999xDy5YtiY6OJiYmpsyVkX+Xm5sbM2fO5JlnniEmJobY2FiWLVtWaZ7T63u6jh07cvjwYXr06AFAdHQ0UVFRZmvbG2+8QceOHenatSvt2rWrdFtPPPEEcXFx3H777dxzzz2Eh4fTvn17IiMjGTVqlFzFKMTfoIw+YBe2hIQELWPKiEtBamoqYWFhtVqH9PR0rr32WpkoW1xQLoTPhhDni1JqrdY6oaJ10uIlhBBCCFFDJPAS4jITGBgorV1CCFFLJPASQgghhKghEngJIYQQQtQQCbyEEEIIIWqIBF5CCCGEEDVEAi8hLnOvvvoqY8eOLTMJdWlTpkzhoYceqtY62Gw2nn/+eYKDg4mNjSU2Npa33nrrvJU/cuRIc6obIYSoTRJ4CSFq3YsvvsiBAwfYuHEjycnJJCUlmSOkl6a1xuFw1EINhRDi/JDAS4jL0FtvvUVISAjdunVj69at5vP//e9/iY2NJTIyklWrVpXLVzKafEJCAiEhIfz8888Vlr99+3b69OlDTEwM7du3Z8eOHSxatIhrr73WTPPQQw8xZcoU8vLy+Pzzzxk/fjweHh6AMS3Oq6++ChgDvoaGhnLHHXcQGRnJ3r17uf/++0lISCAiIoJXXnnFLDMwMJAxY8YQFRVFhw4dzCmPABYvXkyXLl1o3bq1tH4JIWqNTJItRC15Z9U7bMkoPw3PP9Gufjue6fBMpWnWrl3Lt99+S3JyMjabjfbt2xMfHw9AXl4eycnJLF68mLvuuqvC8b7S09NZtWoVO3bs4Morr2T79u1mwFTi1ltv5dlnn+WGG26goKAAh8PB3r17K6zP9u3badmyJb6+vmesc1paGl9++SWdOnUCjMCxfv362O12evfuzYYNG4iOjgagbt26bNy4ka+++orHHnvMDA4PHjzIkiVL2LJlC4MGDeKmm26q9DgJIUR1kBYvIS4zSUlJ3HDDDXh5eVGnTh0GDRpkrrv55psB6NGjB1lZWZw8ebJc/qFDh2KxWAgODqZ169bl5nDMzs5m//795lyJHh4eeHl5Vbl+kydPJjY2lhYtWpjBWqtWrcygC+C7776jffv2xMXFsXnzZlJSUsrtw80332xOPg1w/fXXY7FYCA8P5/Dhw1WujxBCnE/S4iVELTlby1RtKJlI+UzLZ0pz55138tdff9G0adMKJ24GcHFxKdM/q6CgAIC2bduyZ88esrOz8fX15c477+TOO+8kMjISu90OgLe3t5lv165djB07ltWrV+Pn58fIkSPNsk6vX+nH7u7u5uOLYY5aIcSlSVq8hLjM9OjRgx9++IH8/Hyys7P56aefzHUlQdOSJUuoW7cudevWLZd/xowZOBwOduzYwc6dOwkNDWXy5MkkJyczd+5cfH19ad68OT/88AMAhYWF5OXl0apVK1JSUigsLOTkyZP8/vvvAHh5eXH33Xfz0EMPmQGU3W6nqKiowvpnZWXh7e1N3bp1OXz4ML/88kuZ9SX7MH36dDp37vzPDpYQQpxn0uIlxGWmffv2DBs2jJiYGBo1akRiYqK5zsPDg7i4OIqLi5k0aVKF+Vu2bEmHDh3IyspiwoQJ5fp3gdFJf9SoUbz88su4uroyY8YMWrduzdChQ4mMjCQoKIi4uDgz/VtvvcVLL71EZGQkvr6+eHp6MmLECJo2bcqBAwfKlB0TE0NcXBzt2rWjRYsWdO3atcz6EydOEB0djbu7O9OmTfsnh0oIIc47dS5N7kopL611XjXWp0IJCQm6ovGFhLjYpKamEhYWVtvV+NtGjhzJtddee8F2TA8MDGTNmjU0bNiwtqsiztHF/tkQojSl1FqtdUJF66p0qlEp1UUplQJscS7HKKX+cx7rKIQQQghxyavqqcZxQD/gRwCt9XqlVI9qq5UQ4oI0ZcqU2q5CpdLT02u7CkIIUakqd67XWp8+CI/9PNdFCCGEEOKSVtUWr71KqS6AVkq5Ao8CqdVXLSGEEEKIS09VW7xGAw8CzYD9QKxzWQghhBBCVFGVWry01seAW6u5LkIIIYQQl7SqXtUYpJR6Xyn1vVLqx5JbdVdOCFH9Xn31VcaOHUvPnj2paNiWKVOm8NBDD9VIXUpva+TIkTKZtRDiklPVPl4/AF8APwGOypMKIUTt0VqjtcZikYk5hBAXnqp+MxVorT/SWi/UWv9ZcqvWmgkhqs1bb71FSEgI3bp1Y+vWrebz//3vf4mNjSUyMpJVq1aVyzdy5EhGjx5NQkICISEh/Pzzz+XSHDlyhPj4eADWr1+PUoo9e/YA0KZNG/Ly8vjpp5/o2LEjcXFx9OnT56yTVr/00kuMHDkSu93Oe++9R2JiItHR0bzyyiuAMYxEaGgod9xxB5GRkebk2kIIcaGpaovXh0qpV4D5QGHJk1rrddVSKyEuE7tvv+OsaXx69qTB3XeZ6evecAP1brwB24kT7H/k0TJpW/33q7OWt3btWr799luSk5Ox2Wy0b9/eDJTy8vJITk5m8eLF3HXXXWzatKlc/vT0dFatWsWOHTu48sor2b59e5lpgxo1akRBQQFZWVkkJSWRkJBAUlIS3bp1o1GjRnh5edGtWzdWrFiBUoqJEyfy7rvv8u9//7vC+j799NNkZ2czefJkfvvtN9LS0li1ahVaawYNGsTixYtp2bIlaWlpfPnll3Tq1Omsx0AIIWpLVQOvKOB2oBenTjVq57IQ4iKSlJTEDTfcgJeXFwCDBg0y1918882AMZF2VlYWJ0+eLJd/6NChWCwWgoODad26NVu2bCE2NrZMmi5durB06VIWL17M888/z7x589Ba0717dwD27dvHsGHDOHjwIEVFRQQFBVVY1zfeeIOOHTvy2WefATB//nzmz59vzvOYk5NDWloaLVu2pFWrVhJ0CSEueFUNvIYArbXWRdVZGSEuN1VpoTpTehc/v3POfzZKqUqXz5Tmzjvv5K+//qJp06bMnTuXHj16kJSUxO7du7nuuut45513UEoxYMAAAB5++GGeeOIJBg0axKJFi3j11VcrrE9iYiJr164lIyOD+vXro7XmueeeY9SoUWXSpaen4+3t/Q/2XAghakZV+3htAupVYz2EEDWkR48e/PDDD+Tn55Odnc1PP/1krps+fToAS5YsoW7dutStW7dc/hkzZuBwONixYwc7d+4kNDSUyZMnk5yczNy5cwHo3r07X3/9NcHBwVgsFurXr8/cuXPp1q0bAJmZmTRr1gyAL7/88ox17d+/P88++ywDBgwgOzubfv36MWnSJHJycgDYv38/R44cOT8HRgghakBVW7zqAVuUUqsp28dr0BlzCCEuSO3bt2fYsGHExMTQqFEjEhMTzXUeHh7ExcVRXFzMpEmTKszfsmVLOnToQFZWFhMmTCjTv6tEYGAgWmt69DCmdO3WrRv79u3Dz88PMIawGDJkCH5+fvTq1Ytdu3adsb5DhgwhOzubQYMGMXfuXG655RY6d+4MgI+PD19//TVWq/VvHw8hhKhJSmt99kRKXVHR85Vd2aiU8gAWA+4YAd5MrfUrSqkg4FugAbAWuP1spzATEhJ0ReMLCXGxSU1NJSwsrLar8beNHDmSa6+9lptuuqm2qyIuMRf7Z0OI0pRSa7XWCRWtq+rI9X9n6IhCoJfWOsc5v+MSpdQvwBPAOK31t0qpCcDdwP/9jfKFEEIIIS4qlQZeSqklWutuSqlsjKsYzVWA1lrXOVNebTSl5TgXXZ23kishb3E+/yXwKhJ4CXFRmDJlSm1XQQghLmqVBl5a627Oe9+/U7hSyopxOrEt8AmwAziptbY5k+zDmHi7orz3AfeB0adECCGEEOJiV9W5Gv9bledOp7W2a61jgeZAB6BdVSumtf5Ma52gtU7w9/evajYhhBBCiAtWVYeTiCi9oJRyAeKruhGt9UlgIdAZqOfMD0ZAtr+q5QghhBBCXMwqDbyUUs85+3dFK6WynLds4DAw5yx5/ZVS9ZyPPYGrgFSMAKzkkqgRZytHCCGEEOJSUWngpbX+l7N/13ta6zrOm6/WuoHW+rmzlB0ALFRKbQBWA79prX8GngGeUEptxxhS4ovzsB9CiGrw6quv0qxZM3Pi7B9//LFW6rFt2zauueYagoODad++PUOHDuXw4cOsWbOGRx55BIBFixaxbNmyWqnfP5WTk8P9999PmzZtzLkzP//88/NWfs+ePZEheYS4MFR1OInnlFLNgFal82itF1eSZwMQV8HzOzH6ewkhLgKPP/44Tz31FKmpqXTv3p0jR45gsVS1l0LV2Ww2XFzKfyUVFBQwYMAA3n//fQYOHAgYQdbRo0dJSEggISHBfM7Hx4cuXbqc97qdjd1u/0eDuN5zzz20bt2atLQ0LBYLR48erXAA2zMdIyHExaOqnevfBpYCLwJPO29PVWO9hBDV6KuvviI6OpqYmBhuv/120tPT6dWrF9HR0fTu3Zs9e/aUyxMWFoaLiwvHjh3j+uuvJz4+noiICHMCazBGkn/88ceJiIigd+/eHD16FIAdO3bQv39/4uPj6d69O1u2bAGMAVlHjx5Nx44dGTNmDH/++SexsbHExsYSFxdHdnY2U6dOpXPnzmbQBUYLTmRkJIsWLeLaa68lPT2dCRMmMG7cOGJjY0lKSuLo0aMMHjyYxMREEhMTWbp0KUCF2wB47733SExMJDo6mldeecXc1tdff02HDh2IjY1l1KhR2O12c1+ffPJJYmJiWL58eZljtXr1arp06UJMTAwdOnQgOzubKVOm8NBDD5lprr32WhYtWsSOHTtYtWoVb775phnQ+vv788wzzwBGQNm9e3cGDRpEeHg4wDkffzCmeurQoQMhISEkJSVV7Y0ihDjvqvrX6QYgVGtdeNaUQogqSfpuG8f25pw94Tlo2MKH7kNDKk2zefNm3nzzTZYtW0bDhg3JyMhgxIgR5m3SpEk88sgj/PDDD2XyrVy5EovFgr+/P5MmTaJ+/frk5+eTmJjI4MGDadCgAbm5uSQkJDBu3Dhef/11XnvtNT7++GPuu+8+JkyYQHBwMCtXruSBBx7gjz/+AGDfvn0sW7YMq9XKwIED+eSTT+jatSs5OTl4eHiwadMm4uMrv5YnMDCQ0aNH4+Pjw1NPGf8Jb7nlFh5//HG6devGnj176NevH6mpqYwdO7bcNubPn09aWhqrVq1Ca82gQYNYvHgx/v7+TJ8+naVLl+Lq6soDDzzAN998wx133EFubi4dO3bk3//+d5m6FBUVMWzYMKZPn05iYiJZWVl4enpW+nrExMRU2oq4bt06Nm3aRFBQEMA5H38wWstWrVrF3Llzee2111iwYEGlx1QIUT2qGnjtxBgAVQIvIS5yf/zxB0OGDKFhw4YA1K9fn+XLl/P9998DcPvttzNmzBgz/bhx4/j666/x9fVl+vTpKKX46KOPmD17NgB79+4lLS2NBg0aYLFYGDZsGAC33XYbN954Izk5OSxbtowhQ4aYZRYWnvoqGTJkiHmarmvXrjzxxBPceuut3HjjjTRv3vxv7+eCBQtISUkxl7OyssjJyalwG/Pnz2f+/PnExRm9I3JyckhLS2PDhg2sXbvWnM8yPz+fRo0aAWC1Whk8eHC57W7dupWAgAAzT506ZxxnukJvvfUWM2bM4MiRIxw4cACADh06mEEXcE7Hv0TJ4/j4eNLT08+pTkKI86eqgVcekKyU+p2yk2Q/Ui21EuIycLaWqQtFSR+vEosWLWLBggUsX74cLy8vevbsSUFBQYV5lVI4HA7q1atHcnJyhWm8vb3Nx88++ywDBgxg7ty5dO3alV9//ZWIiAj+/PPcZy1zOBysWLGi3CTeFW1Da81zzz3HqFGjyqQdP348I0aM4F//+le58j08PMyAsV+/fhw+fJiEhAQeffTRCuvj4uKCw+Ewl0uOWXh4OOvXr8fhcGCxWHjhhRd44YUX8PHxMdOWPkbnevxLuLu7A0bAaLPZKkwvhKh+Ve0h+yPwBrAMYyT6kpsQ4iLTq1cvZsyYwfHjxwHIyMigS5cufPvttwB88803dO/e/Yz5MzMz8fPzw8vLiy1btrBixQpzncPhYObMmQBMnTqVbt26UadOHYKCgpgxYwYAWmvWr19fYdk7duwgKiqKZ555hsTERLZs2cItt9zCsmXL+N///memW7x4MZs2bSqT19fX1+yvBdC3b1/Gjx9vLpcEfhVto1+/fkyaNImcHOPU7/79+zly5Ai9e/dm5syZHDlyxDxWu3fvLlfvX3/9leTkZCZOnEhoaCgHDx5k9erVAGRnZ2Oz2QgMDCQ5ORmHw8HevXtZtWoVAG3btiUhIYEXX3zR7D9WUFCAMevaPz/+QogLS1WvavyyuisihKgZERERvPDCC1xxxRVYrVbi4uIYP348d955J++99x7+/v5Mnjz5jPn79+/PhAkTCAsLIzQ0lE6dOpnrvL29zY7ijRo1Yvr06YARzN1///28+eabFBcXM3z4cGJiYsqV/cEHH7Bw4UIsFgsRERFcffXVuLu78/PPP/PYY4/x2GOP4erqSnR0NB9++CHHjh0z8w4cOJCbbrqJOXPmMH78eD766CMefPBBoqOjsdls9OjRgwkTJpxxG6mpqXTu3BkwOql//fXXhIeH8+abb9K3b18cDgeurq588skntGrV6ozHx83NjenTp/Pwww+Tn5+Pp6cnCxYsoGvXrgQFBREeHk5YWBjt27c380ycOJGnn36atm3b0qBBAzw9PXn33XfP2/EXQlw41Jn+VZVJpNQuyk6SDYDWunV1VOp0CQkJWsagEZeC1NRUwsLCarsa1cbHx8dsNRI172I+/pf6Z0NcXpRSa7XWCRWtq2ofr9KZPYAhQP1/WjEhhBBCiMtJlfp4aa2Pl7rt11p/AAyo3qoJIS42F2try6VCjr8QF74qtXgppdqXWrRgtIDJ8MlCCCGEEOegqsFT6RECbUA6xulGIYQQQghRRVW9qvHK0stKKSswHNhWHZUSQgghhLgUVdrHSylVRyn1nFLqY6XUVcrwELAdGFozVRRCCCGEuDScrXP9f4FQYCNwL7AQ4xTjDVrr66q5bkKIWvbqq6/SrFkzYmNjiYyM5Mcff6yVemzbto1rrrmG4OBg2rdvz9ChQzl8+DBr1qzhkUeMCTQWLVrEsmXLaqV+58urr77K2LFjAWMicBlGR4hLz9lONbbWWkcBKKUmAgeBllrriuenEEJcckqmDEpNTaV79+4cOXKk0gmd/y6bzYaLS/mvpIKCAgYMGMD777/PwIEDASPIOnr0KAkJCSQkJJjP+fj40KVLl/Net7Ox2+3m9EG17UzHUQhxYTjbt2dxyQOttR3YJ0GXEBe/r776iujoaGJiYrj99ttJT0+nV69eREdH07t3b/bs2VMuT1hYGC4uLhw7dozrr7+e+Ph4IiIi+Oyzz8w0Pj4+PP7440RERNC7d2+OHj0KGNP09O/fn/j4eLp3786WLVsAGDlyJKNHj6Zjx46MGTOGP//8k9jYWGJjY4mLiyM7O5upU6fSuXNnM+gCozUoMjKSRYsWce2115Kens6ECRMYN24csbGxJCUlcfToUQYPHkxiYiKJiYksXboUoMJtALz33nskJiYSHR3NK6+8Ym7r66+/pkOHDsTGxjJq1ChzWh8fHx+efPJJYmJiWL58uZl+9erV5oTUc+bMwdPTk6KiIgoKCmjd2hhz+vPPPycxMZGYmBgGDx5MXl7eGV8rh8PByJEjzSmFnn76abOen376KWAEnd27d2fQoEGEh4dX9W0ghKgFZ/tbFKOUynI+VoCnc1kBWmtdp1prJ8Qlbvprz541Tev2HUgceKOZPuKKPkT27ENeViY/jSs7efOwV94+a3mbN2/mzTffZNmyZTRs2JCMjAxGjBhh3iZNmsQjjzzCDz/8UCbfypUrsVgs+Pv7M2nSJOrXr09+fj6JiYkMHjyYBg0akJubS0JCAuPGjeP111/ntdde4+OPP+a+++5jwoQJBAcHs3LlSh544AH++OMPAPbt28eyZcuwWq0MHDiQTz75hK5du5KTk4OHhwebNm0iPj6+0n0KDAxk9OjR+Pj4mBN633LLLTz++ON069aNPXv20K9fP1JTUxk7dmy5bcyfP5+0tDRWrVqF1ppBgwaxePFi/P39mT59OkuXLsXV1ZUHHniAb775hjvuuIPc3Fw6duzIv//97zJ1iYuLM+eFTEpKIjIyktWrV2Oz2ejYsSMAN954I/feey8AL774Il988QUPP/xwuf2y2WzceuutREZG8sILL/DZZ59Rt25dVq9eTWFhIV27dqVv374ArFu3jk2bNhEUFHTW94AQovZUGnhprS+MtnMhxHnzxx9/MGTIEBo2bAhA/fr1Wb58Od9//z0At99+O2PGjDHTjxs3jq+//hpfX1+mT5+OUoqPPvqI2bNnA7B3717S0tJo0KABFouFYcOGAXDbbbdx4403kpOTw7Jlyxgy5NQINIWFhebjIUOGmKfpunbtyhNPPMGtt97KjTfeSPPmzf/2fi5YsICUlBRzOSsri5ycnAq3MX/+fObPn09cXBxgDESalpbGhg0bWLt2LYmJiQDk5+fTqFEjAKxWK4MHDy63XRcXF9q0aUNqaiqrVq3iiSeeYPHixdjtdnPy8U2bNvHiiy9y8uRJcnJy6NevX4X7MGrUKIYOHcoLL7wAwPz589mwYYM5EXZmZiZpaWm4ubnRoUMHCbqEuAhIRwAhalFVWqjOlN6rTt1zzv93lPTxKrFo0SIWLFjA8uXL8fLyomfPnhQUVNwDQSmFw+GgXr16ZivQ6by9vc3Hzz77LAMGDGDu3Ll07dqVX3/9lYiICP78889zrrfD4WDFihV4eHiUeb6ibWitee655xg1alSZtOPHj2fEiBH8619lWxYBPDw8zICxX79+HD58mISEBCZOnEiPHj345ZdfcHV1pU+fPowcORK73c57770HGKdYf/jhB2JiYpgyZQqLFi2qcB+6dOnCwoULefLJJ/Hw8EBrzfjx48sFaosWLSpzHIUQF67z30NWCHFB69WrFzNmzOD48eMAZGRk0KVLF7799lsAvvnmG7NlpiKZmZn4+fnh5eXFli1bWLFihbnO4XCYrTFTp06lW7du1KlTh6CgIGbMmAGA1pr169dXWPaOHTuIiorimWeeITExkS1btnDLLbewbNky/ve//5npFi9ezKZNm8rk9fX1NftrAfTt25fx48ebyyWBX0Xb6NevH5MmTTKn3Nm/fz9Hjhyhd+/ezJw5kyNHjpjHavfu3eXq/euvv5KcnMzEiRMB6N69Ox988AGdO3fG39+f48ePs3XrViIjIwHIzs4mICCA4uJivvnmmzMe67vvvptrrrmGoUOHYrPZ6NevH//3f/9HcbHR/Xbbtm3k5uaeMb8Q4sIjLV5CXGYiIiJ44YUXuOKKK7BarcTFxTF+/HjuvPNO3nvvPfz9/Zk8efIZ8/fv358JEyYQFhZGaGgonTp1Mtd5e3uzatUq3nzzTRo1asT06dMBI5i7//77efPNNykuLmb48OHExMSUK/uDDz5g4cKFWCwWIiIiuPrqq3F3d+fnn3/mscce47HHHsPV1ZXo6Gg+/PBDjh07ZuYdOHAgN910E3PmzGH8+PF89NFHPPjgg0RHR2Oz2ejRowcTJkw44zZSU1Pp3LkzYHSc//rrrwkPD+fNN9+kb9++OBwOXF1d+eSTT2jVqlWlx7hjx44cPnyYHj16ABAdHc2hQ4dQSgHwxhtv0LFjR/z9/enYsWOZgPF0TzzxBJmZmdx+++188803pKen0759e7TW+Pv7l+uLJ4S4sCmtdW3X4awSEhK0jGcjLgWpqamEhYXVdjWqjY+Pj0zULP6WS/2zIS4vSqm1WuuEitbJqUYhhBBCiBoigZcQ4ryR1i4hhKicBF5CCCGEEDVEAi8hhBBCiBoigZcQQgghRA2RwEsIIYQQooZI4CWEqLK33nqLiIgIoqOjiY2NZeXKlYAxV2LpMbVKdOnSBYD09HSmTp1qPp+cnMzcuXNrptKlrFq1ip49exIcHEz79u0ZMGAAGzduPC9lp6enmwOkCiHEmcgAqkKIKlm+fDk///wz69atw93dnWPHjlFUVFRpnmXLlgGnAq9bbrkFMAKvNWvWcM0111R5+zabDReXv/+VdfjwYYYOHcrUqVPNgHDJkiXmSPbnc1tCCHEm0uIlxGXo+uuvJz4+noiICD777DPsdjsjR44kMjKSqKgoxo0bVy7PwYMHadiwIe7u7gA0bNiQpk2blkmTn5/P1Vdfzeeffw4YA6qCMT9iUlISsbGxvPPOO7z88stMnz6d2NhYpk+fTm5uLnfddRcdOnQgLi6OOXPmADBlyhQGDRpEr1696N27d7k6vfPOO0RFRRETE8Ozzz4LQM+ePSkZcPnYsWMEBgYC8PHHHzNixAgz6ALo1q0b119/PWDMnzh69Gg6duzImDFjWLVqFZ07dyYuLo4uXbqwdetWs07XXXed2XL22muvmeXZ7XbuvfdeIiIi6Nu3L/n5+ef2wgghLnnyl06IWnLypx0UHTi/8+y5NfWm3sA2Z003adIk6tevT35+PomJicTHx7N//35z/sOTJ0+Wy9O3b19ef/11QkJC6NOnD8OGDeOKK64w1+fk5DB8+HDuuOMO7rjjjjJ53377bcaOHcvPP/8MQOPGjVmzZg0ff/wxAM8//zy9evVi0qRJnDx5kg4dOtCnTx8A1q1bx4YNG6hfv36ZMn/55RfmzJnDypUr8fLyIiMjo9J93rx5MyNGjKg0zb59+1i2bBlWq5WsrCySkpJwcXFhwYIFPP/888yaNQswTllu2rQJLy8vEhMTGTBgAA0bNiQtLY1p06bx+eefM3ToUGbNmsVtt91W6TaFEJeXamvxUkq1UEotVEqlKKU2K6UedT5fXyn1m1IqzXnvV111EEJU7KOPPiImJoZOnTqxd+9eioqK2LlzJw8//DDz5s2jTp065fL4+Piwdu1aPvvsM/z9/Rk2bBhTpkwx11933XXceeed5YKuqpg/fz5vv/02sbGx9OzZk4KCAvbs2QPAVVddVS7oAliwYAF33nknXl5eABWmqUzHjh0JCwvj0UcfNZ8bMmQIVqsVMCYDHzJkCJGRkTz++ONs3rzZTHfVVVfRoEEDPD09ufHGG1myZAkAQUFBxMbGAhAfH096evo51UkIcemrzhYvG/Ck1nqdUsoXWKuU+g0YCfyutX5bKfUs8CzwTDXWQ4gLUlVapqrDokWLWLBgAcuXL8fLy4uePXtSWFjI+vXr+fXXX5kwYQLfffcdr732GgMHDgRg9OjRjB49GqvVSs+ePenZsydRUVF8+eWXjBw5EoCuXbsyb948brnlFnMy6KrSWjNr1ixCQ0PLPL9y5Uq8vb3Nx6NGjQLg9ddfP2NZLi4uOBwOAAoKCsznIyIiWLduHdddd51Z3syZM81WOMDcFsBLL73ElVdeyezZs0lPT6dnz57mutP3r2S55DQsgNVqlVONQohyqq3FS2t9UGu9zvk4G0gFmgHXAV86k30JXF9ddRBClJeZmYmfnx9eXl5s2bKFFStWcOzYMRwOB4MHD+bNN99k3bp1tGjRguTkZJKTkxk9ejRbt24lLS3NLCc5OZlWrVqZy6+//jp+fn48+OCD5bbp6+tLdnb2GZf79evH+PHj0VoD8Ndff5Uro2PHjmZ9Bg0axFVXXcXkyZPJy8sDME81BgYGsnbtWgBmzpxp5n/wwQeZMmWK2eEfMPOe6Tg1a9YMoEzLHsBvv/1GRkYG+fn5/PDDD3Tt2vWM5QghRGk10rleKRUIxAErgcZa64POVYeAxjVRByGEoX///thsNsLCwnj22Wfp1KkT+/fvp2fPnsTGxnLbbbfxr3/9q1y+nJwcRowYQXh4ONHR0aSkpPDqq6+WSfPhhx+Sn5/PmDFjyjwfHR2N1WolJiaGcePGceWVV5KSkmJ2rn/ppZcoLi4mOjqaiIgIXnrppSrtx6BBg0hISCA2NpaxY8cC8NRTT/F///d/xMXFlRniokmTJkyfPp3nnnuOtm3b0qVLF2bOnMlDDz1UYfljxozhueeeIy4uDpvNVmZdhw4dGDx4MNHR0QwePJiEhISz1lcIIQBUyT/MatuAUj7An8BbWuvvlVIntdb1Sq0/obUu189LKXUfcB9Ay5Yt43fv3l2t9RSiJqSmphIWFlbb1RD/wJQpU8pcGCDOD/lsiEuJUmqt1rrCf2TV2uKllHIFZgHfaK2/dz59WCkV4FwfABypKK/W+jOtdYLWOsHf3786qymEEEIIUSOq86pGBXwBpGqt3y+16keg5JruEcCc6qqDEEKcbyNHjpTWLiHE31adVzV2BW4HNiqlkp3PPQ+8DXynlLob2A0MrcY6CCGEEEJcMKot8NJaLwHOdE15+SGohRBCCCEucTJlkBBCCCFEDZHASwghhBCihkjgJYSosrfeeouIiAiio6OJjY1l5cqVgDFoaekxs0qUTEidnp7O1KlTzeeTk5OZO3duzVS6AiX1TU9PJzIystbqIYS4/EjgJYSokuXLl/Pzzz+bk1YvWLCAFi1aVJqnZJT48xF4nT6IaW270OojhLg4SOAlxGXo+uuvJz4+noiICD777DPsdjsjR44kMjKSqKgoxo0bVy7PwYMHadiwoTkfYcOGDWnatGmZNPn5+Vx99dV8/vnngDGxNsCzzz5LUlISsbGxvPPOO7z88stMnz7dHLk+NzeXu+66iw4dOhAXF8ecOcYoM1OmTGHQoEH06tWL3r3LXpPz4IMP8uOPPwJwww03cNdddwEwadIkXnjhhQr3szI7d+4kLi6O1atXs2PHDvr37098fDzdu3dny5YtgDGUxOjRo+nYsWO50fmFEKIqqnM4CSHEWUyePPmsaUJCQsy5ACdPnkxsbCxxcXHk5uby3XfflUl75513Vmm7kyZNon79+uTn55OYmEh8fDz79+9n06ZNAJw8ebJcnr59+/L6668TEhJCnz59GDZsGFdccYW5Picnh+HDh3PHHXdwxx13lMn79ttvM3bsWHNC6saNG5cZ/f3555+nV69eTJo0iZMnT9KhQwf69OkDYLaw1a9fv0yZ3bt3JykpiUGDBrF//34OHjRmIktKSmL48OEV7ufgwYNp0KBBuX3bunUrw4cPZ8qUKcTExNC7d28mTJhAcHAwK1eu5IEHHuCPP/4AYN++fSxbtgyr1VqlYy2EEKVJi5cQl6GPPvqImJgYOnXqxN69eykqKmLnzp08/PDDzJs3jzp16pTL4+Pjw9q1a/nss8/w9/dn2LBhZSaPvu6667jzzjvLBV1VMX/+fN5++21iY2Pp2bMnBQUF7NmzB4CrrrqqXNAFpwKvlJQUwsPDady4MQcPHmT58uVm37LT97P0JN8ljh49ynXXXcc333xDTEwMOTk5LFu2jCFDhhAbG8uoUaPMoA5gyJAhEnQJIf42afESohZVtYWqovTe3t7nnB9g0aJFLFiwgOXLl+Pl5UXPnj0pLCxk/fr1/Prrr0yYMIHvvvuO1157jYEDBwIwevRoRo8ejdVqpWfPnvTs2ZOoqCi+/PJLRo4cCUDXrl2ZN28et9xyC8bEFVWntWbWrFmEhoaWeX7lypV4e3ubj0eNGgXA66+/zqBBgzh58iTz5s2jR48eZGRk8N133+Hj44Ovr2+F+1lQUFBu23Xr1qVly5YsWbKE8PBwHA4H9erVIzk5ucK6ltRHCCH+DmnxEuIyk5mZiZ+fH15eXmzZsoUVK1Zw7NgxHA4HgwcP5s0332TdunW0aNGC5ORkkpOTGT16NFu3bi3TYpScnEyrVq3M5ddffx0/Pz8efPDBctv09fUlOzv7jMv9+vVj/PjxaK0B+Ouvv8qV0bFjR7M+gwYNAqBTp0588MEH9OjRg+7duzN27Fi6d+9+xv2siJubG7Nnz+arr75i6tSp1KlTh6CgIGbMmAEYQeH69eurfHyFEKIyEngJcZnp378/NpuNsLAwnn32WTp16sT+/fvp2bMnsbGx3HbbbfzrX/8qly8nJ4cRI0YQHh5OdHQ0KSkpvPrqq2XSfPjhh+Tn55freB4dHY3VaiUmJoZx48Zx5ZVXkpKSYnauf+mllyguLiY6OpqIiAheeumlKu1L9+7dsdlstG3blvbt25ORkWEGXhXt55l4e3vz888/M27cOH788Ue++eYbvvjiC2JiYoiIiDA7+wshxD+lSv5hXsgSEhL0mjVrarsaQvxjqamphIWF1XY1hLjgyGdDXEqUUmu11gkVrZMWLyGEEEKIGiKBlxBCCCFEDZHAS4gadjGc3heiJslnQlxOJPASogZ5eHhw/Phx+aERwklrzfHjx/Hw8KjtqghRI2QcLyFqUPPmzdm3bx9Hjx6t7aoIccHw8PCgefPmtV0NIWqEBF5C1CBXV1eCgoJquxpCCCFqiZxqFEIIIYSoIRJ4CSGEEELUEAm8hBBCCCFqiAReQgghhBA1RAIvIYQQQogaIoGXEEIIIUQNkcBLCCGEEKKGSOAlhBBCCFFDJPASQgghhKghEngJIYQQQtQQCbyEEEIIIWqIBF5CCCGEEDVEAi8hhBBCiBoigZcQQgghRA2RwEsIIYQQooZI4CWEEEIIUUOqLfBSSk1SSh1RSm0q9Vx9pdRvSqk0571fdW1fCCGEEOJCU50tXlOA/qc99yzwu9Y6GPjduSyEEEIIcVmotsBLa70YyDjt6euAL52PvwSur67tCyGEEEJcaGq6j1djrfVB5+NDQOMzJVRK3aeUWqOUWnP06NGaqZ0QQgghRDWqtc71WmsN6ErWf6a1TtBaJ/j7+9dgzYQQQgghqkdNB16HlVIBAM77IzW8fSGEEEKIWlPTgdePwAjn4xHAnBrevhBCCCFEranO4SSmAcuBUKXUPqXU3cDbwFVKqTSgj3NZCCGEEOKy4FJdBWutbz7Dqt7VtU0hhBBCiAuZjFwvhBBCCFFDJPASQgghhKghEngJIYQQQtQQCbyEEEIIIWqIBF5CCCGEEDVEAi8hhBBCiBoigZcQQgghRA2RwEsIIYQQooZI4CWEEEIIUUMk8BJCCCGEqCESeAkhhBBC1BAJvIQQQgghaogEXkIIIYQQNUQCLyGEEEKIGiKBlxBCCCFEDZHASwghhBCihkjgJYQQQghRQ1xquwIXioItW3Dk56MsFlAKVMk95nMWT0/cWrUCoGjfPpSrK66NGwNQvH8/WmuUUkY+sxxlPqfc3bH6+gJgz85Gubpi8fBAa40uKDDzKTiV32Ix8gshhBDioieBl9OBZ56lcOvWStN4REURNOM7APY9+BCuzZvT4pOPAdg1ZCj2jIxK8/v07m2m39GvP779+hLwyitgt7M1rn3lFVQKv5uH0+Tll9HO9A0ffJCGo+6j+NAhdg64tsJgr+Q5pRQN7rmb+iNGUHz4CLtvvplGTz1JnWuuoWDrVvY/8mipYK8k/6nysCj8H3gA3z59KExL48CLL9L4mWfxah9H3po1HPngA5SylNom5Zb9H34Yz6go8tev59hnn9P42Wdwa9GCnKQlnPx+ljPALFVnS9ll/4cexLVpU3JXrSLrf3Np9PRTWH18yF60iNylyyrcZun9aDjqPize3uSuWEHe2rX4P/ggADl//klhWhpYrCgXK1itKKsLWC0oq4v5XN0BAwAjSLefPIl3p07G8rZtOHJyjAC9JL2zLGV1lufmZgbp9pxco4re3gBou10CbCH+Bq01OBxgt6NL7p03HA603Y7Fyxurjze6uJji/fuxNmyI1ccHR34+xQcPGd8zVisoi/HYYjE+j857i48PFjc3tM2GLixEeXigrFZje1rLZ1ecMwm8nJq88gqO3BzQ+tQHqtRNOxxY69Qx0zd68gksnp6n8r/4Ao6CQmd6h/MLQZdZdmvWzEzv/8jDZusZFgv+Tz4BGjP9qXpgfLGg8YiMMvPXv+N2PKONZYunJ/VuGlzhNin1nGvLlgAoN1e8EhNx8fc38nt44BERAWh0SX7nNs1lrVEl+2u1YvXxRbmeevsoi9VZZzuUfOlpjdan9kEX2wCML7x9+8xl+8mTFG7dZmxTazSl9sPhMJcb3DkSgOK9+8hesAD/xx4FoDA1lcw5c07l16X2oeT105r6I+4wA6/jn080A6+sX+eT+f33lb9BXFzMwCvjy6/IXbGC4IV/AHDkvbHkJiVVnj0gwEy//7HHsGdlEfTddAB23TjYCPqtVuPL3sXlVMDmvHlERtLi//4DwN4HH8I1IIAmL74AwJ577sWRm3sqULRawcVqvCYuRhDpGRNDg7vuBODwv/6FR2QUdQdei3Y4OPLOu+XSK6sziLRawWrBo10Y3p06oh0OMmf/gEdkBB6hoTjy88lJSipXX8wyrCgXF1waN8a1USN0cTFFe/fh0sgfq48PuqgIe2bmqX0+PWC1XFq9IUremyX75SgsRBcVlQscyixrjXtQEABF+/bjyMvFIyQEgIKt27CfPAkOO9rucN47P382OzjsKA8PfK+8EoCcpCS0zWYuZ/74I7aMDLA7nJ/d8veuTQPwGz4cgGMTPsXFvyH1Bg8G4NDrb+DIzXVus3QdTgVDXvHtaTh6NAB77r0Pn25dqT9iBI6CAtKH31x2Xyu4r3fTYBo99hiO/Hy2de2G/8MP0+DOkRTt3s2Ofv3PeswbPfUkDe65h+IDB9jR/2qavvM2da+7joLNm9l92+1nzV+SPj85md233U7LyZPw7tyZ7F9/Zf/jTxiJVPmATSkjoGv+8cd4d+xA9h9/cPCVV2j11Ve4BwVxctYsjv3fBONzqyzO/Mp4bLWaj5t9MA63Fi3I+uUXMr7+hhafforVx5sTM2aQPe/XU/ksVuf2nX82neUGvPkGFi8vsn75hdwVKwl47VUATn4/m4JNG8ukLVeWm6v5PZn9xx/YDh/G7+abAciaN4/iQ4ec++usb+ljoCxYfH2o07cvALkrV4HWeHfqaCyvWIkjP8/4rCvndk8LgC2+vuZ7vXD7dpSHJ27Njd/Rwp27jENvBsvWU4+VBWW1oDw8sPr4AGDPyUG5uWFxczvra17dJPBy8mofd07pfXr0KLNc55przil/yRcZGKcyG957b5XzKquVRk89ZS5b69al8XPPVTm/i58fTd9521x2a9WKZu//u8r53Vu3puUXE81lr4QEWn31ZZXze3fqROs5P5jLdQdeS92B11Y5f73BN1Jv8I3mcsP776fh/fdXOX+jxx7D/9FHzeWA116lyUsvnvrBs9vRNpvzsQPsNuO+ZHsPPkD9O059YTd64nFsI0eU/dG02c0fQW2zY/FwN9P7DR+Go7Dw1PLNw7EdOWoGreY2bXbjOZsd11JBu1vLlrg0bGguW3x8jG3Z7Gi78a/c+NdvM35A7TYzyAbIXbYM5e4BgLbZODljBtrZOoDNZgSrp/G79VYj8LLZOPjCC/g/9hgeoaHYjmcYraVn4f/YYzQcPYriw0fYec01BLz1FvUG30j+5s3svvmWM2dUClxcCHjjdepdfz35Gzey9977aPbBB3h36khOUhKHXn+jwmDTDGItFhqNeRrPqCjy1v3F8S++oMnzz+HarBnZixaROWtWmUChovum772LW4sWZP70M8c//5xW33yN1deX4198wYlp35YLVk4vI/jPRVjr1uXov/9Nxlf/pd2G9QAcevllMuf8WOmxU25uZvqjH31I/rq/aLvgNwAOv/0v8pavqDS/a7NmZqCVMXkyjvwCc/nYZ59RtH3HmY+91YpX+/bm91XOokW4BQWZgVfe6tU48vJOBckV3Dvy8svsCy7Gz46yWHBt2tQI0C3WcvdYLSiLFY/QUCO9iwt+w4bhERYGgLVePRo++KCZrsJ7qxXPmBgjfYOGNH33HTzjjO96t6Agmo4da3x2HA7jz17px9qBtjvwiDL+4Lo2b06jp5/CzfkH1r1tWxo+/NCptA6H8dprh/GH1W5HawcujYzPnot/I3yv7GW2dLs0aoRn+zjndh3Gn1SzDs4/jg4Hynm8UBaUq6vzTADooiIcOTlGMO/cllmWw17qsfF5Ltq9h7wVp94r+RvWk/3rfOf+Osw/y6X/OFvc3E79Qf1lHvnr15uB14lp35K3cmXl772WLc3A69h//oO228zA69Brr1G0a1el+T0T4gn8+msA9j38CO7tQmk+bhwA6UOH4sjJqTS/79X9zfTbe15JvcE3ntNvZXVRuoIv2QtNQkKCXrNmTW1XQ4jLQpmgw2ZD2+0oFxcsXl5orbEdOIDF1xdrnTrooiIKd+0yAlWHo4KA1Qg+3QMDcQsMxJGbS/YfC/GMjcGtRQtsR4+S/fvvRqBaOti0nwoksTvw7dsXz8gIivbuJWPyFPxuvQX3Nm3I37CBjP9+XeoU06lgE9upfWj0zBg8o6LIWbqUI++NpfmHH+DWqhWZP/3E8c8+rzRwUFYLTV5/HbfmzcleuJCTs2bR9O23sfr4kDV3LtmLFp3hh/9UGf4PPYjFy4vcZcvIX7/e/KOQs3gxhTt2Vhx8OFsNlYuL+ccuf/NmHJmZeHfpAkBBSgr2rOxTrYtmnU9tX7m5ma3rtmPHQClcGjQAwJ6VZbzo5jZLlSGnz0QpJZ9x5WwxcuTlnfrcl5ydsNtPnamwO1AWhWvTpsCpftBuzZsDUJiWhqOgwBkclgr+zKDXjtXXF8/oaAByly/H4uODpzMQzvp1Prq42ExbUQDs2rwFPt27AZDx9Te4t21rBn7VTSm1VmudUOE6CbyEEEIIIc6fygKvS6sDhRBCCCHEBUz6eIlLX0ln+5IrNO3FYC8CN6OvBcUFxgUNFhejb0nJlZFCCCHEeSaBV4niAtB25xWFxpU5p64wdJz6YfZxdlLO3G/8SPs2MZaPpRk/6KXL0I6yZXjUg0btjPTpS8HbH/xDjHPbafNPy1dBGQ3aQPMEI/1fX0FALDSNhcJsSJ5aSX5nGYHdoFUXyD8JSz+EsIHQrD2c3AMrJpwhn/1U/rjbjPzHd8DCt6DrYxAQDXtXw9IPyuZzVHAcrnrNqP+uJFjwKtz4mbFPm2ZB0rgKjt1p+W+daRy/df+F31+DB1eBV334811Y+tGZ8+I8nf70DvBuCIv+Zez/y8eN5//3JCR/Xfb9YHEpdbOC1Q2e3m6s++1l2LMS7v71VP59q8vmUZayyz7+MGi8kX7pR1CcDz2fMZaT3oecI84rg1zKb9tiBd+mED3ESJ/yI7j7QhujgzRb5hrHTllL5XEpW56nn3GswXj93OsYddLauW2XCrZvlQBU/DMlf3pKrk4tyjPeVy7uxvN5GWW/Z8p9bzjAoy74NDLWHd4EPk3At7HxGTq4/rQ8JeWUyu8farz3C7Nh26/QPBH8WkHuMdi5yPlHy1LqT1fJVXbO5xuFG9/zBVlwJAX824FnPcg/YXx3mumtp8oqvezVEFw9wFYIRbnG/lisxu+Fw1Z+m+KSJ4FXiYl94PDGytM0i4d7jSEBmDoM6rWEm6cay5P6Q96xyvOHDjiV/rs7IPw6uPZ948th2rCz1zHhbiNw0Q746VG48kUj8CrIhF/GnD3/lS8agVNRDiwbb3wZNWtvfAGt+6rUuF2Wir9A2vY2yinOM77wCrNOLZ9IryDvaV9EJf0JrW7Gl0/Jj7qbr3EslSr/5VX6VtJC5RdoBI1WV2M5IBba31Fx3UvfXJ3DYbTtY2y/RMQN0DDY+BJ02I0vb4ft1LLjtCv96rcGW9GpZa+G4BtQPo+t4NRj+6mrGDmSYvwIlNg2D46knrY9e9nXrnniqcBr0b+M164k8JrzgPEjUJl218Lwb4zHE/tA5GAYMNb48v93yJnzlQSQifdC//9npH8/DLo/CZ3uh6wDMOXaUsGapeLgMe52iLrJeK/9/Dh0uBeCehhB4JL3y+c5PXBtdw00iYLsQ7B5NrQbYLxnTuyG3UudQWclgWvjSCNIL/mxbBhivB/yMowyy/zw6/KBQLP2xvvvxG44tg1a9zTef4dT4Hha+YDBXHbex95qBBu7l8OhjdDxPuP4bv0FDm8+wx8W52NlNf60APz1DZzYBb1eNJaXfGC8d8oFHqX+OHo3hOuM8QOZ/6Lx3r3mXWN55t1GeeX+bJYqJyAGhkw20k+5Fhq0hYEfGMsfREPByVKBzmnHDQ3h18NQ51XP/24HsbfA1W8bgdN7rSt/3wJ0HA1Xv2MELp/2gD6vQrfHIfsgTOp39vwl6XOOwKy74YZPjcDr2DZj+Wxu+BRihhtB3+Sr4Y45xuu/4w+YedfZ85ek3/I/mHknPLDS+AO56nP4tYIr7MoEYla470/jD/rqL4w/vA+vMwK/xWNh7RTn997pwV+pQG7Ez+BRx9he6k8wwnkV7ZJxsGtx5YGjiwdcbwxjw9ovje/5Pq8Yyys/g4wdzvyqgm1bwbP+qff6pu+N90TUTcbyhhlQmFk+j7msjIA7qIeRftdi4w9j01hjOX2J8/Nxev5Sx8PTD+q1MNJn7AT3uuDd4OyvWTWTwKtEp/uNwKlc4FDqx9z71CX59HoR3LxOLQ/6yPhRqugNXFJG6fw3f2v8EICR/t6FlQcNSp0KFixWeCLVaPUA40d/zK6zBx7K+W+qbnN4uVSQ2Kw9PL+v6seqSRQ8vPbUcusr4P6lVc/fsiPc/v2p5ZC+xq2qgrobt7+bv1UX41YiuI9xq6r4kWWXrzzHy5NvmFB2+e755dNofSoIc9jKrrtjzqnXEuCu+cap09KBW0nwVvKcV6kvm2vHnfoyUhYY8G/nFUW28sFjSTnNE0/lDxto/PgCWFyhaVzZfKcHrsVFxmcDjB/PY2lG6wEYP9o7Fp552yX7Xq+l8b47kQ7znjVaMeq1hAPr4IcqDCVy+2xo8//bu/9gO8r6juPvT24SjQEJiiQZQnqZgUGTDEZsERAziJYSpTAqHdIZKVg7FAsV7Q9G/cOi/1jrjLV1HCjir7YwpERhIo0ITqmDU5JCkBJItJMGMIlIIpQQhCbm5ts/9jnczTlnz9mb3LN7757PK3Mn5+w+u/t8z7Nnz3ef/XUebPsh3H45fOQBmL8EHl2dza+fP1kPx78JfrouK3/dE9n399Hbst7Tfpa+L0u8/vtuWH/D+I/R43dm88jL77C0fvxaidfPH4adG8cTr2ceg+0bOqfJb7/yxtrWp9mvyXri26fJz+e4XGK++Mxse9Oy5OJs3eu2o9aqU6uXH+Cdnxp/PzIbVn6hc3ntsbR6ame+ClbdmvU4Qdbzddkd3afJ1+e12VV1HHMiXP1g1lsG2Q7b1Q/27nE7ODa+rh//Jvjgd2DBaemzOAsuvaWgpy03v+NOTct7M6z8myyZgGw7+O7ri5P91vs587Lyrz95fD2CbAfwpBWd5dtjmDGSlW/1NLYc2Jd9D3vVfyR3z6tnHst2uFu23Zcduem67LHxOrbW9Ye+no1vJV4//Hy209LL4rPHE6+7/gwWLIPf+2b2/tZVsH9v4aQALH3/+E7DjSvg9Mvggs/1nqYCvqrRzKa2gwez/2fMyBKH/Xth1lyYOTs7dPOr3blkrUtv5cEDMH9plii98HSWvIy+I+sFePZ/sh6oXoeaNDLe47X3GdizPfsRHZmV9Za99GzxdK35zj0+G77/pSxRaf2YtnpO83vrZk3Q6j1tJX6/fjkb1uqweOm53M5at97asSxRPHY0K//M5uyQ7etSL+nPNsDBX7dNH4e+P3phdpQIslNajj0p+y5XwLeTMDMzM6uIbydhZmZmNgU48TIzMzOrSC2Jl6QLJP1U0lZJJc5sNTMzM5v+Kk+8JI0AXwFWAkuA35e0pOp6mJmZmVWtjttJnAFsjYhtAJJuAy4GNtdQFwB+cMt6xp7KLksVeuV+mzD+8tBrjQouSGgbrBLXLYjOq5iCKJy28Jqn9mWnku2zOWToRJYRPceWqpeIriVVXJVDPoeJXu9V5vOfyHy71r7HMrpHOwGTeN1LUT0KP/ca6jJhR1inbt+9ic1cPd6Vnaq8I/ncBr3M9nLF6365Ob5Sqk8z1PFZHonDW273LWTv9Xdwev8+9Cp0mPMqPaco+HXp9Nz58zjn3csPZ0GToo7E6wRge+79DqCax4UX+L8tu1h+4Ni+5YquAC27npQpNxnz6jeP6JU4qmAe5XLNguGD+dwK4ygx/85x5b/tg7wOOLq8mvx5H6Yemexk17bM+lNu2omXPJK1deLr9GA+0/Zpo2COg98mDepzKx46mdvjiUwxmd+B8jFMdA+93/IOs72OIAcsP69+W/zyxjRnkuZ0eKbsDVQlXQlcCbB48eKBLmv0mtPYtGd3fuHjL1HXW+sU7W1I3feC24d3luic/0SW274MFQwvM68Zfe4l1D7dIctVcbn8mIksV2p/0Tl/FQw/dB7FcXVbdvf6qOP4fOHnq2516WzXVplubVbcdl2mz3/2betwx5QF7aTO0YXr8Pj7znboXEe6fTc64x6vQ7c2LNdGSv86hnc5sWJGr+8T6tG23dfubp9lfppur81suNSReO0ETsy9X5SGHSIibgJuguw+XoOs0LIFoyxbMDrIRZiZmZnVclXjg8Apkk6SNBtYBaytoR5mZmZmlaq8xysiDki6Bvg+MAJ8PSIer7oeZmZmZlWr5RyviFgHrKtj2WZmZmZ18Z3rzczMzCrixMvMzMysIk68zMzMzCrixMvMzMysIk68zMzMzCrixMvMzMysIk68zMzMzCqiogc/TyWSdgNPDXgxxwG/HPAyprJhjt+xD69hjn+YY4fhjt+xD95vRMQbuo2YFolXFSQ9FBG/WXc96jLM8Tv24Ywdhjv+YY4dhjt+x15v7D7UaGZmZlYRJ15mZmZmFXHiNe6muitQs2GO37EPr2GOf5hjh+GO37HXyOd4mZmZmVXEPV5mZmZmFRmqxEvSiZLuk7RZ0uOSru1SRpL+XtJWSY9KOr2Ouk62krGfK2mPpEfS36frqOsgSHq1pP+U9F8p/s90KfMqSatT22+QNFpDVSddydivkLQ71/Z/VEddB0XSiKQfS7qry7hGtnten/ib3vZPStqUYnuoy/hGbvOhVOxN3ubPk7RG0k8kbZF0Vtv42tp9ZlULmiIOAH8eEQ9LOhrYKOneiNicK7MSOCX9vQ24If0/3ZWJHeD+iLiwhvoN2j7gvIh4UdIs4EeSvhcR63NlPgz8b0ScLGkV8Hng0joqO8nKxA6wOiKuqaF+VbgW2AK8tsu4prZ7Xq/4odltD/DOiCi6d1NTt/ktvWKH5m7z/w64OyIukTQbeE3b+Nrafah6vCLi6Yh4OL3eS7YhOqGt2MXAP0ZmPTBP0sKKqzrpSsbeWKk9X0xvZ6W/9hMcLwa+lV6vAd4lSRVVcWBKxt5YkhYB7wVuLijSyHZvKRH/sGvkNn+YSToGWAF8DSAi9kfE823Famv3oUq88tLhhLcAG9pGnQBsz73fQcMSlB6xA5yVDkl9T9LSams2WOlwyyPALuDeiChs+4g4AOwBXl9pJQekROwAH0hd7msknVhtDQfqS8B1wMGC8Y1t9+RL9I4fmtv2kO1k3CNpo6Qru4xv8ja/X+zQzG3+ScBu4BvpEPvNkua2lamt3Ycy8ZJ0FPBt4GMR8ULd9alSn9gfJnvMwZuBLwN3Vly9gYqIsYhYDiwCzpC0rOYqVaZE7N8FRiPiNOBexnuApjVJFwK7ImJj3XWpQ8n4G9n2OedExOlkh5aulrSi7gpVqF/sTd3mzwROB26IiLcAvwI+UW+Vxg1d4pXOcfk2cEtEfKdLkZ1Afo9vURo27fWLPSJeaB2Sioh1wCxJx1VczYFLXc73ARe0jXql7SXNBI4Bnq20cgNWFHtEPBsR+9Lbm4G3Vly1QXk7cJGkJ4HbgPMk/XNbmSa3e9/4G9z2AETEzvT/LuAO4Iy2Io3d5veLvcHb/B3AjlzP/hqyRCyvtnYfqsQrnbfxNWBLRHyxoNha4A/SFQ9nAnsi4unKKjkgZWKXtKB1boukM8jWj0b8AEl6g6R56fUc4LeBn7QVWwtcnl5fAvxbNOBGd2Vibzu34SKycwCnvYj4ZEQsiohRYBVZm36wrVgj2x3Kxd/UtgeQNDddTEQ61HQ+8FhbsaZu8/vG3tRtfkT8Atgu6dQ06F1A+4VktbX7sF3V+HbgMmBTOt8F4FPAYoCIuBFYB7wH2Aq8BHyo+moORJnYLwE+IukA8DKwqik/QMBC4FuSRsg2Lv8SEXdJ+izwUESsJUtM/0nSVuA5sh+qJigT+0clXUR29etzwBW11bYCQ9LuhYao7ecDd6TcYiZwa0TcLekqaPw2v0zsTd7m/ylwS7qicRvwoanS7r5zvZmZmVlFhupQo5mZmVmdnHiZmZmZVcSJl5mZmVlFnHiZmZmZVcSJl5mZmVlFnHiZ2ZQhaUzSI5Iek3S7pPYH2+bLnivp7Nz7b0q6pMQyXuxXZqIkLZf0ntz76yX9xWQvx8ymPydeZjaVvBwRyyNiGbAfuKpH2XOBs3uMr9JysnsCmZn15MTLzKaq+4GTJf2upA3pYbc/kDQ/Pej9KuDjqYfsHWmaFZL+Q9K2kr1ffynpwfSA6M+kYaOStkj6qqTHJd2T7viPpN9KZR+R9IXUMzcb+CxwaRp+aZr9Ekn/nury0cn+cMxsenLiZWZTTnpm4kpgE/Aj4Mz0sNvbgOsi4kngRuBvUw/Z/WnShcA5wIXAX/dZxvnAKWTPr1sOvDX3EOFTgK9ExFLgeeADafg3gD9ODxwfA4iI/cCngdWpLqtT2TcCv5Pm/1fpWalmNuSG7ZFBZja1zck90up+ssf5nAqsTs8UnA080WP6OyPiILBZ0vw+yzo//f04vT+KLOH6GfBERLTqsREYTc+7PDoiHkjDbyVL8Ir8a3r49D5Ju8ge4bKjT53MrOGceJnZVPJy6k16haQvA1+MiLWSzgWu7zH9vvykfZYl4HMR8Q9tyxttm88YMKfPvPrVZQxvb80MH2o0s6nvGGBnen15bvhe4OgjmO/3gT+UdBSApBMkHV9UOCKeB/ZKelsalH+Y9pHWxcyGhBMvM5vqrgdul7QR+GVu+HeB97WdXF9aRNxDdrjwAUmbgDX0T54+DHw1HQ6dC+xJw+8jO5k+f3K9mVkHRUTddTAzmxYkHRURL6bXnwAWRsS1NVfLzKYRn3NgZlbeeyV9kmzb+RRwRb3VMbPpxj1eZmZmZhXxOV5mZmZmFXHiZWZmZlYRJ15mZmZmFXHiZWZmZlYRJ15mZmZmFXHiZWZmZlaR/wd+5LmtinetbwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "seed_idx = list(range(2,max_depth +1))\n", + "\n", + "plt.figure(figsize=(10,5))\n", + "\n", + "for i in range(len(data)):\n", + " plt.plot(seed_idx, time_algo_cu[i], label = (str(names[i] + \"-cuGraph\")))\n", + "\n", + " plt.plot(seed_idx, time_algo_wk[i], label = (str(names[i] + \"-walker\")), linestyle='-.')\n", + "\n", + "\n", + "plt.title(f'Runtime vs. Path Length ({num_seeds} Seeds)')\n", + "plt.xlabel('Path length')\n", + "plt.ylabel('Runtime')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5164" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del time_algo_cu\n", + "del time_algo_wk\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test 2: Runtime Speedup versus number of seeds\n", + "The number of seeds will be increased over a range in increments of 50. \n", + "The runtime will be the sum of runtime per increment. Increaing number of seeds by 1 would make for very long execution times " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading ./data/preferentialAttachment.mtx...\n", + "\t.Random walks - T=2.28s\n", + ".Random walks - T=2.29s\n", + ".Random walks - T=2.28s\n", + ".Random walks - T=2.21s\n", + ".Random walks - T=1.95s\n", + ".Random walks - T=2.38s\n", + ".Random walks - T=2.00s\n", + ".Random walks - T=2.19s\n", + ".Random walks - T=1.99s\n", + ".Random walks - T=2.40s\n", + ".Random walks - T=1.95s\n", + ".Random walks - T=2.17s\n", + ".Random walks - T=1.95s\n", + ".Random walks - T=2.39s\n", + ".Random walks - T=1.97s\n", + ".Random walks - T=2.23s\n", + ".Random walks - T=2.63s\n", + ".Random walks - T=4.08s\n", + ".Random walks - T=3.44s\n", + ".Random walks - T=3.77s\n", + " \n", + "Reading ./data/dblp-2010.mtx...\n", + "\t.Random walks - T=6.61s\n", + ".Random walks - T=6.57s\n", + ".Random walks - T=6.48s\n", + ".Random walks - T=6.69s\n", + ".Random walks - T=6.11s\n", + ".Random walks - T=6.18s\n", + ".Random walks - T=4.98s\n", + ".Random walks - T=5.64s\n", + ".Random walks - T=3.83s\n", + ".Random walks - T=4.28s\n", + ".Random walks - T=4.34s\n", + ".Random walks - T=4.14s\n", + ".Random walks - T=3.79s\n", + ".Random walks - T=4.37s\n", + ".Random walks - T=4.00s\n", + ".Random walks - T=3.66s\n", + ".Random walks - T=4.01s\n", + ".Random walks - T=3.67s\n", + ".Random walks - T=4.32s\n", + ".Random walks - T=3.70s\n", + " \n", + "Reading ./data/coPapersCiteseer.mtx...\n", + "\t.Random walks - T=56.64s\n", + ".Random walks - T=52.26s\n", + ".Random walks - T=45.66s\n", + ".Random walks - T=48.81s\n", + ".Random walks - T=56.16s\n", + ".Random walks - T=56.73s\n", + ".Random walks - T=45.43s\n", + ".Random walks - T=44.96s\n", + ".Random walks - T=51.77s\n", + ".Random walks - T=58.39s\n", + ".Random walks - T=43.35s\n", + ".Random walks - T=42.89s\n", + ".Random walks - T=57.96s\n", + ".Random walks - T=45.03s\n", + ".Random walks - T=64.27s\n", + ".Random walks - T=52.57s\n", + ".Random walks - T=46.91s\n", + ".Random walks - T=55.62s\n", + ".Random walks - T=46.85s\n", + ".Random walks - T=44.84s\n", + " \n", + "Reading ./data/as-Skitter.mtx...\n", + "\t.Random walks - T=51.36s\n", + ".Random walks - T=52.06s\n", + ".Random walks - T=44.91s\n", + ".Random walks - T=49.73s\n", + ".Random walks - T=47.45s\n", + ".Random walks - T=52.21s\n", + ".Random walks - T=47.65s\n", + ".Random walks - T=45.49s\n", + ".Random walks - T=47.84s\n", + ".Random walks - T=43.48s\n", + ".Random walks - T=45.67s\n", + ".Random walks - T=45.75s\n", + ".Random walks - T=55.03s\n", + ".Random walks - T=46.39s\n", + ".Random walks - T=50.64s\n", + ".Random walks - T=43.87s\n", + ".Random walks - T=40.98s\n", + ".Random walks - T=49.42s\n", + ".Random walks - T=51.94s\n", + ".Random walks - T=49.28s\n", + " \n" + ] + } + ], + "source": [ + "# some parameters\n", + "rw_depth = 4\n", + "max_seeds = 1000\n", + "\n", + "# arrays to capture performance gains\n", + "names = []\n", + "\n", + "# Two dimension data\n", + "time_algo_cu = [] # will be two dimensional\n", + "time_algo_wk = [] # will be two dimensional\n", + "perf = [] # will be two dimensional\n", + "\n", + "i = 0\n", + "for k,v in data.items():\n", + " time_algo_cu.append([])\n", + " time_algo_wk.append([])\n", + " perf.append([])\n", + " \n", + " # Saved the file Name\n", + " names.append(k)\n", + "\n", + " # read data\n", + " gdf = read_data(v)\n", + " pdf = gdf.to_pandas()\n", + " \n", + " # Create the Graphs\n", + " Gcg = create_cu_ugraph(gdf)\n", + " Gnx = create_nx_ugraph(pdf)\n", + " \n", + " num_nodes = Gcg.number_of_nodes()\n", + " nodes = Gcg.nodes().to_array().tolist()\n", + " \n", + " print('\\t', end='')\n", + " for j in range (50, max_seeds +1, 50) :\n", + " print('.', end='')\n", + " seeds = random.sample(nodes, j)\n", + " tc = run_cu_rw(Gcg, seeds, rw_depth)\n", + " tw = run_wk_rw(Gnx, seeds, rw_depth)\n", + " \n", + " time_algo_cu[i].append(tc)\n", + " time_algo_wk[i].append(tw) \n", + " perf[i].append(tw/tc)\n", + " \n", + "\n", + " # update i\n", + " i = i + 1\n", + " print(\" \")\n", + " \n", + " del Gcg\n", + " del Gnx\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmQAAAFNCAYAAACuWnPfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABhrElEQVR4nO3deXhU5d3/8fd3JvtKQsK+KyBbCGFRFNndqlZbta4t1Nra/uxma936WFuf2mprH6u2fazdsNatWn1cq1ZFRQUREBCRHZSdhED2ZDIz9++PORkmIUDAJJPA53Vdc80591nmOzMJfHKf+5xjzjlEREREJH588S5ARERE5FinQCYiIiISZwpkIiIiInGmQCYiIiISZwpkIiIiInGmQCYiIiISZwpkInLYzOwNM7sq3nV0VGa2ycxmxum1u5vZW2ZWYWa/iUcNXh0DzMyZWUK8ahDpTBTIRDoRM5tkZu+aWZmZlZrZO2Y2Pt51dWQxweDFJu3/MLOfxqmstvQNoATIcs79sOlCM+tjZv8ysxLv52iFmc1u9ypFpBEFMpFOwsyygOeB+4BcoDfwM6AunnV1Iiea2cnxLuJwHGHvUn9gpTvwVb8fAjZ763UFvgzsPLIKRaS1KJCJdB5DAJxzjzrnQs65GufcK8655QBmNtvrMfud1/OxysxmNGxsZtlm9hcz225mW83s52bmj1l+pZl9bGZ7zOxlM+sfs+w0b39lZvY7wGKW/dTM/hEz3+hQlXd485dmttDMys3sGTPLbe4Neq9/Tsx8gpkVm1mRmaV4vVq7zWyvmb1vZt0P4/P7FXD7AV53tpm93aTNmdnx3vQcM/uDmf3bzCq9z7mHmf3W+7xWmdmYJrsdb2YrveV/M7OUmH2fY2ZLvffxrpkVxCzbZGY3mNlyoKq5UGZmJ3vvv8x7PrmhTmAWcL1XZ3OHTccDc5xzVc65oHPuA+fcv2P2fZJX014zW2ZmU2OWHfBnyMz8ZnaX1/O2ATi7mc94g3codaOZXd7cdyFyrFIgE+k81gAhM3vQzM4ys5xm1jkRWA/kAbcCT8WEnzlAEDgeGAOcDlwFYGbnATcDXwTygXnAo96yPOAp4L+8/a4HTjnM2r8CXAn09Gq49wDrPQpcGjN/BlDinFtCJGhkA32J9Ox8E6g5jBr+AAw5QEhpiS+x7zOoA+YDS7z5J4H/abL+5V79xxEJ0/8F4AW3vwJXe+/jj8CzZpYcs+2lRAJNF+dcMHan3vf5ApHPsKv3ui+YWVfn3GzgYeBXzrkM59yrzbyPBcDvzewSM+vXZN+9vX3/nEgv7HXAv8ws31tlDgf4GQK+DpzjtY8DLozZb7pX71nOuUzgZGBpM7WJHLMUyEQ6CedcOTAJcMCfgGIze7ZJL9Eu4LfOuXrn3OPAauBsb53PAd/3ekZ2AXcDl3jbfRP4pXPuYy8A/AIo9HrJPgd85Jx70jlXD/wW2HGY5T/knFvhnKsCbgG+FNs7F+MR4PNmlubNX4YXDIF6IgHkeK+HcLH3mbRUDZEesp8fZu0NnvZesxZ4Gqh1zv3dORcCHicSRGL9zjm32TlX6r1uQ9D8BvBH59x73vt4kEjAOylm23u9bZsLnGcDa51zD3k9XI8Cq4BzW/g+LiISuG8BNno9dQ3jEK8AXnTOveicCzvn/gMsAj7Xgp+hLxH52Wt4z79s8rphYKSZpTrntjvnPmphvSLHBAUykU7EC0yznXN9gJFALyIBqcHWJmOHPvHW6Q8kAtu9Q1F7ifTMdPPW6w/cE7OslMhhyd7e9ptjanCx8y0Uu/4nXi15zby/dcDHwLleKPs8kZAGkbFPLwOPmdk2M/uVmSUeZh1/BrqbWUvDS6zYcVY1zcxnNFm/6Xvu5U33B37Y8Fl7n3ffmOVNt22ql7e/WJ8Q+a4OyTm3xzl3o3NuBNCdSE/V/5mZebVd1KS2SUR6Ng/1M9To5yS2Ri+IX0wk+G83sxfM7ISW1CtyrFAgE+mknHOriBxCGhnT3Nv7j7VBP2Abkf8o64A851wX75Hl/aeMt/zqmGVdnHOpzrl3ge1EAgMA3v77xrxGFZAWM9+jmXJj1+9HpLer5ABvreGw5XlEBqev895vvXPuZ8654UQOeZ1D5FBoiznnAkROhPhvYsbBNX0PZtbcezhcTd/zNm96M3B7k886zevpipZ6kP1uIxKOYvUDth5ugc65EuAuImEq16vtoSa1pTvn7uDQP0ONfk68mmJf62Xn3GlEwt0qIr28IuJRIBPpJMzsBDP7oZn18eb7EgkuC2JW6wZ818wSzewiYBiRQ1DbgVeA35hZlpn5zOw4M5vibXc/cJOZjfD2ne1tD5ExRSPM7IveAPPv0jh0LQUmm1k/M8sGbmqm/CvMbLjX63Ub8KR3qK85jxEZm/Qt9vWOYWbTzGyUd6iznEioCx/qc2vGQ0AKcGZM2zLvPRZ6g+9/egT7beoai1xiIhf4MZHDmhAJIt80sxMtIt3MzjazzBbu90UiY+Eus8hJDxcDw4mcgXtIZnanmY30ts0k8jmvc87tBv5BpHfyDG+QfoqZTTWzPi34GfonkZ+9Pt74xhtjXrO7mZ3njSWrAyo5su9O5KilQCbSeVQQGbT/nplVEQliK4DYa029Bwwm0vt0O3Ch9x8tRHqTkoCVwB4iA9F7AjjnngbuJHI4sNzb71neshIi447uAHZ7+3+n4QW9cUaPA8uBxTQfDB4i0pu3g0gY+u6B3qT3H/98Ir1gj8cs6uHVXE7ksOab3n4xs/vN7P4D7bPJ/kPAT4j0CDW0rSESFF8F1gJvN7/1YXmESIDZQOREiJ97r7WIyAD43xH5HtYBs1u6U+/7PIfI974buB44x/ueWiKNyBi4vV5t/YkcGsY5t5lIz+TNQDGRXrEfse//igP+DBEJmi8TCbdLiJwI0sAH/IBI714pMIVIEBQRjx34UjUi0plY5OKeVznnJsW7llhm9gbwD+fcn+Ndi4hIR6UeMhEREZE4UyATERERiTMdshQRERGJM/WQiYiIiMSZApmIiIhInO1309rOJC8vzw0YMCDeZYiIiIgc0uLFi0ucc/nNLevUgWzAgAEsWrQo3mWIiIiIHJKZNb3tWZQOWYqIiIjEmQKZiIiISJwpkImIiIjEWaceQyYiInIk6uvr2bJlC7W1tfEuRY5CKSkp9OnTh8TExBZvo0AmIiLHnC1btpCZmcmAAQMws3iXI0cR5xy7d+9my5YtDBw4sMXb6ZCliIgcc2pra+natavCmLQ6M6Nr166H3fuqQCYiIsckhTFpK0fys6VAJiIi0gnNmzePESNGUFhYSE1NTZu9zpw5c9i2bVt0/qqrrmLlypUH3Wbq1KmNrhO6dOlSzIyXXnop2rZp0yYeeeSRRuu8+OKLR1zngAEDKCkpOeLtD9dnrbcpBTIREZEOKhQKHXDZww8/zE033cTSpUtJTU095L6cc4TD4cOuoWkg+/Of/8zw4cMPax+PPvookyZN4tFHH422tXYga28KZO1oe+V2/rHyH9QGdRaOiIi0rk2bNnHCCSdw+eWXM2zYMC688EKqq6sZMGAAN9xwA0VFRTzxxBO88sorTJw4kaKiIi666CIqKyv585//zD//+U9uueUWLr/8cgB+/etfM378eAoKCrj11lujrzF06FC+8pWvMHLkSDZv3nzA9YYNG8bXv/51RowYwemnn05NTQ1PPvkkixYt4vLLL4/2xMX2fn3rW99i3LhxjBgxIrqvppxzPPHEE8yZM4f//Oc/0bFVN954I/PmzaOwsJA777yTn/zkJzz++OMUFhby+OOPs3DhQiZOnMiYMWM4+eSTWb16NRAJqddddx0jR46koKCA++67L/pa9913H0VFRYwaNYpVq1YB8NOf/pRZs2Zx6qmn0r9/f5566imuv/56Ro0axZlnnkl9fT0AixcvZsqUKYwdO5YzzjiD7du3A5HevhtuuIEJEyYwZMgQ5s2bRyAQ2K/ez8w512kfY8eOdW1p7qdz3cg5I93C7Qvb9HVERKR9rVy5Mt4luI0bNzrAvf32284557761a+6X//6165///7uzjvvdM45V1xc7E499VRXWVnpnHPujjvucD/72c+cc87NmjXLPfHEE845515++WX39a9/3YXDYRcKhdzZZ5/t3nzzTbdx40ZnZm7+/PmHXM/v97sPPvjAOefcRRdd5B566CHnnHNTpkxx77//frTu2Pndu3c755wLBoNuypQpbtmyZfut8/bbb7vp06c755y79NJL3ZNPPumcc27u3Lnu7LPPju73b3/7m7vmmmui82VlZa6+vt4559x//vMf98UvftE559wf/vAHd8EFF0SXNdTQv39/d++99zrnnPv973/vvva1rznnnLv11lvdKaec4gKBgFu6dKlLTU11L774onPOufPPP989/fTTLhAIuIkTJ7pdu3Y555x77LHH3Fe/+tXoe/nBD37gnHPuhRdecDNmzGi23qaa+xkDFrkDZBpd9uIgxnQbA8CSnUsY32N8nKsREZG28LPnPmLltvJW3efwXlnceu6IQ67Xt29fTjnlFACuuOIK7r33XgAuvvhiABYsWMDKlSuj6wQCASZOnLjffl555RVeeeUVxoyJ/L9VWVnJ2rVr6devH/379+ekk0465HoDBw6ksLAQgLFjx7Jp06ZD1v/Pf/6TBx54gGAwyPbt21m5ciUFBQWN1nn00Ue55JJLALjkkkv4+9//zgUXXHDIfZeVlTFr1izWrl2LmUV7sl599VW++c1vkpAQiTC5ubnRbb74xS9G63/qqaei7WeddRaJiYmMGjWKUCjEmWeeCcCoUaPYtGkTq1evZsWKFZx22mlApBeuZ8+eze63JZ/LkVAgO4js5GyO73I8S3YtiXcpIiJyFGp6Nl7DfHp6OhA5inXaaac1GnvVHOccN910E1dffXWj9k2bNkX3daj1kpOTo/N+v/+QJwps3LiRu+66i/fff5+cnBxmz56936UeQqEQ//rXv3jmmWe4/fbbo9foqqioOOi+AW655RamTZvG008/zaZNm5g6deoht2l4D36/n2AwuF+7z+cjMTEx+jn7fD6CwSDOOUaMGMH8+fMPa7+tSYHsEMZ2H8tz658jGA6S4NPHJSJytGlJT1Zb+fTTT5k/fz4TJ07kkUceYdKkSXzwwQfR5SeddBLXXHMN69at4/jjj6eqqoqtW7cyZMiQRvs544wzouPJMjIy2Lp1a7NXiW/perEyMzObDVDl5eWkp6eTnZ3Nzp07+fe//71faHrttdcoKCjg5ZdfjrbNmjWLp59+mhEjRjTab9PXKSsro3fv3kDkxIIGp512Gn/84x+ZNm0aCQkJlJaWNuolOxJDhw6luLg4+l3U19ezZs0aRow48M/GgT6XI6VB/YdQ1K2I6mA1a/asiXcpIiJylBk6dCi///3vGTZsGHv27OFb3/pWo+X5+fnMmTOHSy+9lIKCAiZOnBgdrB7r9NNP57LLLmPixImMGjWKCy+8sNmw0NL1Ys2ePZtvfvOb+11eY/To0YwZM4YTTjiByy67LHpYNdajjz7KF77whUZtF1xwAY8++igFBQX4/X5Gjx7N3XffzbRp01i5cmV0kPz111/PTTfdxJgxYxr1Sl111VX069ePgoICRo8e3ehMzSOVlJTEk08+yQ033MDo0aMpLCzk3XffPeg2Tev9rCwyxqxzGjdunIu9zklb2FG1g9OePI0bxt/AFcOvaNPXEhGR9vHxxx8zbNiwuNawadMmzjnnHFasWBHXOqRtNPczZmaLnXPjmltfPWSH0CO9B73Se2kcmYiIiLQZBbIWKOpexOKdi+nMvYkiItKxDBgwQL1jEtXmgczM/Gb2gZk9780PNLP3zGydmT1uZklee7I3v85bPqCta2upou5FlNaW8kn5J/EuRURERI5C7dFD9j3g45j5O4G7nXPHA3uAr3ntXwP2eO13e+t1CGO7jQXQYUsRERFpE20ayMysD3A28Gdv3oDpwJPeKg8C53vT53nzeMtn2JHcLr0NDMweSE5yDot3Lo53KSIiInIUausest8C1wMNdzPtCux1zjWcv7oF6O1N9wY2A3jLy7z1487MGNNtDEt2qodMREREWl+bBTIzOwfY5Zxr1W4lM/uGmS0ys0XFxcWtueuDKupexJbKLeyq3tVurykiIseOn/70p9x1112Nbt4da86cOXz729+OQ2XSHtqyh+wU4PNmtgl4jMihynuALmbWcMn7PsBWb3or0BfAW54N7G66U+fcA865cc65cfn5+W1YfmNju3vjyNRLJiIiIq2szQKZc+4m51wf59wA4BLgdefc5cBc4EJvtVnAM970s9483vLXXQe6zsQJuSeQmpCqcWQiItJqbr/9doYMGcKkSZNYvXp1tP2hhx6isLCQkSNHsnDhwv22a7h6/rhx4xgyZAjPP/98e5YtbSAeN2e8AXjMzH4OfAD8xWv/C/CQma0DSomEuA4jwZfA6PzRfLDrg0OvLCIicgiLFy/mscceY+nSpQSDQYqKihg7NnI0prq6mqVLl/LWW29x5ZVXNnu9sk2bNrFw4ULWr1/PtGnTWLduHSkpKe39NqSVtEsgc869AbzhTW8AJjSzTi1wUXvUc6SKuhfxv0v/l/JAOVlJWfEuR0REWsO/b4QdH7buPnuMgrPuOOgq8+bN4wtf+AJpaWkAfP7zn48uu/TSSwGYPHky5eXl7N27d7/tv/SlL+Hz+Rg8eDCDBg1i1apVFBYWttpbkPalK/UfhqJuRTgcS3ctjXcpIiJyFGt61afmrgLVknWk84jHIctOqyC/gARLYMnOJUzuMzne5YiISGs4RE9WW5k8eTKzZ8/mpptuIhgM8txzz3H11VcD8PjjjzNt2jTefvttsrOzyc7O3m/7J554glmzZrFx40Y2bNjA0KFD2/stSCtSIDsMqQmpDO86XFfsFxGRz6yoqIiLL76Y0aNH061bN8aPHx9dlpKSwpgxY6ivr+evf/1rs9v369ePCRMmUF5ezv3336/xY52cdaATGQ/buHHjXHPXamlLv1n0Gx7++GHmXzafZH9yu762iIi0jo8//phhw4bFu4wjNnv2bM455xwuvPDCQ68scdHcz5iZLXbOjWtufY0hO0xF3YqoD9fzYXErDwAVERGRY5YOWR6mMd3GAJEbjY/r0WzIFRERaVNz5syJdwnSytRDdgguHG403yWlC8d3OV5X7BcREZFWo0B2EJVvvsnaSadSv2NHo/aibkUsLV5KKByKU2UiIiJyNFEgO4jEvv0IlZZS8frrjdqLuhdRVV/F6j2rD7CliIiISMspkB1E8qCBJA0aROWrrzVq143GRUREpDUpkB1C5ozpVC1cSKi8PNrWI70HvdJ76XpkIiLSLn7605/Su3fv6A3Hn3322bjUsWbNGj73uc8xePBgioqK+NKXvsTOnTtZtGgR3/3udwF44403ePfdd+NSX2emQHYImTNmQDBI5ZtvNWov6l7Ekp1L6MzXcRMRkc7j2muvZenSpTzxxBNceeWVhJucdNZagsFgs+21tbWcffbZfOtb32Lt2rUsWbKE//f//h/FxcWMGzeOe++9F4hvIAuFOu/YbgWyQ0gpKMCfn0fFa40PWxZ1L2J37W4+rfg0TpWJiEhn9/e//52CggJGjx7Nl7/8ZTZt2sT06dMpKChgxowZfPrp/v/HDBs2jISEBEpKSjj//PMZO3YsI0aM4IEHHoiuk5GRwbXXXsuIESOYMWMGxcXFAKxfv54zzzyTsWPHcuqpp7Jq1SogcqHZb37zm5x44olcf/31vPnmmxQWFlJYWMiYMWOoqKjgkUceYeLEiZx77rnR15k6dSojR47kjTfe4JxzzmHTpk3cf//93H333RQWFjJv3jyKi4u54IILGD9+POPHj+edd94BaPY1AH79618zfvx4CgoKuPXWW6Ov9Y9//IMJEyZQWFjI1VdfHQ1fGRkZ/PCHP2T06NHMnz+/lb+hduSc67SPsWPHuvaw7Se3ulVjilyotjbatm7POjdyzkj31Jqn2qUGERFpPStXrox3CW7FihVu8ODBrri42Dnn3O7du90555zj5syZ45xz7i9/+Ys777zznHPO3Xrrre7Xv/61c865BQsWuJ49e7pwOOx2797tnHOuurrajRgxwpWUlDjnnAPcP/7xD+eccz/72c/cNddc45xzbvr06W7NmjXR/UybNs0559ysWbPc2Wef7YLBoHPOuXPOOce9/fbbzjnnKioqXH19vbv22mvdb3/722bfy9y5c93ZZ5+9X63OOXfppZe6efPmOeec++STT9wJJ5xwwNd4+eWX3de//nUXDoddKBRyZ599tnvzzTfdypUr3TnnnOMCgYBzzrlvfetb7sEHH4y+18cff/ywP/+21tzPGLDIHSDT6MKwLZA5cwZ7H3+c6gULyJgyBYBB2YPoktyFxTsX84XBX4hzhSIicqTuXHgnq0pXteo+T8g9gRsm3HDQdV5//XUuuugi8vLyAMjNzWX+/Pk89dRTAHz5y1/m+uuvj65/9913849//IPMzEwef/xxzIx7772Xp59+GoDNmzezdu1aunbtis/n4+KLLwbgiiuu4Itf/CKVlZW8++67XHTRRdF91tXVRacvuugi/H4/AKeccgo/+MEPuPzyy/niF79Inz59jvizePXVV1m5cmV0vry8nMrKymZf45VXXuGVV15hzJjIRdgrKytZu3Yty5cvZ/HixdH7fdbU1NCtWzcA/H4/F1xwwRHX11EokLVA2okn4ktPp+LV16KBzMwY022MBvaLiEi7uPbaa7nuuuui82+88Qavvvoq8+fPJy0tjalTp1JbW9vstmZGOBymS5cuLF26tNl10tPTo9M33ngjZ599Ni+++CKnnHIKL7/8MiNGjODNN9887LrD4TALFizY7+bnzb2Gc46bbrqJq6++utG69913H7NmzeKXv/zlfvtPSUmJBsnOTIGsBXxJSWRMmUzF3Ln0CIcxX2To3djuY5m7eS7F1cXkp+XHuUoRETkSh+rJaivTp0/nC1/4Aj/4wQ/o2rUrpaWlnHzyyTz22GN8+ctf5uGHH+bUU0894PZlZWXk5OSQlpbGqlWrWLBgQXRZOBzmySef5JJLLuGRRx5h0qRJZGVlMXDgQJ544gkuuuginHMsX76c0aNH77fv9evXM2rUKEaNGsX777/PqlWruOyyy/jlL3/JCy+8wNlnnw3AW2+9RW5ubqNtMzMzKY+5MsHpp5/Offfdx49+9CMAli5dSmFhYbOvccYZZ3DLLbdw+eWXk5GRwdatW0lMTGTGjBmcd955XHvttXTr1o3S0lIqKiro37//Z/oOOhIN6m+hjBkzCJWUULNsWbStqFsRAIt3LY5XWSIi0kmNGDGCH//4x0yZMoXRo0fzgx/8gPvuu4+//e1vFBQU8NBDD3HPPfcccPszzzyTYDDIsGHDuPHGGznppJOiy9LT01m4cCEjR47k9ddf5yc/+QkADz/8MH/5y18YPXo0I0aM4Jlnnml237/97W8ZOXIkBQUFJCYmctZZZ5Gamsrzzz/Pfffdx+DBgxk+fDh/+MMfyM9v3CFx7rnn8vTTT0cH9d97770sWrSIgoIChg8fzv3333/A1zj99NO57LLLmDhxIqNGjeLCCy+koqKC4cOH8/Of/5zTTz+dgoICTjvtNLZv3/5Zv4IOxVwnvmzDuHHj3KJFi9rltUIVFaw5+RS6zvoK3bwu4/pwPac8egrnH38+N594c7vUISIin93HH3/MsGHD4l1Gm8nIyKCysjLeZRzTmvsZM7PFzrlxza2vHrIW8mdmkj5hAhX/eTV67bFEXyIF+QW6Yr+IiIh8JgpkhyFz5gwCn3xCYMOGaNvYbmNZs2cN5YHyg2wpIiLSftQ71vkokB2GjOnTAaiIubdlUfciHI6lu5bGqSoRERHp7BTIDkNi9+6kFBQ0ump/QX4BCZbAB7s+iGNlIiIi0pkpkB2mzBkzqF2+nPqdOwFITUhleNfhGkcmIiIiR0yB7DBlzpwBQOXrr0fbiroX8WHJh9SF6g60mYiIiMgBKZAdpqRBg0gaMKDROLIx3cZQH65nRcmKOFYmIiLHittvv50RI0ZQUFBAYWEh7733HgADBgygpKRkv/VPPvlkADZt2sQjjzwSbV+6dCkvvvhi+xQtB6VAdpjMjMyZM6hauJCQd2f6hgvE6rCliIi0tfnz5/P888+zZMkSli9fzquvvkrfvn0Pus27774LtE4gCwaDh1+0HJIC2RHImDED6uupfOstALqkdOG47ON0xX4RETks559/PmPHjmXEiBE88MADhEIhZs+ezciRIxk1ahR33333ftts376dvLw8kpOTAcjLy6NXr16N1qmpqeGss87iT3/6ExC5UCxE7h85b948CgsLufPOO/nJT37C448/TmFhIY8//jhVVVVceeWVTJgwgTFjxkSv5D9nzhw+//nPM336dGbMmNGWH8kxS/eyPAKpo0fjz8uj8rXXyPbu51XUvYh/b/w3oXAIv6/z3+RURETa3l//+ldyc3Opqalh/PjxjB07lq1bt7JiRWQIzN69e/fb5vTTT+e2225jyJAhzJw5k4svvpgpU6ZEl1dWVnLJJZfwla98ha985SuNtr3jjju46667eP755wHo3r07ixYt4ne/+x0AN998M9OnT+evf/0re/fuZcKECcycORMg2iPX9N6V0joUyI6A+XxkTptG+YsvEg4E8CUlUdS9iCfWPMGaPWsY1vXovR2HiMjRZscvfkHdx6tadZ/Jw06gx82HvqXevffey9NPPw3A5s2bCQQCbNiwge985zucffbZnH766fttk5GRweLFi5k3bx5z587l4osv5o477mD27NkAnHfeeVx//fVcfvnlh133K6+8wrPPPstdd90FQG1tLZ9++ikAp512msJYG9IhyyOUOXMG4aoqqr2BlGO7jQVgyS6NIxMRkUN74403ePXVV5k/fz7Lli1jzJgx1NXVsWzZMqZOncr999/PVVddxebNmyksLKSwsDB6Y26/38/UqVP52c9+xu9+9zv+9a9/Rfd7yimn8NJLL3Ek96p2zvGvf/2LpUuXsnTpUj799NPo/RjT09Nb541Ls9RDdoTSTjoJX1oaFa++Rsapp9Izoyc903uyeOdiLh92+H+ViIhIfLSkJ6stlJWVkZOTQ1paGqtWrWLBggWUlJQQDoe54IILGDp0KFdccQV9+/Zl6dKl0e1Wr16Nz+dj8ODBQGRgfv/+/aPLb7vtNm677TauueYa/vCHPzR6zczMTCq8E9Kamz/jjDO47777uO+++zAzPvjgA8aMGdNGn4DEUg/ZEfIlJ5M+eTIVr7+GC4eByDiyJTuXHNFfJSIicmw588wzCQaDDBs2jBtvvJGTTjqJrVu3MnXqVAoLC7niiiv45S9/ud92lZWVzJo1i+HDh1NQUMDKlSv56U9/2mide+65h5qaGq6//vpG7QUFBfj9fkaPHs3dd9/NtGnTWLlyZXRQ/y233EJ9fT0FBQWMGDGCW265pS0/AolhnTk8jBs3zi1atChur1/23PNs+9GPGPDYo6QWFvLP1f/kvxf8Ny984QX6ZfWLW10iInJwH3/8cfRQnEhbaO5nzMwWO+fGNbe+esg+g4wpkyEhIXpvy7HdI+PIFu/U5S9ERESk5RTIPgN/VhbpEyZQ8VrkNkqDsgfRJbmLBvaLiIjIYVEg+4wyZkwnsGEDdRs2YGaM6TZGV+wXERGRw6JA9hllTp8OED1sWdStiE8rPqWkZv97iYmISMfRmcdQS8d2JD9bCmSfUWLPnqSMHEmld7Pxou6R+1pqHJmISMeVkpLC7t27Fcqk1Tnn2L17NykpKYe1na5D1goyZ86g+Lf3UL9rF8PyhpGakMqSnUs4Y8AZ8S5NRESa0adPH7Zs2UJxcXG8S5GjUEpKCn369DmsbRTIWkHmjEggq3x9LjmXXExBXoEG9ouIdGCJiYkMHDgw3mWIROmQZStIOv54Evv32zeOrHsRq0tXUxGoOMSWIiIiIgpkrcLMyJwxk6oFCwhVVlLUvQiHY+mupfEuTURERDoBBbJWkjlzBtTXU/XWWxTkFZBgCTpsKSIiIi2iQNZKUkePxt+1KxWvvkZaYhrDug7T9chERESkRRTIWon5/WRMm0rlW2/hAgGKuhXxYcmH1IXq4l2aiIiIdHAKZK0oc8YMwpWVVC18n6LuRdSH6/mo5KN4lyUiIiIdnAJZK0qfOBFLS6PitVcp6ha5QKzGkYmIiMihKJC1Il9KChmTJlH52utkJ2VxXPZxumK/iIiIHJICWSvLnDmD4K5d1K5YwZjuY1i6aymhcCjeZYmIiEgH1maBzMxSzGyhmS0zs4/M7Gde+0Aze8/M1pnZ42aW5LUne/PrvOUD2qq2tpQxZQr4/VS8+hpF3YqorK9k7d618S5LREREOrC27CGrA6Y750YDhcCZZnYScCdwt3PueGAP8DVv/a8Be7z2u731Oh1/djZpE8ZT8dprjO0+FtCNxkVEROTg2iyQuYhKbzbRezhgOvCk1/4gcL43fZ43j7d8hplZW9XXljJnzCSwfj1dd9XRI72HrkcmIiIiB9WmY8jMzG9mS4FdwH+A9cBe51zQW2UL0Nub7g1sBvCWlwFd27K+tpI5YzoAla9HDlsu2bUE51ycqxIREZGOqk0DmXMu5JwrBPoAE4ATPus+zewbZrbIzBYVFxd/1t21icSePUkZPpyK115nbPexlNSUsLlic7zLEhERkQ6qXc6ydM7tBeYCE4EuZpbgLeoDbPWmtwJ9Abzl2cDuZvb1gHNunHNuXH5+fluXfsQyZs6gZulSxvgHAhpHJiIiIgfWlmdZ5ptZF286FTgN+JhIMLvQW20W8Iw3/aw3j7f8ddeJj/NlzpgJztF18Qayk7N1gVgRERE5oIRDr3LEegIPmpmfSPD7p3PueTNbCTxmZj8HPgD+4q3/F+AhM1sHlAKXtGFtbS55yGAS+/al8vXXGXPxGD7Y9UG8SxIREZEOqs0CmXNuOTCmmfYNRMaTNW2vBS5qq3ram5mROWMGex5+mPFXXcMbm9+gpKaEvNS8eJcmIiIiHYyu1N+GMmfOwNXXM2ZjZF6XvxAREZHmKJC1odQxY/Dn5NDlvTWk+FM0jkxERESapUDWhszvJ2P6NKrfmkdhzkj1kImIiEizFMjaWOaMmYQrKpi2uxur96ymMlB56I1ERETkmKJA1sbST56IpaYy/KNKwi7M0uKl8S5JREREOhgFsjbmS0khY9IppM1fgR+fDluKiIjIfhTI2kHGjBmEdu1ievUAXbFfRERE9qNA1g4ypkwBv5+pG1NZUbKCQCgQ75JERESkA1EgawcJOTmkjRvHgGW7CIQDrChZEe+SREREpANRIGsnmTNmkPjJdnqUOl2PTERERBpRIGsnmTOmA3DG5hwN7BcREZFGFMjaSWLv3iQPH8aEtbB011JC4VC8SxIREZEOQoGsHWVOn0Heut349pazbu+6eJcjIiIiHYQCWTvKnDkDc46xa50ufyEiIiJRCmTtKHnoUBJ792bShiQN7BcREZEoBbJ2ZGZkzpzBCesDfPTpIpxz8S5JREREOgAFsnaWMWMGCcEwvVcWs6ViS7zLERERkQ5AgaydpRUVQXYm49c4Fu/SODIRERFRIGt3lpBA1vQZjF0PH2xdFO9yREREpANQIIuDrJkzSa91lC18N96liIiISAegQBYH6SefTCgpgX5Ld1BSUxLvckRERCTOFMjiwJeaik0oZPwax5IdGkcmIiJyrFMgi5MeZ55HXgVsWPhqvEsRERGROFMgi5PsGTMIG7h5C+NdioiIiMSZAlmcJOTkUHZCLwYuK6YyUBnvckRERCSOFMjiKGXaZPoXOz5c9p94lyIiIiJxpEAWR8edexkAO196Ps6ViIiISDwpkMVR9sDB7OiVQtr8D+NdioiIiMRRiwOZmSWZWYGZjTKzpLYs6lhSduIJ9N5YQfWu7fEuRUREROKkRYHMzM4G1gP3Ar8D1pnZWW1Z2LEi57Qz8TlY98Jj8S5FRERE4qSlPWS/AaY556Y656YA04C7266sY8fIieewKxsqXtX1yERERI5VLQ1kFc65dTHzG4CKNqjnmNM1tStrRnQha/kmwtXV8S5HRERE4qClgWyRmb1oZrPNbBbwHPC+mX3RzL7YhvUdE+pPKSShPkz52/PiXYqIiIjEQUsDWQqwE5gCTAWKgVTgXOCcNqnsGNL31DOoTIHt/34m3qWIiIhIHCS0ZCXn3FfbupBjWVGv8Twz2Jg89x2CpaUk5ObGuyQRERFpRy0KZGb2N8A1bXfOXdnqFR2DeqX34p1p+Uz5qJiSP/wvPf7rx/EuSURERNpRSw9ZPg+84D1eA7IA3YCxlZgZfUdN5K0xSex57FECn3wS75JERESkHbUokDnn/hXzeBj4EjCubUs7tswaPotHTg4R9Bu7fvvbeJcjIiIi7ehIb500GOjWmoUc64Z1HcbMogt5Zryj4t8vUbNsWbxLEhERkXbS0iv1V5hZecMzkcte3NC2pR17vjPmO7x+SibVmYns/PVdOLffsD0RERE5CrX0kGWmcy4r5nmIc+5fbV3csSY3JZfZE77FwyeHqFm0iMo33oh3SSIiItIODnqWpZkVHWy5c25J65Yjlw67lKcmP8GuxZ+QdNdvyDj1VCyhRSfDioiISCd1qB6y33iP3wPvAQ8Af/Kmf9+2pR2bEn2JXHfSDfx9cojA+vWU/d//xbskERERaWMHDWTOuWnOuWnAdqDIOTfOOTcWGANsbY8Cj0WTek8idfpU1vXxs/OeewjX1MS7JBEREWlDLT3Lcqhz7sOGGefcCmBY25QkANeN/xEPT/cTLi6h9MG/x7scERERaUMtDWTLzezPZjbVe/wJWN6WhR3rBmQPYPzpX+H9wcauB/5IsLQ03iWJiIhIG2lpIPsq8BHwPe+x0muTNnR1wdU8f0YOrraGkv/933iXIyIiIm2kpZe9qAXuB250zn3BOXe31yZtKCMpg0vPuI7XCozSRx8l8Omn8S5JRERE2kBLLwz7eWAp8JI3X2hmz7ZhXeI57/jzWPb5EwhYmO3/85t4lyMiIiJtoKWHLG8FJgB7AZxzS4GBbVOSxPKZj+/M/AnPTYDql16hZrmG7omIiBxtWhrI6p1zZU3aDnpfHzPra2ZzzWylmX1kZt/z2nPN7D9mttZ7zvHazczuNbN1Zrb8UBelPZYUdiuk9uKzKEuDzXfcrlsqiYiIHGVaGsg+MrPLAL+ZDTaz+4B3D7FNEPihc244cBJwjZkNB24EXnPODQZe8+YBziJy0/LBwDcAjWKP8Z1TfsQzk5MJLVlO5ZtvxrscERERaUUtDWTfAUYAdcAjQBnw/YNt4Jzb3nBrJedcBfAx0Bs4D3jQW+1B4Hxv+jzg7y5iAdDFzHq2+J0c5Xqk92Dgl7/Bthz45I6f40KheJckIiIiraSlZ1lWO+d+DExxzo13zv3X4ZxlaWYDiFzd/z2gu3Nuu7doB9Ddm+4NbI7ZbIvXJp6vjL6SV87Mw79pK6VPPRXvckRERKSVtPQsy5PNbCWwypsfbWZ/aOG2GcC/gO8758pjl7nIYKjDGhBlZt8ws0Vmtqi4uPhwNu30UhJSmDnrFtb0gq13/1q3VBIRETlKtPSQ5d3AGcBuAOfcMmDyoTYys0QiYexh51xDl87OhkOR3vMur30r0Ddm8z40c79M59wD3j01x+Xn57ew/KPHzP6nsejCESSWVrDtb3+KdzkiIiLSCloayHDObW7SdNBBTGZmwF+Aj51z/xOz6Flgljc9C3gmpv0r3tmWJwFlMYc2xWNmXHHJ7SwabJT+6c8E9+yJd0kiIiLyGbU0kG02s5MBZ2aJZnYdkUH6B3MK8GVgupkt9R6fA+4ATjOztcBMbx7gRWADsA74E/D/DvO9HDOG5g6ldPbZ+GvrWX/3HYfeQERERDo0a8k1rcwsD7iHSIDyAS8D33PO7W7b8g5u3LhxbtGiRfEsIW721u7lsSunMmlZgMH/fonkfv3iXZKIiIgchJktds6Na25ZS8+yLHHOXe6c6+6cy3fOXRHvMHas65LShdxrvkm9z/HRL26OdzkiIiLyGbT0LMtBZvacmRWb2S4ze8bMBrV1cXJwX5x4Fe+e2pXUNxZTtnRxvMsRERGRI9TSMWSPAP8EegK9gCeAR9uqKGmZBF8CRdfeRlkafPzfN+mWSiIiIp1USwNZmnPuIedc0Hv8A0hpy8KkZSYOns7yc4aS/dFmtv7nuXiXIyIiIkegpYHs32Z2o5kNMLP+ZnY98KJ3o/DctixQDu3M7/0PO3KMT++8XbdUEhER6YRaGsi+BFwNvA7MBb4FXAIsBo7N0xw7kH5dB7HjyzPI2VrOiod+F+9yRERE5DAdNJCZ2Xgz6+GcG+icGwj8DFgBPAeM9do1uL8DOPeqX7KpdyLV//sXgtVV8S5HREREDsOhesj+CAQAzGwy8EvgQaAMeKBtS5PDkZGUgf+7V5JVVs/8e/4r3uWIiIjIYThUIPM750q96YuBB5xz/3LO3QIc37alyeGa+fnvsmZ4FumPvUz5rv1uAyoiIiId1CEDmZkleNMziIwha5DQzPoSRz7zMfDGW0kOON75xbXxLkdERERa6FCB7FHgTTN7BqgB5gGY2fFEDltKB1Mw4XOsnzSAPq98yCcfL4x3OSIiItICBw1kzrnbgR8Cc4BJbt+VR33Ad9q2NDlSE/7rfwj5YNntN8S7FBEREWmBQ172wjm3wDn3tHOuKqZtjXNuSduWJkeqR/9h7Pr8iQxetIP35j4S73JERETkEFp6HTLpZE694TdUpvko/vVvCIQC8S5HREREDkKB7CiVmt2V0OwLOG5DNS8/+st4lyMiIiIHoUB2FJtw9Y/Zm5dC8gNPUFpVEu9yRERE5AAUyI5ivuRkun7vu/TdFeKFP/wo3uWIiIjIASiQHeUGXzibPQO7MvCfC1i1fXm8yxEREZFmKJAd5cyM4398G10r4I3/+RH7rlwiIiIiHYUC2TGg26TplI8bzLhXPuX1Fc/EuxwRERFpQoHsGDHyljtJDcCau39BbbA23uWIiIhIDAWyY0T60GEEz5rMpAUVPD733niXIyIiIjEUyI4hw2+4Dfw+gn98iB1VO+JdjoiIiHgUyI4hid27k3bFJZy8IshPH7iU1z55TYP8RUREOgAFsmPMgP/3fcI52Xz7TztYfd13+OFDl/Lx7o/jXZaIiMgxTYHsGOPPzGTI0/9H18suZ8rqBL72i2W8e9UF3P3Y9yip0dX8RURE4sE68yGrcePGuUWLFsW7jE4ruHs32//6AHsefpTE2no+GJyA76tf4gvnXU+yPzne5YmIiBxVzGyxc25cs8sUyCRUVsbGv/6ein88RkpVPWuOSyH7G19j8rn/D59PnagiIiKt4WCBTP/bCv7sbI6/9mZGvzWf2m9dTM9dQbrf8Hte+9yJfPTs3zXwX0REpI0pkEmULz2dMd/7KWPeeo/t3zyX1L3V+K7/Je+ePpHNzzyOC4XiXaKIiMhRSYFM9pOYmsb07/+K0a+9w4ffmEJtVTmVN/yUJaedSvG//omrr493iSIiIkcVBTI5oMz0LnzpB/cz+MUXee2qQnYF91Dy41v5cOZUSh95lHBdXbxLFBEROSookMkh9esygG9f9yj5j/+dh7/an43+Unbedhurpk9j99/mEK6ujneJIiIinZrOspTDEgqHeHbdM7z01F3MmLuXUZ84LDuLvNmzybn8cvxZWfEuUUREpEPSZS+k1VXVV/GXD//C26/8lfPfCTJmbQjLSCf3ssvJnT2LhNzceJcoIiLSoSiQSZvZVrmNuxffzcfv/ZtLFiZS9FEdvuRkunzpIrpeeSWJPXrEu0QREZEOQYFM2twHuz7gVwt/xe41H/LVJdmM/qAM8/nocv755H372yR27xbvEkVEROJKgUzaRdiFeWHDC/x28W9x23fy7ZV9GfbuVnyJieRdcw25X/kylpgY7zJFRETiQoFM2lV1fTVzPprD31b8jdzSAFe/nsiwVVVU9Mpm+zfOJvuUU+mT0Yfemb1JTUiNd7kiIiLtQoFM4mJH1Q4eXfUom8o2kb7wY854Zgvd9jjmn2D8fYaP3VlGXmoefTL60CfTe2Tse85Py8dnujKLiIgcHRTIpEMI1day9YHfU/nnB3FmfHrBBBZO6c6ntdvZUrGFHdU7CLtwdP0kXxK9M3s3Cml9MvvQO6M3fTP7kpaYFsd3IyIicngUyKRDCWzZys47fknlq6+RNGAA3X/8YzJOnUR9qJ7tVZFwtqVyS6PnzRWbqayvbLSf3JTc6KHPPhl9GJwzmCl9piioiYhIh6RAJh1S5bx57Pz57QQ++YTM02bS/cYbSezdu9l1nXOUB8oj4axycySsxQS2HVU7CLkQqQmpzOg3g3MHncuJPU/E7/O387sSERFpngKZdFjhQIDSv82h5P77wTnyrv4GuVdeiS85+bD2Ux+uZ3nxcp7f8Dwvb3yZivoK8lPzOXvQ2Zwz6ByG5g5to3cgIiLSMgpk0uHVb9vGzjt/RcXLL5PYrx/db76JzKlTj2hfdaE63tz8Js9teI63t7xN0AUZkjOEcwedy+cGfY5uabommoiItD8FMuk0qt59lx0/v53Ahg1kTJtG95tvIqlv3yPe357aPby06SWeX/88y0uW4zMfJ/Y4kXOPO5cZ/WZovJmIiLQbBTLpVFwgQOlDD1H8+z9AMEjXq66i6ze+ji8l5TPtd1PZJp7f8DzPb3ierZVbNd5MRETalQKZdEr1O3ey61e/pvyFF0js3ZvuN99ExvTpmNln2q9zjg92fcBzG57j5U0vUxGIjDf73MDPce5x52q8mYiItAkFMunUqt5byM6f/zd1a9eRPvlUetx8M0kDBrTKvutCdby15S2eW/8c87bOIxgOMjhncGS82cDP0T29e6u8joiIiAKZdHquvp7Shx+m5L7f4QIBcq+8kryrv4EvrfXGgO2t3ctLm17iuQ3Psbx4OYZxYs/IeLOZ/WZqvJmIiHwmcQlkZvZX4Bxgl3NupNeWCzwODAA2AV9yzu2xyDGoe4DPAdXAbOfckkO9hgLZsSdYXMyuu+6i7JlnSejVk+433Ejm6ad95sOYTX1S/gnPb3ie59Y/Fx1vNr3fdM4ddC4n9TxJ481EROSwxSuQTQYqgb/HBLJfAaXOuTvM7EYgxzl3g5l9DvgOkUB2InCPc+7EQ72GAtmxq3rxYnbc9t/UrV5N+skn0/2//ovkQQNb/XWccywtXspz65/jpU0vURGoIC81jyuGXcGsEbNI8CW0+muKiMjRKW6HLM1sAPB8TCBbDUx1zm03s57AG865oWb2R2/60abrHWz/CmTHNhcMsuexxym+5x7CtbV0+cIXSB48mMRePUns2ZOEnj3xd+nSar1ngVCAt7a8xZNrn+Sdre8wOn80v5j0C/pl9WuV/YuIyNHtYIGsvf+87x4TsnYADSOmewObY9bb4rUdNJDJsc0SEsi94nKyzjqTXf/zP5Q98wyurq7xOqmpJPZsCGg9vOle+0Jbjx4tvitAkj+Jmf1nMrP/TF7c8CI/f+/nXPjchVw37jouGnJRqx82FRGRY0d795Dtdc51iVm+xzmXY2bPA3c459722l8DbnDO7df9ZWbfAL4B0K9fv7GffPJJm9UvnYtzjtCePdRv20799m0Et2/3phse2wgVl+y3nT8vLxraEnv2JLFXpHetYd7ftWuzYWtH1Q5+8s5PmL99PpN6T+K2k28jPy2/Pd6qiIh0QjpkKeIJBwIEd+yIBLUd2/cPbdu24WpqGm1jSUle71qvaEhLGz+OtJNOwuF4bNVj3L34bpITkrnlpFs4Y8AZcXp3IiLSkXWkQ5bPArOAO7znZ2Lav21mjxEZ1F92qDAmciR8SUkk9etHUr/mx3055wiXlcUEtMa9bVXvvktw1y74gyN58GByZ32FS869gIm9JnLzvJu57s3rmLt5LjefeDNZSVnt/O5ERKSzasuzLB8FpgJ5wE7gVuD/gH8C/YBPiFz2otS77MXvgDOJXPbiq80drmxKPWQSD+G6OspfeJHSBx+kbvVq/Lm55FxyCRmXXMScbU/zx+V/JC81j/8+5b+Z2GtivMsVEZEOQheGFWkDzjmq33uP0jkPUvnGG1hiIlnnnMOe807hph1/ZGPZRi474TK+P/b7pCakxrtcERGJMwUykTZWt3Ejex56iL1P/x+upoaUkybwxsmZ3J30BgO6DOIXk37ByLyR8S5TRETiSIFMpJ2E9u5lzxNPsOcfDxPcuZNQ3x48WVjDv4fWMHvcN7mq4CoSfYnxLlNEROJAgUyknbn6espffoXSOXOoXbGCurREXhwdZNPMYdx0zl0MzG79uwqIiEjHpkAmEifOOWqWLKF0zoOUv/YqIRwLhyWQO3sW553zA3zmi3eJIiLSThTIRDqAwJYtbP3rA5Q/9RRJtSG2DspiyLd+SN/PXYD5dbNyEZGj3cECmf48F2knSX36MPAntzFi3ny2XXkGSaUVVP3oVj6cMZnSBx8kVFkZ7xJFRCRO1EMmEieb9mzgofu/zcjXNnLCFrCMdHIuuJCcL3+ZpD69412eiIi0Mh2yFOmgguEgf1vxN1568Xect9jHuJX1mHNkzpxJ7uzZpI4p1E3LRUSOEgpkIh3cx7s/5qZ5N7Fn8zqu3TiUYW9vIVxeTkpBAdlnf470yZNJGjBA4UxEpBNTIBPpBOpCddy75F4eWvkQx6X04bayaaQ/N4+6tWsBSOzXj4zJk8mYMpm0CRPwJSfHuWIRETkcCmQincj7O97nx2//mJ3VO7lq1FVcljUTe+8Dqt58i6r33sPV1mIpKaSfdBIZUyaTMXkyib015kxEpKNTIBPpZCoCFdyx8A6eXf8shjG863BO6nkSJ+aOYegnQQJvL6DyzTep37wZgKTjjyNj8hQyJk8mbWwRlqi7AYiIdDQKZCKd1IfFH/L21rdZsH0By4uXE3RBknxJjOk2hpN6nsiJ9f3ouXwb1fPmUfX+Iqivx5eeTvopp5AxZTLpp55KYrdu8X4bIiKCApnIUaGqvorFOxezYPsC3tv+Hmv2rAEgMymT8d3Hc3LOGIo2J5L+/mqq3nqL4M6dACQPHxYZezZ5CqmjC3QRWhGROFEgEzkK7a7ZzcIdC1mwfQELti1gW9U2ALqldeOkHicyuW4AJ6yuws1fTM0HSyEUwp+dTfqkSWRMnUL6pEkk5OTE9020glA4RHWwmqr6KqrrI89VwSqq6quoC9ZR1L2IHuk94l2miIgCmcjRzjnHlootLNgRCWcLdyxkb91eAAZlD2JSViGnbM2g9/Id1L2zgFBpKZiRWlBA+pRI71nK8GGYr+1v3uGcoyZYEwlOXniqrq9uFKai096jur46GrJil1UHq6kJ1hz09RIsgTMHnsnsEbMZmju0zd+fiMiBKJCJHGPCLszq0tXRw5uLdy6mNlSLz3yMzBnBabWDKFwfImvxOupWfATO4cvMJCE/H39ODv6cLiTk5ODvkuPNx7Tl5ODr0oW6FD8V9RWUB8opqyujPFBOeV154/mGR13j55ALteh9pCakkp6YTnpiOmkJafumE73phCbzTdY1M55d/yz/WvMvqoPVTOw5kdkjZjOx10Rd001E2p0CmcgxLhAKsKx4WTSgrShZQciFSPYnc0rqSGZsz6X/p7WE95QR3rsXK6vAV15FYkUNvlDz/0YEfVCZCuWpUJEGFalGRVpkvjLNR31mKi47A7Kz8Od0ITGnK2lZOWQlZ5OZlLkvSMWEqqbzPmudHrvyQDlPrH6Chz9+mOKaYobkDGH2iNmcOeBMEv06I1VE2ocCmYg0UhmoZNHORdGAtm7vumbXy0zIoBuZdA+kkl+fQte6RHJq/WTX+siodqRVBUmprCepshZ/eTW+skpcWTmEw83uz5KS8OfkkNinD6mFo0kdPZrUwsJ2OxO0PlTPCxtf4MGPHmTd3nV0S+vGl4d9mQuGXEBmUma71CAixy4FMhE5qOLqYtbtXUdmUiZZSVlkJWWRmZSJ33f4Z2S6cJhweTnBPXsI7dlLaG/s8x6CpXsIrF9P7cqVuPp6ABJ79SK1sDDyGFNIytChWFJSa7/NfTU6xzvb3mHOijm8t+M90hPTuWjIRVw+7HKdACAibUaBTEQ6nHAgQO1HH1GzdBk1S5dSs3Rp9FIdlpxMysiR7dKLtnL3SuZ8NIdXNr2CYToBQETajAKZiHQK9Tt2RMLZB5GA1p69aNsqt/GPj/+hEwBEpM0okIlIp9SoF21ZpCctuGMHENOLNnp0pCetlXrRyurKeHLNkzoBQERanQKZiBw12qsXLRAK8OLGF4+5EwBqgjV8VPIRy4qX8dHuj+iW1o1pfadR1L2IRJ8CqchnoUAmIketcCBA3cqVVC9dGh2PFu1FS0rCn9cVf3o6vrR0fOlp+KLTB3uk7VsnLY35ZUt5cM3DR90JAM45tlVtY9muZSwtXsqy4mWs3b2atMogeeVwfDCXTxPLWdMtSHpqNpP7TGZa32mc0vsU0hPT413+YakN1rJk5xLmb59PcU0xY/LHML7neAZmDdQhaWk3CmQickyJ9qIt/5DQnj2Eq6qafYSqq8HrXTsUS0wknJZCZUKQPb5aapIhIzuf3t2OJ7tLd3yZGZEL5+Z2xZ+bQ0LXriTk5uLPzcWXmdkh/tOvqa/h440LWbt6Pts2LGfv5vUk764grxzyK3z0qEogq6weX7DxZUtcUiK7BmSzuFsly3sE2Ng3iRHHncS0vtOY1nca+Wn5cXpHBxZ2YdbsWcO7295l/rb5LNm5hEA4QIIvgS7JXSipKQEgLzWP8d3HM77neMZ3H0//rP4d4ruSo5MCmYjIAYQDgQMEtup909WNl1WV7Wb7rg2U791Jcl2Y7FAS6QEfvura5l8kMTES1rp23fecGwlvCV0joa0hvPlzu+JLTzuiUBCqrCK4Yzv127dTv207ezavo+STVdRu3YIVl5Kxt46UJvkznODDl59HSq8+JPXsRWLPHiT06EFiz54kdOtO/dat1CxZQvUHH1C7ciUEgwDszE9kRa8ga3obbtRQCorOZHr/GQzKHhS3QLOjagfzt81n/vb5vLf9PUprSwE4vsvxTOw1kYk9JzK2+1hSE1LZXLGZhTsW8v6O93l/x/sU1xQD0C21WzScTegxgT6ZfRTQpNUokB0p56B2L6R2/hswi0jrK6sr44k1kTsAlNSUkBB0ZFVDfl0yfYKZ9AikkV+XRG6tn+wqSK8KkVJRR2J5Nba3Aqqbvw+nJSUdOLzl5ILP5wWvHdTv2E7Qew5XVDbaT9hgTwbsyfIRzO9CSs8+5PYfQp/jCsntP5iEHj1IyMtr8T1Mw7W11H74IdUfLKV6yRKqliyG8gogcoeGNb2NncflkDN+IoWTL6Cwz4QjupZdS1XXV/P+jveZv30+87fNZ0PZBgC6pnSNBLBeEzmp50l0Szv4yR7OOT4p/6RRQNtduxuA7mndmdBjAuN7jGd8j/H0yezTZu9Hjn4KZEdqwxvw6GVw0jdh4rchLbftXktEOq1AKMCinYsori5md+1uSmpK2F2zO/Lw5htu9h4rsd7Roz6NfsEsetan060uma61iXSpNjKqw6RWBEgur8VXVgl7ynC1jXvgQl0yqOiSzI6MIJ+kVLIr07E7C3zd8+k5cBTHHz+B0b2KGJIzpE0G5LtwmMCmTdQsWcLuhe9Svvh9UrZGDgUGffBprwTqhg2k+8QpFEz/Epk9+n6m1wuFQ3y0+6NoL9iyXcsIuiAp/hTGdh8bDWBDcoZ8pl4t5xwbyzby/o73WbhjIYt2Lor2tvVK78W4HuOiIa1XRq/P9J7k2KJAdqR2r4fXfw4fPQXJWTDxGjjpW5CS3XavKSJHpfpwPaU1pY0DW+3uaHArqY20ldSUUB4ob3YfXV0G/ULZBIK1rEkooT7BSPYnM6LrCEZ3G83o/MgjLzWvnd/dPsE9e9jz/nzWv/0CtUuWkruxlETvXvJleakw6gT6TJxB3omnknz8cZj/4D1omys2M3/bfBZsX8CC7QuoCER65IblDov2go3pNoZkf3KbvSfnHOv3ro+Gs/d3vB8N2L0zejfqQevsJ3pI21Ig+6x2rIA3fgmrnoeULnDKd2HC1ZCc0favLSLHnEAoQGltaTSgxYa4kpoSEnwJFOQXUJhfyJDctun9ai2B2mqWvfM0m+b9m9DylfT/pIYuVZFlwbQkkgpGkTvhZFJHjcKSkqmqq2B1yUpWFUcee2pK8IUhN6kLQ7MGMyT7OAZlDiTdn4ILhnDhEITCEA7hQmFcKAihcLR9//kQvpRkkgYMIGnQIJIHDcKffXh/ZIddmLV71rJo5yIWbo+EtIYQ3S+zXzScje8x/pCHS+XYokDWWrZ9AHN/AWtfgbQ8mPR9GH8VJKa2Xw0iIp2Uc45VpauYv+j/2Db/NbJWb2PIFkffEvC11X9FPh/4/ZjfHxkr5/cTrq1tdHatv2tXkgcN8gLaQJIGHUfyoIEk9OzZovF1DWd0Lty+kPd3vs/iHYupqI/05OWl5tEnow99Mr1Hxr7n/LR8fNay8XtydFAga22bF8Lc2yNjzDK6w6nXwdhZkNB2XeYiIkebbZXbmLt5Lu+u/g9lHy2nf0ZfhncbwYhuBRyXO5iEhGTM7wN/QuTZ5z/AfCRw7T/va3YsmQuFqN+yhboNGwhs2EjdhvXe8wbCZWXR9Sw1laSBA0geOIikQQNJPu44kgYOImlAf3zJB/73PhQOsXrPat7f8T7r965na+VWtlRsYUf1DsJu3yVFknxJ9Mro1TioxUy397XenHOUB8ob9cw2nd5du5vaYC39svoxKHsQA7MHMih7EIOyB5GRpKNGh6JA1lY2vRMJZp+8A1l9YPJ1MOYK0O1VREQ6HeccodJSAhs2ULdhI4EN6yPP69dTv23bvhV9PhL79CF54ECSjjvO61XzDn926XLA/deH6tletZ0tFVvYUrml8XPFlmivWoOc5JwDhrXuad1bdAZrbMhqOn6x6XRpbSnBcHC/fSRYArmpuXRN6Upeah6JvkQ+rfiUTeWbGq3fLbUbA7vsC2iDsgcxqMsguqZ01aVDPApkbcm5SE/Z3Nthy/vQpT9MuQEKLgZ/QnxrExGRVhGuqSGwaRN16zdEAtvGDQTWbyCwaRMuEIiu58/NjfSmDTou8jxwIEkDB5LYqxeWcPD/E8rqyhoFtIbprZVb2V65naDbF34SLIGeGT2jAa1nek+qg9XN9mgdKmR1TY0ErQNNZyVlNRuoguEgWyq2sKFsAxvKNrCxbCMb9kamq4PV0fUykzIb9aQ1PHpl9NovVLpgkFBFBeHyckLeJVX8WZn4srLwZ2Ye8jPs6BTI2oNzsPY/kWC2fSl0PR6m3AgjvwhteB0eERGJHxcKUb9tG3XrI4c9Axs3RELb+vWEYg5/kphIUt++JA0cSNKA/pGgNmAASQMG4O966B6kYDjIzuqdzYa1LRVb2FO3Z7+Q1dCj1dx0VnJWm4xfc+EwocpKdu7awJbtq9mxYx27d33Knt1bqdq9EyqrSK+D9FrIrPORG0wmK+AnrcaRVFOPr6buoPv3paXhy87Gn5mJPysrGtR82Vn4M7PwZ2fh8579mV6Qy4q0HekFl1uTAll7cg5WvRAZ/L/rI8g/AabeBMM+HxlcKiIix4RgaSmBTZsIbNwYed60ibqNG6n/5FNczEkFvszMaDhLGjiA5AEDIsGtf398aWkteq3aYC1J/qRWC1nOOcLl5QRLSwnt2UNoz57IdGlkOlRRTri8glBFBaHysuh0uKICwuGD7zsjjfq0JGpSfVQkhSlNqGN3Yi1VyVCVYtSkGEldcsju2ouc5BwyA37SayG91pFSEyK5pp7Eqnr8VbX4qmqwiipcRSXhysqDvi5+f+PwlpWJL8sLd9lZZEyZQtr48a3y+R2IAtkRqq0PsbO8lv5dj2BgZTgMK/8vcrmMkjXQfRRMuxmGngU6li4icsxq6FWLhLVNBDZFAlvdpk0Et21vtG5C9+7RXrWkAQOiPWuJvXsf1uE7FwwS2rs3JlSVemFrL6HSUoJ7vPbSUoJ79xDaszd6m6ymLDUVf5YXaDK9HqiszMY9VFmZ+DIz8Wdl7zvkmJWFLz292WvP1QZr+aT8k+jhz4ZDn8U1xVQGKgm50EHfn2Fk+NPID6fTNZhCXjCF7PpEutQlkBXwkxEw0mscKbVhUqqDJFXXk1BVi7+qFquswVVUkvf975J/5dda/JkeCQWyI/TKRzv4xkOLGZSXztSh3Zh2Qj4TBuaSnHAYhyDDIfjwSXjzDijdAL2KYNqP4fgZCmYiItJIuKaGwKef7gtqGzdFw1q4uUOgXq9aUt++hGtroz1YjQLWnj2Nt23Cl50duU1Xbi7+nJzIrbpycvHn5kTusZoT056biy8lpR0+iX2cc9QEa6isr6QyUBl9rqiviM5XBCr2PQcqqaqv2m95fbj+oK/zvcLvctXor7fpe1EgO0I7y2t5acUO5q7exfz1u6kLhklL8nPK8XlMG9qNqUPz6dWlhdcgCwVh2aPw5q+g7FPoe2IkmA2a0mb1i4jI0cE5R2jv3sjhTy+kNfSsBT75dN+JBX5/JDy1NGB16YIlHhtXBqgL1UUDW0NIq6qvioa5Md3GMDJvZJvWoEDWCmoCIeZvKGHuqmJeX7WLrXsjNwU+oUdmpPdsaD5F/XNI9B/i+H0wAB88BG/dBRXbYMCpkWDWf2I7vAsRETnauFCIYHExvpQUfFlZLb5ZvLQ/BbJW5pxjfXElc1cVM3f1LhZuLCUYdmSmJDB5cD5Th+YzZWg+3TIP0q1bXwuL58C830DVLjhuOkz4BmT1hvR8SM/T9cxERESOIgpkbayitp531u3mjdW7mLt6FzvLI6ftjuqdzbSh+Uw9oRuj+3TB72tmzFigGt7/M7zzW6je3XhZao4XzrpFAlp6fuSRkb9vuuGRnKkxaSIiIh2YAlk7cs6xcns5b6wuZu6qXSz5dA9hB7npSUwZEuk9mzw4n5z0pMYb1lXCjuVQVew9SiLPlbv2TVftgtoDDMz0Jx84rDX0uGV0i0yndVXvm4iISDtTIIujvdUB3lpbwhurdvHGmmJKqwL4DMb0y4n0ng3txohezV8FuVnBAFQ3hLXimAAXG9xilh3orJK0rpDZM3IvzsyekNkdMnpAZswjo7vuzykiItJKFMg6iFDY8eHWMuauihzaXL4l0tvVLTOZqUPzGTcgl26ZyeRnJpOfkUxuehIJhzpJ4GCci/SoVRU3flQWQ+VOqNgBlTu8513Q3HVeUnP2D27R+Ybg1gMS2/c0aBERkc5GgayDKq6o4801kRMD3lpTTEVt44vwmUFOWhJ5GUnkZSSTlxEJa5HpJPK84JaXkUzXjKRDn+F5MOFQZAxbxXao2Bl5bght0eC2M/LczH3RSOmyr1etUXDrBsnZkJwBSRmRsW7JmZHphKT99yMiInKUUiDrBIKhMFv21FBSWUdJZR3FlQFKKuoorqyjpKLOaw9QUllHdaD5KxbnpCVGg1teZnI0yOXHBrnMJLqmJ5OUcIThLRyGmtJ9wa1yRzMhzmsPBQ6+L3+SF9IyIMkLatHg1tCWsS/ARZ8b1snaN52UrpMaRESkQztYIOvct00/iiT4fQzIS2dA3qFv01QdCFJSEaC4spbiikA0xJVU1lHizS/fspeSijqqDhDe0pP8ZKYkkpGSQGZKAhnJCWSlJJKR7M2nJJCZkkhmSgKZyQmN1s1MziAzdzgp3UceeOybc1CzJxLQ6iqgrjxy4kKg0nuuaDxfVxFpqy6FvZ/GLKsAWvJHgzUJa5mNQ93hBr+j/aQH56C+xvvcKyPfTyjonfzRHZJadv88ERFpHQpknVBaUgL9uibQr+uh/9OsCYS8HreGnrZIYCurqaeitp7KuiAVtZHHtr01VNQGqawLHrAXLlaCz8jwwlzj8LYv0GUkJ5KWlE9aUg9SkxJIS/aTluknNclPWlICaUkN035SEvz4ml4axDkIVB0kyJXHLKuMCRjedPUnjdtCdS37kP3JzR9mbWhLTIscck1IiaybkOQ9ew+/tyw63dCe3GQ6ZrtD9fA5B8G6fQG34f3GBtq6igO0Ve5b1tDmDnID4KTMyOHmjO7NPHf3TgLpDml54Nc/IyIin1WH+pfUzM4E7gH8wJ+dc3fEuaROLzXJT9/cNPrmHl6PRzAUpqouRHmj0BaZLq8NUhkz37CsojbI9rJa1u7aNx8MH94h8dREfzSkxU6nJSVEnhP9pCXlkJqUT1pSzPI0P8kJfhqOwDtczHTkciQAFq7HX19FYrAKf7ASf30VCcEqEuqr8AerSQxWkRCsjLTFPBKrqkko2x6ZDlbhD9fiDwfwhwIYBwk2h8OftC+kJaR480mRnqyGENXc+L39WOPevobev8zujXsIo+t4h359CZEzdSt3Rk7yaHje+RGsnwt1zV1yxfb1qjUX3GLbUroc3mHlcDhy2DtY6z3XRR6hhmdvWTCwry12ecM6AOaLPHx+b9ofM29N5mOX+5pZv2HeDrC/g72Wr8nygy1rpk4ROWp1mEBmZn7g98BpwBbgfTN71jm3Mr6VHZsS/D6y03xkpx35oTvnHHXBMNWBENWBIDWBkDcdoqY+0gtXEwhRUx/THtjXXh0IUV0fadtZXhuzfZDa+jCBUGsEIR+Q6T2OjJ8QSdSTTD1JBEmyyHR0nnqSrZ4k6kn3h0j3B0nzhUj1hUjzBUn1BUnxBUmxIKlWT7IFSSZIcqiexFCQoD+FurQ0ajPSqfOnEfCnEfCnE/ClU+tP9+bTqPenU+dPp96XAj4fhkX/Dze83GGGAdQBARqt4zPw+wbiNyMhxfCnGf7uht9nJPiNxHAdGfWlpNXvJi2wm7RACal1u0kJlJBSW0Ly3hKSd6wmqaYYX3j/8YNhXxKhtHxCad1wialYqA7zQpOF6rxHwHvU4TvEjYCPPdY4yPn8MdMJ3nxCM20tWSfB23cz6wDgIj2qrulzOGZZk+XNtsdu02RfuH0h1Ofb9x6jz82F2AOt29L2puH7QK9jR75Nwy9YdLy2a36+0eQB1mk05ruZttg/LswOEPJ9TZY3bffFvLfmHt57aunPwQGXxba7A+8LjuB7bdLeSXSYQAZMANY55zYAmNljwHmAAlknZWakJPpJSfST2/RCuK2gPhSmpn5feKsLhpoNIQ1zjdttv3Vit4V9Aaa59YLhMHXBMIFg5LmuPkQgFKbOC4p1wdC+6ehzKLJuMEx1MMzeoLdedD+hffuLaQs3+nc30vMX/WfaOa8HECCAc3V4qzW7jvNao/uI6VEMu8ilWQ7NgDzv0RxHFtXk217yrYx8vGfbS7f6veSX7SXZSqlziQRIJEAadSQScIkESIhM4027pJi2hJhtEqgjiYBLIEDivuUkNlonQOQPCh8OH2F8hPETjpl3+Alj3rOPMD7bvz3BHIk+R6KPyLM1nk7wnn2EMe81cA378Pbr9i33RR9em4ttC2PONZr3uUgt+2oO4Q+H8bswCYTxW5gEQvhwJBCKLCdMgkW2jywL43d1+KiJbNPQFl0W8vYfwue8du9SOM58OK8CB14VFnlY5B3RpD3csDw67Wumbd8yvCX+6He077nptA+HL/qZhaLT5n1WPkL7PlNvXo5dYfNHfobNDxguZj76jI/y8d+jx4z/F7c6O1Ig6w1sjpnfApwYp1qkE0j0+0j0+8hKOcoH4Lcj5/YFs1DYEQyHCYchGA5H2pwjGHLR6VA4Mh92jmDYEQqHCcWuH92PI+w9l4Yj6/vMoqE3AUjy5iPDCBumLRqGfWbQTFtDwPYW4/PtWw6Gi9bmqA+Fo/UEQy5aZ8N8KByOWXfffGTdg88Hwi5SQIymBxljT4LZf1nLt234nsIuEtDDznmPxt/hgZZHljXez775yHLnIORc5DP1PtyG6ej3ETPf3PcR+1019M42Xb9hH7Dv/TT8fDXUGnIQDu+rrWmNIe/nK+zNh8OOEDFtYYdzIcyFcOFIWPObi4bzBC9MR8Kgw28NIdDh85YleNExIXYZ+/bjJxxdd9+ysPddxvyxZ+a1RCJow89poz8eY38gop+9L9pu0R8Qiz6bgXm9TBbTG2XsmzcXBu+PhYY/BPCeo9t6oRfvjwDz2vb9kRCJ5eGYMB0J6eYthX1/esQG8H1hPkzD970vpDcN6w3rA/uFdH/MH037vrfmp33WuO1g26WXpfM54qcjBbIWMbNvAN8A6NevX5yrETm6mBl+I+a+q/6Dri8ix47YsN8wTrch8Du85/C+HveGPwRw7PsDgki4buixj/3DomH/Dkc4HGlvEHuEomG+ubbIfOOjI9H1o/uymCUNy+wzDdFpDR0pkG0F+sbM9/HaGnHOPQA8AJHrkLVPaSIiIse2/f9gk9bUkUa7vQ8MNrOBZpYEXAI8G+eaRERERNpch+khc84FzezbwMtEjpP81Tn3UZzLEhEREWlzHSaQATjnXgRejHcdIiIiIu2pIx2yFBERETkmKZCJiIiIxJkCmYiIiEicKZCJiIiIxJkCmYiIiEicKZCJiIiIxJkCmYiIiEicmXOd9+5DZlYMfBLvOuSQ8oCSeBchh6TvqfPQd9V56LvqPNrju+rvnMtvbkGnDmTSOZjZIufcuHjXIQen76nz0HfVeei76jzi/V3pkKWIiIhInCmQiYiIiMSZApm0hwfiXYC0iL6nzkPfVeeh76rziOt3pTFkIiIiInGmHjIRERGROFMgk8/EzPqa2VwzW2lmH5nZ97z2XDP7j5mt9Z5zvHYzs3vNbJ2ZLTezovi+g2OLmfnN7AMze96bH2hm73nfx+NmluS1J3vz67zlA+Ja+DHGzLqY2ZNmtsrMPjazifqd6pjM7Frv374VZvaomaXo96pjMLO/mtkuM1sR03bYv0dmNstbf62ZzWqrehXI5LMKAj90zg0HTgKuMbPhwI3Aa865wcBr3jzAWcBg7/EN4H/bv+Rj2veAj2Pm7wTuds4dD+wBvua1fw3Y47Xf7a0n7ece4CXn3AnAaCLfmX6nOhgz6w18FxjnnBsJ+IFL0O9VRzEHOLNJ22H9HplZLnArcCIwAbi1IcS1NgUy+Uycc9udc0u86Qoi/3H0Bs4DHvRWexA435s+D/i7i1gAdDGznu1b9bHJzPoAZwN/9uYNmA486a3S9Htq+P6eBGZ460sbM7NsYDLwFwDnXMA5txf9TnVUCUCqmSUAacB29HvVITjn3gJKmzQf7u/RGcB/nHOlzrk9wH/YP+S1CgUyaTVe9/sY4D2gu3Nuu7doB9Ddm+4NbI7ZbIvXJm3vt8D1QNib7wrsdc4FvfnY7yL6PXnLy7z1pe0NBIqBv3mHl/9sZunod6rDcc5tBe4CPiUSxMqAxej3qiM73N+jdvv9UiCTVmFmGcC/gO8758pjl7nIqbw6nTeOzOwcYJdzbnG8a5FDSgCKgP91zo0Bqth3WAXQ71RH4R26Oo9IiO4FpNNGvSfS+jra75ECmXxmZpZIJIw97Jx7ymve2XDYxHve5bVvBfrGbN7Ha5O2dQrweTPbBDxG5JDKPUS65RO8dWK/i+j35C3PBna3Z8HHsC3AFufce978k0QCmn6nOp6ZwEbnXLFzrh54isjvmn6vOq7D/T1qt98vBTL5TLzxD38BPnbO/U/MomeBhrNRZgHPxLR/xTuj5SSgLKb7WNqIc+4m51wf59wAIoOOX3fOXQ7MBS70Vmv6PTV8fxd663eYvySPZs65HcBmMxvqNc0AVqLfqY7oU+AkM0vz/i1s+K70e9VxHe7v0cvA6WaW4/WInu61tTpdGFY+EzObBMwDPmTf2KSbiYwj+yfQD/gE+JJzrtT7R+t3RLr1q4GvOucWtXvhxzAzmwpc55w7x8wGEekxywU+AK5wztWZWQrwEJExgaXAJc65DXEq+ZhjZoVETr5IAjYAXyXyB7R+pzoYM/sZcDGRM84/AK4iMsZIv1dxZmaPAlOBPGAnkbMl/4/D/D0ysyuJ/L8GcLtz7m9tUq8CmYiIiEh86ZCliIiISJwpkImIiIjEmQKZiIiISJwpkImIiIjEmQKZiIiISJwpkIlIuzEzZ2a/iZm/zsx+2kr7nmNmFx56zc/8OheZ2cdmNrdJu8/M7jWzFWb2oZm9b2YDW+H1BpjZis+6HxHp2BTIRKQ91QFfNLO8eBcSK+aq6i3xNeDrzrlpTdovJnL7nALn3CjgC8De1qlQRI52CmQi0p6CwAPAtU0XNO3hMrNK73mqmb1pZs+Y2QYzu8PMLjezhV5P1HExu5lpZovMbI13/07MzG9mv/Z6rJab2dUx+51nZs8Subp603ou9fa/wszu9Np+AkwC/mJmv26ySU9gu3MuDOCc2+Kc2+Ntd7qZzTezJWb2hHfvV8xsrPfeFpvZyzG3dBlrZsvMbBlwTUxNI7z3vdR7L4MP69MXkQ5LgUxE2tvvgcvNLPswthkNfBMYBnwZGOKcm0DkavbfiVlvADABOBu437sy+teI3AZlPDAe+HrMocQi4HvOuSGxL2ZmvYA7idzzsxAYb2bnO+duAxYBlzvnftSkxn8C53ph6TdmNsbbVx7wX8BM51yRt/0PLHIP2PuAC51zY4G/Ard7+/ob8B3n3Ogmr/FN4B7nXCEwjsh9L0XkKHA43fQiIp+Zc67czP4OfBeoaeFm7zfcn9HM1gOveO0fArGHDv/p9VCtNbMNwAlE7j1XENP7lg0MBgLAQufcxmZebzzwhnOu2HvNh4HJRG67cqD3tcW7/+R07/GamV0EpALDgXcid2chCZgPDAVGAv/x2v3AdjPrAnRxzr3l7foh4Cxvej7wYzPrAzzlnFt7wE9MRDoVBTIRiYffAkuI9AQ1COL12puZj0hwaVAXMx2OmQ/T+N+xpveCc4AR6W1qdENg756eVUdS/IE45+qAfwP/NrOdwPlEwuN/nHOXNnn9UcBHzrmJTdq7HGT/j5jZe0R6AF80s6udc6+35nsQkfjQIUsRaXfOuVIih/i+FtO8CRjrTX8eSDyCXV/kne14HDAIWA28DHzLO0SImQ0xs/RD7GchMMXM8szMD1wKvHmwDcysyDvU2RAoC4jcvHgBcIqZHe8tSzezIV5t+WY20WtPNLMRzrm9wF4zm+Tt+vKY1xgEbHDO3Qs8472GiBwFFMhEJF5+A8SebfknIiFoGTCRI+u9+pRImPo38E3nXC2RcWYrgSXe5SP+yCGODniHR28E5gLLgMXOuWcO8drdgOe811hOpMfvd95hz9nAo2a2nMhhxxOccwHgQuBO7z0vBU729vVV4PdmtpRID1+DLwErvPaRwN8PUZOIdBLmXNMefhERERFpT+ohExEREYkzBTIRERGROFMgExEREYkzBTIRERGROFMgExEREYkzBTIRERGROFMgExEREYkzBTIRERGROPv/4A3x8Y6Wx8YAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "seed_idx = list(range (50, max_seeds +1, 50))\n", + "\n", + "plt.figure(figsize=(10,5))\n", + "\n", + "for i in range(len(data)):\n", + " plt.plot(seed_idx, perf[i], label = names[i] )\n", + "\n", + "plt.title('Speedup vs. Number of Seeds')\n", + "plt.xlabel('Number of Seeds')\n", + "plt.ylabel('Speedup')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3786" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del time_algo_cu\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----\n", + "Copyright (c) 2021, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cugraph_dev", + "language": "python", + "name": "cugraph_dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/cugraph_benchmarks/release.ipynb b/notebooks/cugraph_benchmarks/release.ipynb index 3c6da55abc0..a6eeeb65cdf 100644 --- a/notebooks/cugraph_benchmarks/release.ipynb +++ b/notebooks/cugraph_benchmarks/release.ipynb @@ -22,6 +22,7 @@ "| Triangle Counting | X | |\n", "\n", "### Test Data\n", + "Users must run the _dataPrep.sh_ script before running this notebook so that the test files are downloaded\n", "\n", "| File Name | Num of Vertices | Num of Edges |\n", "| ---------------------- | --------------: | -----------: |\n", @@ -594,7 +595,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/notebooks/sampling/RandomWalk.ipynb b/notebooks/sampling/RandomWalk.ipynb new file mode 100644 index 00000000000..31a521db1c1 --- /dev/null +++ b/notebooks/sampling/RandomWalk.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random Walk Sampling\n", + "\n", + "In this notebook, we will compute the Random Walk from a set of seeds using cuGraph. \n", + "\n", + "\n", + "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", + "| --------------|------------|--------------|-----------------|----------------|\n", + "| Brad Rees | 04/20/2021 | created | 0.19 | GV100, CUDA 11.0\n", + "\n", + "Currently NetworkX does not have a random walk function. There is code on StackOverflow that generats a random walk by getting a vertice and then randomly selection a neighbor and then repeating the process. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Data\n", + "We will be using the Zachary Karate club dataset \n", + "*W. W. Zachary, An information flow model for conflict and fission in small groups, Journal of\n", + "Anthropological Research 33, 452-473 (1977).*\n", + "\n", + "\n", + "![Karate Club](../img/zachary_black_lines.png)\n", + "\n", + "\n", + "Because the test data has vertex IDs starting at 1, the auto-renumber feature of cuGraph (mentioned above) will be used so the starting vertex ID is zero for maximum efficiency. The resulting data will then be auto-unrenumbered, making the entire renumbering process transparent to users." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the modules\n", + "import cugraph\n", + "import cudf" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Read The Data\n", + "# Define the path to the test data \n", + "datafile='../data/karate-data.csv'\n", + "\n", + "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "gdf['wt'] = 1.0" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a Graph - using the source (src) and destination (dst) vertex pairs from the Dataframe \n", + "G = cugraph.Graph()\n", + "G.from_cudf_edgelist(gdf, source='src', destination='dst', edge_attr='wt')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(34, 78)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# some stats on the graph\n", + "(G.number_of_nodes(), G.number_of_edges() )" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# create a list with the seeds\n", + "seeds = [17,19]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "rw, so = cugraph.random_walks(G, seeds, 4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A random walk generates a path from a seed vertex. At each step on the random walk (starting from the seed), the random walker picks a random departing edge to traverse. The random walk will terminate in two situations, when the maximum path length is reached, or when the current vertex on the path has no departing edges to traverse. The result of a single random walk will be a path of some length less than or equal to the maximum path length.\n", + "\n", + "cugraph.random_walks performs a random walk from each of the specified seeds. The output will be a path for each of the seeds. Because the path lengths might be variable length, the return value consists of a pair of outputs.\n", + "\n", + "The first output provides the edges used on the paths.\n", + "\n", + "The second output represents the seed offset, which is a cuDF Series. The seed offset identifies the offset of the first entry in the first output for a particular seed." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 3\n", + "2 6\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "so" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
srcdstweight
01761.0
16171.0
21761.0
319331.0
433311.0
53121.0
\n", + "
" + ], + "text/plain": [ + " src dst weight\n", + "0 17 6 1.0\n", + "1 6 17 1.0\n", + "2 17 6 1.0\n", + "3 19 33 1.0\n", + "4 33 31 1.0\n", + "5 31 2 1.0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rw" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seed 17 starts at index 0 and is 3 rows\n", + "seed 19 starts at index 3 and is 3 rows\n" + ] + } + ], + "source": [ + "for i in range(len(seeds)):\n", + " print(f\"seed {seeds[i]} starts at index {so[i]} and is {so[1 + 1] - so[1]} rows\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "-----\n", + "Copyright (c) 2021, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cugraph_dev", + "language": "python", + "name": "cugraph_dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 29c012e95a2..ee1a269e532 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -334,6 +334,7 @@ def test_betweenness_centrality( @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_betweenness_centrality_k_full( graph_file, directed, @@ -377,6 +378,7 @@ def test_betweenness_centrality_k_full( @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_betweenness_centrality_fixed_sample( graph_file, directed, @@ -415,6 +417,7 @@ def test_betweenness_centrality_fixed_sample( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_betweenness_centrality_weight_except( graph_file, directed, diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index 8c5aad7dc61..6caad0d9fad 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -341,6 +341,7 @@ def test_edge_betweenness_centrality( @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_edge_betweenness_centrality_k_full( graph_file, directed, @@ -381,6 +382,7 @@ def test_edge_betweenness_centrality_k_full( @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_edge_betweenness_centrality_fixed_sample( graph_file, directed, @@ -417,6 +419,7 @@ def test_edge_betweenness_centrality_fixed_sample( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) +@pytest.mark.skip(reason="Skipping large tests") def test_edge_betweenness_centrality_weight_except( graph_file, directed, diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 55256d6b74e..175cf389d16 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -73,6 +73,7 @@ def test_bfs_paths_array(): @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skip(reason="Skipping large tests") def test_get_traversed_cost(graph_file): cu_M = utils.read_csv_file(graph_file) From 9011eec2e26969ca1638dddfe8a9ef69d884cabb Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Wed, 28 Apr 2021 14:05:05 -0400 Subject: [PATCH 236/343] Remove conda & pip installs from doc script (#1563) This PR removes the conda & pip installs from the doc build script since they're already included in our environment at build time. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1563 --- ci/docs/build.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 5d81e5521a8..279faa6a61d 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. ################################# # cuGraph Docs build script for CI # ################################# @@ -28,11 +28,6 @@ gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh conda activate rapids -# TODO: Move installs to docs-build-env meta package -gpuci_conda_retry install -c anaconda markdown beautifulsoup4 jq -pip install sphinx-markdown-tables - - gpuci_logger "Check versions" python --version $CC --version From cff84f5157720fc118e96d06bb1e1fd879d724c5 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 28 Apr 2021 14:31:40 -0500 Subject: [PATCH 237/343] Add new graph structure (#1404) Authors: - https://github.com/Iroy30 Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) - Joseph Nke (https://github.com/jnke2016) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1404 --- benchmarks/bench_algos.py | 10 +- python/cugraph/__init__.py | 8 +- .../betweenness_centrality_wrapper.pyx | 2 +- .../edge_betweenness_centrality_wrapper.pyx | 2 +- python/cugraph/community/ktruss_subgraph.py | 4 +- python/cugraph/community/leiden.py | 4 +- python/cugraph/community/louvain.py | 4 +- .../cugraph/community/subgraph_extraction.py | 4 +- python/cugraph/community/triangle_count.py | 4 +- .../components/connectivity_wrapper.pyx | 2 +- python/cugraph/cores/k_core.py | 4 +- python/cugraph/dask/link_analysis/pagerank.py | 2 +- python/cugraph/layout/force_atlas2.py | 4 +- python/cugraph/link_analysis/pagerank.py | 2 +- python/cugraph/link_prediction/jaccard.py | 5 +- python/cugraph/link_prediction/overlap.py | 4 +- python/cugraph/link_prediction/wjaccard.py | 5 +- python/cugraph/link_prediction/woverlap.py | 4 +- python/cugraph/structure/__init__.py | 14 +- python/cugraph/structure/convert_matrix.py | 4 +- python/cugraph/structure/graph.py | 1509 ----------------- python/cugraph/structure/graph_classes.py | 743 ++++++++ .../graph_implementation/__init__.py | 17 + .../graph_implementation/npartiteGraph.py | 100 ++ .../simpleDistributedGraph.py | 473 ++++++ .../graph_implementation/simpleGraph.py | 823 +++++++++ python/cugraph/structure/hypergraph.py | 9 +- python/cugraph/structure/number_map.py | 1 - python/cugraph/structure/symmetrize.py | 30 +- python/cugraph/tests/test_graph.py | 11 +- python/cugraph/traversal/bfs.py | 2 +- .../traversal/traveling_salesperson.py | 2 +- python/cugraph/tree/minimum_spanning_tree.py | 2 +- 33 files changed, 2239 insertions(+), 1575 deletions(-) delete mode 100644 python/cugraph/structure/graph.py create mode 100644 python/cugraph/structure/graph_classes.py create mode 100644 python/cugraph/structure/graph_implementation/__init__.py create mode 100644 python/cugraph/structure/graph_implementation/npartiteGraph.py create mode 100644 python/cugraph/structure/graph_implementation/simpleDistributedGraph.py create mode 100644 python/cugraph/structure/graph_implementation/simpleGraph.py diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index f9f8bf9cf53..5284ffbd37b 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -51,9 +51,9 @@ def createGraph(csvFileName, graphType=None): # complexity lower, and assume tests have coverage to verify # correctness for those combinations. if "/directed/" in csvFileName: - graphType = cugraph.structure.graph.DiGraph + graphType = cugraph.structure.graph_classes.DiGraph else: - graphType = cugraph.structure.graph.Graph + graphType = cugraph.structure.graph_classes.Graph return cugraph.from_cudf_edgelist( utils.read_csv_file(csvFileName), @@ -122,7 +122,7 @@ def graphWithAdjListComputed(request): csvFileName = request.param[0] reinitRMM(request.param[1], request.param[2]) - G = createGraph(csvFileName, cugraph.structure.graph.Graph) + G = createGraph(csvFileName, cugraph.structure.graph_classes.Graph) G.view_adj_list() return G @@ -166,7 +166,7 @@ def bench_create_graph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, source="0", destination="1", - create_using=cugraph.structure.graph.Graph, + create_using=cugraph.structure.graph_classes.Graph, renumber=False) @@ -183,7 +183,7 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, source="0", destination="1", - create_using=cugraph.structure.graph.DiGraph, + create_using=cugraph.structure.graph_classes.DiGraph, renumber=False) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index d4632708591..1a113b93d8d 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -33,6 +33,8 @@ DiGraph, MultiGraph, MultiDiGraph, + BiPartiteGraph, + BiPartiteDiGraph, from_edgelist, from_cudf_edgelist, from_pandas_edgelist, @@ -48,7 +50,11 @@ symmetrize, symmetrize_df, symmetrize_ddf, -) + is_weighted, + is_directed, + is_multigraph, + is_bipartite, + is_multipartite) from cugraph.centrality import ( betweenness_centrality, diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx index 855de3327ba..e63b6996816 100644 --- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx @@ -17,7 +17,7 @@ # cython: language_level = 3 from cugraph.centrality.betweenness_centrality cimport betweenness_centrality as c_betweenness_centrality -from cugraph.structure.graph import DiGraph +from cugraph.structure.graph_classes import DiGraph from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool diff --git a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx index 136bde1b0e3..095d291c45e 100644 --- a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx @@ -18,7 +18,7 @@ from cugraph.centrality.betweenness_centrality cimport edge_betweenness_centrality as c_edge_betweenness_centrality from cugraph.structure import graph_primtypes_wrapper -from cugraph.structure.graph import DiGraph, Graph +from cugraph.structure.graph_classes import DiGraph, Graph from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index 8e4f1471955..f4e4f7fb1cc 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.community import ktruss_subgraph_wrapper -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph from cugraph.utilities import check_nx_graph from cugraph.utilities import cugraph_to_nx diff --git a/python/cugraph/community/leiden.py b/python/cugraph/community/leiden.py index 8c1b79b8b63..641cf552192 100644 --- a/python/cugraph/community/leiden.py +++ b/python/cugraph/community/leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.community import leiden_wrapper -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph from cugraph.utilities import check_nx_graph from cugraph.utilities import df_score_to_dictionary diff --git a/python/cugraph/community/louvain.py b/python/cugraph/community/louvain.py index d4d56a1100c..a761e060038 100644 --- a/python/cugraph/community/louvain.py +++ b/python/cugraph/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.community import louvain_wrapper -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph from cugraph.utilities import check_nx_graph from cugraph.utilities import df_score_to_dictionary diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 8c702c2f58f..7815851d465 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.community import subgraph_extraction_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check from cugraph.utilities import check_nx_graph from cugraph.utilities import cugraph_to_nx diff --git a/python/cugraph/community/triangle_count.py b/python/cugraph/community/triangle_count.py index ff4dc9a5c5f..d28424a513e 100644 --- a/python/cugraph/community/triangle_count.py +++ b/python/cugraph/community/triangle_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.community import triangle_count_wrapper -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph from cugraph.utilities import check_nx_graph diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index 76d279a8116..ac173de3564 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -22,7 +22,7 @@ from cugraph.structure import utils_wrapper from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cugraph.structure.symmetrize import symmetrize -from cugraph.structure.graph import Graph as type_Graph +from cugraph.structure.graph_classes import Graph as type_Graph import cudf import numpy as np diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index ce67665764b..ca17bdd5c81 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,7 +14,7 @@ from cugraph.cores import k_core_wrapper, core_number_wrapper from cugraph.utilities import cugraph_to_nx from cugraph.utilities import check_nx_graph -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph def k_core(G, k=None, core_number=None): diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index fb9f4ad3a25..f90e5c72231 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -119,7 +119,7 @@ def pagerank(input_graph, edge_attr='value') >>> pr = dcg.pagerank(dg) """ - from cugraph.structure.graph import null_check + from cugraph.structure.graph_classes import null_check nstart = None diff --git a/python/cugraph/layout/force_atlas2.py b/python/cugraph/layout/force_atlas2.py index 4c6859c6c03..0b745d8ca15 100644 --- a/python/cugraph/layout/force_atlas2.py +++ b/python/cugraph/layout/force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.layout import force_atlas2_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check def force_atlas2( diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 8a03ee077f6..4f5f8f6aae0 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.link_analysis import pagerank_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check import cugraph diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 71cf0925342..2a9e9625050 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,9 +13,8 @@ import pandas as pd import cudf -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph, null_check from cugraph.link_prediction import jaccard_wrapper -from cugraph.structure.graph import null_check from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary diff --git a/python/cugraph/link_prediction/overlap.py b/python/cugraph/link_prediction/overlap.py index a5ca1e22979..077080bda1d 100644 --- a/python/cugraph/link_prediction/overlap.py +++ b/python/cugraph/link_prediction/overlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,7 +13,7 @@ import pandas as pd from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check import cudf from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary diff --git a/python/cugraph/link_prediction/wjaccard.py b/python/cugraph/link_prediction/wjaccard.py index 2a4e2417102..9679d1ba9cf 100644 --- a/python/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/link_prediction/wjaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,9 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph, null_check from cugraph.link_prediction import jaccard_wrapper -from cugraph.structure.graph import null_check import cudf diff --git a/python/cugraph/link_prediction/woverlap.py b/python/cugraph/link_prediction/woverlap.py index c93ad28ea54..fe64f812957 100644 --- a/python/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/link_prediction/woverlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check import cudf diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index ad67fe91876..b70854d61ce 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph +from cugraph.structure.graph_classes import (Graph, + DiGraph, + MultiGraph, + MultiDiGraph, + BiPartiteGraph, + BiPartiteDiGraph) +from cugraph.structure.graph_classes import (is_weighted, + is_directed, + is_multigraph, + is_bipartite, + is_multipartite) from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf from cugraph.structure.convert_matrix import (from_edgelist, diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index edd1c630185..5b3c375ea9d 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ import cudf import dask_cudf -from cugraph.structure.graph import DiGraph, Graph +from cugraph.structure.graph_classes import DiGraph, Graph # optional dependencies used for handling different input types try: diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py deleted file mode 100644 index a3024f9d081..00000000000 --- a/python/cugraph/structure/graph.py +++ /dev/null @@ -1,1509 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.structure import graph_primtypes_wrapper -from cugraph.structure.symmetrize import symmetrize -from cugraph.structure.number_map import NumberMap -import cugraph.dask.common.mg_utils as mg_utils -import cudf -import dask_cudf -import cugraph.comms.comms as Comms -import pandas as pd -import numpy as np -from cugraph.dask.structure import replication - - -def null_check(col): - if col.null_count != 0: - raise ValueError("Series contains NULL values") - - -class Graph: - class EdgeList: - def __init__(self, *args): - if len(args) == 1: - self.__from_dask_cudf(*args) - else: - self.__from_cudf(*args) - - def __from_cudf(self, source, destination, edge_attr=None): - self.edgelist_df = cudf.DataFrame() - self.edgelist_df["src"] = source - self.edgelist_df["dst"] = destination - self.weights = False - if edge_attr is not None: - self.weights = True - if type(edge_attr) is dict: - for k in edge_attr.keys(): - self.edgelist_df[k] = edge_attr[k] - else: - self.edgelist_df["weights"] = edge_attr - - def __from_dask_cudf(self, ddf): - self.edgelist_df = ddf - self.weights = False - # FIXME: Edge Attribute not handled - - class AdjList: - def __init__(self, offsets, indices, value=None): - self.offsets = offsets - self.indices = indices - self.weights = value # Should be a dataframe for multiple weights - - class transposedAdjList: - def __init__(self, offsets, indices, value=None): - Graph.AdjList.__init__(self, offsets, indices, value) - - """ - cuGraph graph class containing basic graph creation and transformation - operations. - """ - - def __init__( - self, - m_graph=None, - symmetrized=False, - bipartite=False, - multi=False, - dynamic=False, - ): - """ - Returns - ------- - G : cuGraph.Graph. - - Examples - -------- - >>> import cuGraph - >>> G = cuGraph.Graph() - - """ - self.symmetrized = symmetrized - self.renumbered = False - self.renumber_map = None - self.bipartite = False - self.multipartite = False - self._nodes = {} - self.multi = multi - self.distributed = False - self.dynamic = dynamic - self.self_loop = False - self.edgelist = None - self.adjlist = None - self.transposedadjlist = None - self.edge_count = None - self.node_count = None - - # MG - Batch - self.batch_enabled = False - self.batch_edgelists = None - self.batch_adjlists = None - self.batch_transposed_adjlists = None - - if m_graph is not None: - if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph: - elist = m_graph.view_edge_list() - if m_graph.edgelist.weights: - weights = "weights" - else: - weights = None - self.from_cudf_edgelist(elist, - source="src", - destination="dst", - edge_attr=weights) - else: - msg = ( - "Graph can only be initialized using MultiGraph " - "or MultiDiGraph" - ) - raise Exception(msg) - - def enable_batch(self): - client = mg_utils.get_client() - comms = Comms.get_comms() - - if client is None or comms is None: - msg = ( - "MG Batch needs a Dask Client and the " - "Communicator needs to be initialized." - ) - raise Exception(msg) - - self.batch_enabled = True - - if self.edgelist is not None: - if self.batch_edgelists is None: - self._replicate_edgelist() - - if self.adjlist is not None: - if self.batch_adjlists is None: - self._replicate_adjlist() - - if self.transposedadjlist is not None: - if self.batch_transposed_adjlists is None: - self._replicate_transposed_adjlist() - - def _replicate_edgelist(self): - client = mg_utils.get_client() - comms = Comms.get_comms() - - # FIXME: There might be a better way to control it - if client is None: - return - work_futures = replication.replicate_cudf_dataframe( - self.edgelist.edgelist_df, client=client, comms=comms - ) - - self.batch_edgelists = work_futures - - def _replicate_adjlist(self): - client = mg_utils.get_client() - comms = Comms.get_comms() - - # FIXME: There might be a better way to control it - if client is None: - return - - weights = None - offsets_futures = replication.replicate_cudf_series( - self.adjlist.offsets, client=client, comms=comms - ) - indices_futures = replication.replicate_cudf_series( - self.adjlist.indices, client=client, comms=comms - ) - - if self.adjlist.weights is not None: - weights = replication.replicate_cudf_series(self.adjlist.weights) - else: - weights = {worker: None for worker in offsets_futures} - - merged_futures = { - worker: [ - offsets_futures[worker], - indices_futures[worker], - weights[worker], - ] - for worker in offsets_futures - } - self.batch_adjlists = merged_futures - - # FIXME: Not implemented yet - def _replicate_transposed_adjlist(self): - self.batch_transposed_adjlists = True - - def clear(self): - """ - Empty this graph. This function is added for NetworkX compatibility. - """ - self.edgelist = None - self.adjlist = None - self.transposedadjlist = None - - self.batch_edgelists = None - self.batch_adjlists = None - self.batch_transposed_adjlists = None - - def add_nodes_from(self, nodes, bipartite=None, multipartite=None): - """ - Add nodes information to the Graph. - - Parameters - ---------- - nodes : list or cudf.Series - The nodes of the graph to be stored. If bipartite and multipartite - arguments are not passed, the nodes are considered to be a list of - all the nodes present in the Graph. - bipartite : str - Sets the Graph as bipartite. The nodes are stored as a set of nodes - of the partition named as bipartite argument. - multipartite : str - Sets the Graph as multipartite. The nodes are stored as a set of - nodes of the partition named as multipartite argument. - """ - if bipartite is None and multipartite is None: - self._nodes["all_nodes"] = cudf.Series(nodes) - else: - set_names = [i for i in self._nodes.keys() if i != "all_nodes"] - if multipartite is not None: - if self.bipartite: - raise Exception( - "The Graph is already set as bipartite. " - "Use bipartite option instead." - ) - self.multipartite = True - elif bipartite is not None: - if self.multipartite: - raise Exception( - "The Graph is set as multipartite. " - "Use multipartite option instead." - ) - self.bipartite = True - multipartite = bipartite - if multipartite not in set_names and len(set_names) == 2: - raise Exception( - "The Graph is set as bipartite and " - "already has two partitions initialized." - ) - self._nodes[multipartite] = cudf.Series(nodes) - - def is_bipartite(self): - """ - Checks if Graph is bipartite. This solely relies on the user call of - add_nodes_from with the bipartite parameter. This does not parse the - graph to check if it is bipartite. - """ - # TO DO: Call coloring algorithm - return self.bipartite - - def is_multipartite(self): - """ - Checks if Graph is multipartite. This solely relies on the user call - of add_nodes_from with the partition parameter. This does not parse - the graph to check if it is multipartite. - """ - # TO DO: Call coloring algorithm - return self.multipartite or self.bipartite - - def is_multigraph(self): - """ - Returns True if the graph is a multigraph. Else returns False. - """ - return self.multi - - def sets(self): - """ - Returns the bipartite set of nodes. This solely relies on the user's - call of add_nodes_from with the bipartite parameter. This does not - parse the graph to compute bipartite sets. If bipartite argument was - not provided during add_nodes_from(), it raise an exception that the - graph is not bipartite. - """ - # TO DO: Call coloring algorithm - set_names = [i for i in self._nodes.keys() if i != "all_nodes"] - if self.bipartite: - top = self._nodes[set_names[0]] - if len(set_names) == 2: - bottom = self._nodes[set_names[1]] - else: - bottom = cudf.Series( - set(self.nodes().values_host) - set(top.values_host) - ) - return top, bottom - else: - return {k: self._nodes[k] for k in set_names} - - def from_cudf_edgelist( - self, - input_df, - source="source", - destination="destination", - edge_attr=None, - renumber=True, - ): - """ - Initialize a graph from the edge list. It is an error to call this - method on an initialized Graph object. The passed input_df argument - wraps gdf_column objects that represent a graph using the edge list - format. source argument is source column name and destination argument - is destination column name. - - By default, renumbering is enabled to map the source and destination - vertices into an index in the range [0, V) where V is the number - of vertices. If the input vertices are a single column of integers - in the range [0, V), renumbering can be disabled and the original - external vertex ids will be used. - - If weights are present, edge_attr argument is the weights column name. - - Parameters - ---------- - input_df : cudf.DataFrame or dask_cudf.DataFrame - A DataFrame that contains edge information - If a dask_cudf.DataFrame is passed it will be reinterpreted as - a cudf.DataFrame. For the distributed path please use - from_dask_cudf_edgelist. - source : str or array-like - source column name or array of column names - destination : str or array-like - destination column name or array of column names - edge_attr : str or None - the weights column name. Default is None - renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. - - Examples - -------- - >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, source='0', destination='1', - edge_attr='2', renumber=False) - - """ - if self.edgelist is not None or self.adjlist is not None: - raise Exception("Graph already has values") - - s_col = source - d_col = destination - if not isinstance(s_col, list): - s_col = [s_col] - if not isinstance(d_col, list): - d_col = [d_col] - if not ( - set(s_col).issubset(set(input_df.columns)) - and set(d_col).issubset(set(input_df.columns)) - ): - raise Exception( - "source column names and/or destination column " - "names not found in input. Recheck the source and " - "destination parameters" - ) - - # FIXME: update for smaller GPUs - # Consolidation - if isinstance(input_df, cudf.DataFrame): - if len(input_df[source]) > 2147483100: - raise Exception( - "cudf dataFrame edge list is too big " - "to fit in a single GPU" - ) - elist = input_df - elif isinstance(input_df, dask_cudf.DataFrame): - if len(input_df[source]) > 2147483100: - raise Exception( - "dask_cudf dataFrame edge list is too big " - "to fit in a single GPU" - ) - elist = input_df.compute().reset_index(drop=True) - else: - raise Exception( - "input should be a cudf.DataFrame or " - "a dask_cudf dataFrame" - ) - - renumber_map = None - if renumber: - # FIXME: Should SG do lazy evaluation like MG? - elist, renumber_map = NumberMap.renumber( - elist, source, destination, store_transposed=False - ) - source = "src" - destination = "dst" - self.renumbered = True - self.renumber_map = renumber_map - else: - if type(source) is list and type(destination) is list: - raise Exception("set renumber to True for multi column ids") - - if (elist[source] == elist[destination]).any(): - self.self_loop = True - source_col = elist[source] - dest_col = elist[destination] - - if edge_attr is not None: - value_col = elist[edge_attr] - else: - value_col = None - - if value_col is not None: - source_col, dest_col, value_col = symmetrize( - source_col, dest_col, value_col, multi=self.multi, - symmetrize=not self.symmetrized) - else: - source_col, dest_col = symmetrize( - source_col, dest_col, multi=self.multi, - symmetrize=not self.symmetrized) - - self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) - - if self.batch_enabled: - self._replicate_edgelist() - - self.renumber_map = renumber_map - - def from_pandas_edgelist( - self, - pdf, - source="source", - destination="destination", - edge_attr=None, - renumber=True, - ): - """ - Initialize a graph from the edge list. It is an error to call this - method on an initialized Graph object. Source argument is source - column name and destination argument is destination column name. - - By default, renumbering is enabled to map the source and destination - vertices into an index in the range [0, V) where V is the number - of vertices. If the input vertices are a single column of integers - in the range [0, V), renumbering can be disabled and the original - external vertex ids will be used. - - If weights are present, edge_attr argument is the weights column name. - - Parameters - ---------- - input_df : pandas.DataFrame - A DataFrame that contains edge information - source : str or array-like - source column name or array of column names - destination : str or array-like - destination column name or array of column names - edge_attr : str or None - the weights column name. Default is None - renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. - - Examples - -------- - >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_pandas_edgelist(df, source='0', destination='1', - edge_attr='2', renumber=False) - - """ - gdf = cudf.DataFrame.from_pandas(pdf) - self.from_cudf_edgelist(gdf, source=source, destination=destination, - edge_attr=edge_attr, renumber=renumber) - - def to_pandas_edgelist(self, source='source', destination='destination'): - """ - Returns the graph edge list as a Pandas DataFrame. - - Parameters - ---------- - source : str or array-like - source column name or array of column names - destination : str or array-like - destination column name or array of column names - - Returns - ------- - df : pandas.DataFrame - """ - - gdf = self.view_edge_list() - return gdf.to_pandas() - - def from_pandas_adjacency(self, pdf): - """ - Initializes the graph from pandas adjacency matrix - """ - np_array = pdf.to_numpy() - columns = pdf.columns - self.from_numpy_array(np_array, columns) - - def to_pandas_adjacency(self): - """ - Returns the graph adjacency matrix as a Pandas DataFrame. - """ - - np_array_data = self.to_numpy_array() - pdf = pd.DataFrame(np_array_data) - if self.renumbered: - nodes = self.renumber_map.implementation.df['0'].\ - values_host.tolist() - pdf.columns = nodes - pdf.index = nodes - return pdf - - def to_numpy_array(self): - """ - Returns the graph adjacency matrix as a NumPy array. - """ - - nlen = self.number_of_nodes() - elen = self.number_of_edges() - df = self.edgelist.edgelist_df - np_array = np.full((nlen, nlen), 0.0) - for i in range(0, elen): - np_array[df['src'].iloc[i], df['dst'].iloc[i]] = df['weights'].\ - iloc[i] - return np_array - - def to_numpy_matrix(self): - """ - Returns the graph adjacency matrix as a NumPy matrix. - """ - np_array = self.to_numpy_array() - return np.asmatrix(np_array) - - def from_numpy_array(self, np_array, nodes=None): - """ - Initializes the graph from numpy array containing adjacency matrix. - """ - src, dst = np_array.nonzero() - weight = np_array[src, dst] - df = cudf.DataFrame() - if nodes is not None: - df['src'] = nodes[src] - df['dst'] = nodes[dst] - else: - df['src'] = src - df['dst'] = dst - df['weight'] = weight - self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight') - - def from_numpy_matrix(self, np_matrix): - """ - Initializes the graph from numpy matrix containing adjacency matrix. - """ - np_array = np.asarray(np_matrix) - self.from_numpy_array(np_array) - - def from_dask_cudf_edgelist( - self, - input_ddf, - source="source", - destination="destination", - edge_attr=None, - renumber=True, - ): - """ - Initializes the distributed graph from the dask_cudf.DataFrame - edgelist. Undirected Graphs are not currently supported. - - By default, renumbering is enabled to map the source and destination - vertices into an index in the range [0, V) where V is the number - of vertices. If the input vertices are a single column of integers - in the range [0, V), renumbering can be disabled and the original - external vertex ids will be used. - - Note that the graph object will store a reference to the - dask_cudf.DataFrame provided. - - Parameters - ---------- - input_ddf : dask_cudf.DataFrame - The edgelist as a dask_cudf.DataFrame - source : str or array-like - source column name or array of column names - destination : str - destination column name or array of column names - edge_attr : str - weights column name. - renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. - """ - if self.edgelist is not None or self.adjlist is not None: - raise Exception("Graph already has values") - if not isinstance(input_ddf, dask_cudf.DataFrame): - raise Exception("input should be a dask_cudf dataFrame") - if type(self) is Graph: - raise Exception("Undirected distributed graph not supported") - - s_col = source - d_col = destination - if not isinstance(s_col, list): - s_col = [s_col] - if not isinstance(d_col, list): - d_col = [d_col] - if not ( - set(s_col).issubset(set(input_ddf.columns)) - and set(d_col).issubset(set(input_ddf.columns)) - ): - raise Exception( - "source column names and/or destination column " - "names not found in input. Recheck the source " - "and destination parameters" - ) - ddf_columns = s_col + d_col - if edge_attr is not None: - if not (set([edge_attr]).issubset(set(input_ddf.columns))): - raise Exception( - "edge_attr column name not found in input." - "Recheck the edge_attr parameter") - ddf_columns = ddf_columns + [edge_attr] - input_ddf = input_ddf[ddf_columns] - - if edge_attr is not None: - input_ddf = input_ddf.rename(columns={edge_attr: 'value'}) - - # - # Keep all of the original parameters so we can lazily - # evaluate this function - # - - # FIXME: Edge Attribute not handled - self.distributed = True - self.local_data = None - self.edgelist = None - self.adjlist = None - self.renumbered = renumber - self.input_df = input_ddf - self.source_columns = source - self.destination_columns = destination - self.store_tranposed = None - - def view_edge_list(self): - """ - Display the edge list. Compute it if needed. - - NOTE: If the graph is of type Graph() then the displayed undirected - edges are the same as displayed by networkx Graph(), but the direction - could be different i.e. an edge displayed by cugraph as (src, dst) - could be displayed as (dst, src) by networkx. - - cugraph.Graph stores symmetrized edgelist internally. For displaying - undirected edgelist for a Graph the upper trianglar matrix of the - symmetrized edgelist is returned. - - networkx.Graph renumbers the input and stores the upper triangle of - this renumbered input. Since the internal renumbering of networx and - cugraph is different, the upper triangular matrix of networkx - renumbered input may not be the same as cugraph's upper trianglar - matrix of the symmetrized edgelist. Hence the displayed source and - destination pairs in both will represent the same edge but node values - could be swapped. - - Returns - ------- - df : cudf.DataFrame - This cudf.DataFrame wraps source, destination and weight - - df[src] : cudf.Series - contains the source index for each edge - df[dst] : cudf.Series - contains the destination index for each edge - df[weight] : cusd.Series - Column is only present for weighted Graph, - then containing the weight value for each edge - """ - if self.distributed: - if self.edgelist is None: - raise Exception("Graph has no Edgelist.") - return self.edgelist.edgelist_df - if self.edgelist is None: - src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) - self.edgelist = self.EdgeList(src, dst, weights) - - edgelist_df = self.edgelist.edgelist_df - - if self.renumbered: - edgelist_df = self.unrenumber(edgelist_df, "src") - edgelist_df = self.unrenumber(edgelist_df, "dst") - - if type(self) is Graph or type(self) is MultiGraph: - edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]] - edgelist_df = edgelist_df.reset_index(drop=True) - self.edge_count = len(edgelist_df) - - return edgelist_df - - def delete_edge_list(self): - """ - Delete the edge list. - """ - # decrease reference count to free memory if the referenced objects are - # no longer used. - self.edgelist = None - - def from_cudf_adjlist(self, offset_col, index_col, value_col=None): - """ - Initialize a graph from the adjacency list. It is an error to call this - method on an initialized Graph object. The passed offset_col and - index_col arguments wrap gdf_column objects that represent a graph - using the adjacency list format. - If value_col is None, an unweighted graph is created. If value_col is - not None, a weighted graph is created. - Undirected edges must be stored as directed edges in both directions. - - Parameters - ---------- - offset_col : cudf.Series - This cudf.Series wraps a gdf_column of size V + 1 (V: number of - vertices). - The gdf column contains the offsets for the vertices in this graph. - Offsets must be in the range [0, E] (E: number of edges). - index_col : cudf.Series - This cudf.Series wraps a gdf_column of size E (E: number of edges). - The gdf column contains the destination index for each edge. - Destination indices must be in the range [0, V) (V: number of - vertices). - value_col : cudf.Series, optional - This pointer can be ``None``. - If not, this cudf.Series wraps a gdf_column of size E (E: number of - edges). - The gdf column contains the weight value for each edge. - The expected type of the gdf_column element is floating point - number. - - Examples - -------- - >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> M = gdf.to_pandas() - >>> M = scipy.sparse.coo_matrix((M['2'],(M['0'],M['1']))) - >>> M = M.tocsr() - >>> offsets = cudf.Series(M.indptr) - >>> indices = cudf.Series(M.indices) - >>> G = cugraph.Graph() - >>> G.from_cudf_adjlist(offsets, indices, None) - - """ - if self.edgelist is not None or self.adjlist is not None: - raise Exception("Graph already has values") - self.adjlist = Graph.AdjList(offset_col, index_col, value_col) - - if self.batch_enabled: - self._replicate_adjlist() - - def compute_renumber_edge_list(self, transposed=False): - """ - Compute a renumbered edge list - - This function works in the MNMG pipeline and will transform - the input dask_cudf.DataFrame into a renumbered edge list - in the prescribed direction. - - This function will be called by the algorithms to ensure - that the graph is renumbered properly. The graph object will - cache the most recent renumbering attempt. For benchmarking - purposes, this function can be called prior to calling a - graph algorithm so we can measure the cost of computing - the renumbering separately from the cost of executing the - algorithm. - - When creating a CSR-like structure, set transposed to False. - When creating a CSC-like structure, set transposed to True. - - Parameters - ---------- - transposed : (optional) bool - If True, renumber with the intent to make a CSC-like - structure. If False, renumber with the intent to make - a CSR-like structure. Defaults to False. - """ - # FIXME: What to do about edge_attr??? - # currently ignored for MNMG - - if not self.distributed: - raise Exception( - "compute_renumber_edge_list should only be used " - "for distributed graphs" - ) - - if not self.renumbered: - self.edgelist = self.EdgeList(self.input_df) - self.renumber_map = None - else: - if self.edgelist is not None: - if type(self) is Graph: - return - - if self.store_transposed == transposed: - return - - del self.edgelist - - renumbered_ddf, number_map = NumberMap.renumber( - self.input_df, - self.source_columns, - self.destination_columns, - store_transposed=transposed, - ) - self.edgelist = self.EdgeList(renumbered_ddf) - self.renumber_map = number_map - self.store_transposed = transposed - - def view_adj_list(self): - """ - Display the adjacency list. Compute it if needed. - - Returns - ------- - offset_col : cudf.Series - This cudf.Series wraps a gdf_column of size V + 1 (V: number of - vertices). - The gdf column contains the offsets for the vertices in this graph. - Offsets are in the range [0, E] (E: number of edges). - index_col : cudf.Series - This cudf.Series wraps a gdf_column of size E (E: number of edges). - The gdf column contains the destination index for each edge. - Destination indices are in the range [0, V) (V: number of - vertices). - value_col : cudf.Series or ``None`` - This pointer is ``None`` for unweighted graphs. - For weighted graphs, this cudf.Series wraps a gdf_column of size E - (E: number of edges). - The gdf column contains the weight value for each edge. - The expected type of the gdf_column element is floating point - number. - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - - if self.adjlist is None: - if self.transposedadjlist is not None and type(self) is Graph: - off, ind, vals = ( - self.transposedadjlist.offsets, - self.transposedadjlist.indices, - self.transposedadjlist.weights, - ) - else: - off, ind, vals = graph_primtypes_wrapper.view_adj_list(self) - self.adjlist = self.AdjList(off, ind, vals) - - if self.batch_enabled: - self._replicate_adjlist() - - return self.adjlist.offsets, self.adjlist.indices, self.adjlist.weights - - def view_transposed_adj_list(self): - """ - Display the transposed adjacency list. Compute it if needed. - - Returns - ------- - offset_col : cudf.Series - This cudf.Series wraps a gdf_column of size V + 1 (V: number of - vertices). - The gdf column contains the offsets for the vertices in this graph. - Offsets are in the range [0, E] (E: number of edges). - index_col : cudf.Series - This cudf.Series wraps a gdf_column of size E (E: number of edges). - The gdf column contains the destination index for each edge. - Destination indices are in the range [0, V) (V: number of - vertices). - value_col : cudf.Series or ``None`` - This pointer is ``None`` for unweighted graphs. - For weighted graphs, this cudf.Series wraps a gdf_column of size E - (E: number of edges). - The gdf column contains the weight value for each edge. - The expected type of the gdf_column element is floating point - number. - - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - if self.transposedadjlist is None: - if self.adjlist is not None and type(self) is Graph: - off, ind, vals = ( - self.adjlist.offsets, - self.adjlist.indices, - self.adjlist.weights, - ) - else: - ( - off, - ind, - vals, - ) = graph_primtypes_wrapper.view_transposed_adj_list(self) - self.transposedadjlist = self.transposedAdjList(off, ind, vals) - - if self.batch_enabled: - self._replicate_transposed_adjlist() - - return ( - self.transposedadjlist.offsets, - self.transposedadjlist.indices, - self.transposedadjlist.weights, - ) - - def delete_adj_list(self): - """ - Delete the adjacency list. - """ - self.adjlist = None - - def get_two_hop_neighbors(self): - """ - Compute vertex pairs that are two hops apart. The resulting pairs are - sorted before returning. - - Returns - ------- - df : cudf.DataFrame - df[first] : cudf.Series - the first vertex id of a pair, if an external vertex id - is defined by only one column - df[second] : cudf.Series - the second vertex id of a pair, if an external vertex id - is defined by only one column - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - df = graph_primtypes_wrapper.get_two_hop_neighbors(self) - if self.renumbered is True: - df = self.unrenumber(df, "first") - df = self.unrenumber(df, "second") - - return df - - def number_of_vertices(self): - """ - Get the number of nodes in the graph. - - """ - if self.node_count is None: - if self.distributed: - if self.edgelist is not None: - ddf = self.edgelist.edgelist_df[["src", "dst"]] - self.node_count = ddf.max().max().compute() + 1 - else: - raise Exception("Graph is Empty") - elif self.adjlist is not None: - self.node_count = len(self.adjlist.offsets) - 1 - elif self.transposedadjlist is not None: - self.node_count = len(self.transposedadjlist.offsets) - 1 - elif self.edgelist is not None: - df = self.edgelist.edgelist_df[["src", "dst"]] - self.node_count = df.max().max() + 1 - else: - raise Exception("Graph is Empty") - return self.node_count - - def number_of_nodes(self): - """ - An alias of number_of_vertices(). This function is added for NetworkX - compatibility. - - """ - return self.number_of_vertices() - - def number_of_edges(self, directed_edges=False): - """ - Get the number of edges in the graph. - - """ - if self.distributed: - if self.edgelist is not None: - return len(self.edgelist.edgelist_df) - else: - raise ValueError("Graph is Empty") - if directed_edges and self.edgelist is not None: - return len(self.edgelist.edgelist_df) - if self.edge_count is None: - if self.edgelist is not None: - if type(self) is Graph or type(self) is MultiGraph: - self.edge_count = len( - self.edgelist.edgelist_df[ - self.edgelist.edgelist_df["src"] - >= self.edgelist.edgelist_df["dst"] - ] - ) - else: - self.edge_count = len(self.edgelist.edgelist_df) - elif self.adjlist is not None: - self.edge_count = len(self.adjlist.indices) - elif self.transposedadjlist is not None: - self.edge_count = len(self.transposedadjlist.indices) - else: - raise ValueError("Graph is Empty") - return self.edge_count - - def in_degree(self, vertex_subset=None): - """ - Compute vertex in-degree. Vertex in-degree is the number of edges - pointing into the vertex. By default, this method computes vertex - degrees for the entire set of vertices. If vertex_subset is provided, - this method optionally filters out all but those listed in - vertex_subset. - - Parameters - ---------- - vertex_subset : cudf.Series or iterable container, optional - A container of vertices for displaying corresponding in-degree. - If not set, degrees are computed for the entire set of vertices. - - Returns - ------- - df : cudf.DataFrame - GPU DataFrame of size N (the default) or the size of the given - vertices (vertex_subset) containing the in_degree. The ordering is - relative to the adjacency list, or that given by the specified - vertex_subset. - - df[vertex] : cudf.Series - The vertex IDs (will be identical to vertex_subset if - specified). - df[degree] : cudf.Series - The computed in-degree of the corresponding vertex. - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, '0', '1') - >>> df = G.in_degree([0,9,12]) - - """ - return self._degree(vertex_subset, x=1) - - def out_degree(self, vertex_subset=None): - """ - Compute vertex out-degree. Vertex out-degree is the number of edges - pointing out from the vertex. By default, this method computes vertex - degrees for the entire set of vertices. If vertex_subset is provided, - this method optionally filters out all but those listed in - vertex_subset. - - Parameters - ---------- - vertex_subset : cudf.Series or iterable container, optional - A container of vertices for displaying corresponding out-degree. - If not set, degrees are computed for the entire set of vertices. - - Returns - ------- - df : cudf.DataFrame - GPU DataFrame of size N (the default) or the size of the given - vertices (vertex_subset) containing the out_degree. The ordering is - relative to the adjacency list, or that given by the specified - vertex_subset. - - df[vertex] : cudf.Series - The vertex IDs (will be identical to vertex_subset if - specified). - df[degree] : cudf.Series - The computed out-degree of the corresponding vertex. - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, '0', '1') - >>> df = G.out_degree([0,9,12]) - - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - return self._degree(vertex_subset, x=2) - - def degree(self, vertex_subset=None): - """ - Compute vertex degree, which is the total number of edges incident - to a vertex (both in and out edges). By default, this method computes - degrees for the entire set of vertices. If vertex_subset is provided, - then this method optionally filters out all but those listed in - vertex_subset. - - Parameters - ---------- - vertex_subset : cudf.Series or iterable container, optional - a container of vertices for displaying corresponding degree. If not - set, degrees are computed for the entire set of vertices. - - Returns - ------- - df : cudf.DataFrame - GPU DataFrame of size N (the default) or the size of the given - vertices (vertex_subset) containing the degree. The ordering is - relative to the adjacency list, or that given by the specified - vertex_subset. - - df['vertex'] : cudf.Series - The vertex IDs (will be identical to vertex_subset if - specified). - df['degree'] : cudf.Series - The computed degree of the corresponding vertex. - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, '0', '1') - >>> all_df = G.degree() - >>> subset_df = G.degree([0,9,12]) - - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - return self._degree(vertex_subset) - - # FIXME: vertex_subset could be a DataFrame for multi-column vertices - def degrees(self, vertex_subset=None): - """ - Compute vertex in-degree and out-degree. By default, this method - computes vertex degrees for the entire set of vertices. If - vertex_subset is provided, this method optionally filters out all but - those listed in vertex_subset. - - Parameters - ---------- - vertex_subset : cudf.Series or iterable container, optional - A container of vertices for displaying corresponding degree. If not - set, degrees are computed for the entire set of vertices. - - Returns - ------- - df : cudf.DataFrame - GPU DataFrame of size N (the default) or the size of the given - vertices (vertex_subset) containing the degrees. The ordering is - relative to the adjacency list, or that given by the specified - vertex_subset. - - df['vertex'] : cudf.Series - The vertex IDs (will be identical to vertex_subset if - specified). - df['in_degree'] : cudf.Series - The in-degree of the vertex. - df['out_degree'] : cudf.Series - The out-degree of the vertex. - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, '0', '1') - >>> df = G.degrees([0,9,12]) - - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - ( - vertex_col, - in_degree_col, - out_degree_col, - ) = graph_primtypes_wrapper._degrees(self) - - df = cudf.DataFrame() - df["vertex"] = vertex_col - df["in_degree"] = in_degree_col - df["out_degree"] = out_degree_col - - if self.renumbered is True: - df = self.unrenumber(df, "vertex") - - if vertex_subset is not None: - df = df[df['vertex'].isin(vertex_subset)] - - return df - - def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) - df = cudf.DataFrame() - df["vertex"] = vertex_col - df["degree"] = degree_col - - if self.renumbered is True: - df = self.unrenumber(df, "vertex") - - if vertex_subset is not None: - df = df[df['vertex'].isin(vertex_subset)] - - return df - - def to_directed(self): - """ - Return a directed representation of the graph. - This function sets the type of graph as DiGraph() and returns the - directed view. - - Returns - ------- - G : DiGraph - A directed graph with the same nodes, and each edge (u,v,weights) - replaced by two directed edges (u,v,weights) and (v,u,weights). - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, '0', '1') - >>> DiG = G.to_directed() - - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - if type(self) is DiGraph: - return self - if type(self) is Graph: - DiG = DiGraph() - DiG.renumbered = self.renumbered - DiG.renumber_map = self.renumber_map - DiG.edgelist = self.edgelist - DiG.adjlist = self.adjlist - DiG.transposedadjlist = self.transposedadjlist - return DiG - - def to_undirected(self): - """ - Return an undirected copy of the graph. - - Returns - ------- - G : Graph - A undirected graph with the same nodes, and each directed edge - (u,v,weights) replaced by an undirected edge (u,v,weights). - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> DiG = cugraph.DiGraph() - >>> DiG.from_cudf_edgelist(M, '0', '1') - >>> G = DiG.to_undirected() - - """ - - if type(self) is Graph: - return self - if type(self) is DiGraph: - G = Graph() - df = self.edgelist.edgelist_df - G.renumbered = self.renumbered - G.renumber_map = self.renumber_map - G.multi = self.multi - if self.edgelist.weights: - source_col, dest_col, value_col = symmetrize( - df["src"], df["dst"], df["weights"] - ) - else: - source_col, dest_col = symmetrize(df["src"], df["dst"]) - value_col = None - G.edgelist = Graph.EdgeList(source_col, dest_col, value_col) - - return G - - def is_directed(self): - if type(self) is DiGraph: - return True - else: - return False - - def has_node(self, n): - """ - Returns True if the graph contains the node n. - """ - if self.edgelist is None: - raise Exception("Graph has no Edgelist.") - if self.distributed: - ddf = self.edgelist.edgelist_df[["src", "dst"]] - return (ddf == n).any().any().compute() - if self.renumbered: - tmp = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) - return tmp[0] is not cudf.NA and tmp[0] >= 0 - else: - df = self.edgelist.edgelist_df[["src", "dst"]] - return (df == n).any().any() - - def has_edge(self, u, v): - """ - Returns True if the graph contains the edge (u,v). - """ - if self.edgelist is None: - raise Exception("Graph has no Edgelist.") - if self.renumbered: - tmp = cudf.DataFrame({"src": [u, v]}) - tmp = tmp.astype({"src": "int"}) - tmp = self.add_internal_vertex_id( - tmp, "id", "src", preserve_order=True - ) - - u = tmp["id"][0] - v = tmp["id"][1] - - df = self.edgelist.edgelist_df - if self.distributed: - return ((df["src"] == u) & (df["dst"] == v)).any().compute() - return ((df["src"] == u) & (df["dst"] == v)).any() - - def edges(self): - """ - Returns all the edges in the graph as a cudf.DataFrame containing - sources and destinations. It does not return the edge weights. - For viewing edges with weights use view_edge_list() - """ - return self.view_edge_list()[["src", "dst"]] - - def nodes(self): - """ - Returns all the nodes in the graph as a cudf.Series - """ - if self.distributed: - raise Exception("Not supported for distributed graph") - if self.edgelist is not None: - df = self.edgelist.edgelist_df - if self.renumbered: - # FIXME: If vertices are multicolumn - # this needs to return a dataframe - # FIXME: This relies on current implementation - # of NumberMap, should not really expose - # this, perhaps add a method to NumberMap - return self.renumber_map.implementation.df["0"] - else: - return cudf.concat([df["src"], df["dst"]]).unique() - if self.adjlist is not None: - return cudf.Series(np.arange(0, self.number_of_nodes())) - if "all_nodes" in self._nodes.keys(): - return self._nodes["all_nodes"] - else: - n = cudf.Series(dtype="int") - set_names = [i for i in self._nodes.keys() if i != "all_nodes"] - for k in set_names: - n = n.append(self._nodes[k]) - return n - - def neighbors(self, n): - if self.edgelist is None: - raise Exception("Graph has no Edgelist.") - if self.distributed: - ddf = self.edgelist.edgelist_df - return ddf[ddf["src"] == n]["dst"].reset_index(drop=True) - if self.renumbered: - node = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) - if len(node) == 0: - return cudf.Series(dtype="int") - n = node[0] - - df = self.edgelist.edgelist_df - neighbors = df[df["src"] == n]["dst"].reset_index(drop=True) - if self.renumbered: - # FIXME: Multi-column vertices - return self.renumber_map.from_internal_vertex_id(neighbors)["0"] - else: - return neighbors - - def unrenumber(self, df, column_name, preserve_order=False): - """ - Given a DataFrame containing internal vertex ids in the identified - column, replace this with external vertex ids. If the renumbering - is from a single column, the output dataframe will use the same - name for the external vertex identifiers. If the renumbering is from - a multi-column input, the output columns will be labeled 0 through - n-1 with a suffix of _column_name. - - Note that this function does not guarantee order in single GPU mode, - and does not guarantee order or partitioning in multi-GPU mode. If you - wish to preserve ordering, add an index column to df and sort the - return by that index column. - - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing internal vertex identifiers that will be - converted into external vertex identifiers. - - column_name: string - Name of the column containing the internal vertex id. - - preserve_order: (optional) bool - If True, preserve the order of the rows in the output - DataFrame to match the input DataFrame - - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. The external - vertex identifiers are added to the DataFrame, the internal - vertex identifier column is removed from the dataframe. - """ - return self.renumber_map.unrenumber(df, column_name, preserve_order) - - def lookup_internal_vertex_id(self, df, column_name=None): - """ - Given a DataFrame containing external vertex ids in the identified - columns, or a Series containing external vertex ids, return a - Series with the internal vertex ids. - - Note that this function does not guarantee order in single GPU mode, - and does not guarantee order or partitioning in multi-GPU mode. - - Parameters - ---------- - df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. - - column_name: (optional) string - Name of the column containing the external vertex ids - - Returns - --------- - series : cudf.Series or dask_cudf.Series - The internal vertex identifiers - """ - return self.renumber_map.to_internal_vertex_id(df, column_name) - - def add_internal_vertex_id( - self, - df, - internal_column_name, - external_column_name, - drop=True, - preserve_order=False, - ): - """ - Given a DataFrame containing external vertex ids in the identified - columns, return a DataFrame containing the internal vertex ids as the - specified column name. Optionally drop the external vertex id columns. - Optionally preserve the order of the original DataFrame. - - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. - - internal_column_name: string - Name of column to contain the internal vertex id - - external_column_name: string or list of strings - Name of the column(s) containing the external vertex ids - - drop: (optional) bool, defaults to True - Drop the external columns from the returned DataFrame - - preserve_order: (optional) bool, defaults to False - Preserve the order of the data frame (requires an extra sort) - - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - Original DataFrame with new column containing internal vertex - id - """ - return self.renumber_map.add_internal_vertex_id( - df, - internal_column_name, - external_column_name, - drop, - preserve_order, - ) - - -class DiGraph(Graph): - """ - cuGraph directed graph class. Drops parallel edges. - """ - def __init__(self, m_graph=None): - super().__init__( - m_graph=m_graph, symmetrized=True - ) - - -class MultiGraph(Graph): - """ - cuGraph class to create and store undirected graphs with parallel edges. - """ - def __init__(self, renumbered=True): - super().__init__(multi=True) - - -class MultiDiGraph(Graph): - """ - cuGraph class to create and store directed graphs with parallel edges. - """ - def __init__(self, renumbered=True): - super().__init__(symmetrized=True, multi=True) diff --git a/python/cugraph/structure/graph_classes.py b/python/cugraph/structure/graph_classes.py new file mode 100644 index 00000000000..3cd1863a054 --- /dev/null +++ b/python/cugraph/structure/graph_classes.py @@ -0,0 +1,743 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from .graph_implementation import (simpleGraphImpl, + simpleDistributedGraphImpl, + npartiteGraphImpl) +import cudf +import warnings + + +# TODO: Move to utilities +def null_check(col): + if col.null_count != 0: + raise ValueError("Series contains NULL values") + + +class Graph: + class Properties: + def __init__(self, directed): + self.directed = directed + self.weights = False + + def __init__(self, m_graph=None, directed=False): + self._Impl = None + self.graph_properties = Graph.Properties(directed) + if m_graph is not None: + if m_graph.is_multigraph(): + elist = m_graph.view_edge_list() + if m_graph.is_weighted(): + weights = "weights" + else: + weights = None + self.from_cudf_edgelist(elist, + source="src", + destination="dst", + edge_attr=weights) + else: + msg = ( + "Graph can only be initialized using MultiGraph " + "or MultiDiGraph" + ) + raise Exception(msg) + + def __getattr__(self, name): + if self._Impl is None: + raise AttributeError(name) + if hasattr(self._Impl, name): + return getattr(self._Impl, name) + # FIXME: Remove access to Impl properties + elif hasattr(self._Impl.properties, name): + return getattr(self._Impl.properties, name) + else: + raise AttributeError(name) + + def __dir__(self): + return dir(self._Impl) + + def from_cudf_edgelist( + self, + input_df, + source="source", + destination="destination", + edge_attr=None, + renumber=True + ): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. The passed input_df argument + wraps gdf_column objects that represent a graph using the edge list + format. source argument is source column name and destination argument + is destination column name. + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + If weights are present, edge_attr argument is the weights column name. + Parameters + ---------- + input_df : cudf.DataFrame or dask_cudf.DataFrame + A DataFrame that contains edge information + If a dask_cudf.DataFrame is passed it will be reinterpreted as + a cudf.DataFrame. For the distributed path please use + from_dask_cudf_edgelist. + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + Examples + -------- + >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + """ + if self._Impl is None: + self._Impl = simpleGraphImpl(self.graph_properties) + elif type(self._Impl) is not simpleGraphImpl: + raise Exception("Graph is already initialized") + elif (self._Impl.edgelist is not None or + self._Impl.adjlist is not None): + raise Exception("Graph already has values") + self._Impl._simpleGraphImpl__from_edgelist(input_df, + source=source, + destination=destination, + edge_attr=edge_attr, + renumber=renumber) + + def from_cudf_adjlist(self, offset_col, index_col, value_col=None): + """ + Initialize a graph from the adjacency list. It is an error to call this + method on an initialized Graph object. The passed offset_col and + index_col arguments wrap gdf_column objects that represent a graph + using the adjacency list format. + If value_col is None, an unweighted graph is created. If value_col is + not None, a weighted graph is created. + Undirected edges must be stored as directed edges in both directions. + Parameters + ---------- + offset_col : cudf.Series + This cudf.Series wraps a gdf_column of size V + 1 (V: number of + vertices). + The gdf column contains the offsets for the vertices in this graph. + Offsets must be in the range [0, E] (E: number of edges). + index_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be in the range [0, V) (V: number of + vertices). + value_col : cudf.Series, optional + This pointer can be ``None``. + If not, this cudf.Series wraps a gdf_column of size E (E: number of + edges). + The gdf column contains the weight value for each edge. + The expected type of the gdf_column element is floating point + number. + Examples + -------- + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> M = gdf.to_pandas() + >>> M = scipy.sparse.coo_matrix((M['2'],(M['0'],M['1']))) + >>> M = M.tocsr() + >>> offsets = cudf.Series(M.indptr) + >>> indices = cudf.Series(M.indices) + >>> G = cugraph.Graph() + >>> G.from_cudf_adjlist(offsets, indices, None) + """ + if self._Impl is None: + self._Impl = simpleGraphImpl(self.graph_properties) + elif type(self._Impl) is not simpleGraphImpl: + raise Exception("Graph is already initialized") + elif (self._Impl.edgelist is not None or + self._Impl.adjlist is not None): + raise Exception("Graph already has values") + self._Impl._simpleGraphImpl__from_adjlist(offset_col, + index_col, + value_col) + + def from_dask_cudf_edgelist( + self, + input_ddf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + """ + Initializes the distributed graph from the dask_cudf.DataFrame + edgelist. Undirected Graphs are not currently supported. + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + Note that the graph object will store a reference to the + dask_cudf.DataFrame provided. + Parameters + ---------- + input_ddf : dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like + source column name or array of column names + destination : str + destination column name or array of column names + edge_attr : str + weights column name. + renumber : bool + If source and destination indices are not in range 0 to V where V + is number of vertices, renumber argument should be True. + """ + if self._Impl is None: + self._Impl = simpleDistributedGraphImpl(self.graph_properties) + elif type(self._Impl) is not simpleDistributedGraphImpl: + raise Exception("Graph is already initialized") + elif (self._Impl.edgelist is not None): + raise Exception("Graph already has values") + self._Impl._simpleDistributedGraphImpl__from_edgelist(input_ddf, + source, + destination, + edge_attr, + renumber) + + # Move to Compat Module + def from_pandas_edgelist( + self, + pdf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. Source argument is source + column name and destination argument is destination column name. + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + If weights are present, edge_attr argument is the weights column name. + Parameters + ---------- + input_df : pandas.DataFrame + A DataFrame that contains edge information + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + Examples + -------- + >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_pandas_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + """ + gdf = cudf.DataFrame.from_pandas(pdf) + self.from_cudf_edgelist(gdf, source=source, destination=destination, + edge_attr=edge_attr, renumber=renumber) + + def from_pandas_adjacency(self, pdf): + """ + Initializes the graph from pandas adjacency matrix + """ + np_array = pdf.to_numpy() + columns = pdf.columns + self.from_numpy_array(np_array, columns) + + def from_numpy_array(self, np_array, nodes=None): + """ + Initializes the graph from numpy array containing adjacency matrix. + """ + src, dst = np_array.nonzero() + weight = np_array[src, dst] + df = cudf.DataFrame() + if nodes is not None: + df['src'] = nodes[src] + df['dst'] = nodes[dst] + else: + df['src'] = src + df['dst'] = dst + df['weight'] = weight + self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight') + + def from_numpy_matrix(self, np_matrix): + """ + Initializes the graph from numpy matrix containing adjacency matrix. + """ + np_array = np.asarray(np_matrix) + self.from_numpy_array(np_array) + + def unrenumber(self, df, column_name, preserve_order=False): + """ + Given a DataFrame containing internal vertex ids in the identified + column, replace this with external vertex ids. If the renumbering + is from a single column, the output dataframe will use the same + name for the external vertex identifiers. If the renumbering is from + a multi-column input, the output columns will be labeled 0 through + n-1 with a suffix of _column_name. + Note that this function does not guarantee order in single GPU mode, + and does not guarantee order or partitioning in multi-GPU mode. If you + wish to preserve ordering, add an index column to df and sort the + return by that index column. + Parameters + ---------- + df: cudf.DataFrame or dask_cudf.DataFrame + A DataFrame containing internal vertex identifiers that will be + converted into external vertex identifiers. + column_name: string + Name of the column containing the internal vertex id. + preserve_order: (optional) bool + If True, preserve the order of the rows in the output + DataFrame to match the input DataFrame + Returns + --------- + df : cudf.DataFrame or dask_cudf.DataFrame + The original DataFrame columns exist unmodified. The external + vertex identifiers are added to the DataFrame, the internal + vertex identifier column is removed from the dataframe. + """ + return self.renumber_map.unrenumber(df, column_name, preserve_order) + + def lookup_internal_vertex_id(self, df, column_name=None): + """ + Given a DataFrame containing external vertex ids in the identified + columns, or a Series containing external vertex ids, return a + Series with the internal vertex ids. + Note that this function does not guarantee order in single GPU mode, + and does not guarantee order or partitioning in multi-GPU mode. + Parameters + ---------- + df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + column_name: (optional) string + Name of the column containing the external vertex ids + Returns + --------- + series : cudf.Series or dask_cudf.Series + The internal vertex identifiers + """ + return self.renumber_map.to_internal_vertex_id(df, column_name) + + def add_internal_vertex_id( + self, + df, + internal_column_name, + external_column_name, + drop=True, + preserve_order=False, + ): + """ + Given a DataFrame containing external vertex ids in the identified + columns, return a DataFrame containing the internal vertex ids as the + specified column name. Optionally drop the external vertex id columns. + Optionally preserve the order of the original DataFrame. + Parameters + ---------- + df: cudf.DataFrame or dask_cudf.DataFrame + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + internal_column_name: string + Name of column to contain the internal vertex id + external_column_name: string or list of strings + Name of the column(s) containing the external vertex ids + drop: (optional) bool, defaults to True + Drop the external columns from the returned DataFrame + preserve_order: (optional) bool, defaults to False + Preserve the order of the data frame (requires an extra sort) + Returns + --------- + df : cudf.DataFrame or dask_cudf.DataFrame + Original DataFrame with new column containing internal vertex + id + """ + return self.renumber_map.add_internal_vertex_id( + df, + internal_column_name, + external_column_name, + drop, + preserve_order, + ) + + def clear(self): + """ + Empty the graph. + """ + self._Impl = None + + def is_bipartite(self): + """ + Checks if Graph is bipartite. This solely relies on the user call of + add_nodes_from with the bipartite parameter. This does not parse the + graph to check if it is bipartite. + """ + # TO DO: Call coloring algorithm + return False + + def is_multipartite(self): + """ + Checks if Graph is multipartite. This solely relies on the user call + of add_nodes_from with the partition parameter. This does not parse + the graph to check if it is multipartite. + """ + # TO DO: Call coloring algorithm + return False + + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. Else returns False. + """ + # TO DO: Call coloring algorithm + return False + + def is_directed(self): + """ + Returns True if the graph is a directed graph. + Returns False if the graph is an undirected graph. + """ + return self.graph_properties.directed + + def is_renumbered(self): + """ + Returns True if the graph is renumbered. + """ + return self.properties.renumbered + + def is_weighted(self): + """ + Returns True if the graph has edge weights. + """ + return self.properties.weighted + + def has_isolated_vertices(self): + """ + Returns True if the graph has isolated vertices. + """ + return self.properties.isolated_vertices + + def to_directed(self): + """ + Return a directed representation of the graph. + This function sets the type of graph as DiGraph() and returns the + directed view. + Returns + ------- + G : DiGraph + A directed graph with the same nodes, and each edge (u,v,weights) + replaced by two directed edges (u,v,weights) and (v,u,weights). + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> DiG = G.to_directed() + """ + directed_graph = type(self)() + directed_graph.graph_properties.directed = True + directed_graph._Impl = type(self._Impl)(directed_graph. + graph_properties) + self._Impl.to_directed(directed_graph._Impl) + return directed_graph + + def to_undirected(self): + """ + Return an undirected copy of the graph. + Returns + ------- + G : Graph + A undirected graph with the same nodes, and each directed edge + (u,v,weights) replaced by an undirected edge (u,v,weights). + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> DiG = cugraph.DiGraph() + >>> DiG.from_cudf_edgelist(M, '0', '1') + >>> G = DiG.to_undirected() + """ + + if self.graph_properties.directed is False: + undirected_graph = type(self)() + elif self.__class__.__bases__[0] == object: + undirected_graph = type(self)() + else: + undirected_graph = self.__class__.__bases__[0]() + undirected_graph._Impl = type(self._Impl)(undirected_graph. + graph_properties) + self._Impl.to_undirected(undirected_graph._Impl) + return undirected_graph + + def add_nodes_from(self, nodes): + """ + Add nodes information to the Graph. + Parameters + ---------- + nodes : list or cudf.Series + The nodes of the graph to be stored. + """ + self._Impl._nodes["all_nodes"] = cudf.Series(nodes) + + # TODO: Add function + # def properties(): + + +class DiGraph(Graph): + def __init__(self, m_graph=None): + warnings.warn( + "DiGraph is deprecated, use Graph(directed=True) instead", + DeprecationWarning + ) + super(DiGraph, self).__init__(m_graph, directed=True) + + +class MultiGraph(Graph): + def __init__(self, directed=False): + super(MultiGraph, self).__init__(directed=directed) + self.graph_properties.multi_edge = True + + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. Else returns False. + """ + # TO DO: Call coloring algorithm + return True + + +class MultiDiGraph(MultiGraph): + def __init__(self): + warnings.warn( + "MultiDiGraph is deprecated,\ + use MultiGraph(directed=True) instead", + DeprecationWarning + ) + super(MultiDiGraph, self).__init__(directed=True) + + +class Tree(Graph): + def __init__(self, directed=False): + super(Tree, self).__init__(directed=directed) + self.graph_properties.tree = True + + +class NPartiteGraph(Graph): + def __init__(self, bipartite=False, directed=False): + super(NPartiteGraph, self).__init__(directed=directed) + self.graph_properties.bipartite = bipartite + self.graph_properties.multipartite = True + + def from_cudf_edgelist( + self, + input_df, + source="source", + destination="destination", + edge_attr=None, + renumber=True + ): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. The passed input_df argument + wraps gdf_column objects that represent a graph using the edge list + format. source argument is source column name and destination argument + is destination column name. + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + If weights are present, edge_attr argument is the weights column name. + Parameters + ---------- + input_df : cudf.DataFrame or dask_cudf.DataFrame + A DataFrame that contains edge information + If a dask_cudf.DataFrame is passed it will be reinterpreted as + a cudf.DataFrame. For the distributed path please use + from_dask_cudf_edgelist. + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + Examples + -------- + >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.BiPartiteGraph() + >>> G.from_cudf_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + """ + if self._Impl is None: + self._Impl = npartiteGraphImpl(self.graph_properties) + # API may change in future + self._Impl._npartiteGraphImpl__from_edgelist(input_df, + source=source, + destination=destination, + edge_attr=edge_attr, + renumber=renumber) + + def from_dask_cudf_edgelist( + self, + input_ddf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + """ + Initializes the distributed graph from the dask_cudf.DataFrame + edgelist. Undirected Graphs are not currently supported. + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + Note that the graph object will store a reference to the + dask_cudf.DataFrame provided. + Parameters + ---------- + input_ddf : dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like + source column name or array of column names + destination : str + destination column name or array of column names + edge_attr : str + weights column name. + renumber : bool + If source and destination indices are not in range 0 to V where V + is number of vertices, renumber argument should be True. + """ + raise Exception("Distributed N-partite graph not supported") + + def add_nodes_from(self, nodes, bipartite=None, multipartite=None): + """ + Add nodes information to the Graph. + Parameters + ---------- + nodes : list or cudf.Series + The nodes of the graph to be stored. If bipartite and multipartite + arguments are not passed, the nodes are considered to be a list of + all the nodes present in the Graph. + bipartite : str + Sets the Graph as bipartite. The nodes are stored as a set of nodes + of the partition named as bipartite argument. + multipartite : str + Sets the Graph as multipartite. The nodes are stored as a set of + nodes of the partition named as multipartite argument. + """ + if self._Impl is None: + self._Impl = npartiteGraphImpl(self.graph_properties) + if bipartite is None and multipartite is None: + self._Impl._nodes["all_nodes"] = cudf.Series(nodes) + else: + self._Impl.add_nodes_from(nodes, bipartite=bipartite, + multipartite=multipartite) + + def is_multipartite(self): + """ + Checks if Graph is multipartite. This solely relies on the user call + of add_nodes_from with the partition parameter and the Graph created. + This does not parse the graph to check if it is multipartite. + """ + return True + + +class BiPartiteGraph(NPartiteGraph): + def __init__(self, directed=False): + super(BiPartiteGraph, self).__init__(directed=directed, bipartite=True) + + def is_bipartite(self): + """ + Checks if Graph is bipartite. This solely relies on the user call of + add_nodes_from with the bipartite parameter and the Graph created. + This does not parse the graph to check if it is bipartite. + """ + return True + + +class BiPartiteDiGraph(BiPartiteGraph): + def __init__(self): + warnings.warn( + "BiPartiteDiGraph is deprecated,\ + use BiPartiteGraph(directed=True) instead", + DeprecationWarning + ) + super(BiPartiteDiGraph, self).__init__(directed=True) + + +class NPartiteDiGraph(NPartiteGraph): + def __init__(self): + warnings.warn( + "NPartiteDiGraph is deprecated,\ + use NPartiteGraph(directed=True) instead", + DeprecationWarning + ) + super(NPartiteGraph, self).__init__(directed=True) + + +def is_directed(G): + """ + Returns True if the graph is a directed graph. + Returns False if the graph is an undirected graph. + """ + return G.is_directed() + + +def is_multigraph(G): + """ + Returns True if the graph is a multigraph. Else returns False. + """ + return G.is_multigraph() + + +def is_multipartite(G): + """ + Checks if Graph is multipartite. This solely relies on the Graph + type. This does not parse the graph to check if it is multipartite. + """ + return G.is_multipatite() + + +def is_bipartite(G): + """ + Checks if Graph is bipartite. This solely relies on the Graph type. + This does not parse the graph to check if it is bipartite. + """ + return G.is_bipartite() + + +def is_weighted(G): + """ + Returns True if the graph has edge weights. + """ + return G.is_weighted() diff --git a/python/cugraph/structure/graph_implementation/__init__.py b/python/cugraph/structure/graph_implementation/__init__.py new file mode 100644 index 00000000000..eeef73c0f64 --- /dev/null +++ b/python/cugraph/structure/graph_implementation/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .simpleGraph import simpleGraphImpl +from .simpleDistributedGraph import simpleDistributedGraphImpl +from .npartiteGraph import npartiteGraphImpl + diff --git a/python/cugraph/structure/graph_implementation/npartiteGraph.py b/python/cugraph/structure/graph_implementation/npartiteGraph.py new file mode 100644 index 00000000000..111d9f792fa --- /dev/null +++ b/python/cugraph/structure/graph_implementation/npartiteGraph.py @@ -0,0 +1,100 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .simpleGraph import simpleGraphImpl +import cudf + + +class npartiteGraphImpl(simpleGraphImpl): + def __init__(self, properties): + super(npartiteGraphImpl, self).__init__(properties) + self.properties.bipartite = properties.bipartite + + # API may change in future + def __from_edgelist( + self, + input_df, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + self._simpleGraphImpl__from_edgelist( + input_df, + source=source, + destination=destination, + edge_attr=edge_attr, + renumber=renumber, + ) + + def sets(self): + """ + Returns the bipartite set of nodes. This solely relies on the user's + call of add_nodes_from with the bipartite parameter. This does not + parse the graph to compute bipartite sets. If bipartite argument was + not provided during add_nodes_from(), it raise an exception that the + graph is not bipartite. + """ + # TO DO: Call coloring algorithm + set_names = [i for i in self._nodes.keys() if i != "all_nodes"] + if self.properties.bipartite: + top = self._nodes[set_names[0]] + if len(set_names) == 2: + bottom = self._nodes[set_names[1]] + else: + bottom = cudf.Series( + set(self.nodes().values_host) - set(top.values_host) + ) + return top, bottom + else: + return {k: self._nodes[k] for k in set_names} + + # API may change in future + def add_nodes_from(self, nodes, bipartite=None, multipartite=None): + """ + Add nodes information to the Graph. + Parameters + ---------- + nodes : list or cudf.Series + The nodes of the graph to be stored. If bipartite and multipartite + arguments are not passed, the nodes are considered to be a list of + all the nodes present in the Graph. + bipartite : str + Sets the Graph as bipartite. The nodes are stored as a set of nodes + of the partition named as bipartite argument. + multipartite : str + Sets the Graph as multipartite. The nodes are stored as a set of + nodes of the partition named as multipartite argument. + """ + if bipartite is None and multipartite is None: + raise Exception("Partition not provided") + else: + set_names = [i for i in self._nodes.keys() if i != "all_nodes"] + if multipartite is not None: + if self.properties.bipartite: + raise Exception( + "The Graph is bipartite. " + "Use bipartite option instead." + ) + elif bipartite is not None: + if not self.properties.bipartite: + raise Exception( + "The Graph is set as npartite. " + "Use multipartite option instead.") + multipartite = bipartite + if multipartite not in set_names and len(set_names) == 2: + raise Exception( + "The Graph is set as bipartite and " + "already has two partitions initialized." + ) + self._nodes[multipartite] = cudf.Series(nodes) diff --git a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py new file mode 100644 index 00000000000..e85f3b6ab6c --- /dev/null +++ b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -0,0 +1,473 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.number_map import NumberMap +import cudf +import dask_cudf + + +class simpleDistributedGraphImpl: + class EdgeList: + def __init__(self, ddf): + self.edgelist_df = ddf + self.weights = False + # FIXME: Edge Attribute not handled + + # class AdjList: + # Not Supported + + # class transposedAdjList: + # Not Supported + + class Properties: + def __init__(self, properties): + self.multi_edge = getattr(properties, 'multi_edge', False) + self.directed = properties.directed + self.renumbered = False + self.store_transposed = False + self.self_loop = None + self.isolated_vertices = None + self.node_count = None + self.edge_count = None + self.weighted = False + + def __init__(self, properties): + # Structure + self.edgelist = None + self.renumber_map = None + self.properties = simpleDistributedGraphImpl.Properties(properties) + self.source_columns = None + self.destination_columns = None + + # Functions + def __from_edgelist( + self, + input_ddf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + store_transposed=False, + ): + if not isinstance(input_ddf, dask_cudf.DataFrame): + raise Exception("input should be a dask_cudf dataFrame") + if self.properties.directed is False: + raise Exception("Undirected distributed graph not supported") + + s_col = source + d_col = destination + if not isinstance(s_col, list): + s_col = [s_col] + if not isinstance(d_col, list): + d_col = [d_col] + if not ( + set(s_col).issubset(set(input_ddf.columns)) + and set(d_col).issubset(set(input_ddf.columns)) + ): + raise Exception( + "source column names and/or destination column " + "names not found in input. Recheck the source " + "and destination parameters" + ) + ddf_columns = s_col + d_col + if edge_attr is not None: + if not (set([edge_attr]).issubset(set(input_ddf.columns))): + raise Exception( + "edge_attr column name not found in input." + "Recheck the edge_attr parameter") + self.weighted = True + ddf_columns = ddf_columns + [edge_attr] + input_ddf = input_ddf[ddf_columns] + + if edge_attr is not None: + input_ddf = input_ddf.rename(columns={edge_attr: 'value'}) + + # + # Keep all of the original parameters so we can lazily + # evaluate this function + # + + # FIXME: Edge Attribute not handled + self.properties.renumbered = renumber + self.input_df = input_ddf + self.source_columns = source + self.destination_columns = destination + + def view_edge_list(self): + """ + Display the edge list. Compute it if needed. + NOTE: If the graph is of type Graph() then the displayed undirected + edges are the same as displayed by networkx Graph(), but the direction + could be different i.e. an edge displayed by cugraph as (src, dst) + could be displayed as (dst, src) by networkx. + cugraph.Graph stores symmetrized edgelist internally. For displaying + undirected edgelist for a Graph the upper trianglar matrix of the + symmetrized edgelist is returned. + networkx.Graph renumbers the input and stores the upper triangle of + this renumbered input. Since the internal renumbering of networx and + cugraph is different, the upper triangular matrix of networkx + renumbered input may not be the same as cugraph's upper trianglar + matrix of the symmetrized edgelist. Hence the displayed source and + destination pairs in both will represent the same edge but node values + could be swapped. + Returns + ------- + df : cudf.DataFrame + This cudf.DataFrame wraps source, destination and weight + df[src] : cudf.Series + contains the source index for each edge + df[dst] : cudf.Series + contains the destination index for each edge + df[weight] : cusd.Series + Column is only present for weighted Graph, + then containing the weight value for each edge + """ + if self.edgelist is None: + raise Exception("Graph has no Edgelist.") + return self.edgelist.edgelist_df + + def delete_edge_list(self): + """ + Delete the edge list. + """ + # decrease reference count to free memory if the referenced objects are + # no longer used. + self.edgelist = None + + def clear(self): + """ + Empty this graph. This function is added for NetworkX compatibility. + """ + self.edgelist = None + + def number_of_vertices(self): + """ + Get the number of nodes in the graph. + """ + if self.properties.node_count is None: + if self.edgelist is not None: + ddf = self.edgelist.edgelist_df[["src", "dst"]] + self.properties.node_count = ddf.max().max().compute() + 1 + else: + raise Exception("Graph is Empty") + return self.properties.node_count + + def number_of_nodes(self): + """ + An alias of number_of_vertices(). This function is added for NetworkX + compatibility. + """ + return self.number_of_vertices() + + def number_of_edges(self, directed_edges=False): + """ + Get the number of edges in the graph. + """ + if self.edgelist is not None: + return len(self.edgelist.edgelist_df) + else: + raise Exception("Graph is Empty") + + def in_degree(self, vertex_subset=None): + """ + Compute vertex in-degree. Vertex in-degree is the number of edges + pointing into the vertex. By default, this method computes vertex + degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding in-degree. + If not set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the in_degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df[vertex] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df[degree] : cudf.Series + The computed in-degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.in_degree([0,9,12]) + """ + return self._degree(vertex_subset, x=1) + + def out_degree(self, vertex_subset=None): + """ + Compute vertex out-degree. Vertex out-degree is the number of edges + pointing out from the vertex. By default, this method computes vertex + degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding out-degree. + If not set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the out_degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df[vertex] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df[degree] : cudf.Series + The computed out-degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.out_degree([0,9,12]) + """ + # TODO: Add support + raise Exception("Not supported for distributed graph") + + def degree(self, vertex_subset=None): + """ + Compute vertex degree, which is the total number of edges incident + to a vertex (both in and out edges). By default, this method computes + degrees for the entire set of vertices. If vertex_subset is provided, + then this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + a container of vertices for displaying corresponding degree. If not + set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df['vertex'] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df['degree'] : cudf.Series + The computed degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> all_df = G.degree() + >>> subset_df = G.degree([0,9,12]) + """ + raise Exception("Not supported for distributed graph") + + # FIXME: vertex_subset could be a DataFrame for multi-column vertices + def degrees(self, vertex_subset=None): + """ + Compute vertex in-degree and out-degree. By default, this method + computes vertex degrees for the entire set of vertices. If + vertex_subset is provided, this method optionally filters out all but + those listed in vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding degree. If not + set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the degrees. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df['vertex'] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df['in_degree'] : cudf.Series + The in-degree of the vertex. + df['out_degree'] : cudf.Series + The out-degree of the vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.degrees([0,9,12]) + """ + raise Exception("Not supported for distributed graph") + + def _degree(self, vertex_subset, x=0): + vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + df = cudf.DataFrame() + df["vertex"] = vertex_col + df["degree"] = degree_col + + if self.renumbered is True: + df = self.unrenumber(df, "vertex") + + if vertex_subset is not None: + df = df[df['vertex'].isin(vertex_subset)] + + return df + + def to_directed(self, DiG): + """ + Return a directed representation of the graph. + This function sets the type of graph as DiGraph() and returns the + directed view. + Returns + ------- + G : DiGraph + A directed graph with the same nodes, and each edge (u,v,weights) + replaced by two directed edges (u,v,weights) and (v,u,weights). + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> DiG = G.to_directed() + """ + # TODO: Add support + raise Exception("Not supported for distributed graph") + + def to_undirected(self, G): + """ + Return an undirected copy of the graph. + Returns + ------- + G : Graph + A undirected graph with the same nodes, and each directed edge + (u,v,weights) replaced by an undirected edge (u,v,weights). + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> DiG = cugraph.DiGraph() + >>> DiG.from_cudf_edgelist(M, '0', '1') + >>> G = DiG.to_undirected() + """ + + # TODO: Add support + raise Exception("Not supported for distributed graph") + + def has_node(self, n): + """ + Returns True if the graph contains the node n. + """ + if self.edgelist is None: + raise Exception("Graph has no Edgelist.") + # FIXME: Check renumber map + ddf = self.edgelist.edgelist_df[["src", "dst"]] + return (ddf == n).any().any().compute() + + def has_edge(self, u, v): + """ + Returns True if the graph contains the edge (u,v). + """ + # TODO: Verify Correctness + if self.properties.renumbered: + tmp = cudf.DataFrame({"src": [u, v]}) + tmp = tmp.astype({"src": "int"}) + tmp = self.add_internal_vertex_id( + tmp, "id", "src", preserve_order=True + ) + + u = tmp["id"][0] + v = tmp["id"][1] + + df = self.edgelist.edgelist_df + return ((df["src"] == u) & (df["dst"] == v)).any().compute() + + def edges(self): + """ + Returns all the edges in the graph as a cudf.DataFrame containing + sources and destinations. It does not return the edge weights. + For viewing edges with weights use view_edge_list() + """ + return self.view_edge_list()[["src", "dst"]] + + def nodes(self): + """ + Returns all the nodes in the graph as a cudf.Series + """ + # FIXME: Return renumber map nodes + raise Exception("Not supported for distributed graph") + + def neighbors(self, n): + if self.edgelist is None: + raise Exception("Graph has no Edgelist.") + # FIXME: Add renumbering of node n + ddf = self.edgelist.edgelist_df + return ddf[ddf["src"] == n]["dst"].reset_index(drop=True) + + def compute_renumber_edge_list(self, transposed=False): + """ + Compute a renumbered edge list + This function works in the MNMG pipeline and will transform + the input dask_cudf.DataFrame into a renumbered edge list + in the prescribed direction. + This function will be called by the algorithms to ensure + that the graph is renumbered properly. The graph object will + cache the most recent renumbering attempt. For benchmarking + purposes, this function can be called prior to calling a + graph algorithm so we can measure the cost of computing + the renumbering separately from the cost of executing the + algorithm. + When creating a CSR-like structure, set transposed to False. + When creating a CSC-like structure, set transposed to True. + Parameters + ---------- + transposed : (optional) bool + If True, renumber with the intent to make a CSC-like + structure. If False, renumber with the intent to make + a CSR-like structure. Defaults to False. + """ + # FIXME: What to do about edge_attr??? + # currently ignored for MNMG + + if not self.properties.renumbered: + self.edgelist = self.EdgeList(self.input_df) + self.renumber_map = None + else: + if self.edgelist is not None: + if self.properties.directed is False: + return + + if self.properties.store_transposed == transposed: + return + + del self.edgelist + + renumbered_ddf, number_map = NumberMap.renumber( + self.input_df, + self.source_columns, + self.destination_columns, + store_transposed=transposed, + ) + self.edgelist = self.EdgeList(renumbered_ddf) + self.renumber_map = number_map + self.properties.store_transposed = transposed diff --git a/python/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/structure/graph_implementation/simpleGraph.py new file mode 100644 index 00000000000..4e632a72231 --- /dev/null +++ b/python/cugraph/structure/graph_implementation/simpleGraph.py @@ -0,0 +1,823 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.symmetrize import symmetrize +from cugraph.structure.number_map import NumberMap +import cugraph.dask.common.mg_utils as mg_utils +import cudf +import dask_cudf +import cugraph.comms.comms as Comms +import pandas as pd +import numpy as np +from cugraph.dask.structure import replication + + +# FIXME: Change to consistent camel case naming +class simpleGraphImpl: + + class EdgeList: + def __init__(self, source, destination, edge_attr=None): + self.edgelist_df = cudf.DataFrame() + self.edgelist_df["src"] = source + self.edgelist_df["dst"] = destination + self.weights = False + if edge_attr is not None: + self.weights = True + if type(edge_attr) is dict: + for k in edge_attr.keys(): + self.edgelist_df[k] = edge_attr[k] + else: + self.edgelist_df["weights"] = edge_attr + + class AdjList: + def __init__(self, offsets, indices, value=None): + self.offsets = offsets + self.indices = indices + self.weights = value # Should be a dataframe for multiple weights + + class transposedAdjList: + def __init__(self, offsets, indices, value=None): + simpleGraphImpl.AdjList.__init__(self, offsets, indices, value) + + class Properties: + def __init__(self, properties): + self.multi_edge = getattr(properties, 'multi_edge', False) + self.directed = properties.directed + self.renumbered = False + self.self_loop = None + self.isolated_vertices = None + self.node_count = None + self.edge_count = None + self.weighted = False + + def __init__(self, properties): + # Structure + self.edgelist = None + self.adjlist = None + self.transposedadjlist = None + self.renumber_map = None + self.properties = simpleGraphImpl.Properties(properties) + self._nodes = {} + + # TODO: Move to new batch class + # MG - Batch + self.batch_enabled = False + self.batch_edgelists = None + self.batch_adjlists = None + self.batch_transposed_adjlists = None + + # Functions + # FIXME: Change to public function + # FIXME: Make function more modular + def __from_edgelist( + self, + input_df, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + + # Verify column names present in input DataFrame + s_col = source + d_col = destination + if not isinstance(s_col, list): + s_col = [s_col] + if not isinstance(d_col, list): + d_col = [d_col] + if not ( + set(s_col).issubset(set(input_df.columns)) + and set(d_col).issubset(set(input_df.columns)) + ): + # FIXME: Raise concrete Exceptions + raise Exception( + "source column names and/or destination column " + "names not found in input. Recheck the source and " + "destination parameters" + ) + + # FIXME: check if the consolidated graph fits on the + # device before gathering all the edge lists + + # Consolidation + if isinstance(input_df, cudf.DataFrame): + if len(input_df[source]) > 2147483100: + raise Exception( + "cudf dataFrame edge list is too big " + "to fit in a single GPU" + ) + elist = input_df + elif isinstance(input_df, dask_cudf.DataFrame): + if len(input_df[source]) > 2147483100: + raise Exception( + "dask_cudf dataFrame edge list is too big " + "to fit in a single GPU" + ) + elist = input_df.compute().reset_index(drop=True) + else: + raise Exception( + "input should be a cudf.DataFrame or " + "a dask_cudf dataFrame" + ) + + # Renumbering + self.renumber_map = None + if renumber: + # FIXME: Should SG do lazy evaluation like MG? + elist, renumber_map = NumberMap.renumber( + elist, source, destination, store_transposed=False + ) + source = "src" + destination = "dst" + self.properties.renumbered = True + self.renumber_map = renumber_map + else: + if type(source) is list and type(destination) is list: + raise Exception("set renumber to True for multi column ids") + + # Populate graph edgelist + source_col = elist[source] + dest_col = elist[destination] + + if edge_attr is not None: + self.weighted = True + value_col = elist[edge_attr] + else: + value_col = None + + # TODO: Update Symmetrize to work on Graph and/or DataFrame + if value_col is not None: + source_col, dest_col, value_col = symmetrize( + source_col, dest_col, value_col, + multi=self.properties.multi_edge, + symmetrize=not self.properties.directed) + if isinstance(value_col, cudf.DataFrame): + value_dict = {} + for i in value_col.columns: + value_dict[i] = value_col[i] + value_col = value_dict + else: + source_col, dest_col = symmetrize( + source_col, dest_col, multi=self.properties.multi_edge, + symmetrize=not self.properties.directed) + + self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, + value_col) + + if self.batch_enabled: + self._replicate_edgelist() + + def to_pandas_edgelist(self, source='source', destination='destination'): + """ + Returns the graph edge list as a Pandas DataFrame. + Parameters + ---------- + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + Returns + ------- + df : pandas.DataFrame + """ + + gdf = self.view_edge_list() + return gdf.to_pandas() + + def to_pandas_adjacency(self): + """ + Returns the graph adjacency matrix as a Pandas DataFrame. + """ + + np_array_data = self.to_numpy_array() + pdf = pd.DataFrame(np_array_data) + if self.properties.renumbered: + nodes = self.renumber_map.implementation.df['0'].\ + values_host.tolist() + pdf.columns = nodes + pdf.index = nodes + return pdf + + def to_numpy_array(self): + """ + Returns the graph adjacency matrix as a NumPy array. + """ + + nlen = self.number_of_nodes() + elen = self.number_of_edges() + df = self.edgelist.edgelist_df + np_array = np.full((nlen, nlen), 0.0) + for i in range(0, elen): + np_array[df['src'].iloc[i], df['dst'].iloc[i]] = df['weights'].\ + iloc[i] + return np_array + + def to_numpy_matrix(self): + """ + Returns the graph adjacency matrix as a NumPy matrix. + """ + np_array = self.to_numpy_array() + return np.asmatrix(np_array) + + def view_edge_list(self): + """ + Display the edge list. Compute it if needed. + NOTE: If the graph is of type Graph() then the displayed undirected + edges are the same as displayed by networkx Graph(), but the direction + could be different i.e. an edge displayed by cugraph as (src, dst) + could be displayed as (dst, src) by networkx. + cugraph.Graph stores symmetrized edgelist internally. For displaying + undirected edgelist for a Graph the upper trianglar matrix of the + symmetrized edgelist is returned. + networkx.Graph renumbers the input and stores the upper triangle of + this renumbered input. Since the internal renumbering of networx and + cugraph is different, the upper triangular matrix of networkx + renumbered input may not be the same as cugraph's upper trianglar + matrix of the symmetrized edgelist. Hence the displayed source and + destination pairs in both will represent the same edge but node values + could be swapped. + Returns + ------- + df : cudf.DataFrame + This cudf.DataFrame wraps source, destination and weight + df[src] : cudf.Series + contains the source index for each edge + df[dst] : cudf.Series + contains the destination index for each edge + df[weight] : cusd.Series + Column is only present for weighted Graph, + then containing the weight value for each edge + """ + if self.edgelist is None: + src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) + self.edgelist = self.EdgeList(src, dst, weights) + + edgelist_df = self.edgelist.edgelist_df + + if self.properties.renumbered: + edgelist_df = self.renumber_map.unrenumber(edgelist_df, "src") + edgelist_df = self.renumber_map.unrenumber(edgelist_df, "dst") + + if not self.properties.directed: + edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]] + edgelist_df = edgelist_df.reset_index(drop=True) + self.properties.edge_count = len(edgelist_df) + + return edgelist_df + + def delete_edge_list(self): + """ + Delete the edge list. + """ + # decrease reference count to free memory if the referenced objects are + # no longer used. + self.edgelist = None + + def __from_adjlist(self, offset_col, index_col, value_col=None): + self.adjlist = simpleGraphImpl.AdjList(offset_col, index_col, + value_col) + + if self.batch_enabled: + self._replicate_adjlist() + + def view_adj_list(self): + """ + Display the adjacency list. Compute it if needed. + Returns + ------- + offset_col : cudf.Series + This cudf.Series wraps a gdf_column of size V + 1 (V: number of + vertices). + The gdf column contains the offsets for the vertices in this graph. + Offsets are in the range [0, E] (E: number of edges). + index_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices are in the range [0, V) (V: number of + vertices). + value_col : cudf.Series or ``None`` + This pointer is ``None`` for unweighted graphs. + For weighted graphs, this cudf.Series wraps a gdf_column of size E + (E: number of edges). + The gdf column contains the weight value for each edge. + The expected type of the gdf_column element is floating point + number. + """ + + if self.adjlist is None: + if self.transposedadjlist is not None and\ + self.properties.directed is False: + off, ind, vals = ( + self.transposedadjlist.offsets, + self.transposedadjlist.indices, + self.transposedadjlist.weights, + ) + else: + off, ind, vals = graph_primtypes_wrapper.view_adj_list(self) + self.adjlist = self.AdjList(off, ind, vals) + + if self.batch_enabled: + self._replicate_adjlist() + + return self.adjlist.offsets, self.adjlist.indices, self.adjlist.weights + + def view_transposed_adj_list(self): + """ + Display the transposed adjacency list. Compute it if needed. + Returns + ------- + offset_col : cudf.Series + This cudf.Series wraps a gdf_column of size V + 1 (V: number of + vertices). + The gdf column contains the offsets for the vertices in this graph. + Offsets are in the range [0, E] (E: number of edges). + index_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices are in the range [0, V) (V: number of + vertices). + value_col : cudf.Series or ``None`` + This pointer is ``None`` for unweighted graphs. + For weighted graphs, this cudf.Series wraps a gdf_column of size E + (E: number of edges). + The gdf column contains the weight value for each edge. + The expected type of the gdf_column element is floating point + number. + """ + + if self.transposedadjlist is None: + if self.adjlist is not None and self.properties.directed is False: + off, ind, vals = ( + self.adjlist.offsets, + self.adjlist.indices, + self.adjlist.weights, + ) + else: + ( + off, + ind, + vals, + ) = graph_primtypes_wrapper.view_transposed_adj_list(self) + self.transposedadjlist = self.transposedAdjList(off, ind, vals) + + if self.batch_enabled: + self._replicate_transposed_adjlist() + + return ( + self.transposedadjlist.offsets, + self.transposedadjlist.indices, + self.transposedadjlist.weights, + ) + + def delete_adj_list(self): + """ + Delete the adjacency list. + """ + self.adjlist = None + + # FIXME: Update batch workflow and refactor to suitable file + def enable_batch(self): + client = mg_utils.get_client() + comms = Comms.get_comms() + + if client is None or comms is None: + msg = ( + "MG Batch needs a Dask Client and the " + "Communicator needs to be initialized." + ) + raise Exception(msg) + + self.batch_enabled = True + + if self.edgelist is not None: + if self.batch_edgelists is None: + self._replicate_edgelist() + + if self.adjlist is not None: + if self.batch_adjlists is None: + self._replicate_adjlist() + + if self.transposedadjlist is not None: + if self.batch_transposed_adjlists is None: + self._replicate_transposed_adjlist() + + def _replicate_edgelist(self): + client = mg_utils.get_client() + comms = Comms.get_comms() + + # FIXME: There might be a better way to control it + if client is None: + return + work_futures = replication.replicate_cudf_dataframe( + self.edgelist.edgelist_df, client=client, comms=comms + ) + + self.batch_edgelists = work_futures + + def _replicate_adjlist(self): + client = mg_utils.get_client() + comms = Comms.get_comms() + + # FIXME: There might be a better way to control it + if client is None: + return + + weights = None + offsets_futures = replication.replicate_cudf_series( + self.adjlist.offsets, client=client, comms=comms + ) + indices_futures = replication.replicate_cudf_series( + self.adjlist.indices, client=client, comms=comms + ) + + if self.adjlist.weights is not None: + weights = replication.replicate_cudf_series(self.adjlist.weights) + else: + weights = {worker: None for worker in offsets_futures} + + merged_futures = { + worker: [ + offsets_futures[worker], + indices_futures[worker], + weights[worker], + ] + for worker in offsets_futures + } + self.batch_adjlists = merged_futures + + # FIXME: Not implemented yet + def _replicate_transposed_adjlist(self): + self.batch_transposed_adjlists = True + + def get_two_hop_neighbors(self): + """ + Compute vertex pairs that are two hops apart. The resulting pairs are + sorted before returning. + Returns + ------- + df : cudf.DataFrame + df[first] : cudf.Series + the first vertex id of a pair, if an external vertex id + is defined by only one column + df[second] : cudf.Series + the second vertex id of a pair, if an external vertex id + is defined by only one column + """ + + df = graph_primtypes_wrapper.get_two_hop_neighbors(self) + + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "first") + df = self.renumber_map.unrenumber(df, "second") + + return df + + def number_of_vertices(self): + """ + Get the number of nodes in the graph. + """ + if self.properties.node_count is None: + if self.adjlist is not None: + self.properties.node_count = len(self.adjlist.offsets) - 1 + elif self.transposedadjlist is not None: + self.properties.node_count = len( + self.transposedadjlist.offsets) - 1 + elif self.edgelist is not None: + df = self.edgelist.edgelist_df[["src", "dst"]] + self.properties.node_count = df.max().max() + 1 + else: + raise Exception("Graph is Empty") + return self.properties.node_count + + def number_of_nodes(self): + """ + An alias of number_of_vertices(). This function is added for NetworkX + compatibility. + """ + return self.number_of_vertices() + + def number_of_edges(self, directed_edges=False): + """ + Get the number of edges in the graph. + """ + # TODO: Move to Outer graphs? + if directed_edges and self.edgelist is not None: + return len(self.edgelist.edgelist_df) + if self.properties.edge_count is None: + if self.edgelist is not None: + if self.properties.directed is False: + self.properties.edge_count = len( + self.edgelist.edgelist_df[ + self.edgelist.edgelist_df["src"] + >= self.edgelist.edgelist_df["dst"] + ] + ) + else: + self.properties.edge_count = len(self.edgelist.edgelist_df) + elif self.adjlist is not None: + self.properties.edge_count = len(self.adjlist.indices) + elif self.transposedadjlist is not None: + self.properties.edge_count = len( + self.transposedadjlist.indices) + else: + raise ValueError("Graph is Empty") + return self.properties.edge_count + + def in_degree(self, vertex_subset=None): + """ + Compute vertex in-degree. Vertex in-degree is the number of edges + pointing into the vertex. By default, this method computes vertex + degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding in-degree. + If not set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the in_degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df[vertex] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df[degree] : cudf.Series + The computed in-degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.in_degree([0,9,12]) + """ + return self._degree(vertex_subset, x=1) + + def out_degree(self, vertex_subset=None): + """ + Compute vertex out-degree. Vertex out-degree is the number of edges + pointing out from the vertex. By default, this method computes vertex + degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding out-degree. + If not set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the out_degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df[vertex] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df[degree] : cudf.Series + The computed out-degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.out_degree([0,9,12]) + """ + return self._degree(vertex_subset, x=2) + + def degree(self, vertex_subset=None): + """ + Compute vertex degree, which is the total number of edges incident + to a vertex (both in and out edges). By default, this method computes + degrees for the entire set of vertices. If vertex_subset is provided, + then this method optionally filters out all but those listed in + vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + a container of vertices for displaying corresponding degree. If not + set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the degree. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df['vertex'] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df['degree'] : cudf.Series + The computed degree of the corresponding vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> all_df = G.degree() + >>> subset_df = G.degree([0,9,12]) + """ + return self._degree(vertex_subset) + + # FIXME: vertex_subset could be a DataFrame for multi-column vertices + def degrees(self, vertex_subset=None): + """ + Compute vertex in-degree and out-degree. By default, this method + computes vertex degrees for the entire set of vertices. If + vertex_subset is provided, this method optionally filters out all but + those listed in vertex_subset. + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding degree. If not + set, degrees are computed for the entire set of vertices. + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the degrees. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df['vertex'] : cudf.Series + The vertex IDs (will be identical to vertex_subset if + specified). + df['in_degree'] : cudf.Series + The in-degree of the vertex. + df['out_degree'] : cudf.Series + The out-degree of the vertex. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.degrees([0,9,12]) + """ + ( + vertex_col, + in_degree_col, + out_degree_col, + ) = graph_primtypes_wrapper._degrees(self) + + df = cudf.DataFrame() + df["vertex"] = vertex_col + df["in_degree"] = in_degree_col + df["out_degree"] = out_degree_col + + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "vertex") + + if vertex_subset is not None: + df = df[df['vertex'].isin(vertex_subset)] + + return df + + def _degree(self, vertex_subset, x=0): + vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + df = cudf.DataFrame() + df["vertex"] = vertex_col + df["degree"] = degree_col + + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "vertex") + + if vertex_subset is not None: + df = df[df['vertex'].isin(vertex_subset)] + + return df + + def to_directed(self, DiG): + """ + Return a directed representation of the graph Implementation. + This function copies the internal structures and returns the + directed view. + """ + DiG.properties.renumbered = self.properties.renumbered + DiG.renumber_map = self.renumber_map + DiG.edgelist = self.edgelist + DiG.adjlist = self.adjlist + DiG.transposedadjlist = self.transposedadjlist + + def to_undirected(self, G): + """ + Return an undirected copy of the graph. + """ + G.properties.renumbered = self.properties.renumbered + G.renumber_map = self.renumber_map + if self.properties.directed is False: + G.edgelist = self.edgelist + G.adjlist = self.adjlist + G.transposedadjlist = self.transposedadjlist + else: + df = self.edgelist.edgelist_df + if self.edgelist.weights: + source_col, dest_col, value_col = symmetrize( + df["src"], df["dst"], df["weights"] + ) + else: + source_col, dest_col = symmetrize(df["src"], df["dst"]) + value_col = None + G.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, + value_col) + + def has_node(self, n): + """ + Returns True if the graph contains the node n. + """ + if self.properties.renumbered: + tmp = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) + return tmp[0] is not cudf.NA and tmp[0] >= 0 + else: + df = self.edgelist.edgelist_df[["src", "dst"]] + return (df == n).any().any() + + def has_edge(self, u, v): + """ + Returns True if the graph contains the edge (u,v). + """ + if self.properties.renumbered: + tmp = cudf.DataFrame({"src": [u, v]}) + tmp = tmp.astype({"src": "int"}) + tmp = self.renumber_map.add_internal_vertex_id( + tmp, "id", "src", preserve_order=True + ) + + u = tmp["id"][0] + v = tmp["id"][1] + + df = self.edgelist.edgelist_df + return ((df["src"] == u) & (df["dst"] == v)).any() + + def has_self_loop(self): + """ + Returns True if the graph has self loop. + """ + # Detect self loop + if self.properties.self_loop is None: + elist = self.edgelist.edgelist_df + if (elist["src"] == elist["dst"]).any(): + self.properties.self_loop = True + else: + self.properties.self_loop = False + return self.properties.self_loop + + def edges(self): + """ + Returns all the edges in the graph as a cudf.DataFrame containing + sources and destinations. It does not return the edge weights. + For viewing edges with weights use view_edge_list() + """ + return self.view_edge_list()[["src", "dst"]] + + def nodes(self): + """ + Returns all the nodes in the graph as a cudf.Series + """ + if self.edgelist is not None: + df = self.edgelist.edgelist_df + if self.properties.renumbered: + # FIXME: If vertices are multicolumn + # this needs to return a dataframe + # FIXME: This relies on current implementation + # of NumberMap, should not really expose + # this, perhaps add a method to NumberMap + return self.renumber_map.implementation.df["0"] + else: + return cudf.concat([df["src"], df["dst"]]).unique() + if self.adjlist is not None: + return cudf.Series(np.arange(0, self.number_of_nodes())) + + def neighbors(self, n): + if self.edgelist is None: + raise Exception("Graph has no Edgelist.") + if self.properties.renumbered: + node = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) + if len(node) == 0: + return cudf.Series(dtype="int") + n = node[0] + + df = self.edgelist.edgelist_df + neighbors = df[df["src"] == n]["dst"].reset_index(drop=True) + if self.properties.renumbered: + # FIXME: Multi-column vertices + return self.renumber_map.from_internal_vertex_id(neighbors)["0"] + else: + return neighbors diff --git a/python/cugraph/structure/hypergraph.py b/python/cugraph/structure/hypergraph.py index a11c937d83d..c5a1ac39e4f 100644 --- a/python/cugraph/structure/hypergraph.py +++ b/python/cugraph/structure/hypergraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,7 +36,7 @@ import cudf import numpy as np -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph def hypergraph( @@ -66,24 +66,20 @@ def hypergraph( components as dataframes. The transform reveals relationships between the rows and unique values. This transform is useful for lists of events, samples, relationships, and other structured high-dimensional data. - The transform creates a node for every row, and turns a row's column entries into node attributes. If direct=False (default), every unique value within a column is also turned into a node. Edges are added to connect a row's nodes to each of its column nodes, or if direct=True, to one another. Nodes are given the attribute specified by ``NODETYPE`` that corresponds to the originating column name, or if a row ``EVENTID``. - Consider a list of events. Each row represents a distinct event, and each column some metadata about an event. If multiple events have common metadata, they will be transitively connected through those metadata values. Conversely, if an event has unique metadata, the unique metadata will turn into nodes that only have connections to the event node. - For best results, set ``EVENTID`` to a row's unique ID, ``SKIP`` to all non-categorical columns (or ``columns`` to all categorical columns), and ``categories`` to group columns with the same kinds of values. - Parameters ---------- values : cudf.DataFrame @@ -130,7 +126,6 @@ def hypergraph( The name to use as the node type column in the graph and node DFs. EDGETYPE : str, optional, default "edge_type" The name to use as the edge type column in the graph and edge DF. - Returns ------- result : dict {"nodes", "edges", "graph", "events", "entities"} diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 73316756ef2..2b7c2b2f296 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -182,7 +182,6 @@ def to_internal_vertex_id(self, ddf, col_names): on=self.col_names, how="right", ) - print(x.compute()) return x['global_id'] def from_internal_vertex_id( diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index 8720f7ad343..442701f6508 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure import graph as csg +from cugraph.structure import graph_classes as csg import cudf import dask_cudf @@ -201,8 +201,12 @@ def symmetrize(source_col, dest_col, value_col=None, multi=False, csg.null_check(source_col) csg.null_check(dest_col) if value_col is not None: - weight_name = "value" - input_df.insert(len(input_df.columns), "value", value_col) + if isinstance(value_col, cudf.Series): + weight_name = "value" + input_df.insert(len(input_df.columns), "value", value_col) + elif isinstance(value_col, cudf.DataFrame): + input_df = cudf.concat([input_df, value_col], axis=1) + output_df = None if type(source_col) is dask_cudf.Series: output_df = symmetrize_ddf( @@ -211,11 +215,17 @@ def symmetrize(source_col, dest_col, value_col=None, multi=False, else: output_df = symmetrize_df(input_df, "source", "destination", multi, symmetrize) - if value_col is not None: - return ( - output_df["source"], - output_df["destination"], - output_df["value"], - ) + if isinstance(value_col, cudf.Series): + return ( + output_df["source"], + output_df["destination"], + output_df["value"], + ) + elif isinstance(value_col, cudf.DataFrame): + return ( + output_df["source"], + output_df["destination"], + output_df[value_col.columns], + ) return output_df["source"], output_df["destination"] diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 348f7e2e130..933a34aef3c 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -200,6 +200,7 @@ def test_add_adj_list_to_edge_list(graph_file): # cugraph add_adj_list to_edge_list call G = cugraph.DiGraph() G.from_cudf_adjlist(offsets, indices, None) + edgelist = G.view_edge_list() sources_cu = edgelist["src"] destinations_cu = edgelist["dst"] @@ -535,6 +536,7 @@ def test_to_directed(graph_file): DiG = G.to_directed() DiGnx = Gnx.to_directed() + assert DiG.is_directed() assert DiG.number_of_nodes() == DiGnx.number_of_nodes() assert DiG.number_of_edges() == DiGnx.number_of_edges() @@ -569,6 +571,7 @@ def test_to_undirected(graph_file): G = DiG.to_undirected() Gnx = DiGnx.to_undirected() + assert not G.is_directed() assert G.number_of_nodes() == Gnx.number_of_nodes() assert G.number_of_edges() == Gnx.number_of_edges() @@ -627,17 +630,13 @@ def test_bipartite_api(graph_file): set2_exp = cudf.Series(set(nodes.values_host) - set(set1_exp.values_host)) - G = cugraph.Graph() - assert not G.is_bipartite() + G = cugraph.BiPartiteGraph() + assert G.is_bipartite() # Add a set of nodes present in one partition G.add_nodes_from(set1_exp, bipartite='set1') G.from_cudf_edgelist(cu_M, source='0', destination='1') - # Check if Graph is bipartite. It should return True since we have - # added the partition in add_nodes_from() - assert G.is_bipartite() - # Call sets() to get the bipartite set of nodes. set1, set2 = G.sets() diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 1e6cc42b760..d397b5a4241 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -14,7 +14,7 @@ import cudf from cugraph.traversal import bfs_wrapper -from cugraph.structure.graph import Graph, DiGraph +from cugraph.structure.graph_classes import Graph, DiGraph from cugraph.utilities import (ensure_cugraph_obj, is_matrix_type, is_cp_matrix_type, diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py index 7aea7ae603f..53d411c92ae 100644 --- a/python/cugraph/traversal/traveling_salesperson.py +++ b/python/cugraph/traversal/traveling_salesperson.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.traversal import traveling_salesperson_wrapper -from cugraph.structure.graph import null_check +from cugraph.structure.graph_classes import null_check import cudf diff --git a/python/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/tree/minimum_spanning_tree.py index 45e996aa083..6a5f7b5bf38 100644 --- a/python/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/tree/minimum_spanning_tree.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.tree import minimum_spanning_tree_wrapper -from cugraph.structure.graph import Graph +from cugraph.structure.graph_classes import Graph from cugraph.utilities import check_nx_graph from cugraph.utilities import cugraph_to_nx From 8b1004e594d1ecb532003d1a971cc1c31e42b932 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Wed, 28 Apr 2021 15:56:27 -0500 Subject: [PATCH 238/343] Added Random Walks COO convertor and profiling (#1531) This PR is used to track enhancements to Random Walks functionality: 1. Paths2COO convertor: converts coalesced vertex/weight paths to COO format + offsets (including C++ API for Cython); 2. RW profiling; 3. Moving functionality / tests out of `experimental` sub-dirs; Authors: - Andrei Schaffer (https://github.com/aschaffer) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1531 --- cpp/include/algorithms.hpp | 1 + cpp/include/utilities/cython.hpp | 26 +++ cpp/include/utilities/path_retrieval.hpp | 26 +++ cpp/src/sampling/random_walks.cu | 26 ++- .../random_walks.cuh | 193 ++++++++++++++++ cpp/src/utilities/cython.cu | 31 +++ cpp/src/utilities/high_res_timer.hpp | 15 ++ cpp/tests/CMakeLists.txt | 21 +- cpp/tests/sampling/random_walks_profiling.cu | 216 ++++++++++++++++++ .../random_walks_test.cu | 2 +- .../random_walks_utils.cuh | 2 +- .../rw_low_level_test.cu | 120 +++++++++- cpp/tests/utilities/base_fixture.hpp | 2 +- 13 files changed, 670 insertions(+), 11 deletions(-) rename cpp/src/{experimental => sampling}/random_walks.cuh (82%) create mode 100644 cpp/tests/sampling/random_walks_profiling.cu rename cpp/tests/{experimental => sampling}/random_walks_test.cu (99%) rename cpp/tests/{experimental => sampling}/random_walks_utils.cuh (99%) rename cpp/tests/{experimental => sampling}/rw_low_level_test.cu (86%) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 0b45b799357..7a7a0219d74 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1280,5 +1280,6 @@ random_walks(raft::handle_t const &handle, typename graph_t::vertex_type const *ptr_d_start, index_t num_paths, index_t max_depth); + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index c1d0c836225..0d6cb2f63d0 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -207,6 +207,26 @@ struct random_walk_ret_t { std::unique_ptr d_sizes_; }; +// aggregate for random_walks() COO return type +// to be exposed to cython: +// +struct random_walk_coo_t { + size_t num_edges_; // total number of COO triplets (for all paths) + size_t num_offsets_; // offsets of where each COO set starts for each path; + // NOTE: this can differ than num_paths_, + // because paths with 0 edges (one vertex) + // don't participate to the COO + + std::unique_ptr + d_src_; // coalesced set of COO source vertices; |d_src_| = num_edges_ + std::unique_ptr + d_dst_; // coalesced set of COO destination vertices; |d_dst_| = num_edges_ + std::unique_ptr + d_weights_; // coalesced set of COO edge weights; |d_weights_| = num_edges_ + std::unique_ptr + d_offsets_; // offsets where each COO subset for each path starts; |d_offsets_| = num_offsets_ +}; + // wrapper for renumber_edgelist() return // (unrenumbering maps, etc.) // @@ -479,6 +499,12 @@ call_random_walks(raft::handle_t const& handle, edge_t num_paths, edge_t max_depth); +// convertor from random_walks return type to COO: +// +template +std::unique_ptr random_walks_to_coo(raft::handle_t const& handle, + random_walk_ret_t& rw_ret); + // wrapper for shuffling: // template diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/utilities/path_retrieval.hpp index e626d6af1ab..fd0d36b67d6 100644 --- a/cpp/include/utilities/path_retrieval.hpp +++ b/cpp/include/utilities/path_retrieval.hpp @@ -42,4 +42,30 @@ void get_traversed_cost(raft::handle_t const &handle, weight_t *out, vertex_t stop_vertex, vertex_t num_vertices); + +namespace experimental { +/** + * @brief returns the COO format (src_vector, dst_vector) from the random walks (RW) + * paths. + * + * @tparam vertex_t Type of vertex indices. + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param coalesced_sz_v coalesced vertex vector size. + * @param num_paths number of paths. + * @param d_coalesced_v coalesced vertex buffer. + * @param d_sizes paths size buffer. + * @return tuple of (src_vertex_vector, dst_Vertex_vector, path_offsets), where + * path_offsets are the offsets where the COO set of each path starts. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + convert_paths_to_coo(raft::handle_t const &handle, + index_t coalesced_sz_v, + index_t num_paths, + rmm::device_buffer &&d_coalesced_v, + rmm::device_buffer &&d_sizes); +} // namespace experimental } // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu index 88d5d9ed5c8..d1d0382d46f 100644 --- a/cpp/src/sampling/random_walks.cu +++ b/cpp/src/sampling/random_walks.cu @@ -17,7 +17,7 @@ // Andrei Schaffer, aschaffer@nvidia.com // #include -#include +#include "random_walks.cuh" namespace cugraph { namespace experimental { @@ -73,6 +73,30 @@ template std:: int64_t const* ptr_d_start, int64_t num_paths, int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + convert_paths_to_coo(raft::handle_t const& handle, + int32_t coalesced_sz_v, + int32_t num_paths, + rmm::device_buffer&& d_coalesced_v, + rmm::device_buffer&& d_sizes); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + convert_paths_to_coo(raft::handle_t const& handle, + int64_t coalesced_sz_v, + int64_t num_paths, + rmm::device_buffer&& d_coalesced_v, + rmm::device_buffer&& d_sizes); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + convert_paths_to_coo(raft::handle_t const& handle, + int64_t coalesced_sz_v, + int64_t num_paths, + rmm::device_buffer&& d_coalesced_v, + rmm::device_buffer&& d_sizes); //} } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/random_walks.cuh b/cpp/src/sampling/random_walks.cuh similarity index 82% rename from cpp/src/experimental/random_walks.cuh rename to cpp/src/sampling/random_walks.cuh index aea8f3d8420..82665003769 100644 --- a/cpp/src/experimental/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,12 @@ struct device_const_vector_view { index_t size_; }; +template +value_t const* raw_const_ptr(device_const_vector_view& dv) +{ + return dv.begin(); +} + // raft random generator: // (using upper-bound cached "map" // giving out_deg(v) for each v in [0, |V|); @@ -840,6 +847,156 @@ random_walks_impl(raft::handle_t const& handle, CUGRAPH_FAIL("Not implemented yet."); } +// provides conversion to (coalesced) path to COO format: +// (which in turn provides an API consistent with egonet) +// +template +struct coo_convertor_t { + coo_convertor_t(raft::handle_t const& handle, index_t num_paths) + : handle_(handle), num_paths_(num_paths) + { + } + + std::tuple, device_vec_t, device_vec_t> operator()( + device_const_vector_view& d_coalesced_v, + device_const_vector_view& d_sizes) const + { + CUGRAPH_EXPECTS(static_cast(d_sizes.size()) == num_paths_, "Invalid size vector."); + + auto tupl_fill = fill_stencil(d_sizes); + auto&& d_stencil = std::move(std::get<0>(tupl_fill)); + auto total_sz_v = std::get<1>(tupl_fill); + auto&& d_sz_incl_scan = std::move(std::get<2>(tupl_fill)); + + CUGRAPH_EXPECTS(static_cast(d_coalesced_v.size()) == total_sz_v, + "Inconsistent vertex coalesced size data."); + + auto src_dst_tpl = gather_pairs(d_coalesced_v, d_stencil, total_sz_v); + + auto&& d_src = std::move(std::get<0>(src_dst_tpl)); + auto&& d_dst = std::move(std::get<1>(src_dst_tpl)); + + device_vec_t d_sz_w_scan(num_paths_, handle_.get_stream()); + + // copy vertex path sizes that are > 1: + // (because vertex_path_sz translates + // into edge_path_sz = vertex_path_sz - 1, + // and edge_paths_sz == 0 don't contribute + // anything): + // + auto new_end_it = + thrust::copy_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sizes.begin(), + d_sizes.end(), + d_sz_w_scan.begin(), + [] __device__(auto sz_value) { return sz_value > 1; }); + + // resize to new_end: + // + d_sz_w_scan.resize(thrust::distance(d_sz_w_scan.begin(), new_end_it), handle_.get_stream()); + + // get paths' edge number exclusive scan + // by transforming paths' vertex numbers that + // are > 1, via tranaformation: + // edge_path_sz = (vertex_path_sz-1): + // + thrust::transform_exclusive_scan( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sz_w_scan.begin(), + d_sz_w_scan.end(), + d_sz_w_scan.begin(), + [] __device__(auto sz) { return sz - 1; }, + index_t{0}, + thrust::plus{}); + + return std::make_tuple(std::move(d_src), std::move(d_dst), std::move(d_sz_w_scan)); + } + + std::tuple, index_t, device_vec_t> fill_stencil( + device_const_vector_view& d_sizes) const + { + device_vec_t d_scan(num_paths_, handle_.get_stream()); + thrust::inclusive_scan(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sizes.begin(), + d_sizes.end(), + d_scan.begin()); + + index_t total_sz{0}; + CUDA_TRY(cudaMemcpy( + &total_sz, raw_ptr(d_scan) + num_paths_ - 1, sizeof(index_t), cudaMemcpyDeviceToHost)); + + device_vec_t d_stencil(total_sz, handle_.get_stream()); + + // initialize stencil to all 1's: + // + thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_constant_iterator(1), + d_stencil.size(), + d_stencil.begin()); + + // set to 0 entries positioned at inclusive_scan(sizes[]), + // because those are path "breakpoints", where a path end + // and the next one starts, hence there cannot be an edge + // between a path ending vertex and next path starting vertex; + // + thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0) + num_paths_, + d_scan.begin(), + d_stencil.begin()); + + return std::make_tuple(std::move(d_stencil), total_sz, std::move(d_scan)); + } + + std::tuple, device_vec_t> gather_pairs( + device_const_vector_view& d_coalesced_v, + device_vec_t const& d_stencil, + index_t total_sz_v) const + { + auto total_sz_w = total_sz_v - num_paths_; + device_vec_t valid_src_indx(total_sz_w, handle_.get_stream()); + + // generate valid vertex src indices, + // which is any index in {0,...,total_sz_v - 2} + // provided the next index position; i.e., (index+1), + // in stencil is not 0; (if it is, there's no "next" + // or dst index, because the path has ended); + // + thrust::copy_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_sz_v - 1), + valid_src_indx.begin(), + [ptr_d_stencil = raw_const_ptr(d_stencil)] __device__(auto indx) { + auto dst_indx = indx + 1; + return ptr_d_stencil[dst_indx] == 1; + }); + + device_vec_t d_src_v(total_sz_w, handle_.get_stream()); + device_vec_t d_dst_v(total_sz_w, handle_.get_stream()); + + // construct pair of src[], dst[] by gathering + // from d_coalesced_v all pairs + // at entries (valid_src_indx, valid_src_indx+1), + // where the set of valid_src_indx was + // generated at the previous step; + // + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + valid_src_indx.begin(), + valid_src_indx.end(), + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())), // start_zip + [ptr_d_vertex = raw_const_ptr(d_coalesced_v)] __device__(auto indx) { + return thrust::make_tuple(ptr_d_vertex[indx], ptr_d_vertex[indx + 1]); + }); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); + } + + private: + raft::handle_t const& handle_; + index_t num_paths_; +}; + } // namespace detail /** @@ -883,5 +1040,41 @@ random_walks(raft::handle_t const& handle, std::move(std::get<1>(quad_tuple)), std::move(std::get<2>(quad_tuple))); } + +/** + * @brief returns the COO format (src_vector, dst_vector) from the random walks (RW) + * paths. + * + * @tparam vertex_t Type of vertex indices. + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param coalesced_sz_v coalesced vertex vector size. + * @param num_paths number of paths. + * @param d_coalesced_v coalesced vertex buffer. + * @param d_sizes paths size buffer. + * @return tuple of (src_vertex_vector, dst_Vertex_vector, path_offsets), where + * path_offsets are the offsets where the COO set of each path starts. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + convert_paths_to_coo(raft::handle_t const& handle, + index_t coalesced_sz_v, + index_t num_paths, + rmm::device_buffer&& d_coalesced_v, + rmm::device_buffer&& d_sizes) +{ + detail::coo_convertor_t to_coo(handle, num_paths); + + detail::device_const_vector_view d_v_view( + static_cast(d_coalesced_v.data()), coalesced_sz_v); + + detail::device_const_vector_view d_sz_view(static_cast(d_sizes.data()), + num_paths); + + return to_coo(d_v_view, d_sz_view); +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 9729039fd48..b4dcd84a7e1 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -844,6 +845,27 @@ call_random_walks(raft::handle_t const& handle, } } +template +std::unique_ptr random_walks_to_coo(raft::handle_t const& handle, + random_walk_ret_t& rw_tri) +{ + auto triplet = cugraph::experimental::convert_paths_to_coo( + handle, + static_cast(rw_tri.coalesced_sz_v_), + static_cast(rw_tri.num_paths_), + std::move(*rw_tri.d_coalesced_v_), + std::move(*rw_tri.d_sizes_)); + + random_walk_coo_t rw_coo{std::get<0>(triplet).size(), + std::get<2>(triplet).size(), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::move(rw_tri.d_coalesced_w_), // pass-through + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_coo)); +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -1233,6 +1255,15 @@ template std::unique_ptr call_random_walks( int64_t num_paths, int64_t max_depth); +template std::unique_ptr random_walks_to_coo( + raft::handle_t const& handle, random_walk_ret_t& rw_tri); + +template std::unique_ptr random_walks_to_coo( + raft::handle_t const& handle, random_walk_ret_t& rw_tri); + +template std::unique_ptr random_walks_to_coo( + raft::handle_t const& handle, random_walk_ret_t& rw_tri); + template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, int32_t* identifiers, diff --git a/cpp/src/utilities/high_res_timer.hpp b/cpp/src/utilities/high_res_timer.hpp index a731c5edc9d..807496c8f86 100644 --- a/cpp/src/utilities/high_res_timer.hpp +++ b/cpp/src/utilities/high_res_timer.hpp @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include //#define TIMING @@ -52,6 +54,19 @@ class HighResTimer { it->second.second += stop_time.tv_sec * 1000000000 + stop_time.tv_nsec; } + double get_average_runtime(std::string const &label) + { + auto it = timers.find(label); + if (it != timers.end()) { + return (static_cast(it->second.second) / (1000000.0 * it->second.first)); + } else { + std::stringstream ss; + ss << "ERROR: timing label: " << label << "not found."; + + throw std::runtime_error(ss.str()); + } + } + // // Add display functions... specific label or entire structure // diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 7a544fd75fb..80484fdfad6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -539,16 +539,25 @@ ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_ ################################################################################################### # - Experimental RANDOM_WALKS tests ------------------------------------------------------------ -set(EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/random_walks_test.cu") +set(RANDOM_WALKS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_test.cu") -ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_TEST "${EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS}") +ConfigureTest(RANDOM_WALKS_TEST "${RANDOM_WALKS_TEST_SRCS}") ################################################################################################### -set(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/rw_low_level_test.cu") +set(RANDOM_WALKS_LOW_LEVEL_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/sampling/rw_low_level_test.cu") -ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_TEST "${EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS}") +ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST "${RANDOM_WALKS_LOW_LEVEL_SRCS}") + +################################################################################################### +set(RANDOM_WALKS_PROFILING_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_profiling.cu") + +# FIXME: since this is technically not a test, consider refactoring the the +# ConfigureTest function to share common code with a new ConfigureBenchmark +# function (which would not link gtest, etc.) +ConfigureTest(RANDOM_WALKS_PROFILING "${RANDOM_WALKS_PROFILING_SRCS}") ################################################################################################### diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu new file mode 100644 index 00000000000..397196c4c78 --- /dev/null +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // cugraph::test::create_memory_resource() +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +/** + * @internal + * @brief Populates the device vector d_start with the starting vertex indices + * to be used for each RW path specified. + */ +template +void fill_start(raft::handle_t const& handle, + rmm::device_uvector& d_start, + index_t num_vertices) +{ + index_t num_paths = d_start.size(); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + + d_start.begin(), + [num_vertices] __device__(auto indx) { return indx % num_vertices; }); +} + +/** + * @internal + * @brief Calls the random_walks algorithm and displays the time metrics (total + * time for all requested paths, average time for each path). + */ +template +void output_random_walks_time(graph_vt const& graph_view, typename graph_vt::edge_type num_paths) +{ + using vertex_t = typename graph_vt::vertex_type; + using edge_t = typename graph_vt::edge_type; + using weight_t = typename graph_vt::weight_type; + + raft::handle_t handle{}; + rmm::device_uvector d_start(num_paths, handle.get_stream()); + + vertex_t num_vertices = graph_view.get_number_of_vertices(); + fill_start(handle, d_start, num_vertices); + + // 0-copy const device view: + // + cugraph::experimental::detail::device_const_vector_view d_start_view{ + d_start.data(), num_paths}; + + edge_t max_depth{10}; + + HighResTimer hr_timer; + std::string label("RandomWalks"); + hr_timer.start(label); + cudaProfilerStart(); + auto ret_tuple = + cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + cudaProfilerStop(); + hr_timer.stop(); + try { + auto runtime = hr_timer.get_average_runtime(label); + + std::cout << "RW for num_paths: " << num_paths + << ", runtime [ms] / path: " << runtime / num_paths << ":\n"; + + } catch (std::exception const& ex) { + std::cerr << ex.what() << '\n'; + return; + + } catch (...) { + std::cerr << "ERROR: Unknown exception on timer label search." << '\n'; + return; + } + hr_timer.display(std::cout); +} + +/** + * @struct RandomWalks_Usecase + * @brief Used to specify input to a random_walks benchmark/profile run + * + * @var RandomWalks_Usecase::graph_file_full_path Computed during construction + * to be an absolute path consisting of the value of the RAPIDS_DATASET_ROOT_DIR + * env var and the graph_file_path constructor arg. This is initialized to an + * empty string. + * + * @var RandomWalks_Usecase::test_weighted Bool representing if the specified + * graph is weighted or not. This is initialized to false (unweighted). + */ +struct RandomWalks_Usecase { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + RandomWalks_Usecase(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +/** + * @brief Runs random_walks on a specified input and outputs time metrics + * + * Creates a graph_t instance from the configuration specified in the + * RandomWalks_Usecase instance passed in (currently by reading a dataset to + * populate the graph_t), then runs random_walks to generate 1, 10, and 100 + * random paths and output statistics for each. + * + * @tparam vertex_t Type of vertex identifiers. + * @tparam edge_t Type of edge identifiers. + * @tparam weight_t Type of weight identifiers. + * + * @param[in] configuration RandomWalks_Usecase instance containing the input + * file to read for constructing the graph_t. + */ +template +void run(RandomWalks_Usecase const& configuration) +{ + raft::handle_t handle{}; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + + auto graph_view = graph.view(); + + // FIXME: the num_paths vector might be better specified via the + // configuration input instead of hardcoding here. + std::vector v_np{1, 10, 100}; + for (auto&& num_paths : v_np) { output_random_walks_time(graph_view, num_paths); } +} + +/** + * @brief Performs the random_walks benchmark/profiling run + * + * main function for performing the random_walks benchmark/profiling run. The + * resulting executable takes the following options: "rmm_mode" which can be one + * of "binning", "cuda", "pool", or "managed. "dataset" which is a path + * relative to the env var RAPIDS_DATASET_ROOT_DIR to a input .mtx file to use + * to populate the graph_t instance. + * + * To use the default values of rmm_mode=pool and + * dataset=test/datasets/karate.mtx: + * @code + * RANDOM_WALKS_PROFILING + * @endcode + * + * To specify managed memory and the netscience.mtx dataset (relative to a + * particular RAPIDS_DATASET_ROOT_DIR setting): + * @code + * RANDOM_WALKS_PROFILING --rmm_mode=managed --dataset=test/datasets/netscience.mtx + * @endcode + * + * @return An int representing a successful run. 0 indicates success. + */ +int main(int argc, char** argv) +{ + // Add command-line processing, provide defaults + cxxopts::Options options(argv[0], " - Random Walks benchmark command line options"); + options.add_options()( + "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value("pool")); + options.add_options()( + "dataset", "dataset", cxxopts::value()->default_value("test/datasets/karate.mtx")); + auto const cmd_options = options.parse(argc, argv); + auto const rmm_mode = cmd_options["rmm_mode"].as(); + auto const dataset = cmd_options["dataset"].as(); + + // Configure RMM + auto resource = cugraph::test::create_memory_resource(rmm_mode); + rmm::mr::set_current_device_resource(resource.get()); + + // Run benchmarks + std::cout << "Using dataset: " << dataset << std::endl; + run(RandomWalks_Usecase(dataset, true)); + + // FIXME: consider returning non-zero for situations that warrant it (eg. if + // the algo ran but the results are invalid, if a benchmark threshold is + // exceeded, etc.) + return 0; +} diff --git a/cpp/tests/experimental/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu similarity index 99% rename from cpp/tests/experimental/random_walks_test.cu rename to cpp/tests/sampling/random_walks_test.cu index d692f6a7592..9e4ecd0d024 100644 --- a/cpp/tests/experimental/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include diff --git a/cpp/tests/experimental/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh similarity index 99% rename from cpp/tests/experimental/random_walks_utils.cuh rename to cpp/tests/sampling/random_walks_utils.cuh index 863094dc310..b0b06e7f65a 100644 --- a/cpp/tests/experimental/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu similarity index 86% rename from cpp/tests/experimental/rw_low_level_test.cu rename to cpp/tests/sampling/rw_low_level_test.cu index 8b562bc41f6..dd7fd14b3a2 100644 --- a/cpp/tests/experimental/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include @@ -782,3 +782,121 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) ASSERT_TRUE(test_all_paths); } + +TEST(RandomWalksSpecialCase, SingleRandomWalk) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + bool test_all_paths = + cugraph::test::host_check_rw_paths(handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +} + +TEST(RandomWalksUtility, PathsToCOO) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + std::vector v_sizes{2, 1, 3, 5, 1}; + std::vector v_coalesced{5, 3, 4, 9, 0, 1, 6, 2, 7, 3, 2, 5}; + std::vector w_coalesced{0.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto num_paths = v_sizes.size(); + auto total_sz = v_coalesced.size(); + auto num_edges = w_coalesced.size(); + + ASSERT_TRUE(num_edges == total_sz - num_paths); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_sizes(num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device(d_sizes.data(), v_sizes.data(), d_sizes.size(), handle.get_stream()); + + index_t coalesced_v_sz = d_coalesced_v.size(); + + auto tpl_coo_offsets = convert_paths_to_coo(handle, + coalesced_v_sz, + static_cast(num_paths), + d_coalesced_v.release(), + d_sizes.release()); + + auto&& d_src = std::move(std::get<0>(tpl_coo_offsets)); + auto&& d_dst = std::move(std::get<1>(tpl_coo_offsets)); + auto&& d_offsets = std::move(std::get<2>(tpl_coo_offsets)); + + ASSERT_TRUE(d_src.size() == num_edges); + ASSERT_TRUE(d_dst.size() == num_edges); + + std::vector v_src(num_edges, 0); + std::vector v_dst(num_edges, 0); + std::vector v_offsets(d_offsets.size(), 0); + + raft::update_host(v_src.data(), raw_const_ptr(d_src), d_src.size(), handle.get_stream()); + raft::update_host(v_dst.data(), raw_const_ptr(d_dst), d_dst.size(), handle.get_stream()); + raft::update_host( + v_offsets.data(), raw_const_ptr(d_offsets), d_offsets.size(), handle.get_stream()); + + std::vector v_src_exp{5, 9, 0, 6, 2, 7, 3}; + std::vector v_dst_exp{3, 0, 1, 2, 7, 3, 2}; + std::vector v_offsets_exp{0, 1, 3}; + + EXPECT_EQ(v_src, v_src_exp); + EXPECT_EQ(v_dst, v_dst_exp); + EXPECT_EQ(v_offsets, v_offsets_exp); +} diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 79a86e1fc95..770fbc99397 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -95,7 +95,7 @@ inline std::shared_ptr create_memory_resource( if (allocation_mode == "binning") return make_binning(); if (allocation_mode == "cuda") return make_cuda(); if (allocation_mode == "pool") return make_pool(); - if (allocation_mode == "managed") make_managed(); + if (allocation_mode == "managed") return make_managed(); CUGRAPH_FAIL("Invalid RMM allocation mode"); } From 79f8b1838a0f9035d9608ce5f7a5a88c2d8c319a Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Thu, 29 Apr 2021 11:07:21 +0200 Subject: [PATCH 239/343] ENH Remove defaults channel from conda build (#1564) Remove `defaults` channel from conda build Authors: - Jordan Jacobelli (https://github.com/Ethyling) Approvers: - Rick Ratzel (https://github.com/rlratzel) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1564 --- README.md | 6 +++--- ci/benchmark/build.sh | 4 ++-- conda/recipes/cugraph/meta.yaml | 2 +- conda/recipes/libcugraph/meta.yaml | 2 +- conda_build.sh | 6 ++++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index c5785e6cb08..2e94bd87d34 100644 --- a/README.md +++ b/README.md @@ -152,13 +152,13 @@ Install and update cuGraph using the conda command: ```bash # CUDA 11.0 -conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.0 +conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.0 # CUDA 11.1 -conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.1 +conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.1 # CUDA 11.2 -conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph cudatoolkit=11.2 +conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.2 ``` Note: This conda installation only applies to Linux and Python versions 3.7/3.8. diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 921e96dbbb9..d48f475f2eb 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ########################################## # cuGraph Benchmark test script for CI # ########################################## @@ -68,7 +68,7 @@ CUGRAPH_DEPS=(cudf rmm) LIBCUGRAPH_DEPS=(cudf rmm) gpuci_logger "Install required packages" -gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ +gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge \ "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ "cudatoolkit=$CUDA_REL" \ diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 1ef64ddbe72..c687e57b74f 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -1,7 +1,7 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: -# conda build -c nvidia -c rapidsai -c conda-forge -c defaults . +# conda build -c nvidia -c rapidsai -c conda-forge . {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 2602b2d8608..71b22c8cf1b 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -1,7 +1,7 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: -# conda build -c nvidia -c rapidsai -c conda-forge -c defaults . +# conda build -c nvidia -c rapidsai -c conda-forge . {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set cuda_version='.'.join(environ.get('CUDA', '9.2').split('.')[:2]) %} diff --git a/conda_build.sh b/conda_build.sh index 4643e302f5c..28dd9dc0799 100755 --- a/conda_build.sh +++ b/conda_build.sh @@ -1,13 +1,15 @@ #!/usr/bin/env bash +# Copyright (c) 2021, NVIDIA CORPORATION + set -xe CUDA_REL=${CUDA_VERSION%.*} conda install conda-build anaconda-client conda-verify -y -conda build -c nvidia -c rapidsai -c rapidsai-nightly/label/cuda${CUDA_REL} -c conda-forge -c defaults --python=${PYTHON} conda/recipes/cugraph +conda build -c nvidia -c rapidsai -c rapidsai-nightly/label/cuda${CUDA_REL} -c conda-forge --python=${PYTHON} conda/recipes/cugraph if [ "$UPLOAD_PACKAGE" == '1' ]; then - export UPLOADFILE=`conda build -c nvidia -c rapidsai -c conda-forge -c defaults --python=${PYTHON} conda/recipes/cugraph --output` + export UPLOADFILE=`conda build -c nvidia -c rapidsai -c conda-forge --python=${PYTHON} conda/recipes/cugraph --output` SOURCE_BRANCH=main test -e ${UPLOADFILE} From 8c2381389c98b84b605fdd0a7208dd34ae278846 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 29 Apr 2021 13:27:11 -0400 Subject: [PATCH 240/343] Miscellaneous bug fixes (#1561) Miscellaneous bug fixes: a compile error with CUDA 11.2 and a typo. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1561 --- cpp/tests/community/mg_louvain_helper.cu | 2 +- cpp/tests/experimental/sssp_test.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index 661065ca65b..d62eaa1ec55 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -71,7 +71,7 @@ bool compare_renumbered_vectors(raft::handle_t const &handle, v1.end(), vertex_t{0}); - rmm::device_uvector map(max, size_t{0}); + rmm::device_uvector map(max, handle.get_stream()); auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index a9c12043a7f..0e1e18af785 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -54,14 +54,13 @@ void sssp_reference(edge_t const* offsets, vertex_t source, weight_t cutoff = std::numeric_limits::max()) { - using queue_iterm_t = std::tuple; + using queue_item_t = std::tuple; std::fill(distances, distances + num_vertices, std::numeric_limits::max()); std::fill(predecessors, predecessors + num_vertices, cugraph::invalid_vertex_id::value); *(distances + source) = weight_t{0.0}; - std::priority_queue, std::greater> - queue{}; + std::priority_queue, std::greater> queue{}; queue.push(std::make_tuple(weight_t{0.0}, source)); while (queue.size() > 0) { From 1c7142e3e5448d8258270f059b7caea8645e6cb3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 29 Apr 2021 13:28:23 -0400 Subject: [PATCH 241/343] Enable correctness check in C++ testing by default. (#1560) Correctness check in C++ testing was disabled by default. Enable by default. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1560 --- cpp/tests/experimental/bfs_test.cpp | 2 +- cpp/tests/experimental/katz_centrality_test.cpp | 2 +- cpp/tests/experimental/mg_bfs_test.cpp | 2 +- cpp/tests/experimental/mg_katz_centrality_test.cpp | 2 +- cpp/tests/experimental/mg_sssp_test.cpp | 2 +- cpp/tests/experimental/pagerank_test.cpp | 2 +- cpp/tests/experimental/sssp_test.cpp | 2 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 1de439e1430..44b664c5b92 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -83,7 +83,7 @@ void bfs_reference(edge_t const* offsets, struct BFS_Usecase { size_t source{0}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index af70b90dd02..232d82a1c91 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -98,7 +98,7 @@ void katz_centrality_reference(edge_t const* offsets, struct KatzCentrality_Usecase { bool test_weighted{false}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index ebb2824fb87..f6e0a57e602 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -42,7 +42,7 @@ static int PERF = 0; struct BFS_Usecase { size_t source{0}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index b4a7968e955..864b68caf33 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -39,7 +39,7 @@ static int PERF = 0; struct KatzCentrality_Usecase { bool test_weighted{false}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index c49efefacd5..70f1a95e1f4 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -42,7 +42,7 @@ static int PERF = 0; struct SSSP_Usecase { size_t source{0}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 27739cee01b..1e26245b74c 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -134,7 +134,7 @@ void pagerank_reference(edge_t const* offsets, struct PageRank_Usecase { double personalization_ratio{0.0}; bool test_weighted{false}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 0e1e18af785..d84c1c2fc6c 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -88,7 +88,7 @@ void sssp_reference(edge_t const* offsets, struct SSSP_Usecase { size_t source{0}; - bool check_correctness{false}; + bool check_correctness{true}; }; template diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 0eae6a62f31..659a62a727c 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -43,7 +43,7 @@ static int PERF = 0; struct PageRank_Usecase { double personalization_ratio{0.0}; bool test_weighted{false}; - bool check_correctness{false}; + bool check_correctness{true}; }; template From 70a8c9ac04994aec425961efd734f9262f528cf1 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Wed, 5 May 2021 21:09:14 +0200 Subject: [PATCH 242/343] ENH Remove 'rapidsai-nightly' conda channel when building main branch (#1577) Remove `rapidsai-nightly` conda channel when building main branch Authors: - Jordan Jacobelli (https://github.com/Ethyling) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1577 --- ci/cpu/build.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 8d12b10a640..4f46938ee49 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -39,6 +39,11 @@ gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh conda activate rapids +# Remove rapidsai-nightly channel if we are building main branch +if [ "$SOURCE_BRANCH" = "main" ]; then + conda config --system --remove channels rapidsai-nightly +fi + gpuci_logger "Check versions" python --version $CC --version From 79f0ed9de667f8ed435ac6efc7feaa118f13fedd Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Wed, 5 May 2021 21:09:44 +0200 Subject: [PATCH 243/343] ENH Remove progress output on conda packages upload (#1578) Remove progress output on conda packages upload Authors: - Jordan Jacobelli (https://github.com/Ethyling) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1578 --- ci/cpu/upload.sh | 4 ++-- conda_build.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 50e4c25b90b..a333d8828d8 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -43,13 +43,13 @@ if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then test -e ${LIBCUGRAPH_FILE} echo "Upload libcugraph" echo ${LIBCUGRAPH_FILE} - gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} --no-progress fi if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then test -e ${CUGRAPH_FILE} echo "Upload cugraph" echo ${CUGRAPH_FILE} - gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUGRAPH_FILE} --no-progress fi diff --git a/conda_build.sh b/conda_build.sh index 28dd9dc0799..1254b7d8d5a 100755 --- a/conda_build.sh +++ b/conda_build.sh @@ -28,7 +28,7 @@ if [ "$UPLOAD_PACKAGE" == '1' ]; then echo "Upload" echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --force ${UPLOADFILE} + anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --force ${UPLOADFILE} --no-progress else echo "Skipping upload" fi From 50b43f7bb415eaaae554039efe189d0c02511f57 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Wed, 5 May 2021 17:00:33 -0500 Subject: [PATCH 244/343] Fix Random Walks output format and handle missing weights. (#1567) This PR tracks work on missing weights, https://github.com/rapidsai/cugraph/issues/1566, by providing default `weight_t{1}` weights. And addresses output format changes, as follows: 1. offsets for vertex paths starting indices, instead of sizes; 2. set of pairs (offset, size) for edge (weight) paths; Example: for an edge path with offsets 0,3,3,5,... meaning 1st path has 3 edges, 2nd path has 0 edges (!), 3rd has 2 edges, etc.; the return is: (0,3), (3,0), (3,2), (5,...), ...; 3. The remaining output format stays the same (i.e., coalesced vertex sets, and coalesced weight sets); Authors: - Andrei Schaffer (https://github.com/aschaffer) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1567 --- cpp/include/algorithms.hpp | 21 ++- cpp/include/utilities/path_retrieval.hpp | 16 ++ cpp/src/sampling/random_walks.cu | 31 +++- cpp/src/sampling/random_walks.cuh | 206 +++++++++++++++------- cpp/tests/sampling/random_walks_utils.cuh | 153 ++++++++++++++-- cpp/tests/sampling/rw_low_level_test.cu | 189 +++++++++++++++++++- 6 files changed, 524 insertions(+), 92 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 7a7a0219d74..9f1cb02df0c 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1265,11 +1265,19 @@ extract_ego(raft::handle_t const &handle, * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. * @param num_paths = number(paths). * @param max_depth maximum length of RWs. - * @return std::tuple, device_vec_t, - * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for - * each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed to - * the Python layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes - * is represented as a 1D array. + * @param use_padding (optional) specifies if return uses padded format (true), or coalesced + * (compressed) format; when padding is used the output is a matrix of vertex paths and a matrix of + * edges paths (weights); in this case the matrices are stored in row major order; the vertex path + * matrix is padded with `num_vertices` values and the weight matrix is padded with `0` values; + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector> Triplet of either padded or coalesced RW paths; in the coalesced + * case (default), the return consists of corresponding vertex and edge weights for each, and + * corresponding path sizes. This is meant to minimize the number of DF's to be passed to the Python + * layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes is + * represented as a 1D contiguous array. In the padded case the return is a matrix of num_paths x + * max_depth vertex paths; and num_paths x (max_depth-1) edge (weight) paths, with an empty array of + * sizes. Note: if the graph is un-weighted the edge (weight) paths consists of `weight_t{1}` + * entries; */ template std::tuple, @@ -1279,7 +1287,8 @@ random_walks(raft::handle_t const &handle, graph_t const &graph, typename graph_t::vertex_type const *ptr_d_start, index_t num_paths, - index_t max_depth); + index_t max_depth, + bool use_padding = false); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/utilities/path_retrieval.hpp index fd0d36b67d6..4d1b6a1b4d2 100644 --- a/cpp/include/utilities/path_retrieval.hpp +++ b/cpp/include/utilities/path_retrieval.hpp @@ -67,5 +67,21 @@ std:: index_t num_paths, rmm::device_buffer &&d_coalesced_v, rmm::device_buffer &&d_sizes); + +/** + * @brief returns additional RW information on vertex paths offsets and weight path sizes and + * offsets, for the coalesced case (the padded case does not need or provide this information) + * + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_paths number of paths. + * @param ptr_d_sizes sizes of vertex paths. + * @return tuple of (vertex_path_offsets, weight_path_sizes, weight_path_offsets), where offsets are + * exclusive scan of corresponding sizes. + */ +template +std::tuple, rmm::device_uvector, rmm::device_uvector> +query_rw_sizes_offsets(raft::handle_t const &handle, index_t num_paths, index_t const *ptr_d_sizes); } // namespace experimental } // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu index d1d0382d46f..a5410d0e65e 100644 --- a/cpp/src/sampling/random_walks.cu +++ b/cpp/src/sampling/random_walks.cu @@ -30,7 +30,8 @@ template std:: graph_view_t const& gview, int32_t const* ptr_d_start, int32_t num_paths, - int32_t max_depth); + int32_t max_depth, + bool use_padding); template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -38,7 +39,8 @@ template std:: graph_view_t const& gview, int32_t const* ptr_d_start, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -46,7 +48,8 @@ template std:: graph_view_t const& gview, int64_t const* ptr_d_start, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); //} // // SG FP64{ @@ -56,7 +59,8 @@ template std:: graph_view_t const& gview, int32_t const* ptr_d_start, int32_t num_paths, - int32_t max_depth); + int32_t max_depth, + bool use_padding); template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -64,7 +68,8 @@ template std:: graph_view_t const& gview, int32_t const* ptr_d_start, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -72,7 +77,9 @@ template std:: graph_view_t const& gview, int64_t const* ptr_d_start, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); +//} template std:: tuple, rmm::device_uvector, rmm::device_uvector> @@ -97,6 +104,16 @@ template std:: int64_t num_paths, rmm::device_buffer&& d_coalesced_v, rmm::device_buffer&& d_sizes); -//} + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector> +query_rw_sizes_offsets(raft::handle_t const& handle, int32_t num_paths, int32_t const* ptr_d_sizes); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector> +query_rw_sizes_offsets(raft::handle_t const& handle, int64_t num_paths, int64_t const* ptr_d_sizes); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 82665003769..10a47318bcb 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -221,44 +223,6 @@ struct col_indx_extract_t const& d_indices, - device_vec_t const& d_offsets, - device_vec_t const& d_values, - device_vec_t const& d_crt_out_degs, - device_vec_t const& d_sizes, - index_t num_paths, - index_t max_depth) - : handle_(handle), - col_indices_(raw_const_ptr(d_indices)), - row_offsets_(raw_const_ptr(d_offsets)), - values_(raw_const_ptr(d_values)), - out_degs_(raw_const_ptr(d_crt_out_degs)), - sizes_(raw_const_ptr(d_sizes)), - num_paths_(num_paths), - max_depth_(max_depth) - { - } - - col_indx_extract_t(raft::handle_t const& handle, - vertex_t const* p_d_indices, - edge_t const* p_d_offsets, - weight_t const* p_d_values, - edge_t const* p_d_crt_out_degs, - index_t const* p_d_sizes, - index_t num_paths, - index_t max_depth) - : handle_(handle), - col_indices_(p_d_indices), - row_offsets_(p_d_offsets), - values_(p_d_values), - out_degs_(p_d_crt_out_degs), - sizes_(p_d_sizes), - num_paths_(num_paths), - max_depth_(max_depth) - { - } - col_indx_extract_t(raft::handle_t const& handle, graph_t const& graph, edge_t const* p_d_crt_out_degs, @@ -316,7 +280,11 @@ struct col_indx_extract_t 0; }); } @@ -386,11 +354,15 @@ struct random_walker_t { random_walker_t(raft::handle_t const& handle, graph_t const& graph, index_t num_paths, - index_t max_depth) + index_t max_depth, + vertex_t v_padding_val = 0, + weight_t w_padding_val = 0) : handle_(handle), num_paths_(num_paths), max_depth_(max_depth), - d_cached_out_degs_(graph.compute_out_degrees(handle_)) + d_cached_out_degs_(graph.compute_out_degrees(handle_)), + vertex_padding_value_(v_padding_val != 0 ? v_padding_val : graph.get_number_of_vertices()), + weight_padding_value_(w_padding_val) { } @@ -559,7 +531,7 @@ struct random_walker_t { thrust::make_counting_iterator(0), predicate_w); - CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); + handle_.get_stream_view().synchronize(); d_coalesced_v.resize(thrust::distance(d_coalesced_v.begin(), new_end_v), handle_.get_stream()); d_coalesced_w.resize(thrust::distance(d_coalesced_w.begin(), new_end_w), handle_.get_stream()); @@ -690,11 +662,31 @@ struct random_walker_t { device_vec_t const& get_out_degs(void) const { return d_cached_out_degs_; } + vertex_t get_vertex_padding_value(void) const { return vertex_padding_value_; } + + weight_t get_weight_padding_value(void) const { return weight_padding_value_; } + + void init_padding(device_vec_t& d_coalesced_v, + device_vec_t& d_coalesced_w) const + { + thrust::fill(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_v.begin(), + d_coalesced_v.end(), + vertex_padding_value_); + + thrust::fill(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_w.begin(), + d_coalesced_w.end(), + weight_padding_value_); + } + private: raft::handle_t const& handle_; index_t num_paths_; index_t max_depth_; device_vec_t d_cached_out_degs_; + vertex_t const vertex_padding_value_; + weight_t const weight_padding_value_; }; /** @@ -709,11 +701,21 @@ struct random_walker_t { * @param d_v_start Device (view) set of starting vertex indices for the RW. * number(paths) == d_v_start.size(). * @param max_depth maximum length of RWs. + * @param use_padding (optional) specifies if return uses padded format (true), or coalesced + * (compressed) format; when padding is used the output is a matrix of vertex paths and a matrix of + * edges paths (weights); in this case the matrices are stored in row major order; the vertex path + * matrix is padded with `num_vertices` values and the weight matrix is padded with `0` values; + * @param seeder (optional) is object providing the random seeding mechanism. Defaults to local + * clock time as initial seed. * @return std::tuple, device_vec_t, - * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights - * for each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed - * to the Python layer. Also returning seed for testing / debugging repro. The meaning of - * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + * device_vec_t> Triplet of either padded or coalesced RW paths; in the coalesced case + * (default), the return consists of corresponding vertex and edge weights for each, and + * corresponding path sizes. This is meant to minimize the number of DF's to be passed to the Python + * layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes is + * represented as a 1D contiguous array. In the padded case the return is a matrix of num_paths x + * max_depth vertex paths; and num_paths x (max_depth-1) edge (weight) paths, with an empty array of + * sizes. Note: if the graph is un-weighted the edge (weight) paths consists of `weight_t{1}` + * entries; */ template & d_v_start, index_t max_depth, + bool use_padding = false, seeding_policy_t seeder = clock_seeding_t{}) { using vertex_t = typename graph_t::vertex_type; @@ -772,6 +775,10 @@ random_walks_impl(raft::handle_t const& handle, // seed_t seed0 = static_cast(seeder()); + // if padding used, initialize padding values: + // + if (use_padding) rand_walker.init_padding(d_coalesced_v, d_coalesced_w); + // very first vertex, for each path: // rand_walker.start(d_v_start, d_coalesced_v, d_paths_sz); @@ -799,15 +806,25 @@ random_walks_impl(raft::handle_t const& handle, // wrap-up, post-process: // truncate v_set, w_set to actual space used + // unless padding is used // - rand_walker.stop(d_coalesced_v, d_coalesced_w, d_paths_sz); + if (!use_padding) { rand_walker.stop(d_coalesced_v, d_coalesced_w, d_paths_sz); } // because device_uvector is not copy-cnstr-able: // - return std::make_tuple(std::move(d_coalesced_v), - std::move(d_coalesced_w), - std::move(d_paths_sz), - seed0); // also return seed for repro + if (!use_padding) { + return std::make_tuple(std::move(d_coalesced_v), + std::move(d_coalesced_w), + std::move(d_paths_sz), + seed0); // also return seed for repro + } else { + return std::make_tuple( + std::move(d_coalesced_v), + std::move(d_coalesced_w), + device_vec_t(0, stream), // purposely empty size array for the padded case, to avoid + // unnecessary allocations + seed0); // also return seed for repro + } } /** @@ -822,11 +839,21 @@ random_walks_impl(raft::handle_t const& handle, * @param d_v_start Device (view) set of starting vertex indices for the RW. number(RW) == * d_v_start.size(). * @param max_depth maximum length of RWs. + * @param use_padding (optional) specifies if return uses padded format (true), or coalesced + * (compressed) format; when padding is used the output is a matrix of vertex paths and a matrix of + * edges paths (weights); in this case the matrices are stored in row major order; the vertex path + * matrix is padded with `num_vertices` values and the weight matrix is padded with `0` values; + * @param seeder (optional) is object providing the random seeding mechanism. Defaults to local + * clock time as initial seed. * @return std::tuple, device_vec_t, - * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights - * for each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed - * to the Python layer. Also returning seed for testing / debugging repro. The meaning of - * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + * device_vec_t> Triplet of either padded or coalesced RW paths; in the coalesced case + * (default), the return consists of corresponding vertex and edge weights for each, and + * corresponding path sizes. This is meant to minimize the number of DF's to be passed to the Python + * layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes is + * represented as a 1D contiguous array. In the padded case the return is a matrix of num_paths x + * max_depth vertex paths; and num_paths x (max_depth-1) edge (weight) paths, with an empty array of + * sizes. Note: if the graph is un-weighted the edge (weight) paths consists of `weight_t{1}` + * entries; */ template & d_v_start, index_t max_depth, + bool use_padding = false, seeding_policy_t seeder = clock_seeding_t{}) { CUGRAPH_FAIL("Not implemented yet."); @@ -1003,18 +1031,27 @@ struct coo_convertor_t { * @brief returns random walks (RW) from starting sources, where each path is of given maximum * length. Uniform distribution is assumed for the random engine. * - * @tparam graph_t Type of graph (view). + * @tparam graph_t Type of graph/view (typically, graph_view_t). * @tparam index_t Type used to store indexing and sizes. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param graph Graph object to generate RW on. + * @param graph Graph (view )object to generate RW on. * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. * @param num_paths = number(paths). * @param max_depth maximum length of RWs. - * @return std::tuple, device_vec_t, - * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for - * each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed to - * the Python layer. + * @param use_padding (optional) specifies if return uses padded format (true), or coalesced + * (compressed) format; when padding is used the output is a matrix of vertex paths and a matrix of + * edges paths (weights); in this case the matrices are stored in row major order; the vertex path + * matrix is padded with `num_vertices` values and the weight matrix is padded with `0` values; + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector> Triplet of either padded or coalesced RW paths; in the coalesced + * case (default), the return consists of corresponding vertex and edge weights for each, and + * corresponding path sizes. This is meant to minimize the number of DF's to be passed to the Python + * layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes is + * represented as a 1D contiguous array. In the padded case the return is a matrix of num_paths x + * max_depth vertex paths; and num_paths x (max_depth-1) edge (weight) paths, with an empty array of + * sizes. Note: if the graph is un-weighted the edge (weight) paths consists of `weight_t{1}` + * entries; */ template std::tuple, @@ -1024,7 +1061,8 @@ random_walks(raft::handle_t const& handle, graph_t const& graph, typename graph_t::vertex_type const* ptr_d_start, index_t num_paths, - index_t max_depth) + index_t max_depth, + bool use_padding) { using vertex_t = typename graph_t::vertex_type; @@ -1032,7 +1070,7 @@ random_walks(raft::handle_t const& handle, // detail::device_const_vector_view d_v_start{ptr_d_start, num_paths}; - auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth); + auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth, use_padding); // ignore last element of the quad, seed, // since it's meant for testing / debugging, only: // @@ -1076,5 +1114,47 @@ std:: return to_coo(d_v_view, d_sz_view); } +/** + * @brief returns additional RW information on vertex paths offsets and weight path sizes and + * offsets, for the coalesced case (the padded case does not need or provide this information) + * + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_paths number of paths. + * @param ptr_d_sizes sizes of vertex paths. + * @return tuple of (vertex_path_offsets, weight_path_sizes, weight_path_offsets), where offsets are + * exclusive scan of corresponding sizes. + */ +template +std::tuple, rmm::device_uvector, rmm::device_uvector> +query_rw_sizes_offsets(raft::handle_t const& handle, index_t num_paths, index_t const* ptr_d_sizes) +{ + rmm::device_uvector d_vertex_offsets(num_paths, handle.get_stream()); + rmm::device_uvector d_weight_sizes(num_paths, handle.get_stream()); + rmm::device_uvector d_weight_offsets(num_paths, handle.get_stream()); + + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + ptr_d_sizes, + ptr_d_sizes + num_paths, + d_vertex_offsets.begin()); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + ptr_d_sizes, + ptr_d_sizes + num_paths, + d_weight_sizes.begin(), + [] __device__(auto vertex_path_sz) { return vertex_path_sz - 1; }); + + handle.get_stream_view().synchronize(); + + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_weight_sizes.begin(), + d_weight_sizes.end(), + d_weight_offsets.begin()); + + return std::make_tuple( + std::move(d_vertex_offsets), std::move(d_weight_sizes), std::move(d_weight_offsets)); +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/sampling/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh index b0b06e7f65a..44a6f8d561b 100644 --- a/cpp/tests/sampling/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -54,7 +54,7 @@ bool host_check_path(std::vector const& row_offsets, bool assert3 = (nnz == static_cast(col_inds.size())); if (assert1 == false || assert2 == false || assert3 == false) { - std::cout << "CSR inconsistency\n"; + std::cerr << "CSR inconsistency\n"; return false; } @@ -68,16 +68,16 @@ bool host_check_path(std::vector const& row_offsets, auto found_next = std::find_if( begin, end, [next_vertex](auto dst_vertex) { return dst_vertex == next_vertex; }); if (found_next == end) { - std::cout << "vertex not found: " << next_vertex << " as neighbor of " << crt_vertex << '\n'; + std::cerr << "vertex not found: " << next_vertex << " as neighbor of " << crt_vertex << '\n'; return false; } auto delta = row_offsets[crt_vertex] + std::distance(begin, found_next); - // std::cout << "delta in ci: " << delta << '\n'; + // std::cerr << "delta in ci: " << delta << '\n'; auto found_edge = values.begin() + delta; if (*found_edge != *it_w) { - std::cout << "weight not found: " << *found_edge << " between " << crt_vertex << " and " + std::cerr << "weight not found: " << *found_edge << " between " << crt_vertex << " and " << next_vertex << '\n'; return false; } @@ -91,7 +91,8 @@ bool host_check_rw_paths( cugraph::experimental::graph_view_t const& graph_view, vector_test_t const& d_coalesced_v, vector_test_t const& d_coalesced_w, - vector_test_t const& d_sizes) + vector_test_t const& d_sizes, + index_t num_paths = 0) // only relevant for the padded case (in which case it must be non-zero) { edge_t num_edges = graph_view.get_number_of_edges(); vertex_t num_vertices = graph_view.get_number_of_vertices(); @@ -102,11 +103,15 @@ bool host_check_rw_paths( std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); - std::vector v_vals(num_edges); + std::vector v_vals( + num_edges, 1); // account for unweighted graph, for which RW provides default weights{1} raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); - raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + if (graph_view.is_weighted()) { + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + } std::vector v_coalesced(d_coalesced_v.size()); std::vector w_coalesced(d_coalesced_w.size()); @@ -120,10 +125,39 @@ bool host_check_rw_paths( cugraph::experimental::detail::raw_const_ptr(d_coalesced_w), d_coalesced_w.size(), handle.get_stream()); - raft::update_host(v_sizes.data(), - cugraph::experimental::detail::raw_const_ptr(d_sizes), - d_sizes.size(), - handle.get_stream()); + + if (v_sizes.size() > 0) { // coalesced case + raft::update_host(v_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_sizes), + d_sizes.size(), + handle.get_stream()); + } else { // padded case + if (num_paths == 0) { + std::cerr << "ERROR: padded case requires `num_paths` info.\n"; + return false; + } + + // extract sizes from v_coalesced (which now contains padded info) + // + auto max_depth = v_coalesced.size() / num_paths; + auto it_start_path = v_coalesced.begin(); + for (index_t row_index = 0; row_index < num_paths; ++row_index) { + auto it_end_path = it_start_path + max_depth; + auto it_padding_found = std::find(it_start_path, it_end_path, num_vertices); + + v_sizes.push_back(std::distance(it_start_path, it_padding_found)); + + it_start_path = it_end_path; + } + + // truncate padded vectors v_coalesced, w_coalesced: + // + v_coalesced.erase(std::remove(v_coalesced.begin(), v_coalesced.end(), num_vertices), + v_coalesced.end()); + + w_coalesced.erase(std::remove(w_coalesced.begin(), w_coalesced.end(), weight_t{0}), + w_coalesced.end()); + } auto it_v_begin = v_coalesced.begin(); auto it_w_begin = w_coalesced.begin(); @@ -136,11 +170,11 @@ bool host_check_rw_paths( it_w_begin += crt_sz - 1; if (!test_path) { // something went wrong; print to debug (since it's random) - raft::print_host_vector("sizes", v_sizes.data(), v_sizes.size(), std::cout); + raft::print_host_vector("sizes", v_sizes.data(), v_sizes.size(), std::cerr); - raft::print_host_vector("coalesced v", v_coalesced.data(), v_coalesced.size(), std::cout); + raft::print_host_vector("coalesced v", v_coalesced.data(), v_coalesced.size(), std::cerr); - raft::print_host_vector("coalesced w", w_coalesced.data(), w_coalesced.size(), std::cout); + raft::print_host_vector("coalesced w", w_coalesced.data(), w_coalesced.size(), std::cerr); return false; } @@ -148,5 +182,96 @@ bool host_check_rw_paths( return true; } +template +bool host_check_query_rw(raft::handle_t const& handle, + vector_test_t const& d_v_sizes, + vector_test_t const& d_v_offsets, + vector_test_t const& d_w_sizes, + vector_test_t const& d_w_offsets) +{ + index_t num_paths = d_v_sizes.size(); + + if (num_paths == 0) return false; + + std::vector v_sizes(num_paths); + std::vector v_offsets(num_paths); + std::vector w_sizes(num_paths); + std::vector w_offsets(num_paths); + + raft::update_host(v_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_v_sizes), + num_paths, + handle.get_stream()); + + raft::update_host(v_offsets.data(), + cugraph::experimental::detail::raw_const_ptr(d_v_offsets), + num_paths, + handle.get_stream()); + + raft::update_host(w_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_w_sizes), + num_paths, + handle.get_stream()); + + raft::update_host(w_offsets.data(), + cugraph::experimental::detail::raw_const_ptr(d_w_offsets), + num_paths, + handle.get_stream()); + + index_t crt_v_offset = 0; + index_t crt_w_offset = 0; + auto it_v_sz = v_sizes.begin(); + auto it_w_sz = w_sizes.begin(); + auto it_v_offset = v_offsets.begin(); + auto it_w_offset = w_offsets.begin(); + + bool flag_passed{true}; + + for (; it_v_sz != v_sizes.end(); ++it_v_sz, ++it_w_sz, ++it_v_offset, ++it_w_offset) { + if (*it_w_sz != (*it_v_sz) - 1) { + std::cerr << "ERROR: Incorrect weight path size: " << *it_w_sz << ", " << *it_v_sz << '\n'; + flag_passed = false; + break; + } + + if (*it_v_offset != crt_v_offset) { + std::cerr << "ERROR: Incorrect vertex path offset: " << *it_v_offset << ", " << crt_v_offset + << '\n'; + flag_passed = false; + break; + } + + if (*it_w_offset != crt_w_offset) { + std::cerr << "ERROR: Incorrect weight path offset: " << *it_w_offset << ", " << crt_w_offset + << '\n'; + flag_passed = false; + break; + } + + crt_v_offset += *it_v_sz; + crt_w_offset += *it_w_sz; + } + + if (!flag_passed) { + std::cerr << "v sizes:"; + std::copy(v_sizes.begin(), v_sizes.end(), std::ostream_iterator(std::cerr, ", ")); + std::cerr << '\n'; + + std::cerr << "v offsets:"; + std::copy(v_offsets.begin(), v_offsets.end(), std::ostream_iterator(std::cerr, ", ")); + std::cerr << '\n'; + + std::cerr << "w sizes:"; + std::copy(w_sizes.begin(), w_sizes.end(), std::ostream_iterator(std::cerr, ", ")); + std::cerr << '\n'; + + std::cerr << "w offsets:"; + std::copy(w_offsets.begin(), w_offsets.end(), std::ostream_iterator(std::cerr, ", ")); + std::cerr << '\n'; + } + + return flag_passed; +} + } // namespace test } // namespace cugraph diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index dd7fd14b3a2..29fd01fc7e0 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -62,10 +62,16 @@ graph_t make_graph(raft::handle_t cons raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); - raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + weight_t* ptr_d_weights{nullptr}; + if (is_weighted) { + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + ptr_d_weights = d_weights.data(); + } edgelist_t edgelist{ - d_src.data(), d_dst.data(), d_weights.data(), num_edges}; + d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; graph_t graph( handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); @@ -783,6 +789,67 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) ASSERT_TRUE(test_all_paths); } +TEST(RandomWalksQuery, GraphRWQueryOffsets) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_v_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + auto triplet = query_rw_sizes_offsets(handle, num_paths, detail::raw_const_ptr(d_v_sizes)); + + auto& d_v_offsets = std::get<0>(triplet); + auto& d_w_sizes = std::get<1>(triplet); + auto& d_w_offsets = std::get<2>(triplet); + + bool test_paths_sz = + cugraph::test::host_check_query_rw(handle, d_v_sizes, d_v_offsets, d_w_sizes, d_w_offsets); + + if (!test_paths_sz) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_paths_sz); +} + TEST(RandomWalksSpecialCase, SingleRandomWalk) { using vertex_t = int32_t; @@ -840,6 +907,124 @@ TEST(RandomWalksSpecialCase, SingleRandomWalk) ASSERT_TRUE(test_all_paths); } +TEST(RandomWalksSpecialCase, UnweightedGraph) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w; + + auto graph = + make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + ASSERT_TRUE(values == nullptr); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + + std::vector v_start{2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + bool test_all_paths = + cugraph::test::host_check_rw_paths(handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +} + +TEST(RandomWalksPadded, SimpleGraph) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + bool use_padding{true}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth, use_padding); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + ASSERT_TRUE(d_sizes.size() == 0); + + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes, num_paths); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +} + TEST(RandomWalksUtility, PathsToCOO) { using namespace cugraph::experimental::detail; From e4f58eb5b212e1c228aabb45fc355aeb5588ddf7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 6 May 2021 10:07:15 -0400 Subject: [PATCH 245/343] Fix multi-GPU hang on graph generation (#1572) Two bug fixes for multi-GPU graph creation. - Add barrier to avoid overlap between different communicators - NCCL bug workaround on DGX1 Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Andrei Schaffer (https://github.com/aschaffer) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1572 --- cpp/CMakeLists.txt | 1 + .../patterns/copy_to_adj_matrix_row_col.cuh | 89 ++++++++ .../copy_v_transform_reduce_in_out_nbr.cuh | 46 ++++ ...ransform_reduce_key_aggregated_out_nbr.cuh | 46 ++++ .../update_frontier_v_push_if_out_nbr.cuh | 58 ++++- cpp/include/utilities/host_barrier.hpp | 29 +++ cpp/include/utilities/shuffle_comm.cuh | 6 +- cpp/src/experimental/coarsen_graph.cu | 21 ++ .../experimental/generate_rmat_edgelist.cu | 5 +- cpp/src/experimental/renumber_edgelist.cu | 200 ++++++++++++++---- cpp/src/utilities/host_barrier.cpp | 106 ++++++++++ 11 files changed, 559 insertions(+), 48 deletions(-) create mode 100644 cpp/include/utilities/host_barrier.hpp create mode 100644 cpp/src/utilities/host_barrier.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6f0e324fab5..6b638441a5b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -438,6 +438,7 @@ add_library(cugraph SHARED src/experimental/pagerank.cu src/experimental/katz_centrality.cu src/tree/mst.cu + src/utilities/host_barrier.cpp ) target_link_directories(cugraph diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index ca20b9a1285..26a4eed4213 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,17 @@ void copy_to_matrix_major(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + std::vector rx_counts(col_comm_size, size_t{0}); std::vector displacements(col_comm_size, size_t{0}); for (int i = 0; i < col_comm_size; ++i) { @@ -72,6 +84,17 @@ void copy_to_matrix_major(raft::handle_t const& handle, rx_counts, displacements, handle.get_stream()); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() @@ -106,6 +129,17 @@ void copy_to_matrix_major(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + auto rx_counts = host_scalar_allgather(col_comm, static_cast(thrust::distance(vertex_first, vertex_last)), @@ -171,6 +205,17 @@ void copy_to_matrix_major(raft::handle_t const& handle, matrix_major_value_output_first + matrix_partition.get_major_value_start_offset()); } } + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() @@ -202,6 +247,17 @@ void copy_to_matrix_minor(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + std::vector rx_counts(row_comm_size, size_t{0}); std::vector displacements(row_comm_size, size_t{0}); for (int i = 0; i < row_comm_size; ++i) { @@ -214,6 +270,17 @@ void copy_to_matrix_minor(raft::handle_t const& handle, rx_counts, displacements, handle.get_stream()); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() @@ -248,6 +315,17 @@ void copy_to_matrix_minor(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + auto rx_counts = host_scalar_allgather(row_comm, static_cast(thrust::distance(vertex_first, vertex_last)), @@ -310,6 +388,17 @@ void copy_to_matrix_minor(raft::handle_t const& handle, matrix_minor_value_output_first); } } + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } else { assert(graph_view.get_number_of_local_vertices() == graph_view.get_number_of_local_adj_matrix_partition_rows()); diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 6d828dab513..6aded0eccf0 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -496,6 +497,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } if (GraphViewType::is_multi_gpu && update_major) { + auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); @@ -503,6 +505,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + device_reduce(col_comm, major_buffer_first, vertex_value_output_first, @@ -510,6 +523,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, raft::comms::op_t::SUM, i, handle.get_stream()); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } } @@ -523,6 +547,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + for (int i = 0; i < row_comm_size; ++i) { auto offset = (graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size)); @@ -535,6 +570,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, i, handle.get_stream()); } + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } } diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index f6eac67e4e7..9a1d9fea24c 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -211,10 +212,22 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto kv_map_ptr = std::make_unique>( size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + auto map_counts = host_scalar_allgather(row_comm, static_cast(thrust::distance(map_key_first, map_key_last)), @@ -292,6 +305,21 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // 2. aggregate each vertex out-going edges based on keys and transform-reduce. + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + rmm::device_uvector major_vertices(0, handle.get_stream()); auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { @@ -436,6 +464,9 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). + // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to + // remote memory allocation), this barrier is unnecessary otherwise. + col_comm.barrier(); auto rx_sizes = host_scalar_gather(col_comm, tmp_major_vertices.size(), i, handle.get_stream()); std::vector rx_displs{}; @@ -475,6 +506,21 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( } } + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_value_output_first, vertex_value_output_first + graph_view.get_number_of_local_vertices(), diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 3d87f19969e..4f3925f7d4c 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -403,6 +404,21 @@ void update_frontier_v_push_if_out_nbr( // 1. fill the buffer + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + rmm::device_uvector keys(size_t{0}, handle.get_stream()); auto payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); @@ -585,6 +601,21 @@ void update_frontier_v_push_if_out_nbr( } } + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + // 2. reduce the buffer auto num_buffer_elements = @@ -596,13 +627,21 @@ void update_frontier_v_push_if_out_nbr( if (GraphViewType::is_multi_gpu) { // FIXME: this step is unnecessary if row_comm_size== 1 auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif std::vector h_vertex_lasts(row_comm_size); for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { @@ -649,6 +688,17 @@ void update_frontier_v_push_if_out_nbr( get_dataframe_buffer_begin(payload_buffer), keys.size(), reduce_op); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } // 3. update vertex properties @@ -753,7 +803,7 @@ void update_frontier_v_push_if_out_nbr( } } } -} +} // namespace experimental } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/host_barrier.hpp b/cpp/include/utilities/host_barrier.hpp new file mode 100644 index 00000000000..11803a7bde4 --- /dev/null +++ b/cpp/include/utilities/host_barrier.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace experimental { + +// FIXME: a temporary hack till UCC is integrated into RAFT (so we can use UCC barrier for DASK and +// MPI barrier for MPI) +void host_barrier(raft::comms::comms_t const& comm, rmm::cuda_stream_view stream_view); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index b318009d9bf..b42b9ad06bb 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -73,6 +73,10 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, rx_offsets, rx_src_ranks, stream); + // FIXME: temporary unverified work-around for a NCCL (2.9.6) bug that causes a hang on DGX1 (due + // to remote memory allocation), this synchronization is unnecessary otherwise but seems like + // suppress the hange issue. Need to be revisited once NCCL 2.10 is released. + CUDA_TRY(cudaDeviceSynchronize()); raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); @@ -201,8 +205,6 @@ auto shuffle_values(raft::comms::comms_t const &comm, rmm::device_uvector d_tx_value_counts(comm_size, stream); raft::update_device(d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream); - CUDA_TRY(cudaStreamSynchronize(stream)); // tx_value_counts should be up-to-date - std::vector tx_counts{}; std::vector tx_offsets{}; std::vector tx_dst_ranks{}; diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 1eccbd23584..6397f92e336 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -269,6 +270,16 @@ coarsen_graph( for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { // 1-1. locally construct coarsened edge list + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif rmm::device_uvector major_labels( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) : graph_view.get_number_of_local_adj_matrix_partition_rows(i), @@ -285,6 +296,16 @@ coarsen_graph( major_labels.size(), static_cast(i), handle.get_stream()); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index d75a4654a15..f00443a0596 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -137,8 +137,9 @@ generate_rmat_edgelists(raft::handle_t const& handle, bool scramble_vertex_ids) { CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); - CUGRAPH_EXPECTS(size_t{1} << max_scale <= std::numeric_limits::max(), - "Invalid input argument: scale too large for vertex_t."); + CUGRAPH_EXPECTS( + size_t{1} << max_scale <= static_cast(std::numeric_limits::max()), + "Invalid input argument: scale too large for vertex_t."); std::vector, rmm::device_uvector>> output{}; output.reserve(n_edgelists); diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index dbf0250b88a..01022e8fa6d 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,22 @@ rmm::device_uvector compute_renumber_map( // 1. acquire (unique major label, count) pairs + if (multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + ; + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + rmm::device_uvector major_labels(0, handle.get_stream()); rmm::device_uvector major_counts(0, handle.get_stream()); for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { @@ -71,6 +88,7 @@ rmm::device_uvector compute_renumber_map( edgelist_major_vertices[i], edgelist_major_vertices[i] + edgelist_edge_counts[i], sorted_major_labels.begin()); + // FIXME: better refactor this sort-count_if-reduce_by_key routine for reuse thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), sorted_major_labels.begin(), sorted_major_labels.end()); @@ -98,6 +116,9 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_major_labels(0, handle.get_stream()); rmm::device_uvector rx_major_counts(0, handle.get_stream()); + // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to + // remote memory allocation), this barrier is unnecessary otherwise. + col_comm.barrier(); auto rx_sizes = host_scalar_gather( col_comm, tmp_major_labels.size(), static_cast(i), handle.get_stream()); std::vector rx_displs{}; @@ -118,32 +139,39 @@ rmm::device_uvector compute_renumber_map( static_cast(i), handle.get_stream()); if (static_cast(i) == col_comm_rank) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_major_labels.begin(), - rx_major_labels.end(), - rx_major_counts.begin()); - major_labels.resize(rx_major_labels.size(), handle.get_stream()); - major_counts.resize(major_labels.size(), handle.get_stream()); - auto pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_major_labels.begin(), - rx_major_labels.end(), - rx_major_counts.begin(), - major_labels.begin(), - major_counts.begin()); - major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(pair_it)), - handle.get_stream()); - major_counts.resize(major_labels.size(), handle.get_stream()); - major_labels.shrink_to_fit(handle.get_stream()); - major_counts.shrink_to_fit(handle.get_stream()); + major_labels = std::move(rx_major_labels); + major_counts = std::move(rx_major_counts); } } else { - tmp_major_labels.shrink_to_fit(handle.get_stream()); - tmp_major_counts.shrink_to_fit(handle.get_stream()); + assert(i == 0); major_labels = std::move(tmp_major_labels); major_counts = std::move(tmp_major_counts); } } + if (multi_gpu) { + // FIXME: better refactor this sort-count_if-reduce_by_key routine for reuse + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_labels.begin(), + major_labels.end(), + major_counts.begin()); + auto num_unique_labels = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(major_labels.size()), + [labels = major_labels.data()] __device__(auto i) { + return (i == 0) || (labels[i - 1] != labels[i]); + }); + rmm::device_uvector tmp_major_labels(num_unique_labels, handle.get_stream()); + rmm::device_uvector tmp_major_counts(tmp_major_labels.size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_labels.begin(), + major_labels.end(), + major_counts.begin(), + tmp_major_labels.begin(), + tmp_major_counts.begin()); + major_labels = std::move(tmp_major_labels); + major_counts = std::move(tmp_major_counts); + } // 2. acquire unique minor labels @@ -168,28 +196,54 @@ rmm::device_uvector compute_renumber_map( minor_labels.end())), handle.get_stream()); if (multi_gpu) { + auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); - rmm::device_uvector rx_minor_labels(0, handle.get_stream()); - std::tie(rx_minor_labels, std::ignore) = groupby_gpuid_and_shuffle_values( - row_comm, - minor_labels.begin(), - minor_labels.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{row_comm_size}] __device__( - auto val) { return key_func(val); }, - handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_minor_labels.begin(), - rx_minor_labels.end()); - rx_minor_labels.resize( - thrust::distance( - rx_minor_labels.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_minor_labels.begin(), - rx_minor_labels.end())), - handle.get_stream()); - minor_labels = std::move(rx_minor_labels); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + + if (row_comm_size > 1) { + rmm::device_uvector rx_minor_labels(0, handle.get_stream()); + std::tie(rx_minor_labels, std::ignore) = groupby_gpuid_and_shuffle_values( + row_comm, + minor_labels.begin(), + minor_labels.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{row_comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end()); + rx_minor_labels.resize( + thrust::distance( + rx_minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end())), + handle.get_stream()); + minor_labels = std::move(rx_minor_labels); + } + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + // + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif } minor_labels.shrink_to_fit(handle.get_stream()); @@ -366,6 +420,19 @@ void expensive_check_edgelist( auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + // FIXME: this barrier is unnecessary if the above host_scalar_allreduce is a true host + // operation (as it serves as a barrier) barrier is necessary here to avoid potential + // overlap (which can leads to deadlock) between two different communicators (beginning of + // col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with + // DASK and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + rmm::device_uvector sorted_major_vertices(0, handle.get_stream()); { auto recvcounts = @@ -385,6 +452,17 @@ void expensive_check_edgelist( sorted_major_vertices.end()); } + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) + // between two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with + // DASK and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + rmm::device_uvector sorted_minor_vertices(0, handle.get_stream()); { auto recvcounts = @@ -404,6 +482,17 @@ void expensive_check_edgelist( sorted_minor_vertices.end()); } + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) + // between two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with + // DASK and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); CUGRAPH_EXPECTS( @@ -509,7 +598,6 @@ renumber_edgelist(raft::handle_t const& handle, edgelist_const_major_vertices, edgelist_const_minor_vertices, edgelist_edge_counts); - // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened // graph @@ -535,6 +623,18 @@ renumber_edgelist(raft::handle_t const& handle, // FIXME: compare this hash based approach with a binary search based approach in both memory // footprint and execution time + // FIXME: this barrier is unnecessary if the above host_scalar_allgather is a true host operation + // (as it serves as a barrier) barrier is necessary here to avoid potential overlap (which can + // leads to deadlock) between two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK and + // MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { rmm::device_uvector renumber_map_major_labels( col_comm_rank == static_cast(i) ? vertex_t{0} @@ -571,6 +671,16 @@ renumber_edgelist(raft::handle_t const& handle, edgelist_major_vertices[i]); } + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between two + // different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK and + // MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif { rmm::device_uvector renumber_map_minor_labels( partition.get_matrix_partition_minor_size(), handle.get_stream()); @@ -611,6 +721,16 @@ renumber_edgelist(raft::handle_t const& handle, edgelist_minor_vertices[i]); } } + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between two + // different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK and + // MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); diff --git a/cpp/src/utilities/host_barrier.cpp b/cpp/src/utilities/host_barrier.cpp new file mode 100644 index 00000000000..1c018d624ed --- /dev/null +++ b/cpp/src/utilities/host_barrier.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include + +namespace cugraph { +namespace experimental { + +// FIXME: a temporary hack till UCC is integrated into RAFT (so we can use UCC barrier for DASK and +// MPI barrier for MPI) +void host_barrier(raft::comms::comms_t const& comm, rmm::cuda_stream_view stream_view) +{ + stream_view.synchronize(); + + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + // k-tree barrier + + int constexpr k = 2; + static_assert(k >= 2); + std::vector requests(k - 1); + std::vector dummies(k - 1); + + // up + + int mod = 1; + while (mod < comm_size) { + if (comm_rank % mod == 0) { + auto level_rank = comm_rank / mod; + if (level_rank % k == 0) { + auto num_irecvs = 0; + ; + for (int i = 1; i < k; ++i) { + auto src_rank = (level_rank + i) * mod; + if (src_rank < comm_size) { + comm.irecv(dummies.data() + (i - 1), + sizeof(std::byte), + src_rank, + int{0} /* tag */, + requests.data() + (i - 1)); + ++num_irecvs; + } + } + comm.waitall(num_irecvs, requests.data()); + } else { + comm.isend(dummies.data(), + sizeof(std::byte), + (level_rank - (level_rank % k)) * mod, + int{0} /* tag */, + requests.data()); + comm.waitall(1, requests.data()); + } + } + mod *= k; + } + + // down + + mod /= k; + while (mod >= 1) { + if (comm_rank % mod == 0) { + auto level_rank = comm_rank / mod; + if (level_rank % k == 0) { + auto num_isends = 0; + for (int i = 1; i < k; ++i) { + auto dst_rank = (level_rank + i) * mod; + if (dst_rank < comm_size) { + comm.isend(dummies.data() + (i - 1), + sizeof(std::byte), + dst_rank, + int{0} /* tag */, + requests.data() + (i - 1)); + ++num_isends; + } + } + comm.waitall(num_isends, requests.data()); + } else { + comm.irecv(dummies.data(), + sizeof(std::byte), + (level_rank - (level_rank % k)) * mod, + int{0} /* tag */, + requests.data()); + comm.waitall(1, requests.data()); + } + } + mod /= k; + } +} + +} // namespace experimental +} // namespace cugraph From 924f6782762d105cd5ab0f79111975d5e93916b5 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 6 May 2021 09:11:33 -0500 Subject: [PATCH 246/343] add multi-column support in algorithms - part 1 (#1559) Enable multi-column support in algorithms. This PR updates: - Katz_centrality - Egonet - SSSP - Subgraph_extraction - K_core - Lap - RandonWalk - added test, waiting for Joseph's wrapper updates Authors: - https://github.com/Iroy30 Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1559 --- python/cugraph/centrality/katz_centrality.py | 6 ++- .../centrality/katz_centrality_wrapper.pyx | 2 +- python/cugraph/community/egonet.py | 30 +++++++++----- .../cugraph/community/subgraph_extraction.py | 24 ++++++----- python/cugraph/cores/k_core.py | 23 +++++++---- python/cugraph/linear_assignment/lap.py | 16 +++++--- python/cugraph/sampling/random_walks.py | 15 +++++-- python/cugraph/structure/graph_classes.py | 6 ++- python/cugraph/structure/number_map.py | 18 +++++++-- python/cugraph/tests/test_egonet.py | 40 +++++++++++++++++++ python/cugraph/tests/test_k_core.py | 37 +++++++++++++++-- python/cugraph/tests/test_katz_centrality.py | 37 +++++++++++++++++ python/cugraph/tests/test_random_walks.py | 35 +++++++++++++++- .../cugraph/tests/test_subgraph_extraction.py | 36 +++++++++++++++++ python/cugraph/traversal/sssp.py | 10 +++-- 15 files changed, 284 insertions(+), 51 deletions(-) diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index 4a2b41cfe59..a1e7c1b2349 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -106,7 +106,11 @@ def katz_centrality( if nstart is not None: if G.renumbered is True: - nstart = G.add_internal_vertex_id(nstart, 'vertex', 'vertex') + if len(G.renumber_map.implementation.col_names) > 1: + cols = nstart.columns[:-1].to_list() + else: + cols = 'vertex' + nstart = G.add_internal_vertex_id(nstart, 'vertex', cols) df = katz_centrality_wrapper.katz_centrality( G, alpha, max_iter, tol, nstart, normalized diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index d38a0b82824..038723ad9bf 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -34,7 +34,7 @@ def get_output_df(input_graph, nstart): if len(nstart) != num_verts: raise ValueError('nstart must have initial guess for all vertices') - nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64]) + nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])[0] df['katz_centrality'][nstart['vertex']] = nstart['values'] return df diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index ca3c6149ece..5ae025f1203 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -58,8 +58,10 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. - n : integer - A single node + n : integer or cudf.DataFrame + A single node as integer or a cudf.DataFrame if nodes are + represented with multiple columns. If a cudf.DataFrame is provided, + only the first row is taken as the node input. radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional @@ -91,20 +93,25 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): result_graph = type(G)() if G.renumbered is True: - n = G.lookup_internal_vertex_id(cudf.Series([n])) + if isinstance(n, cudf.DataFrame): + n = G.lookup_internal_vertex_id(n, n.columns) + else: + n = G.lookup_internal_vertex_id(cudf.Series([n])) df, offsets = egonet_wrapper.egonet(G, n, radius) if G.renumbered: - df = G.unrenumber(df, "src") - df = G.unrenumber(df, "dst") + df, src_names = G.unrenumber(df, "src", get_column_names=True) + df, dst_names = G.unrenumber(df, "dst", get_column_names=True) if G.edgelist.weights: result_graph.from_cudf_edgelist( - df, source="src", destination="dst", edge_attr="weight" + df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: - result_graph.from_cudf_edgelist(df, source="src", destination="dst") + result_graph.from_cudf_edgelist(df, source=src_names, + destination=dst_names) return _convert_graph_to_output_type(result_graph, input_type) @@ -121,8 +128,8 @@ def batched_ego_graphs( Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. - seeds : cudf.Series or list - Specifies the seeds of the induced egonet subgraphs + seeds : cudf.Series or list or cudf.DataFrame + Specifies the seeds of the induced egonet subgraphs. radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional @@ -145,7 +152,10 @@ def batched_ego_graphs( (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") if G.renumbered is True: - seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) + if isinstance(seeds, cudf.DataFrame): + seeds = G.lookup_internal_vertex_id(seeds, seeds.columns) + else: + seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) df, offsets = egonet_wrapper.egonet(G, seeds, radius) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 7815851d465..2df6e037d71 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -12,8 +12,8 @@ # limitations under the License. from cugraph.community import subgraph_extraction_wrapper -from cugraph.structure.graph_classes import null_check from cugraph.utilities import check_nx_graph +import cudf from cugraph.utilities import cugraph_to_nx @@ -28,8 +28,9 @@ def subgraph(G, vertices): ---------- G : cugraph.Graph cuGraph graph descriptor - vertices : cudf.Series - Specifies the vertices of the induced subgraph + vertices : cudf.Series or cudf.DataFrame + Specifies the vertices of the induced subgraph. For multi-column + vertices, vertices should be provided as a cudf.DataFrame Returns ------- @@ -52,27 +53,30 @@ def subgraph(G, vertices): >>> Sg = cugraph.subgraph(G, sverts) """ - null_check(vertices) - G, isNx = check_nx_graph(G) if G.renumbered: - vertices = G.lookup_internal_vertex_id(vertices) + if isinstance(vertices, cudf.DataFrame): + vertices = G.lookup_internal_vertex_id(vertices, vertices.columns) + else: + vertices = G.lookup_internal_vertex_id(vertices) result_graph = type(G)() df = subgraph_extraction_wrapper.subgraph(G, vertices) if G.renumbered: - df = G.unrenumber(df, "src") - df = G.unrenumber(df, "dst") + df, src_names = G.unrenumber(df, "src", get_column_names=True) + df, dst_names = G.unrenumber(df, "dst", get_column_names=True) if G.edgelist.weights: result_graph.from_cudf_edgelist( - df, source="src", destination="dst", edge_attr="weight" + df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: - result_graph.from_cudf_edgelist(df, source="src", destination="dst") + result_graph.from_cudf_edgelist(df, source=src_names, + destination=dst_names) if isNx is True: result_graph = cugraph_to_nx(result_graph) diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index ca17bdd5c81..17a3baf9c4c 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -69,31 +69,38 @@ def k_core(G, k=None, core_number=None): if core_number is not None: if G.renumbered is True: - core_number = G.add_internal_vertex_id( - core_number, "vertex", "vertex", drop=True - ) + if len(G.renumber_map.implementation.col_names) > 1: + cols = core_number.columns[:-1].to_list() + else: + cols = 'vertex' + core_number = G.add_internal_vertex_id(core_number, 'vertex', + cols) + else: core_number = core_number_wrapper.core_number(G) core_number = core_number.rename( columns={"core_number": "values"}, copy=False ) - print(core_number) + if k is None: k = core_number["values"].max() k_core_df = k_core_wrapper.k_core(G, k, core_number) if G.renumbered: - k_core_df = G.unrenumber(k_core_df, "src") - k_core_df = G.unrenumber(k_core_df, "dst") + k_core_df, src_names = G.unrenumber(k_core_df, "src", + get_column_names=True) + k_core_df, dst_names = G.unrenumber(k_core_df, "dst", + get_column_names=True) if G.edgelist.weights: KCoreGraph.from_cudf_edgelist( - k_core_df, source="src", destination="dst", edge_attr="weight" + k_core_df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: KCoreGraph.from_cudf_edgelist( - k_core_df, source="src", destination="dst" + k_core_df, source=src_names, destination=dst_names, ) if isNx is True: diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index c634d9aceb4..d6f02efe77e 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cudf from cugraph.linear_assignment import lap_wrapper @@ -39,9 +40,10 @@ def hungarian(G, workers): as an an edge list. Edge weights are required. If an edge list is not provided then it will be computed. - workers : cudf.Series + workers : cudf.Series or cudf.DataFrame A series or column that identifies the vertex ids of the vertices - in the workers set. All vertices in G that are not in the workers + in the workers set. In case of multi-column vertices, it should be a + cudf.DataFrame. All vertices in G that are not in the workers set are implicitly assigned to the jobs set. Returns @@ -67,16 +69,20 @@ def hungarian(G, workers): """ if G.renumbered: - local_workers = G.lookup_internal_vertex_id(workers) + if isinstance(workers, cudf.DataFrame): + local_workers = G.lookup_internal_vertex_id(workers, + workers.columns) + else: + local_workers = G.lookup_internal_vertex_id(workers) else: local_workers = workers - df = lap_wrapper.sparse_hungarian(G, local_workers) + cost, df = lap_wrapper.sparse_hungarian(G, local_workers) if G.renumbered: df = G.unrenumber(df, 'vertex') - return df + return cost, df def dense_hungarian(costs, num_rows, num_columns): diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 7ab3191a07c..84fde262010 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -35,9 +35,10 @@ def random_walks( Use weight parameter if weights need to be considered (currently not supported) - start_vertices : int or list or cudf.Series + start_vertices : int or list or cudf.Series or cudf.DataFrame A single node or a list or a cudf.Series of nodes from which to run - the random walks + the random walks. In case of multi-column vertices it should be + a cudf.DataFrame max_depth : int The maximum depth of the random walks @@ -61,11 +62,17 @@ def random_walks( if start_vertices is int: start_vertices = [start_vertices] - if not isinstance(start_vertices, cudf.Series): + if isinstance(start_vertices, list): start_vertices = cudf.Series(start_vertices) if G.renumbered is True: - start_vertices = G.lookup_internal_vertex_id(start_vertices) + if isinstance(start_vertices, cudf.DataFrame): + start_vertices = G.lookup_internal_vertex_id( + start_vertices, + start_vertices.columns) + else: + start_vertices = G.lookup_internal_vertex_id(start_vertices) + vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( G, start_vertices, max_depth) diff --git a/python/cugraph/structure/graph_classes.py b/python/cugraph/structure/graph_classes.py index 3cd1863a054..52fcb2ffba4 100644 --- a/python/cugraph/structure/graph_classes.py +++ b/python/cugraph/structure/graph_classes.py @@ -293,7 +293,8 @@ def from_numpy_matrix(self, np_matrix): np_array = np.asarray(np_matrix) self.from_numpy_array(np_array) - def unrenumber(self, df, column_name, preserve_order=False): + def unrenumber(self, df, column_name, preserve_order=False, + get_column_names=False): """ Given a DataFrame containing internal vertex ids in the identified column, replace this with external vertex ids. If the renumbering @@ -322,7 +323,8 @@ def unrenumber(self, df, column_name, preserve_order=False): vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. """ - return self.renumber_map.unrenumber(df, column_name, preserve_order) + return self.renumber_map.unrenumber(df, column_name, preserve_order, + get_column_names) def lookup_internal_vertex_id(self, df, column_name=None): """ diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 2b7c2b2f296..d90d7a1fda9 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -591,7 +591,8 @@ def get_renumbered_df(data): renumber_map.implementation.numbered = True return renumbered_df, renumber_map - def unrenumber(self, df, column_name, preserve_order=False): + def unrenumber(self, df, column_name, preserve_order=False, + get_column_names=False): """ Given a DataFrame containing internal vertex ids in the identified column, replace this with external vertex ids. If the renumbering @@ -611,12 +612,17 @@ def unrenumber(self, df, column_name, preserve_order=False): preserve_order: (optional) bool If True, preserve the ourder of the rows in the output DataFrame to match the input DataFrame + get_column_names: (optional) bool + If True, the unrenumbered column names are returned. Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame The original DataFrame columns exist unmodified. The external vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. + column_names: string or list of strings + If get_column_names is True, the unrenumbered column names are + returned. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -636,11 +642,13 @@ def unrenumber(self, df, column_name, preserve_order=False): if len(self.implementation.col_names) == 1: # Output will be renamed to match input mapping = {"0": column_name} + col_names = column_name else: # Output will be renamed to ${i}_${column_name} mapping = {} for nm in self.implementation.col_names: mapping[nm] = nm + "_" + column_name + col_names = list(mapping.values()) if preserve_order: index_name = NumberMap.generate_unused_column_name(df) @@ -654,8 +662,12 @@ def unrenumber(self, df, column_name, preserve_order=False): ).drop(columns=index_name).reset_index(drop=True) if type(df) is dask_cudf.DataFrame: - return df.map_partitions( + df = df.map_partitions( lambda df: df.rename(columns=mapping, copy=False) ) else: - return df.rename(columns=mapping, copy=False) + df = df.rename(columns=mapping, copy=False) + if get_column_names: + return df, col_names + else: + return df diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index b259c2567dc..fc0ce38eb9c 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -15,6 +15,7 @@ import pytest +import cudf import cugraph from cugraph.tests import utils @@ -75,3 +76,42 @@ def test_batched_ego_graphs(graph_file, seeds, radius): ego_df, source="src", target="dst", edge_attr="weight" ) assert nx.is_isomorphic(ego_nx, ego_cugraph) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("radius", RADIUS) +def test_multi_column_ego_graph(graph_file, seed, radius): + gc.collect() + + df = utils.read_csv_file(graph_file, read_weights_in_sp=True) + df.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + df['src_1'] = df['src_0'] + 1000 + df['dst_1'] = df['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist( + df, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], + edge_attr="2" + ) + + seed_df = cudf.DataFrame() + seed_df['v_0'] = [seed] + seed_df['v_1'] = [seed + 1000] + + ego_cugraph_res = cugraph.ego_graph(G1, seed_df, radius=radius) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist( + df, source="src_0", destination="dst_0", + edge_attr="2" + ) + ego_cugraph_exp = cugraph.ego_graph(G2, seed, radius=radius) + + # FIXME: Replace with multi-column view_edge_list() + edgelist_df = ego_cugraph_res.edgelist.edgelist_df + edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df, "src") + edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): + assert ego_cugraph_exp.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 33d403ee27b..d09b719ab79 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -57,7 +57,6 @@ def calc_k_cores(graph_file, directed=True): def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() src, dest = edgelist_df["src"], edgelist_df["dst"] - assert cg.edgelist.weights is False assert len(src) == nxg.size() for i in range(len(src)): @@ -66,7 +65,7 @@ def compare_edges(cg, nxg): @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number_Graph(graph_file): +def test_k_core_Graph(graph_file): gc.collect() cu_kcore, nx_kcore = calc_k_cores(graph_file, False) @@ -75,7 +74,7 @@ def test_core_number_Graph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number_Graph_nx(graph_file): +def test_k_core_Graph_nx(graph_file): gc.collect() NM = utils.read_csv_for_nx(graph_file) @@ -86,3 +85,35 @@ def test_core_number_Graph_nx(graph_file): cc = cugraph.k_core(Gnx) assert nx.is_isomorphic(nc, cc) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_k_core_corenumber_multicolumn(graph_file): + gc.collect() + + cu_M = utils.read_csv_file(graph_file) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + corenumber_G1 = cugraph.core_number(G1) + corenumber_G1.rename(columns={'core_number': 'values'}, inplace=True) + corenumber_G1 = corenumber_G1[['0_vertex', '1_vertex', 'values']] + + ck_res = cugraph.k_core(G1, core_number=corenumber_G1) + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + ck_exp = cugraph.k_core(G2) + + # FIXME: Replace with multi-column view_edge_list() + edgelist_df = ck_res.edgelist.edgelist_df + edgelist_df_res = ck_res.unrenumber(edgelist_df, "src") + edgelist_df_res = ck_res.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): + assert ck_exp.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index 1fef6b05d59..ef2f45c08a4 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -1,3 +1,4 @@ + # Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +16,7 @@ import pytest +import cudf import cugraph from cugraph.tests import utils @@ -112,3 +114,38 @@ def test_katz_centrality_nx(graph_file): err = err + 1 print("Mismatches:", err) assert err < (0.1 * len(ck)) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_katz_centrality_multi_column(graph_file): + gc.collect() + + cu_M = utils.read_csv_file(graph_file) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.DiGraph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.DiGraph() + G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") + + k_df_exp = cugraph.katz_centrality(G2, alpha=None, max_iter=1000) + k_df_exp = k_df_exp.sort_values("vertex").reset_index(drop=True) + + nstart = cudf.DataFrame() + nstart['vertex_0'] = k_df_exp['vertex'] + nstart['vertex_1'] = nstart['vertex_0'] + 1000 + nstart['values'] = k_df_exp['katz_centrality'] + + k_df_res = cugraph.katz_centrality(G1, nstart=nstart, + alpha=None, max_iter=1000) + k_df_res = k_df_res.sort_values("0_vertex").reset_index(drop=True) + k_df_res.rename(columns={'0_vertex': 'vertex'}, inplace=True) + + top_res = topKVertices(k_df_res, "katz_centrality", 10) + top_exp = topKVertices(k_df_exp, "katz_centrality", 10) + + assert top_res.equals(top_exp) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index 9767e81ba1f..ba0cd6eadc9 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -126,7 +126,6 @@ def test_random_walks_invalid_max_dept( directed, max_depth ): - """Test calls random_walks an invalid type""" prepare_test() with pytest.raises(TypeError): df, offsets, seeds = calc_random_walks( @@ -152,3 +151,37 @@ def test_random_walks( max_depth=max_depth ) check_random_walks(df, offsets, seeds, df_G) + + +"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + df_G['src_0'] = df_G['src'] + 1000 + df_G['dst_0'] = df_G['dst'] + 1000 + + if directed: + G = cugraph.DiGraph() + else: + G = cugraph.Graph() + G.from_cudf_edgelist(df_G, source=['src', 'src_0'], + destination=['dst', 'dst_0'], + edge_attr="weight") + + k = random.randint(1, 10) + start_vertices = random.sample(G.nodes().to_array().tolist(), k) + + seeds = cudf.DataFrame() + seeds['v'] = start_vertices + seeds['v_0'] = seeds['v'] + 1000 + + df, offsets = cugraph.random_walks(G, seeds, max_depth) + + check_random_walks(df, offsets, seeds, df_G) +""" diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 56c1c23e0ea..389a7716e48 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -126,3 +126,39 @@ def test_subgraph_extraction_Graph_nx(graph_file): for (u, v) in cu_sub.edges(): assert nx_sub.has_edge(u, v) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_subgraph_extraction_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + verts = cudf.Series([0, 1, 17]) + verts_G1 = cudf.DataFrame() + verts_G1['v_0'] = verts + verts_G1['v_1'] = verts + 1000 + + sG1 = cugraph.subgraph(G1, verts_G1) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") + + sG2 = cugraph.subgraph(G2, verts) + + # FIXME: Replace with multi-column view_edge_list() + edgelist_df = sG1.edgelist.edgelist_df + edgelist_df_res = sG1.unrenumber(edgelist_df, "src") + edgelist_df_res = sG1.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): + assert sG2.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 8d77e6e9312..f3aebaf43bf 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -212,7 +212,11 @@ def sssp(G, matrix_graph_type=DiGraph if directed else Graph) if G.renumbered: - source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] + if isinstance(source, cudf.DataFrame): + source = G.lookup_internal_vertex_id( + source, source.columns).iloc[0] + else: + source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] if source is cudf.NA: raise ValueError( @@ -223,7 +227,7 @@ def sssp(G, if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") - df["predecessor"].fillna(-1, inplace=True) + df.fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type, return_predecessors) From 7cb858345bbf020c862b5999de8c8081cd880d56 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Mon, 10 May 2021 08:53:26 -0500 Subject: [PATCH 247/343] Bindings for RMAT (#1573) Bindings for RMAT Bindings for the list of RMAT graphs includes some quick fixes to address some sphinx doc-building warnings closes #1473 Authors: - Joseph Nke (https://github.com/jnke2016) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1573 --- cpp/include/utilities/cython.hpp | 33 +- cpp/src/utilities/cython.cu | 117 +++++- docs/cugraph/source/api.rst | 6 +- python/cugraph/generators/__init__.py | 14 + python/cugraph/generators/rmat.pxd | 45 +++ python/cugraph/generators/rmat.py | 377 +++++++++++++++++++ python/cugraph/generators/rmat_wrapper.pyx | 171 +++++++++ python/cugraph/structure/graph_classes.py | 189 ++++++---- python/cugraph/structure/graph_utilities.pxd | 14 +- python/cugraph/tests/generators/test_rmat.py | 122 ++++++ python/cugraph/traversal/ms_bfs.py | 2 - 11 files changed, 1005 insertions(+), 85 deletions(-) create mode 100644 python/cugraph/generators/__init__.py create mode 100644 python/cugraph/generators/rmat.pxd create mode 100644 python/cugraph/generators/rmat.py create mode 100644 python/cugraph/generators/rmat_wrapper.pyx create mode 100644 python/cugraph/tests/generators/test_rmat.py diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 0d6cb2f63d0..3221ba54929 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ #pragma once - #include +#include #include #include #include @@ -207,6 +207,12 @@ struct random_walk_ret_t { std::unique_ptr d_sizes_; }; +struct graph_generator_t { + std::unique_ptr d_source; + std::unique_ptr d_destination; +}; + +// enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; // aggregate for random_walks() COO return type // to be exposed to cython: // @@ -488,6 +494,31 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); + +// Wrapper for calling graph generator +template +std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); +template +std::vector, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t size_distribution, + cugraph::experimental::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + // wrapper for random_walks. // template diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index b4dcd84a7e1..093a598b659 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#include +//#include #include #include #include @@ -789,6 +790,70 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); } } +// Wrapper for graph generate_rmat_edgelist() +// to expose the API to cython +// enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; +template +std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + auto src_dst_tuple = cugraph::experimental::generate_rmat_edgelist( + handle, scale, num_edges, a, b, c, seed, clip_and_flip, scramble_vertex_ids); + + graph_generator_t gg_vals{ + std::make_unique(std::get<0>(src_dst_tuple).release()), + std::make_unique(std::get<1>(src_dst_tuple).release())}; + + return std::make_unique(std::move(gg_vals)); +} + +template +std::vector, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t size_distribution, + cugraph::experimental::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + auto src_dst_vec_tuple = + cugraph::experimental::generate_rmat_edgelists(handle, + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids); + + std::vector, std::unique_ptr>> + gg_vec; + + std::transform( + src_dst_vec_tuple.begin(), + src_dst_vec_tuple.end(), + std::back_inserter(gg_vec), + [](auto& tpl_dev_uvec) { + return std::make_pair( + std::move(std::make_unique(std::get<0>(tpl_dev_uvec).release())), + std::move(std::make_unique(std::get<1>(tpl_dev_uvec).release()))); + }); + + return gg_vec; +} // Wrapper for random_walks() through a graph container // to expose the API to cython. @@ -1360,5 +1425,55 @@ template std::unique_ptr> call_renumber( bool do_expensive_check, bool multi_gpu); +template std::unique_ptr call_generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::unique_ptr call_generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector< + std::pair, std::unique_ptr>> +call_generate_rmat_edgelists( + raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t size_distribution, + cugraph::experimental::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector< + std::pair, std::unique_ptr>> +call_generate_rmat_edgelists( + raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t size_distribution, + cugraph::experimental::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + } // namespace cython } // namespace cugraph diff --git a/docs/cugraph/source/api.rst b/docs/cugraph/source/api.rst index e2c2c19cf02..adcf33d1191 100644 --- a/docs/cugraph/source/api.rst +++ b/docs/cugraph/source/api.rst @@ -10,7 +10,7 @@ Structure Graph ----- -.. autoclass:: cugraph.structure.graph.Graph +.. autoclass:: cugraph.structure.graph_classes.Graph :members: :undoc-members: @@ -143,7 +143,7 @@ Core Number .. automodule:: cugraph.cores.core_number :members: :undoc-members: - + K-Core ------ @@ -196,7 +196,7 @@ Pagerank (MG) .. automodule:: cugraph.dask.link_analysis.pagerank :members: pagerank - :undoc-members: + :undoc-members: Link Prediction diff --git a/python/cugraph/generators/__init__.py b/python/cugraph/generators/__init__.py new file mode 100644 index 00000000000..74ecc2384bd --- /dev/null +++ b/python/cugraph/generators/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .rmat import rmat, multi_rmat diff --git a/python/cugraph/generators/rmat.pxd b/python/cugraph/generators/rmat.pxd new file mode 100644 index 00000000000..e1ad250073e --- /dev/null +++ b/python/cugraph/generators/rmat.pxd @@ -0,0 +1,45 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from libcpp cimport bool +from cugraph.structure.graph_utilities cimport * +from libcpp.vector cimport vector + +cdef extern from "experimental/graph_generator.hpp" namespace "cugraph::experimental": + ctypedef enum generator_distribution_t: + POWER_LAW "cugraph::experimental::generator_distribution_t::POWER_LAW" + UNIFORM "cugraph::experimental::generator_distribution_t::UNIFORM" + + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[graph_generator_t] call_generate_rmat_edgelist[vertex_t] ( + const handle_t &handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + int seed, + bool clip_and_flip, + bool scramble_vertex_ids) except + + + cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] call_generate_rmat_edgelists[vertex_t]( + const handle_t &handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t size_distribution, + generator_distribution_t edge_distribution, + int seed, + bool clip_and_flip, + bool scramble_vertex_ids) except + \ No newline at end of file diff --git a/python/cugraph/generators/rmat.py b/python/cugraph/generators/rmat.py new file mode 100644 index 00000000000..46859ccd42d --- /dev/null +++ b/python/cugraph/generators/rmat.py @@ -0,0 +1,377 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dask.distributed import default_client +import dask_cudf + +from cugraph.generators import rmat_wrapper +from cugraph.comms import comms as Comms +import cugraph + + +def _ensure_args_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using, + mg +): + """ + Ensures the args passed in are usable for the rmat() API, raises the + appropriate exception if incorrect, else returns None. + """ + if mg and create_using not in [None, cugraph.DiGraph]: + raise TypeError("Only cugraph.DiGraph and None are supported types " + "for `create_using` for multi-GPU R-MAT") + if create_using not in [None, cugraph.Graph, cugraph.DiGraph]: + raise TypeError("Only cugraph.Graph, cugraph.DiGraph, and None are " + "supported types for 'create_using'") + if not isinstance(scale, int): + raise TypeError("'scale' must be an int") + if not isinstance(num_edges, int): + raise TypeError("'num_edges' must be an int") + if (a+b+c > 1): + raise ValueError( + "a + b + c should be non-negative and no larger than 1.0") + if (clip_and_flip not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if (scramble_vertex_ids not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if not isinstance(seed, int): + raise TypeError("'seed' must be an int") + + +def _ensure_args_multi_rmat( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Ensures the args passed in are usable for the multi_rmat() API, raises the + appropriate exception if incorrect, else returns None. + """ + if not isinstance(n_edgelists, int): + raise TypeError("'n_edgelists' must be an int") + if not isinstance(min_scale, int): + raise TypeError("'min_scale' must be an int") + if not isinstance(max_scale, int): + raise TypeError("'max_scale' must be an int") + if not isinstance(edge_factor, int): + raise TypeError("'edge_factor' must be an int") + if (size_distribution not in [0, 1]): + raise TypeError("'size_distribution' must be either 0 or 1") + if (edge_distribution not in [0, 1]): + raise TypeError("'edge_distribution' must be either 0 or 1") + if (clip_and_flip not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if (scramble_vertex_ids not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if not isinstance(seed, int): + raise TypeError("'seed' must be an int") + + +def _sg_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph +): + """ + Calls RMAT on a single GPU and uses the resulting cuDF DataFrame + to initialize and return a cugraph Graph object specified with + create_using. If create_using is None, returns the edgelist df as-is. + """ + df = rmat_wrapper.generate_rmat_edgelist(scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids) + if create_using is None: + return df + + G = create_using() + G.from_cudf_edgelist(df, source='src', destination='dst', renumber=False) + + return G + + +def _mg_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph +): + """ + Calls RMAT on multiple GPUs and uses the resulting Dask cuDF DataFrame to + initialize and return a cugraph Graph object specified with create_using. + If create_using is None, returns the Dask DataFrame edgelist as-is. + + seed is used as the initial seed for the first worker used (worker 0), then + each subsequent worker will receive seed+ as the seed value. + """ + client = default_client() + worker_list = list(client.scheduler_info()['workers'].keys()) + num_workers = len(worker_list) + num_edges_list = _calc_num_edges_per_worker(num_workers, num_edges) + futures = [] + for (i, worker_num_edges) in enumerate(num_edges_list): + unique_worker_seed = seed + i + future = client.submit( + _call_rmat, + Comms.get_session_id(), + scale, + worker_num_edges, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids, + workers=worker_list[i] + ) + futures.append(future) + + ddf = dask_cudf.from_delayed(futures) + + if create_using is None: + return ddf + + G = create_using() + G.from_dask_cudf_edgelist(ddf, source="src", destination="dst") + + return G + + +def _call_rmat( + sID, + scale, + num_edges_for_worker, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Callable passed to dask client.submit calls that extracts the individual + worker handle based on the dask session ID + """ + handle = Comms.get_handle(sID) + + return rmat_wrapper.generate_rmat_edgelist( + scale, + num_edges_for_worker, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids, + handle=handle + ) + + +def _calc_num_edges_per_worker(num_workers, num_edges): + """ + Returns a list of length num_workers with the individual number of edges + each worker should generate. The sum of all edges in the list is num_edges. + """ + L = [] + w = num_edges // num_workers + r = num_edges % num_workers + for i in range(num_workers): + if (i < r): + L.append(w+1) + else: + L.append(w) + return L + + +############################################################################### + +def rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph, + mg=False +): + """ + Generate a Graph object using a Recursive MATrix (R-MAT) graph generation + algorithm. + + Parameters + ---------- + scale : int + Scale factor to set the number of verties in the graph Vertex IDs have + values in [0, V), where V = 1 << 'scale' + + num_edges : int + Number of edges to generate + + a : float + Probability of the first partition + + b : float + Probability of the second partition + + c : float + Probability of the thrid partition + + seed : int + Seed value for the random number generator + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false). + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to `true`) or + not (if set to `false`); scrambling vertx ID bits breaks correlation + between vertex ID values and vertex degrees + + create_using : cugraph Graph type or None The graph type to construct + containing the generated edges and vertices. If None is specified, the + edgelist cuDF DataFrame (or dask_cudf DataFrame for MG) is returned as-is. + This is useful for benchmarking Graph construction steps that require raw + data that includes potential self-loops, isolated vertices, and duplicated + edges. Default is cugraph.DiGraph. NOTE: only the cugraph.DiGraph type is + supported for multi-GPU + + mg : bool + If True, R-MAT generation occurs across multiple GPUs. If False, only a + single GPU is used. Default is False (single-GPU) + + Returns + ------- + instance of cugraph.Graph + """ + _ensure_args_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using, mg) + + if mg: + return _mg_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using) + else: + return _sg_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using) + + +def multi_rmat( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Generate multiple Graph objects using a Recursive MATrix (R-MAT) graph + generation algorithm. + + Parameters + ---------- + n_edgelists : int + Number of edge lists (graphs) to generate + + min_scale : int + Scale factor to set the minimum number of vertices in the graph + + max_scale : int + Scale factor to set the maximum number of vertices in the graph + + edge_factor : int + Average number of edges per vertex to generate + + size_distribution : int + Distribution of the graph sizes, impacts the scale parameter of the R-MAT + generator. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + edge_distribution : int + Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + are set. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + seed : int + Seed value for the random number generator + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false') + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to 'true') or + not (if set to 'false'); scrambling vertx ID bits breaks correlation + between vertex ID values and vertex degrees + + Returns + ------- + list of cugraph.Graph instances + """ + _ensure_args_multi_rmat(n_edgelists, min_scale, max_scale, edge_factor, + size_distribution, edge_distribution, seed, + clip_and_flip, scramble_vertex_ids) + + dfs = rmat_wrapper.generate_rmat_edgelists( + n_edgelists, min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids) + list_G = [] + + for df in dfs: + G = cugraph.Graph() + G.from_cudf_edgelist(df, source='src', destination='dst') + list_G.append(G) + + return list_G diff --git a/python/cugraph/generators/rmat_wrapper.pyx b/python/cugraph/generators/rmat_wrapper.pyx new file mode 100644 index 00000000000..26f3772ad32 --- /dev/null +++ b/python/cugraph/generators/rmat_wrapper.pyx @@ -0,0 +1,171 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp cimport bool +from libc.stdint cimport uintptr_t +import numpy as np +import numpy.ctypeslib as ctypeslib +from cython.operator cimport dereference as deref + +import rmm +from rmm._lib.device_buffer cimport DeviceBuffer +import cudf +from cudf.core.buffer import Buffer + +from cugraph.structure.graph_utilities cimport * +from cugraph.generators.rmat cimport * +from libcpp.utility cimport move # This must be imported after graph_utilities + # since graph_utilities also defines move + + +def generate_rmat_edgelist( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + handle=None +): + + vertex_t = np.dtype("int32") + if (2**scale) > (2**31 - 1): + vertex_t = np.dtype("int64") + + cdef unique_ptr[handle_t] handle_ptr + cdef size_t handle_size_t + + if handle is None: + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + else: + handle_size_t = handle.getHandle() + handle_ = handle_size_t + + cdef unique_ptr[graph_generator_t] gg_ret_ptr + + if (vertex_t==np.dtype("int32")): + gg_ret_ptr = move(call_generate_rmat_edgelist[int]( deref(handle_), + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids)) + else: # (vertex_t == np.dtype("int64")) + gg_ret_ptr = move(call_generate_rmat_edgelist[long]( deref(handle_), + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids)) + + gg_ret = move(gg_ret_ptr.get()[0]) + source_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret.d_source)) + destination_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret.d_destination)) + source_set = Buffer(source_set) + destination_set = Buffer(destination_set) + + set_source = cudf.Series(data=source_set, dtype=vertex_t) + set_destination = cudf.Series(data=destination_set, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = set_source + df['dst'] = set_destination + + return df + + +def generate_rmat_edgelists( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids + ): + + vertex_t = np.dtype("int32") + if (2**max_scale) > (2**31 - 1): + vertex_t = np.dtype("int64") + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + + cdef generator_distribution_t s_distribution + cdef generator_distribution_t e_distribution + if size_distribution == 0: + s_distribution= POWER_LAW + else : + s_distribution= UNIFORM + if edge_distribution == 0: + e_distribution= POWER_LAW + else : + e_distribution= UNIFORM + #cdef unique_ptr[graph_generator_t*] gg_ret_ptr + cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] gg_ret_ptr + + if (vertex_t==np.dtype("int32")): + #gg_ret_ptr = move(call_generate_rmat_edgelists[int]( deref(handle_), + gg_ret_ptr = move(call_generate_rmat_edgelists[int]( deref(handle_), + n_edgelists, + min_scale, + max_scale, + edge_factor, + s_distribution, + e_distribution, + seed, + clip_and_flip, + scramble_vertex_ids)) + else: # (vertex_t == np.dtype("int64")) + #gg_ret_ptr = move(call_generate_rmat_edgelists[long]( deref(handle_), + gg_ret_ptr = move(call_generate_rmat_edgelists[long]( deref(handle_), + n_edgelists, + min_scale, + max_scale, + edge_factor, + s_distribution, + e_distribution, + seed, + clip_and_flip, + scramble_vertex_ids)) + list_df = [] + + for i in range(n_edgelists): + source_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret_ptr[i].first)) + destination_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret_ptr[i].second)) + source_set = Buffer(source_set) + destination_set = Buffer(destination_set) + + set_source = cudf.Series(data=source_set, dtype=vertex_t) + set_destination = cudf.Series(data=destination_set, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = set_source + df['dst'] = set_destination + + list_df.append(df) + + #Return a list of dataframes + return list_df diff --git a/python/cugraph/structure/graph_classes.py b/python/cugraph/structure/graph_classes.py index 52fcb2ffba4..0fc8b454138 100644 --- a/python/cugraph/structure/graph_classes.py +++ b/python/cugraph/structure/graph_classes.py @@ -86,22 +86,27 @@ def from_cudf_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : cudf.DataFrame or dask_cudf.DataFrame - A DataFrame that contains edge information - If a dask_cudf.DataFrame is passed it will be reinterpreted as - a cudf.DataFrame. For the distributed path please use - from_dask_cudf_edgelist. + A DataFrame that contains edge information If a dask_cudf.DataFrame is + passed it will be reinterpreted as a cudf.DataFrame. For the + distributed path please use from_dask_cudf_edgelist. + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -135,22 +140,22 @@ def from_cudf_adjlist(self, offset_col, index_col, value_col=None): Parameters ---------- offset_col : cudf.Series - This cudf.Series wraps a gdf_column of size V + 1 (V: number of - vertices). - The gdf column contains the offsets for the vertices in this graph. - Offsets must be in the range [0, E] (E: number of edges). + This cudf.Series wraps a gdf_column of size V + 1 (V: number of + vertices). The gdf column contains the offsets for the vertices in + this graph. Offsets must be in the range [0, E] (E: number of edges). + index_col : cudf.Series - This cudf.Series wraps a gdf_column of size E (E: number of edges). - The gdf column contains the destination index for each edge. - Destination indices must be in the range [0, V) (V: number of - vertices). + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be in the range [0, V) + (V: number of vertices). + value_col : cudf.Series, optional - This pointer can be ``None``. - If not, this cudf.Series wraps a gdf_column of size E (E: number of - edges). - The gdf column contains the weight value for each edge. - The expected type of the gdf_column element is floating point - number. + This pointer can be ``None``. If not, this cudf.Series wraps a + gdf_column of size E (E: number of edges). The gdf column contains the + weight value for each edge. The expected type of the gdf_column + element is floating point number. + Examples -------- >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -192,19 +197,24 @@ def from_dask_cudf_edgelist( external vertex ids will be used. Note that the graph object will store a reference to the dask_cudf.DataFrame provided. + Parameters ---------- input_ddf : dask_cudf.DataFrame - The edgelist as a dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str - destination column name or array of column names + destination column name or array of column names + edge_attr : str - weights column name. + weights column name. + renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. + If source and destination indices are not in range 0 to V where V is + number of vertices, renumber argument should be True. """ if self._Impl is None: self._Impl = simpleDistributedGraphImpl(self.graph_properties) @@ -237,19 +247,25 @@ def from_pandas_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : pandas.DataFrame - A DataFrame that contains edge information + A DataFrame that contains edge information + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', @@ -306,22 +322,26 @@ def unrenumber(self, df, column_name, preserve_order=False, and does not guarantee order or partitioning in multi-GPU mode. If you wish to preserve ordering, add an index column to df and sort the return by that index column. + Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing internal vertex identifiers that will be - converted into external vertex identifiers. + A DataFrame containing internal vertex identifiers that will be + converted into external vertex identifiers. + column_name: string - Name of the column containing the internal vertex id. + Name of the column containing the internal vertex id. + preserve_order: (optional) bool - If True, preserve the order of the rows in the output - DataFrame to match the input DataFrame + If True, preserve the order of the rows in the output DataFrame to + match the input DataFrame + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. The external - vertex identifiers are added to the DataFrame, the internal - vertex identifier column is removed from the dataframe. + The original DataFrame columns exist unmodified. The external vertex + identifiers are added to the DataFrame, the internal vertex identifier + column is removed from the dataframe. """ return self.renumber_map.unrenumber(df, column_name, preserve_order, get_column_names) @@ -333,13 +353,16 @@ def lookup_internal_vertex_id(self, df, column_name=None): Series with the internal vertex ids. Note that this function does not guarantee order in single GPU mode, and does not guarantee order or partitioning in multi-GPU mode. + Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + column_name: (optional) string - Name of the column containing the external vertex ids + Name of the column containing the external vertex ids + Returns --------- series : cudf.Series or dask_cudf.Series @@ -363,21 +386,27 @@ def add_internal_vertex_id( Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + internal_column_name: string - Name of column to contain the internal vertex id + Name of column to contain the internal vertex id + external_column_name: string or list of strings - Name of the column(s) containing the external vertex ids + Name of the column(s) containing the external vertex ids + drop: (optional) bool, defaults to True - Drop the external columns from the returned DataFrame + Drop the external columns from the returned DataFrame + preserve_order: (optional) bool, defaults to False - Preserve the order of the data frame (requires an extra sort) + Preserve the order of the data frame (requires an extra sort) + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame Original DataFrame with new column containing internal vertex id + """ return self.renumber_map.add_internal_vertex_id( df, @@ -448,11 +477,13 @@ def to_directed(self): Return a directed representation of the graph. This function sets the type of graph as DiGraph() and returns the directed view. + Returns ------- G : DiGraph - A directed graph with the same nodes, and each edge (u,v,weights) - replaced by two directed edges (u,v,weights) and (v,u,weights). + A directed graph with the same nodes, and each edge (u,v,weights) + replaced by two directed edges (u,v,weights) and (v,u,weights). + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -461,6 +492,7 @@ def to_directed(self): >>> G.from_cudf_edgelist(M, '0', '1') >>> DiG = G.to_directed() """ + directed_graph = type(self)() directed_graph.graph_properties.directed = True directed_graph._Impl = type(self._Impl)(directed_graph. @@ -471,11 +503,13 @@ def to_directed(self): def to_undirected(self): """ Return an undirected copy of the graph. + Returns ------- G : Graph - A undirected graph with the same nodes, and each directed edge - (u,v,weights) replaced by an undirected edge (u,v,weights). + A undirected graph with the same nodes, and each directed edge + (u,v,weights) replaced by an undirected edge (u,v,weights). + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -502,7 +536,7 @@ def add_nodes_from(self, nodes): Parameters ---------- nodes : list or cudf.Series - The nodes of the graph to be stored. + The nodes of the graph to be stored. """ self._Impl._nodes["all_nodes"] = cudf.Series(nodes) @@ -574,22 +608,27 @@ def from_cudf_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : cudf.DataFrame or dask_cudf.DataFrame - A DataFrame that contains edge information - If a dask_cudf.DataFrame is passed it will be reinterpreted as - a cudf.DataFrame. For the distributed path please use - from_dask_cudf_edgelist. + A DataFrame that contains edge information. If a dask_cudf.DataFrame is + passed it will be reinterpreted as a cudf.DataFrame. For the + distributed path please use from_dask_cudf_edgelist. + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -625,19 +664,24 @@ def from_dask_cudf_edgelist( external vertex ids will be used. Note that the graph object will store a reference to the dask_cudf.DataFrame provided. + Parameters ---------- input_ddf : dask_cudf.DataFrame - The edgelist as a dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str - destination column name or array of column names + destination column name or array of column names + edge_attr : str - weights column name. + weights column name. + renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. + If source and destination indices are not in range 0 to V where V is + number of vertices, renumber argument should be True. """ raise Exception("Distributed N-partite graph not supported") @@ -656,6 +700,7 @@ def add_nodes_from(self, nodes, bipartite=None, multipartite=None): multipartite : str Sets the Graph as multipartite. The nodes are stored as a set of nodes of the partition named as multipartite argument. + """ if self._Impl is None: self._Impl = npartiteGraphImpl(self.graph_properties) diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index c9cf1748bfe..330978f0e38 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -83,7 +83,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] dst_indices unique_ptr[device_buffer] edge_data unique_ptr[device_buffer] subgraph_offsets - + cdef cppclass random_walk_ret_t: size_t coalesced_sz_v_ size_t coalesced_sz_w_ @@ -93,13 +93,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] d_coalesced_w_ unique_ptr[device_buffer] d_sizes_ + cdef cppclass graph_generator_t: + unique_ptr[device_buffer] d_source + unique_ptr[device_buffer] d_destination + cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) - cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) - #cdef device_buffer move(device_buffer) - #cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) # renumber_edgelist() interface utilities: # @@ -131,7 +133,7 @@ ctypedef fused shuffled_vertices_t: major_minor_weights_t[int, long, double] major_minor_weights_t[long, long, float] major_minor_weights_t[long, long, double] - + # 3. return type for renumber: # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": @@ -158,7 +160,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() vertex_t get_part_matrix_partition_minor_first() - vertex_t get_part_matrix_partition_minor_last() + vertex_t get_part_matrix_partition_minor_last() # 4. `sort_and_shuffle_values()` wrapper: # diff --git a/python/cugraph/tests/generators/test_rmat.py b/python/cugraph/tests/generators/test_rmat.py new file mode 100644 index 00000000000..a7c8701095e --- /dev/null +++ b/python/cugraph/tests/generators/test_rmat.py @@ -0,0 +1,122 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +import cudf +import dask_cudf + +from cugraph.dask.common.mg_utils import (is_single_gpu, + get_visible_devices, + setup_local_dask_cluster, + teardown_local_dask_cluster) +from cugraph.generators import rmat +import cugraph + + +############################################################################## +_cluster = None +_client = None +_is_single_gpu = is_single_gpu() +_visible_devices = get_visible_devices() +_scale_values = [2, 4, 16] +_scale_test_ids = [f"scale={x}" for x in _scale_values] +_mg_values = [False, True] +_mg_test_ids = [f"mg={x}" for x in _mg_values] +_graph_types = [cugraph.Graph, cugraph.DiGraph, None, int] +_graph_test_ids = [f"create_using={getattr(x,'__name__',str(x))}" + for x in _graph_types] + + +def _call_rmat(scale, num_edges, create_using, mg): + """ + Simplifies calling RMAT by requiring only specific args that are varied by + these tests and hard-coding all others. + """ + return rmat(scale=scale, + num_edges=num_edges, + a=0.1, + b=0.2, + c=0.3, + seed=24, + clip_and_flip=False, + scramble_vertex_ids=True, + create_using=create_using, + mg=mg) + + +############################################################################### +def setup_module(): + global _cluster + global _client + if not _is_single_gpu: + (_cluster, _client) = setup_local_dask_cluster(p2p=True) + + +def teardown_module(): + if not _is_single_gpu: + teardown_local_dask_cluster(_cluster, _client) + + +############################################################################### +@pytest.mark.parametrize("scale", _scale_values, ids=_scale_test_ids) +@pytest.mark.parametrize("mg", _mg_values, ids=_mg_test_ids) +def test_rmat_edgelist(scale, mg): + """ + Verifies that the edgelist returned by rmat() is valid based on inputs. + """ + if mg and _is_single_gpu: + pytest.skip("skipping MG testing on Single GPU system") + + num_edges = (2**scale)*4 + create_using = None # Returns the edgelist from RMAT + + df = _call_rmat(scale, num_edges, create_using, mg) + + if mg: + assert df.npartitions == len(_visible_devices) + df_to_check = df.compute() + else: + df_to_check = df + + assert len(df_to_check) == num_edges + + +@pytest.mark.parametrize("graph_type", _graph_types, ids=_graph_test_ids) +@pytest.mark.parametrize("mg", _mg_values, ids=_mg_test_ids) +def test_rmat_return_type(graph_type, mg): + """ + Verifies that the return type returned by rmat() is valid (or the proper + exception is raised) based on inputs. + """ + if mg and _is_single_gpu: + pytest.skip("skipping MG testing on Single GPU system") + + scale = 2 + num_edges = (2**scale)*4 + + if (mg and (graph_type not in [cugraph.DiGraph, None])) or \ + (graph_type not in [cugraph.Graph, cugraph.DiGraph, None]): + + with pytest.raises(TypeError): + _call_rmat(scale, num_edges, graph_type, mg) + + else: + G_or_df = _call_rmat(scale, num_edges, graph_type, mg) + + if graph_type is None: + assert type(G_or_df) is dask_cudf.DataFrame if mg \ + else cudf.DataFrame + else: + assert type(G_or_df) is graph_type diff --git a/python/cugraph/traversal/ms_bfs.py b/python/cugraph/traversal/ms_bfs.py index e4b799e30e4..3d158524751 100644 --- a/python/cugraph/traversal/ms_bfs.py +++ b/python/cugraph/traversal/ms_bfs.py @@ -14,8 +14,6 @@ import numpy as np import cudf -# from cugraph.structure.graph import Graph, DiGraph -# from cugraph.utilities.utils import get_device_memory_info import warnings From 85ceac3d555632c584b6afd00c82b333ee808591 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 12 May 2021 09:41:19 -0400 Subject: [PATCH 248/343] Bug fix for MNMG Louvain (#1597) in MNMG mode, clusters assigned to different GPUs can increase between iterations. Merge after #1596 closes #1586 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1597 --- cpp/src/experimental/louvain.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 24914fb028b..77e4c9c4604 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -201,6 +201,8 @@ class Louvain { timer_start("compute_vertex_and_cluster_weights"); vertex_weights_v_ = current_graph_view_.compute_out_weight_sums(handle_); + cluster_keys_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); + cluster_weights_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_keys_v_.begin(), From d97638d7a50f77fc077314e00b23167f7836d95e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 12 May 2021 09:42:10 -0400 Subject: [PATCH 249/343] Bug fix in MG coarsen_grph (#1596) - [x] Bug fix in MG coarsen_graph Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1596 --- cpp/src/experimental/coarsen_graph.cu | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 6397f92e336..f0864b8af63 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -284,12 +284,14 @@ coarsen_graph( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) : graph_view.get_number_of_local_adj_matrix_partition_rows(i), handle.get_stream()); - // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for - // input - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - labels, - labels + major_labels.size(), - major_labels.begin()); + if (col_comm_rank == i) { + // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for + // input + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + major_labels.size(), + major_labels.begin()); + } device_bcast(col_comm, major_labels.data(), major_labels.data(), @@ -455,7 +457,7 @@ coarsen_graph( cur_size; thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_edge_first, - src_edge_first + edgelist_major_vertices.size(), + src_edge_first + number_of_partition_edges, dst_edge_first); } } From 4905a3521495f85b7eb9a9d8d2f2f34199cf31c5 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Wed, 12 May 2021 09:39:34 -0500 Subject: [PATCH 250/343] Move headers into `include/cugraph` dir (#1582) This PR moves the headers in `cpp/include` into `cpp/include/cugraph` and updates C++ and Cython `#include` directives. This change makes it easier for other libs to avoid naming conflicts with cuGraph public headers when linking `libcugraph.so`. closes #1491 Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1582 --- cpp/include/{ => cugraph}/algorithms.hpp | 10 +++---- .../{ => cugraph}/compute_partition.cuh | 2 +- cpp/include/{ => cugraph}/dendrogram.hpp | 0 cpp/include/{ => cugraph}/eidecl_graph.hpp | 2 +- cpp/include/{ => cugraph}/eidir_graph.hpp | 2 +- .../experimental/detail/graph_utils.cuh | 8 +++--- .../experimental/eidecl_graph.hpp | 2 +- .../experimental/eidir_graph.hpp | 2 +- .../{ => cugraph}/experimental/graph.hpp | 4 +-- .../experimental/graph_functions.hpp | 4 +-- .../experimental/graph_generator.hpp | 0 .../{ => cugraph}/experimental/graph_view.hpp | 2 +- .../experimental/include_cuco_static_map.cuh | 0 cpp/include/{ => cugraph}/functions.hpp | 4 +-- cpp/include/{ => cugraph}/graph.hpp | 0 cpp/include/{ => cugraph}/internals.hpp | 2 +- .../{ => cugraph}/matrix_partition_device.cuh | 4 +-- .../{ => cugraph}/partition_manager.hpp | 2 +- .../patterns/any_of_adj_matrix_row.cuh | 6 ++-- .../patterns/copy_to_adj_matrix_row_col.cuh | 20 ++++++------- .../copy_v_transform_reduce_in_out_nbr.cuh | 16 +++++------ ...ransform_reduce_key_aggregated_out_nbr.cuh | 24 ++++++++-------- .../{ => cugraph}/patterns/count_if_e.cuh | 6 ++-- .../{ => cugraph}/patterns/count_if_v.cuh | 6 ++-- .../{ => cugraph}/patterns/edge_op_utils.cuh | 2 +- .../{ => cugraph}/patterns/reduce_op.cuh | 0 .../{ => cugraph}/patterns/reduce_v.cuh | 6 ++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 14 +++++----- .../patterns/transform_reduce_e.cuh | 10 +++---- .../patterns/transform_reduce_v.cuh | 6 ++-- ...transform_reduce_v_with_adj_matrix_row.cuh | 6 ++-- .../update_frontier_v_push_if_out_nbr.cuh | 26 ++++++++--------- .../patterns/vertex_frontier.cuh | 6 ++-- .../{ => cugraph}/utilities/collect_comm.cuh | 8 +++--- .../{ => cugraph}/utilities/cython.hpp | 11 +++++--- .../utilities/dataframe_buffer.cuh | 2 +- .../{ => cugraph}/utilities/device_comm.cuh | 2 +- cpp/include/{ => cugraph}/utilities/error.hpp | 2 +- .../{ => cugraph}/utilities/graph_traits.hpp | 0 .../{ => cugraph}/utilities/host_barrier.hpp | 0 .../utilities/host_scalar_comm.cuh | 2 +- .../utilities/path_retrieval.hpp | 0 .../{ => cugraph}/utilities/shuffle_comm.cuh | 4 +-- .../utilities/thrust_tuple_utils.cuh | 0 .../{ => cugraph}/vertex_partition_device.cuh | 6 ++-- cpp/src/centrality/README.md | 4 +-- cpp/src/centrality/betweenness_centrality.cu | 6 ++-- cpp/src/centrality/katz_centrality.cu | 6 ++-- cpp/src/community/README.md | 4 +-- cpp/src/community/ecg.cu | 4 +-- cpp/src/community/egonet.cu | 14 +++++----- .../community/extract_subgraph_by_vertex.cu | 6 ++-- cpp/src/community/flatten_dendrogram.cuh | 4 +-- cpp/src/community/ktruss.cu | 6 ++-- cpp/src/community/louvain.cu | 4 +-- cpp/src/community/louvain.cuh | 4 +-- cpp/src/community/spectral_clustering.cu | 8 +++--- cpp/src/community/triangles_counting.cu | 8 +++--- cpp/src/components/connectivity.cu | 8 +++--- cpp/src/components/utils.h | 4 +-- cpp/src/converters/COOtoCSR.cu | 4 +-- cpp/src/converters/COOtoCSR.cuh | 8 +++--- cpp/src/converters/permute_graph.cuh | 8 +++--- cpp/src/converters/renumber.cuh | 6 ++-- cpp/src/cores/core_number.cu | 4 +-- cpp/src/experimental/bfs.cu | 14 +++++----- cpp/src/experimental/coarsen_graph.cu | 16 +++++------ .../experimental/generate_rmat_edgelist.cu | 4 +-- cpp/src/experimental/graph.cu | 12 ++++---- cpp/src/experimental/graph_view.cu | 12 ++++---- cpp/src/experimental/induced_subgraph.cu | 10 +++---- cpp/src/experimental/katz_centrality.cu | 14 +++++----- cpp/src/experimental/louvain.cuh | 24 ++++++++-------- cpp/src/experimental/pagerank.cu | 22 +++++++-------- cpp/src/experimental/relabel.cu | 18 ++++++------ cpp/src/experimental/renumber_edgelist.cu | 20 ++++++------- cpp/src/experimental/renumber_utils.cu | 12 ++++---- cpp/src/experimental/sssp.cu | 20 ++++++------- cpp/src/layout/barnes_hut.hpp | 10 +++---- cpp/src/layout/exact_fa2.hpp | 8 +++--- cpp/src/layout/fa2_kernels.hpp | 4 +-- cpp/src/linear_assignment/hungarian.cu | 4 +-- cpp/src/link_analysis/gunrock_hits.cpp | 6 ++-- cpp/src/link_prediction/jaccard.cu | 6 ++-- cpp/src/link_prediction/overlap.cu | 6 ++-- cpp/src/sampling/random_walks.cu | 2 +- cpp/src/sampling/random_walks.cuh | 2 +- cpp/src/sort/bitonic.cuh | 4 +-- cpp/src/structure/graph.cu | 8 +++--- cpp/src/traversal/README.md | 4 +-- cpp/src/traversal/bfs.cu | 8 +++--- cpp/src/traversal/bfs_kernels.cuh | 4 +-- cpp/src/traversal/mg/frontier_expand.cuh | 4 +-- .../traversal/mg/frontier_expand_kernels.cuh | 4 +-- cpp/src/traversal/sssp.cu | 4 +-- cpp/src/traversal/sssp_kernels.cuh | 4 +-- cpp/src/traversal/traversal_common.cuh | 4 +-- cpp/src/traversal/tsp.hpp | 2 +- cpp/src/traversal/two_hop_neighbors.cu | 8 +++--- cpp/src/tree/mst.cu | 8 +++--- cpp/src/utilities/cython.cu | 28 ++++++++++--------- cpp/src/utilities/graph_utils.cuh | 4 +-- cpp/src/utilities/host_barrier.cpp | 2 +- cpp/src/utilities/path_retrieval.cu | 4 +-- cpp/src/utilities/spmv_1D.cuh | 4 +-- .../centrality/betweenness_centrality_test.cu | 4 +-- .../edge_betweenness_centrality_test.cu | 4 +-- cpp/tests/centrality/katz_centrality_test.cu | 4 +-- cpp/tests/community/balanced_edge_test.cpp | 4 +-- cpp/tests/community/ecg_test.cpp | 4 +-- cpp/tests/community/egonet_test.cu | 6 ++-- cpp/tests/community/leiden_test.cpp | 4 +-- cpp/tests/community/louvain_test.cpp | 4 +-- cpp/tests/community/mg_louvain_helper.cu | 8 +++--- cpp/tests/community/mg_louvain_helper.hpp | 2 +- cpp/tests/community/mg_louvain_test.cpp | 4 +-- cpp/tests/community/triangle_test.cu | 6 ++-- cpp/tests/components/con_comp_test.cu | 4 +-- cpp/tests/components/scc_test.cu | 4 +-- cpp/tests/components/wcc_graphs.hpp | 2 +- cpp/tests/components/wcc_test.cpp | 4 +-- cpp/tests/experimental/bfs_test.cpp | 8 +++--- cpp/tests/experimental/coarsen_graph_test.cpp | 8 +++--- cpp/tests/experimental/degree_test.cpp | 6 ++-- cpp/tests/experimental/generate_rmat_test.cpp | 4 +-- cpp/tests/experimental/graph_test.cpp | 4 +-- .../experimental/induced_subgraph_test.cpp | 6 ++-- .../experimental/katz_centrality_test.cpp | 8 +++--- cpp/tests/experimental/mg_bfs_test.cpp | 10 +++---- .../experimental/mg_katz_centrality_test.cpp | 4 +-- cpp/tests/experimental/mg_sssp_test.cpp | 10 +++---- cpp/tests/experimental/ms_bfs_test.cpp | 8 +++--- cpp/tests/experimental/pagerank_test.cpp | 8 +++--- cpp/tests/experimental/sssp_test.cpp | 8 +++--- cpp/tests/experimental/weight_sum_test.cpp | 6 ++-- cpp/tests/layout/force_atlas2_test.cu | 4 +-- cpp/tests/linear_assignment/hungarian_test.cu | 16 +++++------ cpp/tests/pagerank/mg_pagerank_test.cpp | 10 +++---- cpp/tests/sampling/random_walks_profiling.cu | 4 +-- cpp/tests/sampling/random_walks_test.cu | 4 +-- cpp/tests/sampling/random_walks_utils.cuh | 2 +- cpp/tests/sampling/rw_low_level_test.cu | 4 +-- cpp/tests/traversal/bfs_test.cu | 2 +- cpp/tests/traversal/sssp_test.cu | 4 +-- cpp/tests/traversal/tsp_test.cu | 4 +-- cpp/tests/tree/mst_test.cu | 4 +-- cpp/tests/utilities/base_fixture.hpp | 2 +- .../utilities/generate_graph_from_edgelist.cu | 8 +++--- .../utilities/matrix_market_file_utilities.cu | 8 +++--- cpp/tests/utilities/rmat_utilities.cu | 10 +++---- cpp/tests/utilities/test_utilities.hpp | 4 +-- .../centrality/betweenness_centrality.pxd | 4 +-- python/cugraph/centrality/katz_centrality.pxd | 2 +- python/cugraph/comms/comms.pxd | 2 +- python/cugraph/community/ecg.pxd | 4 +-- python/cugraph/community/egonet.pxd | 2 +- python/cugraph/community/ktruss_subgraph.pxd | 4 +-- python/cugraph/community/leiden.pxd | 4 +-- python/cugraph/community/louvain.pxd | 2 +- .../cugraph/community/spectral_clustering.pxd | 4 +-- .../cugraph/community/subgraph_extraction.pxd | 4 +-- python/cugraph/community/triangle_count.pxd | 4 +-- python/cugraph/components/connectivity.pxd | 4 +-- python/cugraph/cores/core_number.pxd | 4 +-- python/cugraph/cores/k_core.pxd | 4 +-- .../dask/centrality/mg_katz_centrality.pxd | 2 +- python/cugraph/dask/community/louvain.pxd | 2 +- .../dask/link_analysis/mg_pagerank.pxd | 2 +- python/cugraph/dask/traversal/mg_bfs.pxd | 2 +- python/cugraph/dask/traversal/mg_sssp.pxd | 2 +- python/cugraph/generators/rmat.pxd | 6 ++-- .../cugraph/internals/callbacks_implems.hpp | 4 +-- python/cugraph/layout/force_atlas2.pxd | 6 ++-- .../cugraph/layout/force_atlas2_wrapper.pyx | 2 +- python/cugraph/linear_assignment/lap.pxd | 4 +-- python/cugraph/link_analysis/hits.pxd | 4 +-- python/cugraph/link_analysis/pagerank.pxd | 2 +- python/cugraph/link_prediction/jaccard.pxd | 4 +-- python/cugraph/link_prediction/overlap.pxd | 4 +-- python/cugraph/sampling/random_walks.pxd | 2 +- python/cugraph/structure/graph_primtypes.pxd | 6 ++-- python/cugraph/structure/graph_utilities.pxd | 12 ++++---- python/cugraph/structure/utils.pxd | 2 +- python/cugraph/traversal/bfs.pxd | 2 +- python/cugraph/traversal/sssp.pxd | 2 +- .../traversal/traveling_salesperson.pxd | 3 +- python/cugraph/tree/minimum_spanning_tree.pxd | 2 +- python/cugraph/utilities/path_retrieval.pxd | 3 +- 188 files changed, 551 insertions(+), 548 deletions(-) rename cpp/include/{ => cugraph}/algorithms.hpp (99%) rename cpp/include/{ => cugraph}/compute_partition.cuh (99%) rename cpp/include/{ => cugraph}/dendrogram.hpp (100%) rename cpp/include/{ => cugraph}/eidecl_graph.hpp (99%) rename cpp/include/{ => cugraph}/eidir_graph.hpp (98%) rename cpp/include/{ => cugraph}/experimental/detail/graph_utils.cuh (97%) rename cpp/include/{ => cugraph}/experimental/eidecl_graph.hpp (99%) rename cpp/include/{ => cugraph}/experimental/eidir_graph.hpp (98%) rename cpp/include/{ => cugraph}/experimental/graph.hpp (98%) rename cpp/include/{ => cugraph}/experimental/graph_functions.hpp (99%) rename cpp/include/{ => cugraph}/experimental/graph_generator.hpp (100%) rename cpp/include/{ => cugraph}/experimental/graph_view.hpp (99%) rename cpp/include/{ => cugraph}/experimental/include_cuco_static_map.cuh (100%) rename cpp/include/{ => cugraph}/functions.hpp (97%) rename cpp/include/{ => cugraph}/graph.hpp (100%) rename cpp/include/{ => cugraph}/internals.hpp (96%) rename cpp/include/{ => cugraph}/matrix_partition_device.cuh (99%) rename cpp/include/{ => cugraph}/partition_manager.hpp (98%) rename cpp/include/{ => cugraph}/patterns/any_of_adj_matrix_row.cuh (95%) rename cpp/include/{ => cugraph}/patterns/copy_to_adj_matrix_row_col.cuh (98%) rename cpp/include/{ => cugraph}/patterns/copy_v_transform_reduce_in_out_nbr.cuh (98%) rename cpp/include/{ => cugraph}/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh (97%) rename cpp/include/{ => cugraph}/patterns/count_if_e.cuh (96%) rename cpp/include/{ => cugraph}/patterns/count_if_v.cuh (97%) rename cpp/include/{ => cugraph}/patterns/edge_op_utils.cuh (99%) rename cpp/include/{ => cugraph}/patterns/reduce_op.cuh (100%) rename cpp/include/{ => cugraph}/patterns/reduce_v.cuh (96%) rename cpp/include/{ => cugraph}/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh (98%) rename cpp/include/{ => cugraph}/patterns/transform_reduce_e.cuh (98%) rename cpp/include/{ => cugraph}/patterns/transform_reduce_v.cuh (97%) rename cpp/include/{ => cugraph}/patterns/transform_reduce_v_with_adj_matrix_row.cuh (97%) rename cpp/include/{ => cugraph}/patterns/update_frontier_v_push_if_out_nbr.cuh (98%) rename cpp/include/{ => cugraph}/patterns/vertex_frontier.cuh (98%) rename cpp/include/{ => cugraph}/utilities/collect_comm.cuh (98%) rename cpp/include/{ => cugraph}/utilities/cython.hpp (99%) rename cpp/include/{ => cugraph}/utilities/dataframe_buffer.cuh (99%) rename cpp/include/{ => cugraph}/utilities/device_comm.cuh (99%) rename cpp/include/{ => cugraph}/utilities/error.hpp (98%) rename cpp/include/{ => cugraph}/utilities/graph_traits.hpp (100%) rename cpp/include/{ => cugraph}/utilities/host_barrier.hpp (100%) rename cpp/include/{ => cugraph}/utilities/host_scalar_comm.cuh (99%) rename cpp/include/{ => cugraph}/utilities/path_retrieval.hpp (100%) rename cpp/include/{ => cugraph}/utilities/shuffle_comm.cuh (99%) rename cpp/include/{ => cugraph}/utilities/thrust_tuple_utils.cuh (100%) rename cpp/include/{ => cugraph}/vertex_partition_device.cuh (96%) diff --git a/cpp/include/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp similarity index 99% rename from cpp/include/algorithms.hpp rename to cpp/include/cugraph/algorithms.hpp index 9f1cb02df0c..e5bba7bd5ce 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -15,12 +15,12 @@ */ #pragma once -#include -#include -#include +#include +#include +#include -#include -#include +#include +#include #include diff --git a/cpp/include/compute_partition.cuh b/cpp/include/cugraph/compute_partition.cuh similarity index 99% rename from cpp/include/compute_partition.cuh rename to cpp/include/cugraph/compute_partition.cuh index 5c03b0971f2..6405d239adc 100644 --- a/cpp/include/compute_partition.cuh +++ b/cpp/include/cugraph/compute_partition.cuh @@ -17,7 +17,7 @@ #include -#include +#include #include diff --git a/cpp/include/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp similarity index 100% rename from cpp/include/dendrogram.hpp rename to cpp/include/cugraph/dendrogram.hpp diff --git a/cpp/include/eidecl_graph.hpp b/cpp/include/cugraph/eidecl_graph.hpp similarity index 99% rename from cpp/include/eidecl_graph.hpp rename to cpp/include/cugraph/eidecl_graph.hpp index 03f6a675597..3e3d9ac5b31 100644 --- a/cpp/include/eidecl_graph.hpp +++ b/cpp/include/cugraph/eidecl_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/eidir_graph.hpp b/cpp/include/cugraph/eidir_graph.hpp similarity index 98% rename from cpp/include/eidir_graph.hpp rename to cpp/include/cugraph/eidir_graph.hpp index d7273b9ea37..5bd6c233641 100644 --- a/cpp/include/eidir_graph.hpp +++ b/cpp/include/cugraph/eidir_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/cugraph/experimental/detail/graph_utils.cuh similarity index 97% rename from cpp/include/experimental/detail/graph_utils.cuh rename to cpp/include/cugraph/experimental/detail/graph_utils.cuh index d79788e59ce..e9f86eb9d62 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/cugraph/experimental/detail/graph_utils.cuh @@ -15,10 +15,10 @@ */ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/experimental/eidecl_graph.hpp b/cpp/include/cugraph/experimental/eidecl_graph.hpp similarity index 99% rename from cpp/include/experimental/eidecl_graph.hpp rename to cpp/include/cugraph/experimental/eidecl_graph.hpp index b8ac201008a..18e617c0993 100644 --- a/cpp/include/experimental/eidecl_graph.hpp +++ b/cpp/include/cugraph/experimental/eidecl_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/eidir_graph.hpp b/cpp/include/cugraph/experimental/eidir_graph.hpp similarity index 98% rename from cpp/include/experimental/eidir_graph.hpp rename to cpp/include/cugraph/experimental/eidir_graph.hpp index 8998943ec16..93aa333dc5b 100644 --- a/cpp/include/experimental/eidir_graph.hpp +++ b/cpp/include/cugraph/experimental/eidir_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp similarity index 98% rename from cpp/include/experimental/graph.hpp rename to cpp/include/cugraph/experimental/graph.hpp index 27f766b8593..963631dcc19 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp similarity index 99% rename from cpp/include/experimental/graph_functions.hpp rename to cpp/include/cugraph/experimental/graph_functions.hpp index b48dc6da136..cb1b90a6e8e 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/cugraph/experimental/graph_generator.hpp similarity index 100% rename from cpp/include/experimental/graph_generator.hpp rename to cpp/include/cugraph/experimental/graph_generator.hpp diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp similarity index 99% rename from cpp/include/experimental/graph_view.hpp rename to cpp/include/cugraph/experimental/graph_view.hpp index e9593b70ddb..c61cf4a5935 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/include/experimental/include_cuco_static_map.cuh b/cpp/include/cugraph/experimental/include_cuco_static_map.cuh similarity index 100% rename from cpp/include/experimental/include_cuco_static_map.cuh rename to cpp/include/cugraph/experimental/include_cuco_static_map.cuh diff --git a/cpp/include/functions.hpp b/cpp/include/cugraph/functions.hpp similarity index 97% rename from cpp/include/functions.hpp rename to cpp/include/cugraph/functions.hpp index ede1be3767f..00e8648b156 100644 --- a/cpp/include/functions.hpp +++ b/cpp/include/cugraph/functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -#include +#include namespace cugraph { diff --git a/cpp/include/graph.hpp b/cpp/include/cugraph/graph.hpp similarity index 100% rename from cpp/include/graph.hpp rename to cpp/include/cugraph/graph.hpp diff --git a/cpp/include/internals.hpp b/cpp/include/cugraph/internals.hpp similarity index 96% rename from cpp/include/internals.hpp rename to cpp/include/cugraph/internals.hpp index f71426491e3..cc6b3031079 100644 --- a/cpp/include/internals.hpp +++ b/cpp/include/cugraph/internals.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/cugraph/matrix_partition_device.cuh similarity index 99% rename from cpp/include/matrix_partition_device.cuh rename to cpp/include/cugraph/matrix_partition_device.cuh index 30d6540bcfe..3c9736b7ca6 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/cugraph/matrix_partition_device.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include diff --git a/cpp/include/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp similarity index 98% rename from cpp/include/partition_manager.hpp rename to cpp/include/cugraph/partition_manager.hpp index 431655e5642..c7657d459b2 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/cugraph/partition_manager.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh similarity index 95% rename from cpp/include/patterns/any_of_adj_matrix_row.cuh rename to cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh index a367ec2a50c..94cdae1ec95 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh similarity index 98% rename from cpp/include/patterns/copy_to_adj_matrix_row_col.cuh rename to cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh index 26a4eed4213..e2ab135691e 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh @@ -15,16 +15,16 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh similarity index 98% rename from cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh rename to cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 6aded0eccf0..16a9870d380 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -15,14 +15,14 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh similarity index 97% rename from cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh rename to cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 9a1d9fea24c..eca0b9eed4a 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -15,21 +15,21 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include #include diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/cugraph/patterns/count_if_e.cuh similarity index 96% rename from cpp/include/patterns/count_if_e.cuh rename to cpp/include/cugraph/patterns/count_if_e.cuh index 4eb3fea24c4..1aa695bf5a9 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/cugraph/patterns/count_if_e.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/cugraph/patterns/count_if_v.cuh similarity index 97% rename from cpp/include/patterns/count_if_v.cuh rename to cpp/include/cugraph/patterns/count_if_v.cuh index 6b28cd7ae12..ef49a3e463b 100644 --- a/cpp/include/patterns/count_if_v.cuh +++ b/cpp/include/cugraph/patterns/count_if_v.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/cugraph/patterns/edge_op_utils.cuh similarity index 99% rename from cpp/include/patterns/edge_op_utils.cuh rename to cpp/include/cugraph/patterns/edge_op_utils.cuh index 198c1880ff4..98b2a12f31b 100644 --- a/cpp/include/patterns/edge_op_utils.cuh +++ b/cpp/include/cugraph/patterns/edge_op_utils.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/cugraph/patterns/reduce_op.cuh similarity index 100% rename from cpp/include/patterns/reduce_op.cuh rename to cpp/include/cugraph/patterns/reduce_op.cuh diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/cugraph/patterns/reduce_v.cuh similarity index 96% rename from cpp/include/patterns/reduce_v.cuh rename to cpp/include/cugraph/patterns/reduce_v.cuh index b232d37b78d..d27a45e2737 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/cugraph/patterns/reduce_v.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh similarity index 98% rename from cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh rename to cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 9848aa21f88..e47bd6f1bbb 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -15,13 +15,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_e.cuh similarity index 98% rename from cpp/include/patterns/transform_reduce_e.cuh rename to cpp/include/cugraph/patterns/transform_reduce_e.cuh index b95e036d460..f210065043f 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_e.cuh @@ -15,11 +15,11 @@ */ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/cugraph/patterns/transform_reduce_v.cuh similarity index 97% rename from cpp/include/patterns/transform_reduce_v.cuh rename to cpp/include/cugraph/patterns/transform_reduce_v.cuh index 17ffb89206a..0d5b4f9cbb6 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_v.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh similarity index 97% rename from cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh rename to cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh index 39aca7cacae..59830222a9c 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh similarity index 98% rename from cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh rename to cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh index 4f3925f7d4c..7bd66ec6755 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -15,19 +15,19 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/cugraph/patterns/vertex_frontier.cuh similarity index 98% rename from cpp/include/patterns/vertex_frontier.cuh rename to cpp/include/cugraph/patterns/vertex_frontier.cuh index 4758334e9fc..f86d56deb84 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/cugraph/patterns/vertex_frontier.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh similarity index 98% rename from cpp/include/utilities/collect_comm.cuh rename to cpp/include/cugraph/utilities/collect_comm.cuh index f5a904ad875..6a8db5194be 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -15,10 +15,10 @@ */ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp similarity index 99% rename from cpp/include/utilities/cython.hpp rename to cpp/include/cugraph/utilities/cython.hpp index 3221ba54929..1e246b7255a 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -14,12 +14,15 @@ * limitations under the License. */ #pragma once -#include -#include -#include + +#include +#include +#include +#include + #include + #include -#include namespace cugraph { namespace cython { diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/cugraph/utilities/dataframe_buffer.cuh similarity index 99% rename from cpp/include/utilities/dataframe_buffer.cuh rename to cpp/include/cugraph/utilities/dataframe_buffer.cuh index b0e9c1ebfec..b15bca4abd7 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/cugraph/utilities/dataframe_buffer.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh similarity index 99% rename from cpp/include/utilities/device_comm.cuh rename to cpp/include/cugraph/utilities/device_comm.cuh index daf8524e25b..3c00c54b07d 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/include/utilities/error.hpp b/cpp/include/cugraph/utilities/error.hpp similarity index 98% rename from cpp/include/utilities/error.hpp rename to cpp/include/cugraph/utilities/error.hpp index e44e2c910ea..8cfb077cf7b 100644 --- a/cpp/include/utilities/error.hpp +++ b/cpp/include/cugraph/utilities/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/graph_traits.hpp b/cpp/include/cugraph/utilities/graph_traits.hpp similarity index 100% rename from cpp/include/utilities/graph_traits.hpp rename to cpp/include/cugraph/utilities/graph_traits.hpp diff --git a/cpp/include/utilities/host_barrier.hpp b/cpp/include/cugraph/utilities/host_barrier.hpp similarity index 100% rename from cpp/include/utilities/host_barrier.hpp rename to cpp/include/cugraph/utilities/host_barrier.hpp diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/cugraph/utilities/host_scalar_comm.cuh similarity index 99% rename from cpp/include/utilities/host_scalar_comm.cuh rename to cpp/include/cugraph/utilities/host_scalar_comm.cuh index 2ecfd913813..4505d35e011 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/cugraph/utilities/host_scalar_comm.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/cugraph/utilities/path_retrieval.hpp similarity index 100% rename from cpp/include/utilities/path_retrieval.hpp rename to cpp/include/cugraph/utilities/path_retrieval.hpp diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh similarity index 99% rename from cpp/include/utilities/shuffle_comm.cuh rename to cpp/include/cugraph/utilities/shuffle_comm.cuh index b42b9ad06bb..56f55a31a14 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh similarity index 100% rename from cpp/include/utilities/thrust_tuple_utils.cuh rename to cpp/include/cugraph/utilities/thrust_tuple_utils.cuh diff --git a/cpp/include/vertex_partition_device.cuh b/cpp/include/cugraph/vertex_partition_device.cuh similarity index 96% rename from cpp/include/vertex_partition_device.cuh rename to cpp/include/cugraph/vertex_partition_device.cuh index a6a78ad3878..b57efd115eb 100644 --- a/cpp/include/vertex_partition_device.cuh +++ b/cpp/include/cugraph/vertex_partition_device.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include diff --git a/cpp/src/centrality/README.md b/cpp/src/centrality/README.md index db7838fb0cc..31b5ed6720e 100644 --- a/cpp/src/centrality/README.md +++ b/cpp/src/centrality/README.md @@ -13,7 +13,7 @@ The unit test code is the best place to search for examples on calling pagerank. The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -46,7 +46,7 @@ cugraph::experimental::pagerank(handle, graph_view, nullptr, nullptr, nullptr, v The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. Additionally, the caller must create personalization_vertices and personalized_values vectors in device memory, populate them and pass in the raw pointers to those vectors. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index c0a34de5f70..cdee2140382 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -20,10 +20,10 @@ #include -#include -#include +#include +#include +#include #include -#include #include #include "betweenness_centrality.cuh" diff --git a/cpp/src/centrality/katz_centrality.cu b/cpp/src/centrality/katz_centrality.cu index 0119a388680..0622193670e 100644 --- a/cpp/src/centrality/katz_centrality.cu +++ b/cpp/src/centrality/katz_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,8 @@ #include #include -#include -#include "utilities/error.hpp" +#include +#include namespace cugraph { diff --git a/cpp/src/community/README.md b/cpp/src/community/README.md index 4bff0a6e77e..9d635a6167f 100644 --- a/cpp/src/community/README.md +++ b/cpp/src/community/README.md @@ -31,7 +31,7 @@ The API itself is very simple. There are two variations: The example assumes that you create an SG or MG graph somehow. The caller must create the clustering vector in device memory and pass in the raw pointer to that vector into the louvain function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -54,7 +54,7 @@ std::tie(level, modularity) = cugraph::louvain(handle, graph_view, clustering_v. The Dendrogram represents the levels of hierarchical clustering that the Louvain algorithm computes. There is a separate function that will flatten the clustering into the same result as above. Returning the Dendrogram, however, provides a finer level of detail on the intermediate results which can be helpful in more fully understanding the data. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index a176dfbd1c8..ef171d127fe 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -14,10 +14,10 @@ * limitations under the License. */ -#include #include #include -#include +#include +#include #include #include diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 85ee327edb2..6b93f561a45 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -15,8 +15,8 @@ */ // Alex Fender afender@nvida.com -#include #include +#include #include #include #include @@ -28,14 +28,14 @@ #include #include -#include +#include -#include -#include "experimental/graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include -#include -#include +#include +#include #include diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index eb7b1d494a0..4bfe57c2c50 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 6d455a68192..3359fea87e5 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include diff --git a/cpp/src/community/ktruss.cu b/cpp/src/community/ktruss.cu index 11a8ed6fbae..224f84f6718 100644 --- a/cpp/src/community/ktruss.cu +++ b/cpp/src/community/ktruss.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,11 +21,11 @@ * @file ktruss.cu * --------------------------------------------------------------------------*/ -#include +#include #include #include -#include +#include #include "Static/KTruss/KTruss.cuh" using namespace hornets_nest; diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 2affcf29805..0dc913ffaba 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include @@ -297,4 +297,4 @@ template std::pair louvain( } // namespace cugraph -#include +#include diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e3569d4c850..8fa2b81783a 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -15,12 +15,12 @@ */ #pragma once -#include +#include #include #include -#include +#include #include diff --git a/cpp/src/community/spectral_clustering.cu b/cpp/src/community/spectral_clustering.cu index f32739ddf29..06b62c5019d 100644 --- a/cpp/src/community/spectral_clustering.cu +++ b/cpp/src/community/spectral_clustering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,14 +20,14 @@ * @file spectral_clustering.cu * ---------------------------------------------------------------------------**/ -#include +#include #include #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index f6670365652..31acb4859bd 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,10 +17,10 @@ #include #include -#include -#include +#include +#include -#include +#include #include #include diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index 09412160b37..d5768c7f09f 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -19,13 +19,13 @@ #include -#include #include -#include +#include +#include +#include #include #include -#include "utilities/error.hpp" -#include "utilities/graph_utils.cuh" +#include #include "topology/topology.cuh" diff --git a/cpp/src/components/utils.h b/cpp/src/components/utils.h index c9ebb6ac4d1..7b0e3042a97 100644 --- a/cpp/src/components/utils.h +++ b/cpp/src/components/utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include -#include +#include namespace MLCommon { diff --git a/cpp/src/converters/COOtoCSR.cu b/cpp/src/converters/COOtoCSR.cu index 787872742e9..9164d7b9562 100644 --- a/cpp/src/converters/COOtoCSR.cu +++ b/cpp/src/converters/COOtoCSR.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include "COOtoCSR.cuh" namespace cugraph { diff --git a/cpp/src/converters/COOtoCSR.cuh b/cpp/src/converters/COOtoCSR.cuh index b110e02a513..2876f1ccf52 100644 --- a/cpp/src/converters/COOtoCSR.cuh +++ b/cpp/src/converters/COOtoCSR.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,14 +31,14 @@ #include #include -#include +#include #include #include -#include +#include -#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/converters/permute_graph.cuh b/cpp/src/converters/permute_graph.cuh index b5b2de83e9b..aa64cf5ae11 100644 --- a/cpp/src/converters/permute_graph.cuh +++ b/cpp/src/converters/permute_graph.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ * limitations under the License. */ #include -#include -#include +#include +#include +#include #include "converters/COOtoCSR.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/converters/renumber.cuh b/cpp/src/converters/renumber.cuh index 263d7199c10..ccf4e6f62c2 100644 --- a/cpp/src/converters/renumber.cuh +++ b/cpp/src/converters/renumber.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,9 +30,9 @@ #include #include -#include +#include +#include #include "sort/bitonic.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index 091ba07ccc6..419232e8deb 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -17,8 +17,8 @@ #include #include #include -#include -#include +#include +#include //#include namespace cugraph { diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 2a703c1c85e..101faf43a6f 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index f0864b8af63..2e476a9b54d 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -14,14 +14,14 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index f00443a0596..e0cccd70071 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -16,8 +16,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 18db57a737f..7793898def0 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -516,4 +516,4 @@ template class graph_t; } // namespace experimental } // namespace cugraph -#include +#include diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 67603ae260b..c22fde5f4c7 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 5cda36ad7e2..062bf18cd95 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 7ffef5053af..ad62f5e9d68 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 77e4c9c4604..582b07d39d2 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -15,18 +15,18 @@ */ #pragma once -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index e5874acb04f..db54783453e 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -14,17 +14,17 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 918feeb7a10..eb257acf432 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -14,15 +14,15 @@ * limitations under the License. */ -#include - -#include -#include -#include -#include -#include -#include -#include +#include + +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 01022e8fa6d..d93b9d18911 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -14,16 +14,16 @@ * limitations under the License. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index eef6ca88b3c..5e5e88ef8f7 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include +#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index fc488794795..4b130668a0d 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -14,16 +14,16 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index 437c98fce4b..ebef93e74fb 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,16 @@ #pragma once #include -#include +#include #include #include -#include -#include +#include +#include +#include #include "bh_kernels.hpp" #include "fa2_kernels.hpp" -#include "utilities/graph_utils.cuh" #include "utils.hpp" namespace cugraph { diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index 0b90e417968..abad5a5630f 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,12 +17,12 @@ #pragma once #include -#include +#include #include #include -#include -#include +#include +#include #include "exact_repulsion.hpp" #include "fa2_kernels.hpp" diff --git a/cpp/src/layout/fa2_kernels.hpp b/cpp/src/layout/fa2_kernels.hpp index 0c7e9b1d193..9aec348cec5 100644 --- a/cpp/src/layout/fa2_kernels.hpp +++ b/cpp/src/layout/fa2_kernels.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #define restrict __restrict__ -#include "utilities/graph_utils.cuh" +#include namespace cugraph { namespace detail { diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index 40f7be52c90..dfa1e43edad 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -17,12 +17,12 @@ #include #include -#include +#include #include #include -#include +#include #include diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 5ffaacfe7a6..ffaec16c6a8 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -19,10 +19,10 @@ * @brief wrapper calling gunrock's HITS analytic * --------------------------------------------------------------------------*/ -#include -#include +#include +#include -#include +#include #include diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 83a4ec6e713..b93ad0bd0b3 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include "graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index 83fdc799649..915b2c8bd52 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include "graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu index a5410d0e65e..1883535bf70 100644 --- a/cpp/src/sampling/random_walks.cu +++ b/cpp/src/sampling/random_walks.cu @@ -16,7 +16,7 @@ // Andrei Schaffer, aschaffer@nvidia.com // -#include +#include #include "random_walks.cuh" namespace cugraph { diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 10a47318bcb..44a6e9e83aa 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -18,7 +18,7 @@ // #pragma once -#include +#include #include diff --git a/cpp/src/sort/bitonic.cuh b/cpp/src/sort/bitonic.cuh index e2922a58d39..b1b19bafdf0 100644 --- a/cpp/src/sort/bitonic.cuh +++ b/cpp/src/sort/bitonic.cuh @@ -1,7 +1,7 @@ // -*-c++-*- /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ #include #include -#include +#include namespace cugraph { namespace sort { diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 056ad39fefc..9f683af8209 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include -#include "utilities/error.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include #include @@ -149,4 +149,4 @@ template class GraphCompressedSparseBaseView; template class GraphCompressedSparseBaseView; } // namespace cugraph -#include "utilities/eidir_graph_utils.hpp" +#include diff --git a/cpp/src/traversal/README.md b/cpp/src/traversal/README.md index 7f436926de8..429b58d441e 100644 --- a/cpp/src/traversal/README.md +++ b/cpp/src/traversal/README.md @@ -13,7 +13,7 @@ The unit test code is the best place to search for examples on calling SSSP. The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the SSSP function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -40,7 +40,7 @@ The unit test code is the best place to search for examples on calling BFS. The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the BFS function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/traversal/bfs.cu b/cpp/src/traversal/bfs.cu index 7c59010cab8..8b62fbfecee 100644 --- a/cpp/src/traversal/bfs.cu +++ b/cpp/src/traversal/bfs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -14,14 +14,14 @@ #include #include "bfs.cuh" -#include "graph.hpp" +#include -#include +#include +#include #include "bfs_kernels.cuh" #include "mg/bfs.cuh" #include "mg/common_utils.cuh" #include "traversal_common.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/traversal/bfs_kernels.cuh b/cpp/src/traversal/bfs_kernels.cuh index bf2ec2fc6ee..78ce646d3c6 100644 --- a/cpp/src/traversal/bfs_kernels.cuh +++ b/cpp/src/traversal/bfs_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -#include "graph.hpp" +#include #include "traversal_common.cuh" namespace cugraph { diff --git a/cpp/src/traversal/mg/frontier_expand.cuh b/cpp/src/traversal/mg/frontier_expand.cuh index 2733c319087..5436c060e18 100644 --- a/cpp/src/traversal/mg/frontier_expand.cuh +++ b/cpp/src/traversal/mg/frontier_expand.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "frontier_expand_kernels.cuh" #include "vertex_binning.cuh" diff --git a/cpp/src/traversal/mg/frontier_expand_kernels.cuh b/cpp/src/traversal/mg/frontier_expand_kernels.cuh index 625ec0d956f..00884e01755 100644 --- a/cpp/src/traversal/mg/frontier_expand_kernels.cuh +++ b/cpp/src/traversal/mg/frontier_expand_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "vertex_binning.cuh" namespace cugraph { diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index 6ffbbbf462b..c79c4e5a127 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -17,9 +17,9 @@ // Author: Prasun Gera pgera@nvidia.com #include -#include +#include -#include "graph.hpp" +#include #include "sssp.cuh" #include "sssp_kernels.cuh" diff --git a/cpp/src/traversal/sssp_kernels.cuh b/cpp/src/traversal/sssp_kernels.cuh index d96540b22b9..d1cf9980773 100644 --- a/cpp/src/traversal/sssp_kernels.cuh +++ b/cpp/src/traversal/sssp_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ #include #include +#include #include "traversal_common.cuh" -#include "utilities/error.hpp" namespace cugraph { namespace detail { namespace sssp_kernels { diff --git a/cpp/src/traversal/traversal_common.cuh b/cpp/src/traversal/traversal_common.cuh index 2802fb94be8..64a21a89b04 100644 --- a/cpp/src/traversal/traversal_common.cuh +++ b/cpp/src/traversal/traversal_common.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include -#include "utilities/error.hpp" +#include #define MAXBLOCKS 65535 #define WARP_SIZE 32 diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index 1208f8c8790..8c6948f218c 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index fb984dae0ad..770e618637b 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include -#include +#include +#include +#include #include "two_hop_neighbors.cuh" #include diff --git a/cpp/src/tree/mst.cu b/cpp/src/tree/mst.cu index cc3bdc64a2d..54698b588a4 100644 --- a/cpp/src/tree/mst.cu +++ b/cpp/src/tree/mst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ * @file mst.cu * ---------------------------------------------------------------------------**/ -#include +#include #include #include @@ -28,8 +28,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 093a598b659..1a45782beb4 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -13,21 +13,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include + +#include #include #include diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index ca0b5831c92..4eeab9376fa 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -13,7 +13,7 @@ // Author: Alex Fender afender@nvidia.com #pragma once -#include +#include #include #include diff --git a/cpp/src/utilities/host_barrier.cpp b/cpp/src/utilities/host_barrier.cpp index 1c018d624ed..659e4038c67 100644 --- a/cpp/src/utilities/host_barrier.cpp +++ b/cpp/src/utilities/host_barrier.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include #include diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu index 93ead5898f8..2d862f659e5 100644 --- a/cpp/src/utilities/path_retrieval.cu +++ b/cpp/src/utilities/path_retrieval.cu @@ -19,8 +19,8 @@ #include -#include -#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/utilities/spmv_1D.cuh b/cpp/src/utilities/spmv_1D.cuh index 81466595c19..31af0c75585 100644 --- a/cpp/src/utilities/spmv_1D.cuh +++ b/cpp/src/utilities/spmv_1D.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include +#include #include -#include "utilities/error.hpp" namespace cugraph { namespace mg { diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index 89168618b9c..4cafab68986 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index 50cbef86e11..e31af4dba77 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -26,8 +26,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index 114a89858b8..44e52a7626f 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -20,8 +20,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/community/balanced_edge_test.cpp b/cpp/tests/community/balanced_edge_test.cpp index 81cee945821..a4bd8de769f 100644 --- a/cpp/tests/community/balanced_edge_test.cpp +++ b/cpp/tests/community/balanced_edge_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -10,7 +10,7 @@ */ #include -#include +#include #include diff --git a/cpp/tests/community/ecg_test.cpp b/cpp/tests/community/ecg_test.cpp index a13ee2fe360..0f0960b0abb 100644 --- a/cpp/tests/community/ecg_test.cpp +++ b/cpp/tests/community/ecg_test.cpp @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index 27a235ee15b..6f1ca4eb374 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -18,9 +18,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 9083400f85c..a586810b6b6 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 43d274e6723..821e8651d70 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -16,9 +16,9 @@ #include #include -#include +#include -#include +#include #include #include diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index d62eaa1ec55..1311970292a 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -16,11 +16,11 @@ #include "mg_louvain_helper.hpp" -#include +#include -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp index 43eb294cd13..456301f4d7b 100644 --- a/cpp/tests/community/mg_louvain_helper.hpp +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 4b398f0a4aa..e8cc94edf99 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -19,8 +19,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/community/triangle_test.cu b/cpp/tests/community/triangle_test.cu index 1c5c99261d2..0faeb795e15 100644 --- a/cpp/tests/community/triangle_test.cu +++ b/cpp/tests/community/triangle_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include diff --git a/cpp/tests/components/con_comp_test.cu b/cpp/tests/components/con_comp_test.cu index fdae77f2384..e394d5fc97c 100644 --- a/cpp/tests/components/con_comp_test.cu +++ b/cpp/tests/components/con_comp_test.cu @@ -18,9 +18,9 @@ #include -#include #include -#include +#include +#include #include #include diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index b875a459bd0..0d2e87c40a2 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -16,10 +16,10 @@ #include #include -#include #include #include -#include +#include +#include #include #include diff --git a/cpp/tests/components/wcc_graphs.hpp b/cpp/tests/components/wcc_graphs.hpp index 2b5955c2b78..18989b9b46f 100644 --- a/cpp/tests/components/wcc_graphs.hpp +++ b/cpp/tests/components/wcc_graphs.hpp @@ -9,7 +9,7 @@ * */ -#include +#include #include diff --git a/cpp/tests/components/wcc_test.cpp b/cpp/tests/components/wcc_test.cpp index 962ecefe8f3..381757bc977 100644 --- a/cpp/tests/components/wcc_test.cpp +++ b/cpp/tests/components/wcc_test.cpp @@ -13,8 +13,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 44b664c5b92..da4ef2f5dfb 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -19,10 +19,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 5943a5cd286..7f76094fa0f 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -17,10 +17,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index ea7cc246df0..80f1b51f80c 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 60c3a322725..6d97628e83d 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index bdf56ae7aff..ae899c3ba33 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 2d49c174d7e..8a69da1475a 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 232d82a1c91..35773073757 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -19,10 +19,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index f6e0a57e602..e498e403334 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -19,11 +19,11 @@ #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index 864b68caf33..eca04fb3241 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -19,8 +19,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index 70f1a95e1f4..d3da904afc9 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -19,11 +19,11 @@ #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp index eec51f105ab..b8d1e43f81e 100644 --- a/cpp/tests/experimental/ms_bfs_test.cpp +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -18,10 +18,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 1e26245b74c..e1b7b121b1f 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -19,10 +19,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index d84c1c2fc6c..3095afad8fc 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -19,10 +19,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index d04cba2d132..0320438c9a6 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index c6067407b70..c22c256ae02 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/linear_assignment/hungarian_test.cu b/cpp/tests/linear_assignment/hungarian_test.cu index 656957a85eb..282524ffe0d 100644 --- a/cpp/tests/linear_assignment/hungarian_test.cu +++ b/cpp/tests/linear_assignment/hungarian_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include #include @@ -261,7 +261,7 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re int32_t *d_data = data_v.data().get(); //int64_t seed{85}; int64_t seed{time(nullptr)}; - + thrust::for_each(rmm::exec_policy(stream)->on(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows * num_cols), @@ -287,8 +287,8 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re std::cout << "cost = " << r << std::endl; hr_timer.display(std::cout); - - for (int i = 0 ; i < num_cols ; ++i) + + for (int i = 0 ; i < num_cols ; ++i) validate[i] = 0; int32_t assignment_out_of_range{0}; @@ -303,8 +303,8 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re EXPECT_EQ(assignment_out_of_range, 0); - int32_t assignment_missed = 0; - + int32_t assignment_missed = 0; + for (int32_t i = 0 ; i < num_cols ; ++i) { if (validate[i] != 1) { ++assignment_missed; diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 659a62a727c..adedfa2e3bc 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -19,11 +19,11 @@ #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 397196c4c78..355d62e8141 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/sampling/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu index 9e4ecd0d024..186c45109e9 100644 --- a/cpp/tests/sampling/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -23,8 +23,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/sampling/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh index 44a6f8d561b..f2db29eb23b 100644 --- a/cpp/tests/sampling/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index 29fd01fc7e0..f5b0db0ed3e 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -23,8 +23,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/traversal/bfs_test.cu b/cpp/tests/traversal/bfs_test.cu index 9027d73b83e..8cbfe0081d6 100644 --- a/cpp/tests/traversal/bfs_test.cu +++ b/cpp/tests/traversal/bfs_test.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index e151ab64e68..1903f9ad302 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -13,9 +13,9 @@ #include #include -#include #include -#include +#include +#include #include diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 47a72757bd8..806d9dea51a 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -30,8 +30,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/cpp/tests/tree/mst_test.cu b/cpp/tests/tree/mst_test.cu index e3d7b70d51e..aee88d981c1 100644 --- a/cpp/tests/tree/mst_test.cu +++ b/cpp/tests/tree/mst_test.cu @@ -21,8 +21,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 770fbc99397..a705ca10aaa 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu index 5f41e0e5ce0..b15cf34188d 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -15,10 +15,10 @@ */ #include -#include -#include -#include -#include +#include +#include +#include +#include #include diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index bf7539864be..c51aa69fe48 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -16,10 +16,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 3f0bb0b4a1f..9b36867bf62 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -16,11 +16,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 196128e37c0..d7e6d9f6ed8 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include diff --git a/python/cugraph/centrality/betweenness_centrality.pxd b/python/cugraph/centrality/betweenness_centrality.pxd index 829d7be37d9..7abc9009cc8 100644 --- a/python/cugraph/centrality/betweenness_centrality.pxd +++ b/python/cugraph/centrality/betweenness_centrality.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void betweenness_centrality[VT, ET, WT, result_t]( const handle_t &handle, diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index ce9ab5291f6..c48a90904da 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -19,7 +19,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_katz_centrality[VT,WT]( const handle_t &handle, diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd index 3984ade9a9c..5bc24c0d639 100644 --- a/python/cugraph/comms/comms.pxd +++ b/python/cugraph/comms/comms.pxd @@ -19,7 +19,7 @@ from cugraph.raft.common.handle cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void init_subcomms(handle_t &handle, size_t row_comm_size) diff --git a/python/cugraph/community/ecg.pxd b/python/cugraph/community/ecg.pxd index 9f1dc269b6f..4f13237eac7 100644 --- a/python/cugraph/community/ecg.pxd +++ b/python/cugraph/community/ecg.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void ecg[VT,ET,WT]( const handle_t &handle, diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd index cf1c84fb5f7..acf93330447 100644 --- a/python/cugraph/community/egonet.pxd +++ b/python/cugraph/community/egonet.pxd @@ -14,7 +14,7 @@ from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( const handle_t &handle, const graph_container_t &g, diff --git a/python/cugraph/community/ktruss_subgraph.pxd b/python/cugraph/community/ktruss_subgraph.pxd index ab3a5189414..d993c31c375 100644 --- a/python/cugraph/community/ktruss_subgraph.pxd +++ b/python/cugraph/community/ktruss_subgraph.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] k_truss_subgraph[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 80e0e12f65a..871dc826c06 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,7 +21,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef pair[size_t, weight_t] leiden[vertex_t,edge_t,weight_t]( const handle_t &handle, diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 1f75c13dbaf..08625047285 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -21,7 +21,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, diff --git a/python/cugraph/community/spectral_clustering.pxd b/python/cugraph/community/spectral_clustering.pxd index 27ce6130b05..346eb50a157 100644 --- a/python/cugraph/community/spectral_clustering.pxd +++ b/python/cugraph/community/spectral_clustering.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph::ext_raft": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::ext_raft": cdef void balancedCutClustering[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/subgraph_extraction.pxd b/python/cugraph/community/subgraph_extraction.pxd index 97a71056006..583e220327d 100644 --- a/python/cugraph/community/subgraph_extraction.pxd +++ b/python/cugraph/community/subgraph_extraction.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "algorithms.hpp" namespace "cugraph::subgraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::subgraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] extract_subgraph_vertex[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/triangle_count.pxd b/python/cugraph/community/triangle_count.pxd index 70795a3f43a..55e8114ccbf 100644 --- a/python/cugraph/community/triangle_count.pxd +++ b/python/cugraph/community/triangle_count.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uint64_t -cdef extern from "algorithms.hpp" namespace "cugraph::triangle": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::triangle": cdef uint64_t triangle_count[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/components/connectivity.pxd b/python/cugraph/components/connectivity.pxd index 94fa165969d..a5549a9f54e 100644 --- a/python/cugraph/components/connectivity.pxd +++ b/python/cugraph/components/connectivity.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": ctypedef enum cugraph_cc_t: CUGRAPH_WEAK "cugraph::cugraph_cc_t::CUGRAPH_WEAK" diff --git a/python/cugraph/cores/core_number.pxd b/python/cugraph/cores/core_number.pxd index cf28720a3e8..17dc1118a5e 100644 --- a/python/cugraph/cores/core_number.pxd +++ b/python/cugraph/cores/core_number.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void core_number[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/cores/k_core.pxd b/python/cugraph/cores/k_core.pxd index 556dbc95ed9..1d22e7ac4d2 100644 --- a/python/cugraph/cores/k_core.pxd +++ b/python/cugraph/cores/k_core.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] k_core[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &in_graph, diff --git a/python/cugraph/dask/centrality/mg_katz_centrality.pxd b/python/cugraph/dask/centrality/mg_katz_centrality.pxd index fb1730da13b..5e30530e92b 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality.pxd +++ b/python/cugraph/dask/centrality/mg_katz_centrality.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_katz_centrality[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 738309dac8a..ab990330028 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -20,7 +20,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 55bbc0dba7e..4b47f43dd87 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_pagerank[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 6a0277f8713..d4f399bf689 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -21,7 +21,7 @@ cdef extern from "limits.h": cdef int INT_MAX cdef long LONG_MAX -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd index d56575da567..937b42147e6 100644 --- a/python/cugraph/dask/traversal/mg_sssp.pxd +++ b/python/cugraph/dask/traversal/mg_sssp.pxd @@ -17,7 +17,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_sssp[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/generators/rmat.pxd b/python/cugraph/generators/rmat.pxd index e1ad250073e..16606b59d0f 100644 --- a/python/cugraph/generators/rmat.pxd +++ b/python/cugraph/generators/rmat.pxd @@ -14,13 +14,13 @@ from libcpp cimport bool from cugraph.structure.graph_utilities cimport * from libcpp.vector cimport vector -cdef extern from "experimental/graph_generator.hpp" namespace "cugraph::experimental": +cdef extern from "cugraph/experimental/graph_generator.hpp" namespace "cugraph::experimental": ctypedef enum generator_distribution_t: POWER_LAW "cugraph::experimental::generator_distribution_t::POWER_LAW" UNIFORM "cugraph::experimental::generator_distribution_t::UNIFORM" -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[graph_generator_t] call_generate_rmat_edgelist[vertex_t] ( const handle_t &handle, size_t scale, @@ -42,4 +42,4 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": generator_distribution_t edge_distribution, int seed, bool clip_and_flip, - bool scramble_vertex_ids) except + \ No newline at end of file + bool scramble_vertex_ids) except + diff --git a/python/cugraph/internals/callbacks_implems.hpp b/python/cugraph/internals/callbacks_implems.hpp index 7b3a27f6bff..79fab937965 100644 --- a/python/cugraph/internals/callbacks_implems.hpp +++ b/python/cugraph/internals/callbacks_implems.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include -#include +#include #include diff --git a/python/cugraph/layout/force_atlas2.pxd b/python/cugraph/layout/force_atlas2.pxd index cda55cda5c5..bf5186c91f9 100644 --- a/python/cugraph/layout/force_atlas2.pxd +++ b/python/cugraph/layout/force_atlas2.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,10 +19,10 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "internals.hpp" namespace "cugraph::internals": +cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void force_atlas2[vertex_t, edge_t, weight_t]( GraphCOOView[vertex_t, edge_t, weight_t] &graph, diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 4515c577f78..7b801d19f1c 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -25,7 +25,7 @@ import cudf from numba import cuda import numpy as np -cdef extern from "internals.hpp" namespace "cugraph::internals": +cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index 782d5cfef60..84f5050744d 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef weight_t hungarian[vertex_t,edge_t,weight_t]( const handle_t &handle, @@ -28,7 +28,7 @@ cdef extern from "algorithms.hpp" namespace "cugraph": vertex_t *assignment) except + -cdef extern from "algorithms.hpp": +cdef extern from "cugraph/algorithms.hpp": cdef weight_t dense_hungarian "cugraph::dense::hungarian" [vertex_t,weight_t]( const handle_t &handle, diff --git a/python/cugraph/link_analysis/hits.pxd b/python/cugraph/link_analysis/hits.pxd index 60d25fd3cdb..9e40f7444f9 100644 --- a/python/cugraph/link_analysis/hits.pxd +++ b/python/cugraph/link_analysis/hits.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph::gunrock": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::gunrock": cdef void hits[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index 2c8bea12016..ed8f763b3ca 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -20,7 +20,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_pagerank[VT,WT]( const handle_t &handle, diff --git a/python/cugraph/link_prediction/jaccard.pxd b/python/cugraph/link_prediction/jaccard.pxd index bc55bb2cdf0..9e8c82ec3d8 100644 --- a/python/cugraph/link_prediction/jaccard.pxd +++ b/python/cugraph/link_prediction/jaccard.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void jaccard[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/link_prediction/overlap.pxd b/python/cugraph/link_prediction/overlap.pxd index 970032b56eb..f0654472587 100644 --- a/python/cugraph/link_prediction/overlap.pxd +++ b/python/cugraph/link_prediction/overlap.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void overlap[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd index 3e0e24b4e98..1eaea92c3e5 100644 --- a/python/cugraph/sampling/random_walks.pxd +++ b/python/cugraph/sampling/random_walks.pxd @@ -13,7 +13,7 @@ #from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t]( const handle_t &handle, const graph_container_t &g, diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 1e0d9626727..e0db6c31fca 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -23,7 +23,7 @@ from libcpp.vector cimport vector from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer -cdef extern from "graph.hpp" namespace "cugraph": +cdef extern from "cugraph/graph.hpp" namespace "cugraph": ctypedef enum PropType: PROP_UNDEF "cugraph::PROP_UNDEF" @@ -123,12 +123,12 @@ cdef extern from "graph.hpp" namespace "cugraph": GraphCSRView[VT,ET,WT] view() -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( const GraphCSRView[VT, ET, WT] &graph) except + -cdef extern from "functions.hpp" namespace "cugraph": +cdef extern from "cugraph/functions.hpp" namespace "cugraph": cdef unique_ptr[device_buffer] renumber_vertices[VT_IN,VT_OUT,ET]( ET number_of_edges, diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index 330978f0e38..d0942431302 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -25,7 +25,7 @@ from libcpp.vector cimport vector from rmm._lib.device_buffer cimport device_buffer # C++ graph utilities -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": ctypedef enum numberTypeEnum: int32Type "cugraph::cython::numberTypeEnum::int32Type" @@ -108,7 +108,7 @@ cdef extern from "" namespace "std" nogil: # # 1. `cdef extern partition_t`: # -cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": +cdef extern from "cugraph/experimental/graph_view.hpp" namespace "cugraph::experimental": cdef cppclass partition_t[vertex_t]: pass @@ -116,7 +116,7 @@ cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental" # 2. return type for shuffle: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef cppclass major_minor_weights_t[vertex_t, edge_t, weight_t]: major_minor_weights_t(const handle_t &handle) @@ -136,7 +136,7 @@ ctypedef fused shuffled_vertices_t: # 3. return type for renumber: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef cppclass renum_quad_t[vertex_t, edge_t]: renum_quad_t(const handle_t &handle) @@ -164,7 +164,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": # 4. `sort_and_shuffle_values()` wrapper: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[major_minor_weights_t[vertex_t, edge_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( const handle_t &handle, @@ -175,7 +175,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": # 5. `renumber_edgelist()` wrapper # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( const handle_t &handle, diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd index c22e64841af..350b5890149 100644 --- a/python/cugraph/structure/utils.pxd +++ b/python/cugraph/structure/utils.pxd @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "functions.hpp" namespace "cugraph": +cdef extern from "cugraph/functions.hpp" namespace "cugraph": cdef unique_ptr[GraphCSR[VT,ET,WT]] coo_to_csr[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index b6465a6698c..ac825deffa6 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -22,7 +22,7 @@ from libcpp cimport bool cdef extern from "limits.h": cdef int INT_MAX -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, const graph_container_t &g, diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index 59253a5f1e4..3109668d747 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_sssp[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/traversal/traveling_salesperson.pxd b/python/cugraph/traversal/traveling_salesperson.pxd index 956c7da0978..b38c18c7633 100644 --- a/python/cugraph/traversal/traveling_salesperson.pxd +++ b/python/cugraph/traversal/traveling_salesperson.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef float traveling_salesperson(const handle_t &handle, int *vtx_ptr, @@ -31,4 +31,3 @@ cdef extern from "algorithms.hpp" namespace "cugraph": int nstart, bool verbose, int *route) except + - diff --git a/python/cugraph/tree/minimum_spanning_tree.pxd b/python/cugraph/tree/minimum_spanning_tree.pxd index a38aee96605..32c76ede554 100644 --- a/python/cugraph/tree/minimum_spanning_tree.pxd +++ b/python/cugraph/tree/minimum_spanning_tree.pxd @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] minimum_spanning_tree[VT,ET,WT](const handle_t &handle, const GraphCSRView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/utilities/path_retrieval.pxd b/python/cugraph/utilities/path_retrieval.pxd index 88f1da8f213..dcbbef5127d 100644 --- a/python/cugraph/utilities/path_retrieval.pxd +++ b/python/cugraph/utilities/path_retrieval.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": +cdef extern from "cugraph/utilities/path_retrieval.hpp" namespace "cugraph": cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle, const vertex_t *vertices, @@ -27,4 +27,3 @@ cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": weight_t *out, vertex_t stop_vertex, vertex_t num_vertices) except + - From 6a948025ccde80dcbc4d2125e1c2a6e06c0f3d00 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Thu, 13 May 2021 10:11:36 -0400 Subject: [PATCH 251/343] Fixing broken path utility (#1576) Fix: - Broken path utility Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1576 --- python/cugraph/utilities/utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index adaec0f9e44..01bae895e5a 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -62,8 +62,8 @@ def get_traversed_path(df, id): ---------- df : cudf.DataFrame The dataframe containing the results of a BFS or SSSP call - id : Int - The vertex ID + id : vertex ID + most be the same data types as what is in the dataframe Returns --------- @@ -97,8 +97,9 @@ def get_traversed_path(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID @@ -161,8 +162,9 @@ def get_traversed_path_list(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID From 9853ecaa7c799919f93742358eb6b0a8135c5e61 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 13 May 2021 10:27:25 -0400 Subject: [PATCH 252/343] Update the relabel function to skip relabeling on missing labels (#1598) Relabel function relabels old labels to new labels based on the provided (old_label, new_label) pairs. The provided pairs should include the entire set of old labels to be relabeled. This updates add an additional parameter ```skip_missing_labels``` to skip relabeling for labels missing in the provided pairs. This update is necessary for the new WCC implementation. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1598 --- .../cugraph/experimental/graph_functions.hpp | 5 +++ cpp/src/community/flatten_dendrogram.cuh | 3 +- cpp/src/experimental/coarsen_graph.cu | 2 +- cpp/src/experimental/louvain.cuh | 3 +- cpp/src/experimental/relabel.cu | 45 ++++++++++++++++--- 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/cpp/include/cugraph/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp index cb1b90a6e8e..36881aea5fa 100644 --- a/cpp/include/cugraph/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -338,6 +338,10 @@ coarsen_graph( * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to * new labels in-place ([INOUT] parameter). * @param num_labels Number of labels to be relabeled. + * @param skip_missing_labels Flag dictating the behavior on missing labels (@p labels contains old + * labels missing in @p old_new_label_pairs). If set to true, missing elements are skipped (not + * relabeled). If set to false, undefined behavior (if @p do_expensive_check is set to true, this + * function will throw an exception). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector New labels corresponding to the @p old_labels. */ @@ -347,6 +351,7 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check = false); /** diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 3359fea87e5..ff6446b0e5f 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -51,7 +51,8 @@ void partition_at_level(raft::handle_t const &handle, dendrogram.get_level_ptr_nocheck(l)), dendrogram.get_level_size_nocheck(l), d_partition, - local_num_verts); + local_num_verts, + false); }); } diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 2e476a9b54d..9f3f7c968cc 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -284,7 +284,7 @@ coarsen_graph( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) : graph_view.get_number_of_local_adj_matrix_partition_rows(i), handle.get_stream()); - if (col_comm_rank == i) { + if (col_comm_rank == static_cast(i)) { // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for // input thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 582b07d39d2..1e618482f68 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -527,7 +527,8 @@ class Louvain { static_cast(numbering_indices.begin())), current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), - dendrogram_->current_level_size()); + dendrogram_->current_level_size(), + false); timer_stop(handle_.get_stream()); } diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index eb257acf432..5360fad0031 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -49,6 +49,7 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check) { double constexpr load_factor = 0.7; @@ -156,11 +157,24 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin(), + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the + // corresponding old labels + } std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); @@ -201,10 +215,23 @@ void relabel(raft::handle_t const& handle, }); relabel_map.insert(pair_first, pair_first + num_label_pairs); - relabel_map.find(labels, labels + num_labels, labels); + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + labels, + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find(labels, labels + num_labels, labels); + } } - if (do_expensive_check) { + if (do_expensive_check && !skip_missing_labels) { CUGRAPH_EXPECTS( thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels, @@ -224,6 +251,7 @@ template void relabel(raft::handle_t const& handle, int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -232,6 +260,7 @@ template void relabel( int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel(raft::handle_t const& handle, @@ -239,6 +268,7 @@ template void relabel(raft::handle_t const& handle, int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -247,6 +277,7 @@ template void relabel( int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); } // namespace experimental From 7c326ac102ecb5bc75f4d148ceedcb40694c8a3e Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 13 May 2021 10:30:03 -0400 Subject: [PATCH 253/343] Update release script (#1600) Update the release script to take a parameter with the new version instead of calculating the new version. Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1600 --- ci/release/update-version.sh | 45 +++++++++++------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7cd0d9720fc..f381ed7f6fb 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -13,42 +13,25 @@ # limitations under the License. ## Usage -# bash update-version.sh -# where is either `major`, `minor`, `patch` +# bash update-version.sh -set -e -# Grab argument for release type -RELEASE_TYPE=$1 +# Format is YY.MM.PP - no leading 'v' or trailing 'a' +NEXT_FULL_TAG=$1 -# Get current version and calculate next versions -CURRENT_TAG=`git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v'` -CURRENT_MAJOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}'` -CURRENT_MINOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}'` -CURRENT_PATCH=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}'` +# Get current version +CURRENT_TAG=$(git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') +CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') +CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -NEXT_MAJOR=$((CURRENT_MAJOR + 1)) -NEXT_MINOR=$((CURRENT_MINOR + 1)) -NEXT_PATCH=$((CURRENT_PATCH + 1)) -NEXT_FULL_TAG="" -NEXT_SHORT_TAG="" -# Determine release type -if [ "$RELEASE_TYPE" == "major" ]; then - NEXT_FULL_TAG="${NEXT_MAJOR}.0.0" - NEXT_SHORT_TAG="${NEXT_MAJOR}.0" -elif [ "$RELEASE_TYPE" == "minor" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}.0" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}" -elif [ "$RELEASE_TYPE" == "patch" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}.${NEXT_PATCH}" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}" -else - echo "Incorrect release type; use 'major', 'minor', or 'patch' as an argument" - exit 1 -fi +#Get . for next version +NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') +NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') +NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -echo "Preparing '$RELEASE_TYPE' release [$CURRENT_TAG -> $NEXT_FULL_TAG]" +echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" # Inplace sed replace; workaround for Linux and Mac function sed_runner() { @@ -69,4 +52,4 @@ for FILE in conda/environments/*.yml; do sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/ucx-py=${CURRENT_SHORT_TAG}/ucx-py=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done +done \ No newline at end of file From 9c42957fe98f913e7c3d55fb8788bf47db60b23e Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 13 May 2021 09:35:34 -0500 Subject: [PATCH 254/343] Added `uninstall` target which uninstalls libcugraph and cugraph from a prior build/install step (#1601) Added `uninstall` target which uninstalls libcugraph and cugraph from a prior build/install step. Also extended the `clean` target to remove inplace artifacts (mainly cython-generated cpp files and inplace `.so` python extensions built from cython). NOTE: since targets can be combined on the same command, a user can get the "scrub" behavior by running: ``` ./build.sh uninstall clean ``` Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1601 --- build.sh | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/build.sh b/build.sh index 7c99b27f632..682e9b5ed33 100755 --- a/build.sh +++ b/build.sh @@ -19,10 +19,11 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" +VALIDARGS="clean uninstall libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) + uninstall - uninstall libcugraph and cugraph from a prior build/install (see also -n) libcugraph - build the cugraph C++ code cugraph - build the cugraph Python package cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency. @@ -30,7 +31,7 @@ HELP="$0 [ ...] [ ...] and is: -v - verbose build mode -g - build for debug - -n - no install step + -n - do not install after a successful build --allgpuarch - build for all supported GPU architectures --buildfaiss - build faiss statically into cugraph --show_depr_warn - show cmake deprecation warnings @@ -107,12 +108,36 @@ if hasArg cpp-mgtests; then BUILD_CPP_MG_TESTS=ON fi -# If clean given, run it prior to any other steps +# If clean or uninstall given, run them prior to any other steps +if hasArg uninstall; then + # uninstall libcugraph + if [[ "$INSTALL_PREFIX" != "" ]]; then + rm -rf ${INSTALL_PREFIX}/include/cugraph + rm -f ${INSTALL_PREFIX}/lib/libcugraph.so + fi + # This may be redundant given the above, but can also be used in case + # there are other installed files outside of the locations above. + if [ -e ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt ]; then + xargs rm -f < ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt > /dev/null 2>&1 + fi + # uninstall cugraph installed from a prior "setup.py install" + pip uninstall -y cugraph +fi + if hasArg clean; then - # FIXME: ideally the "setup.py clean" command below would also be run to - # remove all the "inplace" python build artifacts, but currently, running - # any setup.py command has side effects (eg. cloning repos). - #(cd ${REPODIR}/python && python setup.py clean) + # remove artifacts generated inplace + # FIXME: ideally the "setup.py clean" command would be used for this, but + # currently running any setup.py command has side effects (eg. cloning + # repos). + # (cd ${REPODIR}/python && python setup.py clean) + if [[ -d ${REPODIR}/python ]]; then + pushd ${REPODIR}/python > /dev/null + rm -rf dist dask-worker-space cugraph/raft *.egg-info + find . -name "__pycache__" -type d -exec rm -rf {} \; > /dev/null 2>&1 + find . -name "*.cpp" -type f -delete + find . -name "*.cpython*.so" -type f -delete + popd > /dev/null + fi # If the dirs to clean are mounted dirs in a container, the contents should # be removed but the mounted dirs will remain. The find removes all From 2871281b37ce29ffb9ad3b48e5379332ab2a0202 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 14 May 2021 09:37:15 -0400 Subject: [PATCH 255/343] Update graph primitives to support vertex-tagging in frontier expansion (#1551) To get early feedback and to create a baseline weakly-connected-component PR dependent on this PR. This PR requires C++17 and will not compile till https://github.com/rapidsai/cugraph/issues/1528 gets resolved. Tagging vertex IDs with root IDs is necessary in multi-source BFS, k-hop neighbors, and the newly designed weakly-connected-component algorithm based on multi-root collaborative frontier expansion. To fully support multi-source BFS & k-hop neighbors, we also need to store/query properties for (vertex, tag) pairs, and this update will be added in a future PR. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1551 --- .../cugraph/experimental/graph_view.hpp | 72 ++ .../copy_v_transform_reduce_in_out_nbr.cuh | 3 + cpp/include/cugraph/patterns/count_if_e.cuh | 4 +- .../cugraph/patterns/edge_op_utils.cuh | 45 +- cpp/include/cugraph/patterns/reduce_op.cuh | 17 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 1 + .../cugraph/patterns/transform_reduce_e.cuh | 3 + .../update_frontier_v_push_if_out_nbr.cuh | 795 +++++++++++------- .../cugraph/patterns/vertex_frontier.cuh | 458 +++++++--- .../cugraph/utilities/dataframe_buffer.cuh | 48 ++ cpp/include/cugraph/utilities/device_comm.cuh | 12 +- cpp/src/experimental/bfs.cu | 19 +- cpp/src/experimental/sssp.cu | 30 +- 13 files changed, 1044 insertions(+), 463 deletions(-) diff --git a/cpp/include/cugraph/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp index c61cf4a5935..e077f02ff31 100644 --- a/cpp/include/cugraph/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -379,6 +379,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_row_first() const + { + return partition_.get_matrix_partition_minor_first(); + } + + template + std::enable_if_t get_local_adj_matrix_partition_row_last() const + { + return partition_.get_matrix_partition_minor_last(); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_rows() const + { + return get_local_adj_matrix_partition_row_last() - get_local_adj_matrix_partition_row_first(); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { return store_transposed ? partition_.get_matrix_partition_minor_first() @@ -405,6 +423,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_col_first() const + { + return partition_.get_matrix_partition_minor_first(); + } + + template + std::enable_if_t get_local_adj_matrix_partition_col_last() const + { + return partition_.get_matrix_partition_minor_last(); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_cols() const + { + return get_local_adj_matrix_partition_col_last() - get_local_adj_matrix_partition_col_first(); + } + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const { return store_transposed ? partition_.get_matrix_partition_major_first(adj_matrix_partition_idx) @@ -586,6 +622,24 @@ class graph_view_tget_number_of_edges(); } + template + std::enable_if_t get_local_adj_matrix_partition_row_first() const + { + return get_local_adj_matrix_partition_row_first(0); + } + + template + std::enable_if_t get_local_adj_matrix_partition_row_last() const + { + return get_local_adj_matrix_partition_row_last(0); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_rows() const + { + return get_number_of_local_adj_matrix_partition_rows(0); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); @@ -605,6 +659,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_col_first() const + { + return get_local_adj_matrix_partition_col_first(0); + } + + template + std::enable_if_t get_local_adj_matrix_partition_col_last() const + { + return get_local_adj_matrix_partition_col_last(0); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_cols() const + { + return get_number_of_local_adj_matrix_partition_cols(0); + } + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 16a9870d380..4284396370d 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -101,6 +101,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( ? static_cast(major_offset) : minor_offset; return evaluate_edge_op() @@ -189,6 +190,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( ? static_cast(major_offset) : minor_offset; auto e_op_result = evaluate_edge_op() @@ -263,6 +265,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( ? static_cast(major_offset) : minor_offset; auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/cugraph/patterns/count_if_e.cuh b/cpp/include/cugraph/patterns/count_if_e.cuh index 1aa695bf5a9..039be17252d 100644 --- a/cpp/include/cugraph/patterns/count_if_e.cuh +++ b/cpp/include/cugraph/patterns/count_if_e.cuh @@ -66,13 +66,15 @@ typename GraphViewType::edge_type count_if_e( AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op) { - using edge_t = typename GraphViewType::edge_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; return transform_reduce_e(handle, graph_view, adj_matrix_row_value_input_first, adj_matrix_col_value_input_first, cast_edge_op_bool_to_integer +template struct is_valid_edge_op { static constexpr bool value = false; }; -template +template struct is_valid_edge_op< - ResultOfEdgeOp, - typename std::conditional::type> { + InvokeResultEdgeOp, + typename std::conditional_t> { static constexpr bool valid = true; }; template @@ -52,32 +53,36 @@ struct evaluate_edge_op { using row_value_type = typename std::iterator_traits::value_type; using col_value_type = typename std::iterator_traits::value_type; - template - __device__ std::enable_if_t>::valid, - typename std::result_of::type> - compute(V r, V c, W w, R rv, C cv, E e) + __device__ + std::enable_if_t>::valid, + typename std::invoke_result::type> + compute(K r, V c, W w, R rv, C cv, E e) { return e(r, c, w, rv, cv); } - template - __device__ std::enable_if_t>::valid, - typename std::result_of::type> - compute(V r, V c, W w, R rv, C cv, E e) + __device__ std::enable_if_t>::valid, + typename std::invoke_result::type> + compute(K r, V c, W w, R rv, C cv, E e) { return e(r, c, rv, cv); } }; template - __device__ std::enable_if_t>::valid, T> - operator()(V r, V c, W w, R rv, C cv) + __device__ + std::enable_if_t>::valid, T> + operator()(K r, V c, W w, R rv, C cv) { return e_op(r, c, w, rv, cv) ? T{1} : T{0}; } - template - __device__ std::enable_if_t>::valid, T> - operator()(V r, V c, R rv, C cv) + __device__ + std::enable_if_t>::valid, T> + operator()(K r, V c, R rv, C cv) { return e_op(r, c, rv, cv) ? T{1} : T{0}; } diff --git a/cpp/include/cugraph/patterns/reduce_op.cuh b/cpp/include/cugraph/patterns/reduce_op.cuh index d92d3352d08..e73a2861cb0 100644 --- a/cpp/include/cugraph/patterns/reduce_op.cuh +++ b/cpp/include/cugraph/patterns/reduce_op.cuh @@ -20,10 +20,19 @@ namespace cugraph { namespace experimental { namespace reduce_op { +// in case there is no payload to reduce +struct null { + using type = void; +}; + // reducing N elements, any element can be a valid output. template struct any { - using type = T; + using type = T; + // FIXME: actually every reduction operation should be side-effect free if reduction is performed + // by thrust; thrust reduction call rounds up the number of invocations based on the block size + // and discards the values outside the valid range; this does not work if the reduction operation + // has side-effects. static constexpr bool pure_function = true; // this can be called in any process __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; } @@ -34,7 +43,11 @@ struct any { // should be selected. template struct min { - using type = T; + using type = T; + // FIXME: actually every reduction operation should be side-effect free if reduction is performed + // by thrust; thrust reduction call rounds up the number of invocations based on the block size + // and discards the values outside the valid range; this does not work if the reduction operation + // has side-effects. static constexpr bool pure_function = true; // this can be called in any process __host__ __device__ T operator()(T const& lhs, T const& rhs) const diff --git a/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index e47bd6f1bbb..58633fb1e22 100644 --- a/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -98,6 +98,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset : minor_offset)); auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/cugraph/patterns/transform_reduce_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_e.cuh index f210065043f..7cbd4839e4c 100644 --- a/cpp/include/cugraph/patterns/transform_reduce_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_e.cuh @@ -93,6 +93,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto col_offset = GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; return evaluate_edge_op() @@ -163,6 +164,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto col_offset = GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; auto e_op_result = evaluate_edge_op() @@ -227,6 +229,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto col_offset = GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh index 7bd66ec6755..5cadf7af2a2 100644 --- a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -59,8 +59,185 @@ namespace detail { int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 512; +// we cannot use std::iterator_traits::value_type if Iterator is void* (reference to void +// is not allowed) +template +struct optional_payload_buffer_value_type_t; + +template +struct optional_payload_buffer_value_type_t< + PayloadIterator, + std::enable_if_t>> { + using value = typename std::iterator_traits::value_type; +}; + +template +struct optional_payload_buffer_value_type_t< + PayloadIterator, + std::enable_if_t>> { + using value = void; +}; + +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +#if 1 +template >* = nullptr> +std::byte allocate_optional_payload_buffer(size_t size, cudaStream_t stream) +{ + return std::byte{0}; // dummy +} + +template >* = nullptr> +auto allocate_optional_payload_buffer(size_t size, cudaStream_t stream) +{ + return allocate_dataframe_buffer(size, stream); +} + +template >* = nullptr> +void* get_optional_payload_buffer_begin(std::byte& optional_payload_buffer) +{ + return static_cast(nullptr); +} + +template >* = nullptr> +auto get_optional_payload_buffer_begin( + std::add_lvalue_reference_t( + size_t{0}, cudaStream_t{nullptr}))> optional_payload_buffer) +{ + return get_dataframe_buffer_begin(optional_payload_buffer); +} +#else +auto allocate_optional_payload_buffer = [](size_t size, cudaStream_t stream) { + if constexpr (std::is_same_v) { + return std::byte{0}; // dummy + } else { + return allocate_dataframe_buffer(size, stream); + } +}; + +auto get_optional_payload_buffer_begin = [](auto& optional_payload_buffer) { + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_begin(optional_payload_buffer); + } +}; +#endif + +// FIXME: a temporary workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +// in the else part in if constexpr else statement that involves device lambda +template +struct call_v_op_t { + VertexValueInputIterator vertex_value_input_first{}; + VertexValueOutputIterator vertex_value_output_first{}; + VertexOp v_op{}; + vertex_partition_device_t vertex_partition{}; + size_t invalid_bucket_idx; + + template + __device__ std::enable_if_t, uint8_t> operator()( + key_t key) const + { + auto v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + } + + template + __device__ std::enable_if_t, uint8_t> operator()( + key_t key) const + { + auto v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(key)); + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + } +}; + +// FIXME: a temporary workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +// after if constexpr else statement that involves device lambda (bug report submitted) +template +struct check_invalid_bucket_idx_t { + __device__ bool operator()(thrust::tuple pair) + { + return thrust::get<0>(pair) == std::numeric_limits::max(); + } +}; + +template +__device__ void push_if_buffer_element( + matrix_partition_device_t& matrix_partition, + typename std::iterator_traits::value_type key, + typename GraphViewType::vertex_type row_offset, + typename GraphViewType::vertex_type col, + typename GraphViewType::weight_type weight, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using key_t = typename std::iterator_traits::value_type; + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(key, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (e_op_result) { + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + if constexpr (std::is_same_v && std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = col; + } else if constexpr (std::is_same_v && !std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = *e_op_result; + } else if constexpr (!std::is_same_v && std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = thrust::make_tuple(col, *e_op_result); + } else { + *(buffer_key_output_first + buffer_idx) = + thrust::make_tuple(col, thrust::get<0>(*e_op_result)); + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(*e_op_result); + } + } +} + template __global__ void for_all_frontier_row_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, + KeyIterator key_first, + KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, BufferKeyOutputIterator buffer_key_output_first, @@ -80,6 +257,11 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); @@ -87,45 +269,38 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( auto const tid = threadIdx.x + blockIdx.x * blockDim.x; auto idx = static_cast(tid); - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = 0; i < local_out_degree; ++i) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x * blockDim.x; } } template __global__ void for_all_frontier_row_for_all_nbr_mid_degree( matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, + KeyIterator key_first, + KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, BufferKeyOutputIterator buffer_key_output_first, @@ -145,6 +320,11 @@ __global__ void for_all_frontier_row_for_all_nbr_mid_degree( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); @@ -154,38 +334,31 @@ __global__ void for_all_frontier_row_for_all_nbr_mid_degree( auto const lane_id = tid % raft::warp_size(); auto idx = static_cast(tid / raft::warp_size()); - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x * (blockDim.x / raft::warp_size()); @@ -193,7 +366,7 @@ __global__ void for_all_frontier_row_for_all_nbr_mid_degree( } template __global__ void for_all_frontier_row_for_all_nbr_high_degree( matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, + KeyIterator key_first, + KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, BufferKeyOutputIterator buffer_key_output_first, @@ -213,44 +386,42 @@ __global__ void for_all_frontier_row_for_all_nbr_high_degree( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); auto idx = static_cast(blockIdx.x); - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x; @@ -264,22 +435,38 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, size_t num_buffer_elements, ReduceOp reduce_op) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_output_first, - buffer_key_output_first + num_buffer_elements, - buffer_payload_output_first); + using key_t = typename std::iterator_traits::value_type; + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + if constexpr (std::is_same_v) { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements); + } else { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first); + } - if (std::is_same>::value) { + size_t num_reduced_buffer_elements{}; + if constexpr (std::is_same_v) { + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements); + num_reduced_buffer_elements = + static_cast(thrust::distance(buffer_key_output_first, it)); + } else if constexpr (std::is_same>::value) { // FIXME: if ReducOp is any, we may have a cheaper alternative than sort & uique (i.e. discard // non-first elements) auto it = thrust::unique_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, buffer_key_output_first + num_buffer_elements, buffer_payload_output_first); - return static_cast(thrust::distance(buffer_key_output_first, thrust::get<0>(it))); + num_reduced_buffer_elements = + static_cast(thrust::distance(buffer_key_output_first, thrust::get<0>(it))); } else { - using key_t = typename std::iterator_traits::value_type; - using payload_t = typename std::iterator_traits::value_type; // FIXME: better avoid temporary buffer or at least limit the maximum buffer size (if we adopt // CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups and global sync(), we // can use aggregate shared memory as a temporary buffer, or we can limit the buffer size, and @@ -300,8 +487,9 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, get_dataframe_buffer_begin(value_buffer), thrust::equal_to(), reduce_op); - auto num_reduced_buffer_elements = + num_reduced_buffer_elements = static_cast(thrust::distance(keys.begin(), thrust::get<0>(it))); + // FIXME: this copy can be replaced by move thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), keys.begin(), keys.begin() + num_reduced_buffer_elements, @@ -310,17 +498,21 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, get_dataframe_buffer_begin(value_buffer), get_dataframe_buffer_begin(value_buffer) + num_reduced_buffer_elements, buffer_payload_output_first); - return num_reduced_buffer_elements; } + + return num_reduced_buffer_elements; } } // namespace detail +// FIXME: this documentation needs to be updated due to (tagged-)vertex support /** - * @brief Update vertex frontier and vertex property values iterating over the outgoing edges. + * @brief Update (tagged-)vertex frontier and (tagged-)vertex property values iterating over the + * outgoing edges from the frontier. * * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam VertexFrontierType Type of the vertex frontier class which abstracts vertex frontier + * managements. * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row * input properties. * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column @@ -329,16 +521,14 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, * @tparam ReduceOp Type of the binary reduction operator. * @tparam VertexValueInputIterator Type of the iterator for vertex properties. * @tparam VertexValueOutputIterator Type of the iterator for vertex property variables. - * @tparam VertexFrontierType Type of the vertex frontier class which abstracts vertex frontier - * managements. * @tparam VertexOp Type of the binary vertex operator. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_frontier VertexFrontier class object for vertex frontier managements. This object - * includes multiple bucket objects. - * @param cur_fontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the current - * iteration. + * @param frontier VertexFrontier class object for vertex frontier managements. This object includes + * multiple bucket objects. + * @param cur_frontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the + * current iteration. * @param next_frontier_bucket_indices Indices of the VertexFrontier buckets to store new frontier * vertices for the next iteration. * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input @@ -362,11 +552,11 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.get_number_of_local_vertices(). - * @param v_op Binary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p - * graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs for - * this vertex and returns the target bucket index (for frontier update) and new verrtex property - * values (to update *(@p vertex_value_output_first + i)). The target bucket index should either be - * VertexFrontier::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. + * @param v_op Ternary operator takes (tagged-)vertex ID, *(@p vertex_value_input_first + i) (where + * i is [0, @p graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs + * for this vertex and returns the target bucket index (for frontier update) and new verrtex + * property values (to update *(@p vertex_value_output_first + i)). The target bucket index should + * either be VertexFrontierType::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. */ template const& next_frontier_bucket_indices, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op, ReduceOp reduce_op, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) VertexValueInputIterator vertex_value_input_first, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) + // FIXME: currently, it is undefined behavior if vertices in the frontier are tagged and the same + // vertex property is updated by multiple v_op invocations with the same vertex but with different + // tags. VertexValueOutputIterator vertex_value_output_first, + // FIXME: this takes (tagged-)vertex ID in addition, think about consistency with the other + // primitives. VertexOp v_op) { static_assert(!GraphViewType::is_adj_matrix_transposed, @@ -397,10 +601,11 @@ void update_frontier_v_push_if_out_nbr( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + using key_t = typename VertexFrontierType::key_type; using payload_t = typename ReduceOp::type; - auto cur_frontier_vertex_first = vertex_frontier.get_bucket(cur_frontier_bucket_idx).begin(); - auto cur_frontier_vertex_last = vertex_frontier.get_bucket(cur_frontier_bucket_idx).end(); + auto frontier_key_first = frontier.get_bucket(cur_frontier_bucket_idx).begin(); + auto frontier_key_last = frontier.get_bucket(cur_frontier_bucket_idx).end(); // 1. fill the buffer @@ -419,61 +624,81 @@ void update_frontier_v_push_if_out_nbr( #endif } - rmm::device_uvector keys(size_t{0}, handle.get_stream()); - auto payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + auto key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + auto payload_buffer = + detail::allocate_optional_payload_buffer(size_t{0}, handle.get_stream()); rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - rmm::device_uvector frontier_rows(0, handle.get_stream()); + auto matrix_partition_frontier_key_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + vertex_t matrix_partition_frontier_size{0}; if (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto frontier_size = - host_scalar_bcast(col_comm, - (static_cast(col_comm_rank) == i) - ? thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last) - : size_t{0} /* dummy */, - i, - handle.get_stream()); - frontier_rows.resize(frontier_size, handle.get_stream()); + matrix_partition_frontier_size = host_scalar_bcast( + col_comm, + (static_cast(col_comm_rank) == i) + ? static_cast(thrust::distance(frontier_key_first, frontier_key_last)) + : vertex_t{0} /* dummy */, + i, + handle.get_stream()); + resize_dataframe_buffer( + matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); if (static_cast(col_comm_rank) == i) { thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - cur_frontier_vertex_first, - cur_frontier_vertex_last, - frontier_rows.begin()); + frontier_key_first, + frontier_key_last, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer)); } device_bcast(col_comm, - cur_frontier_vertex_first, - frontier_rows.begin(), - frontier_size, + frontier_key_first, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + matrix_partition_frontier_size, i, handle.get_stream()); } else { - frontier_rows.resize(thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last), - handle.get_stream()); + matrix_partition_frontier_size = + static_cast(thrust::distance(frontier_key_first, frontier_key_last)); + resize_dataframe_buffer( + matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - cur_frontier_vertex_first, - cur_frontier_vertex_last, - frontier_rows.begin()); + frontier_key_first, + frontier_key_last, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer)); } - auto max_pushes = frontier_rows.size() > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - [matrix_partition] __device__(auto row) { - auto row_offset = - matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : edge_t{0}; + vertex_t const* matrix_partition_frontier_row_first{nullptr}; + vertex_t const* matrix_partition_frontier_row_last{nullptr}; + if constexpr (std::is_same_v) { + matrix_partition_frontier_row_first = + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer); + matrix_partition_frontier_row_last = + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer); + } else { + matrix_partition_frontier_row_first = + thrust::get<0>(get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + .get_iterator_tuple()); + matrix_partition_frontier_row_last = thrust::get<0>( + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer).get_iterator_tuple()); + } + auto max_pushes = + thrust::distance(matrix_partition_frontier_row_first, matrix_partition_frontier_row_last) > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : edge_t{0}; // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain // additional per column data for filtering in e_op). If we can pause & resume execution if @@ -489,8 +714,11 @@ void update_frontier_v_push_if_out_nbr( // locking. // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature // to reserve address space. - keys.resize(buffer_idx.value(handle.get_stream()) + max_pushes, handle.get_stream()); - resize_dataframe_buffer(payload_buffer, keys.size(), handle.get_stream()); + auto new_buffer_size = buffer_idx.value(handle.get_stream()) + max_pushes; + resize_dataframe_buffer(key_buffer, new_buffer_size, handle.get_stream()); + if constexpr (!std::is_same_v) { + resize_dataframe_buffer(payload_buffer, new_buffer_size, handle.get_stream()); + } auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} @@ -506,8 +734,8 @@ void update_frontier_v_push_if_out_nbr( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, d_thresholds.begin(), d_thresholds.end(), d_offsets.begin()); @@ -528,12 +756,12 @@ void update_frontier_v_push_if_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - frontier_rows.begin(), - frontier_rows.begin() + h_offsets[0], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), buffer_idx.data(), e_op); } @@ -548,18 +776,18 @@ void update_frontier_v_push_if_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - frontier_rows.begin() + h_offsets[0], - frontier_rows.begin() + h_offsets[1], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), buffer_idx.data(), e_op); } - if (frontier_rows.size() - h_offsets[1] > 0) { + if (matrix_partition_frontier_size - h_offsets[1] > 0) { raft::grid_1d_thread_t update_grid( - frontier_rows.size() - h_offsets[1], + matrix_partition_frontier_size - h_offsets[1], detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); @@ -568,19 +796,19 @@ void update_frontier_v_push_if_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - frontier_rows.begin() + h_offsets[1], - frontier_rows.end(), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), buffer_idx.data(), e_op); } } else { - if (frontier_rows.size() > 0) { + if (matrix_partition_frontier_size > 0) { raft::grid_1d_thread_t update_grid( - frontier_rows.size(), + matrix_partition_frontier_size, detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); @@ -589,12 +817,12 @@ void update_frontier_v_push_if_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - frontier_rows.begin(), - frontier_rows.end(), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), buffer_idx.data(), e_op); } @@ -618,12 +846,12 @@ void update_frontier_v_push_if_out_nbr( // 2. reduce the buffer - auto num_buffer_elements = - detail::sort_and_reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.value(handle.get_stream()), - reduce_op); + auto num_buffer_elements = detail::sort_and_reduce_buffer_elements( + handle, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); if (GraphViewType::is_multi_gpu) { // FIXME: this step is unnecessary if row_comm_size== 1 auto& comm = handle.get_comms(); @@ -653,9 +881,16 @@ void update_frontier_v_push_if_out_nbr( d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), handle.get_stream()); + vertex_t const* row_first{nullptr}; + if constexpr (std::is_same_v) { + row_first = get_dataframe_buffer_begin(key_buffer); + } else { + row_first = + thrust::get<0>(get_dataframe_buffer_begin(key_buffer).get_iterator_tuple()); + } thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.begin() + num_buffer_elements, + row_first, + row_first + num_buffer_elements, d_vertex_lasts.begin(), d_vertex_lasts.end(), d_tx_buffer_last_boundaries.begin()); @@ -669,25 +904,27 @@ void update_frontier_v_push_if_out_nbr( std::adjacent_difference( h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - rmm::device_uvector rx_keys(size_t{0}, handle.get_stream()); - std::tie(rx_keys, std::ignore) = - shuffle_values(row_comm, keys.begin(), tx_counts, handle.get_stream()); - keys = std::move(rx_keys); - - auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_payload_buffer, std::ignore) = - shuffle_values(row_comm, - get_dataframe_buffer_begin(payload_buffer), - tx_counts, - handle.get_stream()); - payload_buffer = std::move(rx_payload_buffer); - - num_buffer_elements = - detail::sort_and_reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - keys.size(), - reduce_op); + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, std::ignore) = shuffle_values( + row_comm, get_dataframe_buffer_begin(key_buffer), tx_counts, handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + + if constexpr (!std::is_same_v) { + auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = + shuffle_values(row_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + } + + num_buffer_elements = detail::sort_and_reduce_buffer_elements( + handle, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + size_dataframe_buffer(key_buffer), + reduce_op); // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between // two different communicators (end of row_comm) @@ -701,7 +938,7 @@ void update_frontier_v_push_if_out_nbr( #endif } - // 3. update vertex properties + // 3. update vertex properties and frontier if (num_buffer_elements > 0) { static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); @@ -709,99 +946,75 @@ void update_frontier_v_push_if_out_nbr( vertex_partition_device_t vertex_partition(graph_view); - auto key_payload_pair_first = thrust::make_zip_iterator( - thrust::make_tuple(keys.begin(), get_dataframe_buffer_begin(payload_buffer))); - thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_payload_pair_first, - key_payload_pair_first + num_buffer_elements, - bucket_indices.begin(), - [vertex_value_input_first, - vertex_value_output_first, - v_op, - vertex_partition, - invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { - auto key = thrust::get<0>(pair); - auto payload = thrust::get<1>(pair); - auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); - auto v_val = *(vertex_value_input_first + key_offset); - auto v_op_result = v_op(v_val, payload); - auto bucket_idx = thrust::get<0>(v_op_result); - if (bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); - return static_cast(bucket_idx); - } else { - return std::numeric_limits::max(); - } - }); - - resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); - shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); - - auto bucket_key_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); - keys.resize(thrust::distance( - bucket_key_pair_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + num_buffer_elements, - [] __device__(auto pair) { - return thrust::get<0>(pair) == - std::numeric_limits::max(); - })), - handle.get_stream()); - bucket_indices.resize(keys.size(), handle.get_stream()); - keys.shrink_to_fit(handle.get_stream()); - bucket_indices.shrink_to_fit(handle.get_stream()); - - bucket_key_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); - if (next_frontier_bucket_indices.size() == 1) { - vertex_frontier.get_bucket(next_frontier_bucket_indices[0]).insert(keys.begin(), keys.size()); - } else if (next_frontier_bucket_indices.size() == 2) { - auto first_bucket_size = thrust::distance( - bucket_key_pair_first, - thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + bucket_indices.size(), - [first_bucket_idx = static_cast(next_frontier_bucket_indices[0])] __device__( - auto pair) { return thrust::get<0>(pair) == first_bucket_idx; })); - vertex_frontier.get_bucket(next_frontier_bucket_indices[0]) - .insert(keys.begin(), first_bucket_size); - vertex_frontier.get_bucket(next_frontier_bucket_indices[1]) - .insert(keys.begin() + first_bucket_size, - thrust::distance(keys.begin() + first_bucket_size, keys.end())); + if constexpr (!std::is_same_v) { + auto key_payload_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer))); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_payload_pair_first, + key_payload_pair_first + num_buffer_elements, + bucket_indices.begin(), + [vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { + auto key = thrust::get<0>(pair); + auto payload = thrust::get<1>(pair); + vertex_t v_offset{}; + if constexpr (std::is_same_v) { + v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + } else { + v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(key)); + } + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val, payload); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + }); + + resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); } else { - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + bucket_indices.size()); - rmm::device_uvector d_indices(next_frontier_bucket_indices.size(), - handle.get_stream()); - rmm::device_uvector d_counts(d_indices.size(), handle.get_stream()); - auto it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_indices.begin(), - bucket_indices.end(), - thrust::make_constant_iterator(size_t{1}), - d_indices.begin(), - d_counts.begin()); - d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), - handle.get_stream()); - d_counts.resize(d_indices.size(), handle.get_stream()); - std::vector h_indices(d_indices.size()); - std::vector h_counts(h_indices.size()); - raft::update_host(h_indices.data(), d_indices.data(), d_indices.size(), handle.get_stream()); - raft::update_host(h_counts.data(), d_counts.data(), d_counts.size(), handle.get_stream()); - handle.get_stream_view().synchronize(); - std::vector h_offsets(h_indices.size(), 0); - std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); - for (size_t i = 0; i < h_indices.size(); ++i) { - if (h_counts[i] > 0) { - vertex_frontier.get_bucket(h_indices[i]).insert(keys.begin() + h_offsets[i], h_counts[i]); - } - } + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(key_buffer) + num_buffer_elements, + bucket_indices.begin(), + detail::call_v_op_t{vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + VertexFrontierType::kInvalidBucketIdx}); } + + auto bucket_key_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(bucket_indices.begin(), get_dataframe_buffer_begin(key_buffer))); + bucket_indices.resize( + thrust::distance( + bucket_key_pair_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + num_buffer_elements, + detail::check_invalid_bucket_idx_t())), + handle.get_stream()); + resize_dataframe_buffer(key_buffer, bucket_indices.size(), handle.get_stream()); + bucket_indices.shrink_to_fit(handle.get_stream()); + shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); + + frontier.insert_to_buckets(bucket_indices.begin(), + bucket_indices.end(), + get_dataframe_buffer_begin(key_buffer), + next_frontier_bucket_indices); } } // namespace experimental diff --git a/cpp/include/cugraph/patterns/vertex_frontier.cuh b/cpp/include/cugraph/patterns/vertex_frontier.cuh index f86d56deb84..bfe23882088 100644 --- a/cpp/include/cugraph/patterns/vertex_frontier.cuh +++ b/cpp/include/cugraph/patterns/vertex_frontier.cuh @@ -20,15 +20,17 @@ #include #include -#include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -36,41 +38,96 @@ namespace cugraph { namespace experimental { -template -class SortedUniqueElementBucket { +// stores unique key objects in the sorted (non-descending) order; key type is either vertex_t +// (tag_t == void) or thrust::tuple (tag_t != void) +template +class SortedUniqueKeyBucket { + static_assert(std::is_same_v || std::is_arithmetic_v); + + using optional_buffer_type = std:: + conditional_t, std::byte /* dummy */, rmm::device_uvector>; + public: - SortedUniqueElementBucket(raft::handle_t const& handle) - : handle_ptr_(&handle), elements_(0, handle.get_stream()) + template >* = nullptr> + SortedUniqueKeyBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(std::byte{0}) + { + } + + template >* = nullptr> + SortedUniqueKeyBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(0, handle.get_stream()) { } - void insert(vertex_t v) + /** + * @ brief insert a vertex to the bucket + * + * @param vertex vertex to insert + */ + template >* = nullptr> + void insert(vertex_t vertex) { - if (elements_.size() > 0) { - rmm::device_scalar vertex(v, handle_ptr_->get_stream()); - insert(vertex.data(), vertex_t{1}); + if (vertices_.size() > 0) { + rmm::device_scalar tmp(vertex, handle_ptr_->get_stream()); + insert(tmp.data(), tmp.data() + 1); } else { - elements_.resize(1, handle_ptr_->get_stream()); - raft::update_device(elements_.data(), &v, size_t{1}, handle_ptr_->get_stream()); + vertices_.resize(1, handle_ptr_->get_stream()); + raft::update_device(vertices_.data(), &vertex, size_t{1}, handle_ptr_->get_stream()); + } + } + + /** + * @ brief insert a (vertex, tag) pair to the bucket + * + * @param vertex vertex of the (vertex, tag) pair to insert + * @param tag tag of the (vertex, tag) pair to insert + */ + template >* = nullptr> + void insert(thrust::tuple key) + { + if (vertices_.size() > 0) { + rmm::device_scalar tmp_vertex(thrust::get<0>(key), handle_ptr_->get_stream()); + rmm::device_scalar tmp_tag(thrust::get<1>(key), handle_ptr_->get_stream()); + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_vertex.data(), tmp_tag.data())); + insert(pair_first, pair_first + 1); + } else { + vertices_.resize(1, handle_ptr_->get_stream()); + tags_.resize(1, handle_ptr_->get_stream()); + auto pair_first = + thrust::make_tuple(thrust::make_zip_iterator(vertices_.begin(), tags_.begin())); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + 1, + key); } } /** * @ brief insert a list of vertices to the bucket * - * @param sorted_unique_vertices Device pointer to the array storing the vertex list. - * @param num_sorted_unique_vertices Size of the vertex list to insert. + * @param vertex_first Iterator pointing to the first (inclusive) element of the vertices stored + * in device memory. + * @param vertex_last Iterator pointing to the last (exclusive) element of the vertices stored in + * device memory. */ - void insert(vertex_t const* sorted_unique_vertices, vertex_t num_sorted_unique_vertices) + template >* = nullptr> + void insert(VertexIterator vertex_first, VertexIterator vertex_last) { - if (elements_.size() > 0) { - rmm::device_uvector merged_vertices(elements_.size() + num_sorted_unique_vertices, - handle_ptr_->get_stream()); + static_assert( + std::is_same_v::value_type, vertex_t>); + + if (vertices_.size() > 0) { + rmm::device_uvector merged_vertices( + vertices_.size() + thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - elements_.begin(), - elements_.end(), - sorted_unique_vertices, - sorted_unique_vertices + num_sorted_unique_vertices, + vertices_.begin(), + vertices_.end(), + vertex_first, + vertex_last, merged_vertices.begin()); merged_vertices.resize( thrust::distance( @@ -80,57 +137,164 @@ class SortedUniqueElementBucket { merged_vertices.end())), handle_ptr_->get_stream()); merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); - elements_ = std::move(merged_vertices); + vertices_ = std::move(merged_vertices); } else { - elements_.resize(num_sorted_unique_vertices, handle_ptr_->get_stream()); + vertices_.resize(thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - sorted_unique_vertices, - sorted_unique_vertices + num_sorted_unique_vertices, - elements_.begin()); + vertex_first, + vertex_last, + vertices_.begin()); } } - size_t size() const { return elements_.size(); } + /** + * @ brief insert a list of (vertex, tag) pairs to the bucket + * + * @param key_first Iterator pointing to the first (inclusive) element of the (vertex,tag) pairs + * stored in device memory. + * @param key_last Iterator pointing to the last (exclusive) element of the (vertex,tag) pairs + * stored in device memory. + */ + template >* = nullptr> + void insert(KeyIterator key_first, KeyIterator key_last) + { + static_assert(std::is_same_v::value_type, + thrust::tuple>); + + if (vertices_.size() > 0) { + rmm::device_uvector merged_vertices( + vertices_.size() + thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + rmm::device_uvector merged_tags(merged_vertices.size(), handle_ptr_->get_stream()); + auto old_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + auto merged_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(merged_vertices.begin(), merged_tags.begin())); + thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + old_pair_first, + old_pair_first + vertices_.size(), + key_first, + key_last, + merged_pair_first); + merged_vertices.resize( + thrust::distance( + merged_pair_first, + thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + merged_pair_first, + merged_pair_first + merged_vertices.size())), + handle_ptr_->get_stream()); + merged_tags.resize(merged_vertices.size(), handle_ptr_->get_stream()); + merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); + merged_tags.shrink_to_fit(handle_ptr_->get_stream()); + vertices_ = std::move(merged_vertices); + tags_ = std::move(merged_tags); + } else { + vertices_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + tags_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + key_first, + key_last, + thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin()))); + } + } + + size_t size() const { return vertices_.size(); } template std::enable_if_t aggregate_size() const { return host_scalar_allreduce( - handle_ptr_->get_comms(), elements_.size(), handle_ptr_->get_stream()); + handle_ptr_->get_comms(), vertices_.size(), handle_ptr_->get_stream()); } template std::enable_if_t aggregate_size() const { - return elements_.size(); + return vertices_.size(); } - void resize(size_t size) { elements_.resize(size, handle_ptr_->get_stream()); } + void resize(size_t size) + { + vertices_.resize(size, handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { tags_.resize(size, handle_ptr_->get_stream()); } + } - void clear() { elements_.resize(0, handle_ptr_->get_stream()); } + void clear() { resize(0); } - void shrink_to_fit() { elements_.shrink_to_fit(handle_ptr_->get_stream()); } + void shrink_to_fit() + { + vertices_.shrink_to_fit(handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { tags_.shrink_to_fit(handle_ptr_->get_stream()); } + } - auto const data() const { return elements_.data(); } +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +#if 1 + template >* = nullptr> + auto const begin() const + { + return vertices_.begin(); + } - auto data() { return elements_.data(); } + template >* = nullptr> + auto begin() + { + return vertices_.begin(); + } - auto const begin() const { return elements_.begin(); } + template >* = nullptr> + auto const begin() const + { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } - auto begin() { return elements_.begin(); } + template >* = nullptr> + auto begin() + { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } +#else + auto const begin() const + { + if constexpr (std::is_same_v) { + return vertices_.begin(); + } else { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } + } - auto const end() const { return elements_.end(); } + auto begin() + { + if constexpr (std::is_same_v) { + return vertices_.begin(); + } else { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } + } +#endif + + auto const end() const { return begin() + vertices_.size(); } - auto end() { return elements_.end(); } + auto end() { return begin() + vertices_.size(); } private: raft::handle_t const* handle_ptr_{nullptr}; - rmm::device_uvector elements_; + rmm::device_uvector vertices_; + optional_buffer_type tags_; }; -template +template class VertexFrontier { + static_assert(std::is_same_v || std::is_arithmetic_v); + public: + using key_type = + std::conditional_t, vertex_t, thrust::tuple>; static size_t constexpr kNumBuckets = num_buckets; static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; @@ -139,12 +303,12 @@ class VertexFrontier { for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } } - SortedUniqueElementBucket& get_bucket(size_t bucket_idx) + SortedUniqueKeyBucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } - SortedUniqueElementBucket const& get_bucket(size_t bucket_idx) const + SortedUniqueKeyBucket const& get_bucket(size_t bucket_idx) const { return buckets_[bucket_idx]; } @@ -160,106 +324,144 @@ class VertexFrontier { SplitOp split_op) { auto& this_bucket = get_bucket(this_bucket_idx); - if (this_bucket.size() > 0) { - static_assert(kNumBuckets <= std::numeric_limits::max()); - rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); - thrust::transform( + if (this_bucket.size() == 0) { return; } + + // 1. apply split_op to each bucket element + + static_assert(kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); + thrust::transform( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + this_bucket.begin(), + this_bucket.end(), + bucket_indices.begin(), + [split_op] __device__(auto key) { + auto split_op_result = split_op(key); + return static_cast(split_op_result ? *split_op_result : kInvalidBucketIdx); + }); + + // 2. remove elements with the invalid bucket indices + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + bucket_indices.resize( + thrust::distance(pair_first, + thrust::remove_if( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [] __device__(auto pair) { + return thrust::get<0>(pair) == static_cast(kInvalidBucketIdx); + })), + handle_ptr_->get_stream()); + this_bucket.resize(bucket_indices.size()); + bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); + this_bucket.shrink_to_fit(); + + // 3. separte the elements to stay in this bucket from the elements to be moved to other buckets + + pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + auto new_this_bucket_size = static_cast(thrust::distance( + pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - this_bucket.begin(), - this_bucket.end(), - bucket_indices.begin(), - [split_op] __device__(auto v) { return static_cast(split_op(v)); }); - - auto pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); - this_bucket.resize(thrust::distance( pair_first, - thrust::remove_if( - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first, - pair_first + bucket_indices.size(), - [invalid_bucket_idx = static_cast(kInvalidBucketIdx)] __device__(auto pair) { - return thrust::get<0>(pair) == invalid_bucket_idx; - }))); - bucket_indices.resize(this_bucket.size(), handle_ptr_->get_stream()); - this_bucket.shrink_to_fit(); - bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); + pair_first + bucket_indices.size(), + [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { + return thrust::get<0>(pair) == this_bucket_idx; + }))); + + // 4. insert to target buckets and resize this bucket - pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); - auto new_this_bucket_size = thrust::distance( + insert_to_buckets(bucket_indices.begin() + new_this_bucket_size, + bucket_indices.end(), + this_bucket.begin() + new_this_bucket_size, + move_to_bucket_indices); + + this_bucket.resize(new_this_bucket_size); + this_bucket.shrink_to_fit(); + } + + template + void insert_to_buckets(uint8_t* bucket_idx_first /* [INOUT] */, + uint8_t* bucket_idx_last /* [INOUT] */, + KeyIterator key_first /* [INOUT] */, + std::vector const& to_bucket_indices) + { + // 1. group the elements by their target bucket indices + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(bucket_idx_first, key_first)); + auto pair_last = pair_first + thrust::distance(bucket_idx_first, bucket_idx_last); + + std::vector insert_bucket_indices{}; + std::vector insert_offsets{}; + std::vector insert_sizes{}; + if (to_bucket_indices.size() == 1) { + insert_bucket_indices = to_bucket_indices; + insert_offsets = {0}; + insert_sizes = {static_cast(thrust::distance(pair_first, pair_last))}; + } else if (to_bucket_indices.size() == 2) { + auto next_bucket_size = static_cast(thrust::distance( pair_first, thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), pair_first, - pair_first + bucket_indices.size(), - [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { - return thrust::get<0>(pair) == this_bucket_idx; - })); - - if (move_to_bucket_indices.size() == 1) { - get_bucket(move_to_bucket_indices[0]) - .insert(this_bucket.begin() + new_this_bucket_size, - thrust::distance(this_bucket.begin() + new_this_bucket_size, this_bucket.end())); - } else if (move_to_bucket_indices.size() == 2) { - auto next_bucket_size = thrust::distance( - pair_first + new_this_bucket_size, - thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first + new_this_bucket_size, - pair_first + bucket_indices.size(), - [next_bucket_idx = static_cast(move_to_bucket_indices[0])] __device__( - auto pair) { return thrust::get<0>(pair) == next_bucket_idx; })); - get_bucket(move_to_bucket_indices[0]) - .insert(this_bucket.begin() + new_this_bucket_size, next_bucket_size); - get_bucket(move_to_bucket_indices[1]) - .insert(this_bucket.begin() + new_this_bucket_size + next_bucket_size, - thrust::distance(this_bucket.begin() + new_this_bucket_size + next_bucket_size, - this_bucket.end())); - } else { - thrust::sort(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first + new_this_bucket_size, - pair_first + bucket_indices.size()); - rmm::device_uvector d_indices(move_to_bucket_indices.size(), - handle_ptr_->get_stream()); - rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); - auto it = thrust::reduce_by_key( - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - bucket_indices.begin() + new_this_bucket_size, - bucket_indices.end(), - thrust::make_constant_iterator(size_t{1}), - d_indices.begin(), - d_counts.begin()); - d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), - handle_ptr_->get_stream()); - d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); - std::vector h_indices(d_indices.size()); - std::vector h_counts(h_indices.size()); - raft::update_host( - h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); - raft::update_host( - h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); - handle_ptr_->get_stream_view().synchronize(); - std::vector h_offsets(h_indices.size(), 0); - std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); - for (size_t i = 0; i < h_indices.size(); ++i) { - if (h_counts[i] > 0) { - get_bucket(h_indices[i]) - .insert(this_bucket.begin() + new_this_bucket_size + h_offsets[i], h_counts[i]); - } - } + pair_last, + [next_bucket_idx = static_cast(to_bucket_indices[0])] __device__(auto pair) { + return thrust::get<0>(pair) == next_bucket_idx; + }))); + insert_bucket_indices = to_bucket_indices; + insert_offsets = {0, next_bucket_size}; + insert_sizes = { + next_bucket_size, + static_cast(thrust::distance(pair_first + next_bucket_size, pair_last))}; + } else { + thrust::stable_sort( // stalbe_sort to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_last, + [] __device__(auto lhs, auto rhs) { return thrust::get<0>(lhs) < thrust::get<0>(rhs); }); + rmm::device_uvector d_indices(to_bucket_indices.size(), handle_ptr_->get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + bucket_idx_first, + bucket_idx_last, + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle_ptr_->get_stream()); + d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host( + h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); + raft::update_host( + h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); + handle_ptr_->get_stream_view().synchronize(); + + size_t offset{0}; + for (size_t i = 0; i < h_indices.size(); ++i) { + insert_bucket_indices[i] = static_cast(h_indices[i]); + insert_offsets[i] = offset; + insert_sizes[i] = h_counts[i]; + offset += insert_sizes[i]; } - - this_bucket.resize(new_this_bucket_size); - this_bucket.shrink_to_fit(); } - return; + // 2. insert to the target buckets + + for (size_t i = 0; i < insert_offsets.size(); ++i) { + get_bucket(insert_bucket_indices[i]) + .insert(key_first + insert_offsets[i], key_first + (insert_offsets[i] + insert_sizes[i])); + } } private: raft::handle_t const* handle_ptr_{nullptr}; - std::vector> buckets_{}; + std::vector> buckets_{}; }; } // namespace experimental diff --git a/cpp/include/cugraph/utilities/dataframe_buffer.cuh b/cpp/include/cugraph/utilities/dataframe_buffer.cuh index b15bca4abd7..beaf4cabe00 100644 --- a/cpp/include/cugraph/utilities/dataframe_buffer.cuh +++ b/cpp/include/cugraph/utilities/dataframe_buffer.cuh @@ -91,6 +91,20 @@ auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, BufferTyp get_dataframe_buffer_begin_tuple_element_impl(buffer)...); } +template +auto get_dataframe_buffer_end_tuple_element_impl(BufferType& buffer) +{ + using element_t = typename thrust::tuple_element::type; + return std::get(buffer).end(); +} + +template +auto get_dataframe_buffer_end_tuple_impl(std::index_sequence, BufferType& buffer) +{ + // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. + return thrust::make_tuple(get_dataframe_buffer_end_tuple_element_impl(buffer)...); +} + } // namespace detail template ::value>* = nullptr> @@ -147,6 +161,22 @@ void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) .run(buffer, stream); } +template ::value>* = nullptr> +size_t size_dataframe_buffer(BufferType& buffer) +{ + return buffer.size(); +} + +template ::value>* = nullptr> +size_t size_dataframe_buffer(BufferType& buffer) +{ + return std::get<0>(buffer).size(); +} + template ::value>* = nullptr> @@ -165,5 +195,23 @@ auto get_dataframe_buffer_begin(BufferType& buffer) std::make_index_sequence(), buffer)); } +template ::value>* = nullptr> +auto get_dataframe_buffer_end(BufferType& buffer) +{ + return buffer.end(); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_end(BufferType& buffer) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return thrust::make_zip_iterator( + detail::get_dataframe_buffer_end_tuple_impl(std::make_index_sequence(), buffer)); +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/cugraph/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh index 3c00c54b07d..b13aa8e5401 100644 --- a/cpp/include/cugraph/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -398,7 +398,7 @@ struct device_bcast_tuple_iterator_element_impl { count, root, stream); - device_bcast_tuple_iterator_element_impl( + device_bcast_tuple_iterator_element_impl().run( comm, input_first, output_first, count, root, stream); } }; @@ -458,7 +458,7 @@ struct device_allreduce_tuple_iterator_element_impl { count, op, stream); - device_allreduce_tuple_iterator_element_impl( + device_allreduce_tuple_iterator_element_impl().run( comm, input_first, output_first, count, op, stream); } }; @@ -912,8 +912,8 @@ device_bcast(raft::comms::comms_t const& comm, thrust::tuple_size::value_type>::value; detail:: - device_bcast_tuple_iterator_element_impl( - comm, input_first, output_first, count, root, stream); + device_bcast_tuple_iterator_element_impl() + .run(comm, input_first, output_first, count, root, stream); } template @@ -952,8 +952,8 @@ device_allreduce(raft::comms::comms_t const& comm, detail::device_allreduce_tuple_iterator_element_impl( - comm, input_first, output_first, count, op, stream); + tuple_size>() + .run(comm, input_first, output_first, count, op, stream); } template diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 101faf43a6f..817e9cbd225 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -91,7 +92,10 @@ void bfs(raft::handle_t const &handle, // 3. initialize BFS frontier enum class Bucket { cur, next, num_buckets }; - VertexFrontier(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { @@ -123,15 +127,18 @@ void bfs(raft::handle_t const &handle, *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst)); if (distance != invalid_distance) { push = false; } } - return thrust::make_tuple(push, src); + return push ? thrust::optional{src} : thrust::nullopt; }, reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) ? static_cast(Bucket::next) - : VertexFrontier::kInvalidBucketIdx; - return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); + [depth] __device__(auto v, auto v_val, auto pushed_val) { + return (v_val == invalid_distance) + ? thrust::optional< + thrust::tuple>>{thrust::make_tuple( + static_cast(Bucket::next), + thrust::make_tuple(depth + 1, pushed_val))} + : thrust::nullopt; }); vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 4b130668a0d..c8e7f1eb7a0 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -126,7 +127,10 @@ void sssp(raft::handle_t const &handle, // 4. initialize SSSP frontier enum class Bucket { cur_near, next_near, far, num_buckets }; - VertexFrontier(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle); // 5. SSSP iteration @@ -186,18 +190,25 @@ void sssp(raft::handle_t const &handle, threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return thrust::make_tuple(push, thrust::make_tuple(new_distance, src)); + return push ? thrust::optional>{thrust::make_tuple( + new_distance, src)} + : thrust::nullopt; }, reduce_op::min>(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - [near_far_threshold] __device__(auto v_val, auto pushed_val) { + [near_far_threshold] __device__(auto v, auto v_val, auto pushed_val) { auto new_dist = thrust::get<0>(pushed_val); auto idx = new_dist < v_val ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) : static_cast(Bucket::far)) : VertexFrontier::kInvalidBucketIdx; - return thrust::make_tuple(idx, pushed_val); + return new_dist < v_val + ? thrust::optional>{thrust::make_tuple( + static_cast(new_dist < near_far_threshold ? Bucket::next_near + : Bucket::far), + pushed_val)} + : thrust::nullopt; }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); @@ -220,13 +231,10 @@ void sssp(raft::handle_t const &handle, auto v) { auto dist = *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)); - if (dist < old_near_far_threshold) { - return VertexFrontier::kInvalidBucketIdx; - } else if (dist < near_far_threshold) { - return static_cast(Bucket::cur_near); - } else { - return static_cast(Bucket::far); - } + return dist >= old_near_far_threshold + ? thrust::optional{static_cast( + dist < near_far_threshold ? Bucket::cur_near : Bucket::far)} + : thrust::nullopt; }); near_size = vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size(); From e5878b929bc37800ff7aed80c630c66c6003d8e4 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 14 May 2021 09:38:08 -0400 Subject: [PATCH 256/343] Refactor the graph generation (from edge list) code (#1565) - Move the graph generation function from ```tests/utilities``` to ```src/generator```. - Use std::optional to take the optional vertex list parameter instead of using two separate functions. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1565 --- cpp/CMakeLists.txt | 1 + .../cugraph/experimental/graph_functions.hpp | 144 ++++----- .../cugraph/experimental/graph_generator.hpp | 2 + cpp/src/experimental/coarsen_graph.cu | 19 +- cpp/src/experimental/renumber_edgelist.cu | 225 ++++---------- .../structure/create_graph_from_edgelist.cu} | 284 ++++++++---------- cpp/src/utilities/cython.cu | 3 +- cpp/tests/CMakeLists.txt | 1 - cpp/tests/community/mg_louvain_test.cpp | 9 +- cpp/tests/components/wcc_graphs.cu | 21 +- .../utilities/matrix_market_file_utilities.cu | 20 +- cpp/tests/utilities/rmat_utilities.cu | 20 +- cpp/tests/utilities/test_utilities.hpp | 16 - 13 files changed, 303 insertions(+), 462 deletions(-) rename cpp/{tests/utilities/generate_graph_from_edgelist.cu => src/structure/create_graph_from_edgelist.cu} (68%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6b638441a5b..c2c30d96eb3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -438,6 +438,7 @@ add_library(cugraph SHARED src/experimental/pagerank.cu src/experimental/katz_centrality.cu src/tree/mst.cu + src/structure/create_graph_from_edgelist.cu src/utilities/host_barrier.cpp ) diff --git a/cpp/include/cugraph/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp index 36881aea5fa..47c43920749 100644 --- a/cpp/include/cugraph/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -31,8 +32,8 @@ namespace experimental { /** * @brief renumber edgelist (multi-GPU) * - * This function assumes that edges are pre-shuffled to their target processes using the - * compute_gpu_id_from_edge_t functor. + * This function assumes that vertices and edges are pre-shuffled to their target processes using + * the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, respectively. * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. @@ -40,6 +41,11 @@ namespace experimental { * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. + * @param optional_local_vertex_span If valid, part of the entire set of vertices in the graph to be + * renumbered. The first tuple element is the pointer to the array and the second tuple element is + * the size of the array. This parameter can be used to include isolated vertices. Applying the + * compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function to + * work (vertices should be pre-shuffled). * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex @@ -68,6 +74,7 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -82,90 +89,9 @@ renumber_edgelist(raft::handle_t const& handle, * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as - * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set - * of vertices. - */ -template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check = false); - -/** - * @brief renumber edgelist (multi-GPU) - * - * This version takes the vertex set in addition; this allows renumbering to include isolated - * vertices. This function assumes that vertices and edges are pre-shuffled to their target - * processes using the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, - * respectively. - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam edge_t Type of edge identifiers. Needs to be an integral type. - * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) - * or multi-GPU (true). - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param local_vertices Part of the entire set of vertices in the graph to be renumbered. Applying - * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function - * to work (vertices should be pre-shuffled). - * @param num_local_vertices Number of local vertices. - * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition - * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as - * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target - * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, - * minor) pair should return the GPU ID of this process and applying the - * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition - * should return the partition ID of the corresponding matrix partition. - * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition - * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored - * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs - * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target - * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, - * minor) pair should return the GPU ID of this process and applying the - * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition - * should return the partition ID of the corresponding matrix partition. - * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition - * assigned to this process). - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return std::tuple, partition_t, vertex_t, edge_t> - * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to - * this process in multi-GPU), partition_t object storing graph partitioning information, total - * number of vertices, and total number of edges. - */ -template -std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check = false); - -/** - * @brief renumber edgelist (single-GPU) - * - * This version takes the vertex set in addition; this allows renumbering to include isolated - * vertices. - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam edge_t Type of edge identifiers. Needs to be an integral type. - * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) - * or multi-GPU (true). - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. + * @param optional_local_vertex_span If valid, vertices in the graph to be renumbered. The first + * tuple element is the pointer to the array and the second tuple element is the size of the array. + * This parameter can be used to include isolated vertices. * @param vertices The entire set of vertices in the graph to be renumbered. * @param num_vertices Number of vertices. * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as @@ -182,8 +108,7 @@ renumber_edgelist(raft::handle_t const& handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, @@ -398,5 +323,48 @@ extract_induced_subgraphs( size_t num_subgraphs, bool do_expensive_check = false); +/** + * @brief create a graph from (the optional vertex list and) the given edge list. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param optional_vertex_span If valid, part of the entire set of vertices in the graph to be + * renumbered. The first tuple element is the pointer to the array and the second tuple element is + * the size of the array. This parameter can be used to include isolated vertices. If multi-GPU, + * applying the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this + * function to work (vertices should be pre-shuffled). + * @param edgelist_rows Vector of edge row (source) vertex IDs. + * @param edgelist_cols Vector of edge column (destination) vertex IDs. + * @param edgelist_weights Vector of edge weights. + * @param graph_properties Properties of the graph represented by the input (optional vertex list + * and) edge list. + * @param renumber Flag indicating whether to renumber vertices or not. + * @return std::tuple, rmm::device_uvector> Pair of the generated graph and the renumber map. The + * szie of the renumber map is 0 if @p renumber is false. + */ +template +std::tuple, + rmm::device_uvector> +create_graph_from_edgelist( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber); + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/cugraph/experimental/graph_generator.hpp b/cpp/include/cugraph/experimental/graph_generator.hpp index bc7337944f3..78a73e2fe05 100644 --- a/cpp/include/cugraph/experimental/graph_generator.hpp +++ b/cpp/include/cugraph/experimental/graph_generator.hpp @@ -15,6 +15,8 @@ */ #pragma once +#include + #include #include diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 9f3f7c968cc..e648691f8b1 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -541,13 +541,14 @@ coarsen_graph( counts[i] = static_cast(coarsened_edgelist_major_vertices[i].size()); } std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist(handle, - unique_labels.data(), - static_cast(unique_labels.size()), - major_ptrs, - minor_ptrs, - counts, - do_expensive_check); + renumber_edgelist( + handle, + std::optional>{ + std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, + major_ptrs, + minor_ptrs, + counts, + do_expensive_check); } // 5. build a graph @@ -633,8 +634,8 @@ coarsen_graph( auto renumber_map_labels = renumber_edgelist( handle, - unique_labels.data(), - static_cast(unique_labels.size()), + std::optional>{ + std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, coarsened_edgelist_major_vertices.data(), coarsened_edgelist_minor_vertices.data(), static_cast(coarsened_edgelist_major_vertices.size()), diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index d93b9d18911..33793ecf727 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -49,8 +49,7 @@ namespace detail { template rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_local_vertices /* relevant only if vertices != nullptr */, + std::optional> optional_vertex_span, std::vector const& edgelist_major_vertices, std::vector const& edgelist_minor_vertices, std::vector const& edgelist_edge_counts) @@ -290,18 +289,19 @@ rmm::device_uvector compute_renumber_map( // 4. if vertices != nullptr, add isolated vertices rmm::device_uvector isolated_vertices(0, handle.get_stream()); - if (vertices != nullptr) { - auto num_isolated_vertices = thrust::count_if( + if (optional_vertex_span) { + auto [vertices, num_vertices] = *optional_vertex_span; + auto num_isolated_vertices = thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertices, - vertices + num_local_vertices, + vertices + num_vertices, [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { return !thrust::binary_search(thrust::seq, label_first, label_last, v); }); isolated_vertices.resize(num_isolated_vertices, handle.get_stream()); thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertices, - vertices + num_local_vertices, + vertices + num_vertices, isolated_vertices.begin(), [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { return !thrust::binary_search(thrust::seq, label_first, label_last, v); @@ -335,27 +335,29 @@ rmm::device_uvector compute_renumber_map( template void expensive_check_edgelist( raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + std::optional> optional_vertex_span, std::vector const& edgelist_major_vertices, std::vector const& edgelist_minor_vertices, std::vector const& edgelist_edge_counts) { - rmm::device_uvector sorted_local_vertices( - local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - local_vertices, - local_vertices + num_local_vertices, - sorted_local_vertices.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_local_vertices.begin(), - sorted_local_vertices.end()); - CUGRAPH_EXPECTS(static_cast(thrust::distance( - sorted_local_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_local_vertices.begin(), - sorted_local_vertices.end()))) == sorted_local_vertices.size(), - "Invalid input argument: local_vertices should not have duplicates."); + rmm::device_uvector sorted_local_vertices(size_t{0}, handle.get_stream()); + if (optional_vertex_span) { + auto [vertices, num_vertices] = *optional_vertex_span; + sorted_local_vertices.resize(num_vertices, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_local_vertices.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()); + CUGRAPH_EXPECTS(static_cast(thrust::distance( + sorted_local_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()))) == sorted_local_vertices.size(), + "Invalid input argument: local_vertices should not have duplicates."); + } if (multi_gpu) { auto& comm = handle.get_comms(); @@ -373,6 +375,7 @@ void expensive_check_edgelist( "Invalid input argument: both edgelist_major_vertices.size() & " "edgelist_minor_vertices.size() should coincide with col_comm_size."); + auto [local_vertices, num_local_vertices] = *optional_vertex_span; CUGRAPH_EXPECTS( thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -411,12 +414,7 @@ void expensive_check_edgelist( "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " "pre-shuffled."); - auto aggregate_vertexlist_size = host_scalar_allreduce( - comm, - local_vertices != nullptr ? num_local_vertices : vertex_t{0}, - handle.get_stream()); // local_vertices != nullptr is insufficient in multi-GPU as only a - // subset of GPUs may have a non-zero vertices - if (aggregate_vertexlist_size > 0) { + if (optional_vertex_span) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); @@ -521,24 +519,26 @@ void expensive_check_edgelist( assert(edgelist_major_vertices.size() == 1); assert(edgelist_minor_vertices.size() == 1); - if (local_vertices != nullptr) { + if (optional_vertex_span) { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices[0], edgelist_minor_vertices[0])); CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_edge_counts[0], - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto e) { - return !thrust::binary_search(thrust::seq, - sorted_local_vertices, - sorted_local_vertices + num_local_vertices, - thrust::get<0>(e)) || - !thrust::binary_search(thrust::seq, - sorted_local_vertices, - sorted_local_vertices + num_local_vertices, - thrust::get<1>(e)); - }) == 0, + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[0], + [sorted_local_vertices = sorted_local_vertices.data(), + num_sorted_local_vertices = + static_cast(sorted_local_vertices.size())] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_sorted_local_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_sorted_local_vertices, + thrust::get<1>(e)); + }) == 0, "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " "invalid vertex ID(s)."); } @@ -550,8 +550,7 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -582,8 +581,7 @@ renumber_edgelist(raft::handle_t const& handle, if (do_expensive_check) { expensive_check_edgelist(handle, - local_vertices, - num_local_vertices, + optional_local_vertex_span, edgelist_const_major_vertices, edgelist_const_minor_vertices, edgelist_edge_counts); @@ -593,8 +591,7 @@ renumber_edgelist(raft::handle_t const& handle, auto renumber_map_labels = detail::compute_renumber_map(handle, - local_vertices, - num_local_vertices, + optional_local_vertex_span, edgelist_const_major_vertices, edgelist_const_minor_vertices, edgelist_edge_counts); @@ -745,8 +742,7 @@ renumber_edgelist(raft::handle_t const& handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices /* relevant only if vertices != nullptr */, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, @@ -761,8 +757,7 @@ std::enable_if_t> renumber_edgelist( if (do_expensive_check) { expensive_check_edgelist( handle, - vertices, - num_vertices, + optional_vertex_span, std::vector{edgelist_major_vertices}, std::vector{edgelist_minor_vertices}, std::vector{num_edgelist_edges}); @@ -770,8 +765,7 @@ std::enable_if_t> renumber_edgelist( auto renumber_map_labels = detail::compute_renumber_map( handle, - vertices, - num_vertices, + optional_vertex_span, std::vector{edgelist_major_vertices}, std::vector{edgelist_minor_vertices}, std::vector{num_edgelist_edges}); @@ -811,6 +805,7 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -821,53 +816,7 @@ renumber_edgelist(raft::handle_t const& handle, handle.get_device_properties().major >= 7, "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, - static_cast(nullptr), - vertex_t{0}, - edgelist_major_vertices, - edgelist_minor_vertices, - edgelist_edge_counts, - do_expensive_check); -} - -template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check) -{ - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - return detail::renumber_edgelist(handle, - static_cast(nullptr), - vertex_t{0} /* dummy */, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - do_expensive_check); -} - -template -std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check) -{ - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - return detail::renumber_edgelist(handle, - local_vertices, - num_local_vertices, + optional_local_vertex_span, edgelist_major_vertices, edgelist_minor_vertices, edgelist_edge_counts, @@ -877,8 +826,7 @@ renumber_edgelist(raft::handle_t const& handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, @@ -889,8 +837,7 @@ std::enable_if_t> renumber_edgelist( handle.get_device_properties().major >= 7, "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, - vertices, - num_vertices, + optional_vertex_span, edgelist_major_vertices, edgelist_minor_vertices, num_edgelist_edges, @@ -899,11 +846,13 @@ std::enable_if_t> renumber_edgelist( // explicit instantiation directives (EIDir's): // + // instantiations for // template std::tuple, partition_t, int32_t, int32_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -911,25 +860,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist( - raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int32_t const* vertices, - int32_t num_vertices, + std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, int32_t* edgelist_minor_vertices /* [INOUT] */, int32_t num_edgelist_edges, @@ -940,6 +871,7 @@ template rmm::device_uvector renumber_edgelist template std::tuple, partition_t, int32_t, int64_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -947,25 +879,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int32_t const* vertices, - int32_t num_vertices, + std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, int32_t* edgelist_minor_vertices /* [INOUT] */, int64_t num_edgelist_edges, @@ -976,6 +890,7 @@ template rmm::device_uvector renumber_edgelist template std::tuple, partition_t, int64_t, int64_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -983,25 +898,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - int64_t const* local_vertices, - int64_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int64_t const* vertices, - int64_t num_vertices, + std::optional> optional_vertex_span, int64_t* edgelist_major_vertices /* [INOUT] */, int64_t* edgelist_minor_vertices /* [INOUT] */, int64_t num_edgelist_edges, diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/src/structure/create_graph_from_edgelist.cu similarity index 68% rename from cpp/tests/utilities/generate_graph_from_edgelist.cu rename to cpp/src/structure/create_graph_from_edgelist.cu index b15cf34188d..27764ead0f0 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/src/structure/create_graph_from_edgelist.cu @@ -13,8 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include - #include #include #include @@ -22,12 +20,13 @@ #include -#include +#include +#include #include namespace cugraph { -namespace test { +namespace experimental { namespace { @@ -41,14 +40,14 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist_impl(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist_impl( + raft::handle_t const& handle, + std::optional> optional_local_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); @@ -71,7 +70,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, store_transposed ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - auto edge_counts = test_weighted + auto edge_counts = graph_properties.is_weighted ? cugraph::experimental::groupby_and_count(pair_first, pair_first + edgelist_rows.size(), edgelist_weights.begin(), @@ -111,12 +110,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, } std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = cugraph::experimental::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - major_ptrs, - minor_ptrs, - counts); + handle, optional_local_vertex_span, major_ptrs, minor_ptrs, counts); } // 4. create a graph @@ -127,20 +121,14 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, edgelists[i] = cugraph::experimental::edgelist_t{ edgelist_rows.data() + h_displacements[i], edgelist_cols.data() + h_displacements[i], - test_weighted ? edgelist_weights.data() + h_displacements[i] - : static_cast(nullptr), + graph_properties.is_weighted ? edgelist_weights.data() + h_displacements[i] + : static_cast(nullptr), static_cast(h_edge_counts[i])}; } return std::make_tuple( cugraph::experimental::graph_t( - handle, - edgelists, - partition, - number_of_vertices, - number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - true), + handle, edgelists, partition, number_of_vertices, number_of_edges, graph_properties, true), std::move(renumber_map_labels)); } @@ -154,26 +142,43 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist_impl(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist_impl( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { - vertex_t number_of_vertices = static_cast(vertices.size()); - auto renumber_map_labels = renumber ? cugraph::experimental::renumber_edgelist( handle, - vertices.data(), - static_cast(vertices.size()), + optional_vertex_span, store_transposed ? edgelist_cols.data() : edgelist_rows.data(), store_transposed ? edgelist_rows.data() : edgelist_cols.data(), static_cast(edgelist_rows.size())) : rmm::device_uvector(0, handle.get_stream()); + vertex_t num_vertices{}; + if (renumber) { + num_vertices = static_cast(renumber_map_labels.size()); + } else { + if (optional_vertex_span) { + num_vertices = std::get<1>(*optional_vertex_span); + } else { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + num_vertices = + thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + vertex_t{0}, + thrust::maximum()) + + 1; + } + } return std::make_tuple( cugraph::experimental::graph_t( @@ -181,10 +186,10 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, cugraph::experimental::edgelist_t{ edgelist_rows.data(), edgelist_cols.data(), - test_weighted ? edgelist_weights.data() : nullptr, + graph_properties.is_weighted ? edgelist_weights.data() : static_cast(nullptr), static_cast(edgelist_rows.size())}, - number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + num_vertices, + graph_properties, renumber ? true : false), std::move(renumber_map_labels)); } @@ -198,23 +203,22 @@ template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { - return generate_graph_from_edgelist_impl( + return create_graph_from_edgelist_impl( handle, - std::move(vertices), + optional_vertex_span, std::move(edgelist_rows), std::move(edgelist_cols), std::move(edgelist_weights), - is_symmetric, - test_weighted, + graph_properties, renumber); } @@ -222,291 +226,267 @@ generate_graph_from_edgelist(raft::handle_t const& handle, template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); -} // namespace test +} // namespace experimental } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 1a45782beb4..989de166699 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -1104,10 +1104,11 @@ std::unique_ptr> call_renumber( std::tie( p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = cugraph::experimental::renumber_edgelist( - handle, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); + handle, std::nullopt, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); } else { p_ret->get_dv() = cugraph::experimental::renumber_edgelist( handle, + std::nullopt, shuffled_edgelist_major_vertices, shuffled_edgelist_minor_vertices, edge_counts[0], diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 80484fdfad6..958c393a8bd 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -22,7 +22,6 @@ add_library(cugraphtestutil STATIC "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_graphs.cu" diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index e8cc94edf99..0e22050f7a4 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -128,14 +129,14 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam handle, d_edgelist_rows, d_edgelist_cols, d_renumber_map_gathered_v); std::tie(*sg_graph, std::ignore) = - cugraph::test::generate_graph_from_edgelist( + cugraph::experimental::create_graph_from_edgelist( handle, - std::move(d_vertices), + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - is_symmetric, - true, + cugraph::experimental::graph_properties_t{is_symmetric, false, true}, false); } diff --git a/cpp/tests/components/wcc_graphs.cu b/cpp/tests/components/wcc_graphs.cu index fb11f872fb8..8cbe18ed71e 100644 --- a/cpp/tests/components/wcc_graphs.cu +++ b/cpp/tests/components/wcc_graphs.cu @@ -12,6 +12,8 @@ #include #include +#include + #include #include @@ -68,15 +70,16 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, handle.get_stream_view().synchronize(); - return generate_graph_from_edgelist( - handle, - std::move(vertices_v), - std::move(src_v), - std::move(dst_v), - std::move(weights_v), - true, - false, - false); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(vertices_v.data(), static_cast(vertices_v.size()))}, + std::move(src_v), + std::move(dst_v), + std::move(weights_v), + cugraph::experimental::graph_properties_t{true, false, false}, + false); } template std::tuple, diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index c51aa69fe48..9ef03b7ff53 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -409,15 +410,16 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, } handle.get_stream_view().synchronize(); - return generate_graph_from_edgelist( - handle, - std::move(d_vertices), - std::move(d_edgelist_rows), - std::move(d_edgelist_cols), - std::move(d_edgelist_weights), - is_symmetric, - test_weighted, - renumber); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 9b36867bf62..8d94f936ab7 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -231,15 +232,16 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, d_vertices = std::move(d_rx_vertices); } - return generate_graph_from_edgelist( - handle, - std::move(d_vertices), - std::move(d_edgelist_rows), - std::move(d_edgelist_cols), - std::move(d_edgelist_weights), - false, - test_weighted, - renumber); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + cugraph::experimental::graph_properties_t{false, false, test_weighted}, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index d7e6d9f6ed8..2fc7812dbaa 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -107,22 +107,6 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } -template -std::tuple, - rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber); - // returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template std::tuple, From ad92c1e8a7219b3eb57104f5242452d3c5a6e9a6 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 14 May 2021 09:39:52 -0400 Subject: [PATCH 257/343] Move the utility function to gather distributed vectors from tests/community to tests/utilities and update MG tests (#1602) - Move the utility function to gather distributed vectors from tests/community/mg_louvain_helper.cu to tests/utilities/device_comm_wrapper.cu and rename the function to device_gatherv - Update MG tests to gather the MG results in root and compare with the SG result in root. - Few more minor updates (sort_by_key thrust wrapper return value, adding missing const in input parameter, add missing include statement, add barrier in MG performance measurement) This update is necessary to simplify MG WCC testing. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1602 --- .../cugraph/experimental/graph_functions.hpp | 2 +- .../cugraph/utilities/host_scalar_comm.cuh | 1 + cpp/src/experimental/renumber_utils.cu | 70 ++--- cpp/tests/CMakeLists.txt | 34 +++ cpp/tests/community/mg_louvain_helper.cu | 34 --- cpp/tests/community/mg_louvain_helper.hpp | 5 - cpp/tests/community/mg_louvain_test.cpp | 5 +- cpp/tests/experimental/bfs_test.cpp | 13 +- .../experimental/katz_centrality_test.cpp | 4 +- cpp/tests/experimental/mg_bfs_test.cpp | 239 ++++++++-------- .../experimental/mg_katz_centrality_test.cpp | 142 ++++++---- cpp/tests/experimental/mg_sssp_test.cpp | 264 ++++++++++-------- cpp/tests/experimental/pagerank_test.cpp | 12 +- cpp/tests/experimental/sssp_test.cpp | 13 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 219 ++++++--------- cpp/tests/utilities/device_comm_wrapper.cu | 72 +++++ cpp/tests/utilities/device_comm_wrapper.hpp | 29 ++ cpp/tests/utilities/thrust_wrapper.cu | 62 ++-- cpp/tests/utilities/thrust_wrapper.hpp | 8 +- 19 files changed, 681 insertions(+), 547 deletions(-) create mode 100644 cpp/tests/utilities/device_comm_wrapper.cu create mode 100644 cpp/tests/utilities/device_comm_wrapper.hpp diff --git a/cpp/include/cugraph/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp index 47c43920749..8f5dbb1138b 100644 --- a/cpp/include/cugraph/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -209,7 +209,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, vertex_t const* renumber_map_labels, vertex_t local_int_vertex_first, vertex_t local_int_vertex_last, - std::vector& vertex_partition_lasts, + std::vector const& vertex_partition_lasts, bool do_expensive_check = false); /** diff --git a/cpp/include/cugraph/utilities/host_scalar_comm.cuh b/cpp/include/cugraph/utilities/host_scalar_comm.cuh index 4505d35e011..85994ed22bf 100644 --- a/cpp/include/cugraph/utilities/host_scalar_comm.cuh +++ b/cpp/include/cugraph/utilities/host_scalar_comm.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index 5e5e88ef8f7..765dbb19886 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -214,7 +214,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, vertex_t const* renumber_map_labels, vertex_t local_int_vertex_first, vertex_t local_int_vertex_last, - std::vector& vertex_partition_lasts, + std::vector const& vertex_partition_lasts, bool do_expensive_check) { double constexpr load_factor = 0.7; @@ -385,41 +385,45 @@ template void unrenumber_local_int_vertices(raft::handle_t const& handl int64_t local_int_vertex_last, bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int32_t* vertices, - size_t num_vertices, - int32_t const* renumber_map_labels, - int32_t local_int_vertex_first, - int32_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int32_t* vertices, - size_t num_vertices, - int32_t const* renumber_map_labels, - int32_t local_int_vertex_first, - int32_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int64_t* vertices, - size_t num_vertices, - int64_t const* renumber_map_labels, - int64_t local_int_vertex_first, - int64_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int64_t* vertices, - size_t num_vertices, - int64_t const* renumber_map_labels, - int64_t local_int_vertex_first, - int64_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 958c393a8bd..39f0307f2d2 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -57,6 +57,39 @@ target_link_libraries(cugraphtestutil cugraph) set_target_properties(cugraphtestutil PROPERTIES CUDA_ARCHITECTURES OFF) +add_library(cugraphmgtestutil STATIC + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/device_comm_wrapper.cu") + +set_property(TARGET cugraphmgtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) + +target_include_directories(cugraphmgtestutil + PRIVATE + "${CUB_INCLUDE_DIR}" + "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" + "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" + "${RMM_INCLUDE}" + "${NCCL_INCLUDE_DIRS}" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${RAFT_DIR}/cpp/include" +) + +target_link_libraries(cugraphmgtestutil cugraph) + +# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the +# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent +# cmake warnings about policy CMP0104. With this setting, arch flags must be +# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism +# used in cpp/CMakeLists.txt for setting arch options). +# Run "cmake --help-policy CMP0104" for policy details. +# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to +# the findcudatoolkit features in cmake 3.17+ +set_target_properties(cugraphmgtestutil PROPERTIES + CUDA_ARCHITECTURES OFF) + ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -193,6 +226,7 @@ function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE + cugraphmgtestutil cugraphtestutil cugraph GTest::GTest diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index 1311970292a..2b1b5ade41e 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -31,36 +31,6 @@ namespace cugraph { namespace test { -template -rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - T const *d_input, - size_t size) -{ - auto rx_sizes = - cugraph::experimental::host_scalar_gather(handle.get_comms(), size, 0, handle.get_stream()); - std::vector rx_displs(static_cast(handle.get_comms().get_rank()) == 0 - ? handle.get_comms().get_size() - : int{0}, - size_t{0}); - if (static_cast(handle.get_comms().get_rank()) == 0) { - std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); - } - - auto total_size = thrust::reduce(thrust::host, rx_sizes.begin(), rx_sizes.end()); - rmm::device_uvector gathered_v(total_size, handle.get_stream()); - - cugraph::experimental::device_gatherv(handle.get_comms(), - d_input, - gathered_v.data(), - size, - rx_sizes, - rx_displs, - 0, - handle.get_stream()); - - return gathered_v; -} - template bool compare_renumbered_vectors(raft::handle_t const &handle, rmm::device_uvector const &v1, @@ -336,10 +306,6 @@ template void single_gpu_renumber_edgelist_given_number_map( rmm::device_uvector &d_edgelist_cols, rmm::device_uvector &d_renumber_map_gathered_v); -template rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - int const *d_input, - size_t size); - template bool compare_renumbered_vectors(raft::handle_t const &handle, rmm::device_uvector const &v1, rmm::device_uvector const &v2); diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp index 456301f4d7b..6d074e2d5e9 100644 --- a/cpp/tests/community/mg_louvain_helper.hpp +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -24,11 +24,6 @@ namespace cugraph { namespace test { -template -rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - T const *d_input, - size_t size); - template bool compare_renumbered_vectors(raft::handle_t const &handle, rmm::device_uvector const &v1, diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 0e22050f7a4..7e085919fd7 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -17,6 +17,7 @@ #include "mg_louvain_helper.hpp" #include +#include #include #include @@ -145,7 +146,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam thrust::make_counting_iterator(dendrogram.num_levels()), [&dendrogram, &sg_graph, &d_clustering_v, &sg_modularity, &handle, resolution, rank]( size_t i) { - auto d_dendrogram_gathered_v = cugraph::test::gather_distributed_vector( + auto d_dendrogram_gathered_v = cugraph::test::device_gatherv( handle, dendrogram.get_level_ptr_nocheck(i), dendrogram.get_level_size_nocheck(i)); if (rank == 0) { @@ -208,7 +209,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); - auto d_renumber_map_gathered_v = cugraph::test::gather_distributed_vector( + auto d_renumber_map_gathered_v = cugraph::test::device_gatherv( handle, d_renumber_map_labels.data(), d_renumber_map_labels.size()); compare_sg_results(handle, diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index da4ef2f5dfb..2c8ab894096 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -212,12 +212,15 @@ class Tests_BFS : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); - auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), - d_predecessors.data(), - d_renumber_map_labels.size()); + rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_predecessors) = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), d_unrenumbered_distances.size(), diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 35773073757..d0fc558c89f 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -225,7 +225,9 @@ class Tests_KatzCentrality std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); if (renumber) { - auto d_unrenumbered_katz_centralities = + rmm::device_uvector d_unrenumbered_katz_centralities(size_t{0}, + handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_katz_centralities) = cugraph::test::sort_by_key(handle, d_renumber_map_labels.data(), d_katz_centralities.data(), diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index e498e403334..a832e0f99ac 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -80,8 +81,10 @@ class Tests_MGBFS : public ::testing::TestWithParam mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = @@ -90,6 +93,7 @@ class Tests_MGBFS : public ::testing::TestWithParam sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, false, false); - - auto sg_graph_view = sg_graph.view(); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - - rmm::device_scalar d_source(static_cast(bfs_usecase.source), - handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_source.data(), - size_t{1}, - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - auto unrenumbered_source = d_source.value(handle.get_stream()); - - // 5-2. run SG BFS - - rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - - cugraph::experimental::bfs(handle, - sg_graph_view, - d_sg_distances.data(), - d_sg_predecessors.data(), - unrenumbered_source, - false, - std::numeric_limits::max()); - - // 5-3. compare - - std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); - std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); - raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), - sg_graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - - std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); - std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); - raft::update_host(h_sg_predecessors.data(), - d_sg_predecessors.data(), - d_sg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); - std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_mg_predecessors.data(), - d_mg_predecessors.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - raft::update_host(h_mg_predecessors.data(), - d_mg_predecessors.data(), - d_mg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(h_mg_distances[i] == h_sg_distances[mapped_vertex]) - << "MG BFS distance for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_distances[i] - << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; - if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) - << "vertex reachability does not match with the SG result."; - } else { - ASSERT_TRUE(h_sg_distances[h_mg_predecessors[i]] + 1 == h_sg_distances[mapped_vertex]) - << "distances to this vertex != distances to the predecessor vertex + 1."; - bool found{false}; - for (auto j = h_sg_offsets[h_mg_predecessors[i]]; - j < h_sg_offsets[h_mg_predecessors[i] + 1]; - ++j) { - if (h_sg_indices[j] == mapped_vertex) { - found = true; - break; + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_distances = + cugraph::test::device_gatherv(handle, d_mg_distances.data(), d_mg_distances.size()); + auto d_mg_aggregate_predecessors = + cugraph::test::device_gatherv(handle, d_mg_predecessors.data(), d_mg_predecessors.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumbr MG results + + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + + std::tie(std::ignore, d_mg_aggregate_distances) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_renumber_map_labels.size()); + std::tie(std::ignore, d_mg_aggregate_predecessors) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG BFS + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors( + sg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + + vertex_t unrenumbered_source{}; + raft::update_host(&unrenumbered_source, + d_mg_aggregate_renumber_map_labels.data() + bfs_usecase.source, + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + + cugraph::experimental::bfs(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + false, + std::numeric_limits::max()); + // 4-5. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_mg_aggregate_distances(mg_graph_view.get_number_of_vertices()); + std::vector h_mg_aggregate_predecessors(mg_graph_view.get_number_of_vertices()); + + raft::update_host(h_mg_aggregate_distances.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_distances.size(), + handle.get_stream()); + raft::update_host(h_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + ASSERT_TRUE(std::equal(h_mg_aggregate_distances.begin(), + h_mg_aggregate_distances.end(), + h_sg_distances.begin())); + for (size_t i = 0; i < h_mg_aggregate_predecessors.size(); ++i) { + if (h_mg_aggregate_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + ASSERT_TRUE(h_sg_distances[h_mg_aggregate_predecessors[i]] + 1 == h_sg_distances[i]) + << "distances to this vertex != distances to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_aggregate_predecessors[i]]; + j < h_sg_offsets[h_mg_aggregate_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == i) { + found = true; + break; + } } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } - ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } } } diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index eca04fb3241..d67cd9090b8 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -77,6 +78,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } cugraph::experimental::graph_t mg_graph(handle); @@ -87,6 +89,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; @@ -109,6 +112,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } @@ -124,6 +128,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG Katz Centrality took " << elapsed_time * 1e-6 << " s.\n"; @@ -132,68 +137,81 @@ class Tests_MGKatzCentrality // 5. copmare SG & MG results if (katz_usecase.check_correctness) { - // 5-1. create SG graph - - cugraph::experimental::graph_t sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - // 5-3. run SG Katz Centrality - - rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::katz_centrality(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_katz_centralities.data(), - alpha, - beta, - epsilon, - std::numeric_limits::max(), // max_iterations - false); - - // 5-4. compare - - std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); - raft::update_host(h_sg_katz_centralities.data(), - d_sg_katz_centralities.data(), - d_sg_katz_centralities.size(), - handle.get_stream()); - - std::vector h_mg_katz_centralities(mg_graph_view.get_number_of_local_vertices()); - raft::update_host(h_mg_katz_centralities.data(), - d_mg_katz_centralities.data(), - d_mg_katz_centralities.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_katz_centralities[i], h_sg_katz_centralities[mapped_vertex])) - << "MG KatzCentrality value for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_katz_centralities[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_katz_centralities[mapped_vertex]; + // 5-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_katz_centralities = cugraph::test::device_gatherv( + handle, d_mg_katz_centralities.data(), d_mg_katz_centralities.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 5-2. unrenumbr MG results + + std::tie(std::ignore, d_mg_aggregate_katz_centralities) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 5-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 5-4. run SG Katz Centrality + + rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::katz_centrality( + handle, + sg_graph_view, + static_cast(nullptr), + d_sg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-5. compare + + std::vector h_mg_aggregate_katz_centralities( + mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_katz_centralities.size(), + handle.get_stream()); + + std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_katz_centralities.data(), + d_sg_katz_centralities.data(), + d_sg_katz_centralities.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked + // vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_katz_centralities.begin(), + h_mg_aggregate_katz_centralities.end(), + h_sg_katz_centralities.begin(), + nearly_equal)); } } } diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index d3da904afc9..8568545cbd6 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -77,6 +78,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam mg_graph(handle); @@ -87,6 +89,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - - rmm::device_scalar d_source(static_cast(sssp_usecase.source), - handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_source.data(), - size_t{1}, - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - auto unrenumbered_source = d_source.value(handle.get_stream()); - - // 5-2. run SG SSSP - - rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - - // FIXME: disable do_expensive_check - cugraph::experimental::sssp(handle, - sg_graph_view, - d_sg_distances.data(), - d_sg_predecessors.data(), - unrenumbered_source, - std::numeric_limits::max()); - - // 5-3. compare - - std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); - std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); - std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); - raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), - sg_graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - raft::update_host(h_sg_weights.data(), - sg_graph_view.weights(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - - std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); - std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); - raft::update_host(h_sg_predecessors.data(), - d_sg_predecessors.data(), - d_sg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); - std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_mg_predecessors.data(), - d_mg_predecessors.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - raft::update_host(h_mg_predecessors.data(), - d_mg_predecessors.data(), - d_mg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); - auto epsilon = *max_weight_element * weight_t{1e-6}; - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_distances[i], h_sg_distances[mapped_vertex])) - << "MG SSSP distance for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_distances[i] - << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; - if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) - << "vertex reachability does not match with the SG result."; - } else { - auto pred_distance = h_sg_distances[h_mg_predecessors[i]]; - bool found{false}; - for (auto j = h_sg_offsets[h_mg_predecessors[i]]; - j < h_sg_offsets[h_mg_predecessors[i] + 1]; - ++j) { - if (h_sg_indices[j] == mapped_vertex) { - if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[mapped_vertex])) { - found = true; - break; + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_distances = + cugraph::test::device_gatherv(handle, d_mg_distances.data(), d_mg_distances.size()); + auto d_mg_aggregate_predecessors = + cugraph::test::device_gatherv(handle, d_mg_predecessors.data(), d_mg_predecessors.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumber MG results + + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + + std::tie(std::ignore, d_mg_aggregate_distances) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_renumber_map_labels.size()); + std::tie(std::ignore, d_mg_aggregate_predecessors) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG SSSP + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors( + sg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + vertex_t unrenumbered_source{}; + raft::update_host(&unrenumbered_source, + d_mg_aggregate_renumber_map_labels.data() + sssp_usecase.source, + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + + cugraph::experimental::sssp(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + std::numeric_limits::max()); + + // 4-5. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_sg_weights.data(), + sg_graph_view.weights(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_mg_aggregate_distances(mg_graph_view.get_number_of_vertices()); + std::vector h_mg_aggregate_predecessors(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_distances.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_distances.size(), + handle.get_stream()); + raft::update_host(h_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { + return std::fabs(lhs - rhs) < epsilon; + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_distances.begin(), + h_mg_aggregate_distances.end(), + h_sg_distances.begin(), + nearly_equal)); + + for (size_t i = 0; i < h_mg_aggregate_predecessors.size(); ++i) { + if (h_mg_aggregate_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + auto pred_distance = h_sg_distances[h_mg_aggregate_predecessors[i]]; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_aggregate_predecessors[i]]; + j < h_sg_offsets[h_mg_aggregate_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == i) { + if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[i])) { + found = true; + break; + } } } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; } - ASSERT_TRUE(found) - << "no edge from the predecessor vertex to this vertex with the matching weight."; } } } @@ -288,7 +304,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values( // enable correctness checks std::make_tuple(SSSP_Usecase{0}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); INSTANTIATE_TEST_SUITE_P( rmat_large_test, @@ -296,6 +312,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values( // disable correctness checks for large graphs std::make_tuple(SSSP_Usecase{0, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index e1b7b121b1f..9b07059d2da 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -303,10 +303,11 @@ class Tests_PageRank d_renumber_map_labels.data(), vertex_t{0}, graph_view.get_number_of_vertices()); - cugraph::test::sort_by_key(handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_vertices.size()); + std::tie(d_unrenumbered_personalization_vertices, d_unrenumbered_personalization_values) = + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); raft::update_host(h_unrenumbered_personalization_vertices.data(), d_unrenumbered_personalization_vertices.data(), @@ -346,7 +347,8 @@ class Tests_PageRank std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); if (renumber) { - auto d_unrenumbered_pageranks = cugraph::test::sort_by_key( + rmm::device_uvector d_unrenumbered_pageranks(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_pageranks) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_pageranks.data(), d_renumber_map_labels.size()); raft::update_host(h_cugraph_pageranks.data(), d_unrenumbered_pageranks.data(), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 3095afad8fc..e12df163551 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -220,12 +220,15 @@ class Tests_SSSP : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); - auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), - d_predecessors.data(), - d_renumber_map_labels.size()); + rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_predecessors) = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index adedfa2e3bc..21a2c11f4a7 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -80,6 +81,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } cugraph::experimental::graph_t mg_graph(handle); @@ -89,6 +91,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; @@ -146,6 +149,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } @@ -163,6 +167,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG PageRank took " << elapsed_time * 1e-6 << " s.\n"; @@ -171,134 +176,96 @@ class Tests_MGPageRank // 5. copmare SG & MG results if (pagerank_usecase.check_correctness) { - // 5-1. create SG graph - - cugraph::experimental::graph_t sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - // 5-2. collect personalization vertex/value pairs - - rmm::device_uvector d_sg_personalization_vertices(0, handle.get_stream()); - rmm::device_uvector d_sg_personalization_values(0, handle.get_stream()); - if (pagerank_usecase.personalization_ratio > 0.0) { - rmm::device_uvector d_unrenumbered_personalization_vertices( - d_mg_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_unrenumbered_personalization_values( - d_unrenumbered_personalization_vertices.size(), handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_vertices.data(), - d_mg_personalization_vertices.data(), - d_mg_personalization_vertices.size(), - handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_values.data(), - d_mg_personalization_values.data(), - d_mg_personalization_values.size(), - handle.get_stream()); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - cugraph::experimental::unrenumber_int_vertices( + // 5-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_personalization_vertices = cugraph::test::device_gatherv( + handle, d_mg_personalization_vertices.data(), d_mg_personalization_vertices.size()); + auto d_mg_aggregate_personalization_values = cugraph::test::device_gatherv( + handle, d_mg_personalization_values.data(), d_mg_personalization_values.size()); + auto d_mg_aggregate_pageranks = + cugraph::test::device_gatherv(handle, d_mg_pageranks.data(), d_mg_pageranks.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 5-2. unrenumbr MG results + + cugraph::experimental::unrenumber_int_vertices( handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_vertices.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - handle.get_stream()); - - rmm::device_scalar d_local_personalization_vector_size( - d_unrenumbered_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_recvcounts(comm_size, handle.get_stream()); - comm.allgather( - d_local_personalization_vector_size.data(), d_recvcounts.data(), 1, handle.get_stream()); - std::vector recvcounts(d_recvcounts.size()); - raft::update_host( - recvcounts.data(), d_recvcounts.data(), d_recvcounts.size(), handle.get_stream()); - auto status = comm.sync_stream(handle.get_stream()); - ASSERT_EQ(status, raft::comms::status_t::SUCCESS); - - std::vector displacements(recvcounts.size(), size_t{0}); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); - - d_sg_personalization_vertices.resize(displacements.back() + recvcounts.back(), - handle.get_stream()); - d_sg_personalization_values.resize(d_sg_personalization_vertices.size(), - handle.get_stream()); - - comm.allgatherv(d_unrenumbered_personalization_vertices.data(), - d_sg_personalization_vertices.data(), - recvcounts.data(), - displacements.data(), - handle.get_stream()); - comm.allgatherv(d_unrenumbered_personalization_values.data(), - d_sg_personalization_values.data(), - recvcounts.data(), - displacements.data(), - handle.get_stream()); - - cugraph::test::sort_by_key(handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_vertices.size()); - } + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_vertices.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + std::tie(d_mg_aggregate_personalization_vertices, d_mg_aggregate_personalization_values) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_values.data(), + d_mg_aggregate_personalization_vertices.size()); + std::tie(std::ignore, d_mg_aggregate_pageranks) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_pageranks.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 5-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 5-4. run SG PageRank + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank( + handle, + sg_graph_view, + static_cast(nullptr), + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_values.data(), + static_cast(d_mg_aggregate_personalization_vertices.size()), + d_sg_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-4. compare + + std::vector h_mg_aggregate_pageranks(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_pageranks.data(), + d_mg_aggregate_pageranks.data(), + d_mg_aggregate_pageranks.size(), + handle.get_stream()); - // 5-3. run SG PageRank - - rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::pagerank(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_personalization_vertices.data(), - d_sg_personalization_values.data(), - static_cast(d_sg_personalization_vertices.size()), - d_sg_pageranks.data(), - alpha, - epsilon, - std::numeric_limits::max(), // max_iterations - false); - - // 5-4. compare - - std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); - - std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_pageranks[mapped_vertex]; + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_pageranks.begin(), + h_mg_aggregate_pageranks.end(), + h_sg_pageranks.begin(), + nearly_equal)); } } } diff --git a/cpp/tests/utilities/device_comm_wrapper.cu b/cpp/tests/utilities/device_comm_wrapper.cu new file mode 100644 index 00000000000..2d66e05c59f --- /dev/null +++ b/cpp/tests/utilities/device_comm_wrapper.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device_comm_wrapper.hpp" + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size) +{ + bool is_root = handle.get_comms().get_rank() == int{0}; + auto rx_sizes = cugraph::experimental::host_scalar_gather( + handle.get_comms(), size, int{0}, handle.get_stream()); + std::vector rx_displs(is_root ? static_cast(handle.get_comms().get_size()) + : size_t{0}); + if (is_root) { std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); } + + rmm::device_uvector gathered_v( + is_root ? std::reduce(rx_sizes.begin(), rx_sizes.end()) : size_t{0}, handle.get_stream()); + + cugraph::experimental::device_gatherv(handle.get_comms(), + d_input, + gathered_v.data(), + size, + rx_sizes, + rx_displs, + int{0}, + handle.get_stream()); + + return gathered_v; +} + +// explicit instantiation + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + int32_t const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + int64_t const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + float const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + double const *d_input, + size_t size); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/device_comm_wrapper.hpp b/cpp/tests/utilities/device_comm_wrapper.hpp new file mode 100644 index 00000000000..f56f24248d6 --- /dev/null +++ b/cpp/tests/utilities/device_comm_wrapper.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 5d32fb8a5d1..dfd420b1e2d 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -26,10 +26,8 @@ namespace cugraph { namespace test { template -rmm::device_uvector sort_by_key(raft::handle_t const& handle, - vertex_t const* keys, - value_t const* values, - size_t num_pairs) +std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, vertex_t const* keys, value_t const* values, size_t num_pairs) { rmm::device_uvector sorted_keys(num_pairs, handle.get_stream_view()); rmm::device_uvector sorted_values(num_pairs, handle.get_stream_view()); @@ -44,38 +42,44 @@ rmm::device_uvector sort_by_key(raft::handle_t const& handle, sorted_keys.end(), sorted_values.begin()); - return sorted_values; + return std::make_tuple(std::move(sorted_keys), std::move(sorted_values)); } -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - float const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + float const* values, + size_t num_pairs); -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - double const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + double const* values, + size_t num_pairs); -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - int32_t const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + int32_t const* values, + size_t num_pairs); -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - float const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + float const* values, + size_t num_pairs); -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - double const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + double const* values, + size_t num_pairs); -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - int64_t const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + int64_t const* values, + size_t num_pairs); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp index 579dc3c550f..96f370f884c 100644 --- a/cpp/tests/utilities/thrust_wrapper.hpp +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -17,14 +17,14 @@ #include #include +#include + namespace cugraph { namespace test { template -rmm::device_uvector sort_by_key(raft::handle_t const& handle, - vertex_t const* keys, - value_t const* values, - size_t num_pairs); +std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, vertex_t const* keys, value_t const* values, size_t num_pairs); } // namespace test } // namespace cugraph From a52420674ef0e5c2f5760aa722b78c73c425582e Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Mon, 17 May 2021 12:46:10 -0400 Subject: [PATCH 258/343] DOC Update to v21.06.00 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c2c30d96eb3..483149df3af 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) -project(CUGRAPH VERSION 0.20.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 21.06.00 LANGUAGES C CXX CUDA) # Write the version header include(cmake/Modules/Version.cmake) From fcf4bb58dd8afdafcdfffadfbb092ba8c3dde71d Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Wed, 19 May 2021 11:53:48 -0400 Subject: [PATCH 259/343] Update docs build script (#1606) This PR removes a variable that is no longer necessary for docs builds after the calver transition. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1606 --- ci/docs/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 279faa6a61d..2135ff04b45 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -15,7 +15,6 @@ export PATH=/conda/bin:/usr/local/cuda/bin:$PATH export HOME=$WORKSPACE export PROJECT_WORKSPACE=/rapids/cugraph export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" -export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cugraph libcugraph) gpuci_logger "Check environment" From 9ac368e758ef0f8ea66710991caf3c7920317259 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 19 May 2021 16:46:20 -0400 Subject: [PATCH 260/343] DOC v21.08 Updates --- CHANGELOG.md | 4 ++++ conda/environments/cugraph_dev_cuda11.0.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.1.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.2.yml | 14 +++++++------- cpp/CMakeLists.txt | 2 +- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4170e9c4bc0..dba190abf11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# cuGraph 21.08.00 (Date TBD) + +Please see https://github.com/rapidsai/cugraph/releases/tag/v21.08.00a for the latest changes to this development branch. + # cuGraph 0.20.0 (Date TBD) Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch. diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 20d56b281d2..b875bbac322 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.0 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.08.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 0eba2baccaa..38485fbd0bc 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.1 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.08.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 55f6ad75cec..385b1b6dad0 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.2 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.08.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 483149df3af..7ea6fa64ebf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) -project(CUGRAPH VERSION 21.06.00 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 21.08.00 LANGUAGES C CXX CUDA) # Write the version header include(cmake/Modules/Version.cmake) From fe51a316e60043bf341c4b1c86dc474eef5eb41c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 19 May 2021 16:47:45 -0400 Subject: [PATCH 261/343] SG & MG Weakly Connected Components (#1604) - C++ SG & MG Weakly Connected Components - This PR is to help python binding development - Performance/memory footprint optimizations for SG, MG with # GPUs < (E/V)^2, MG with # GPUs > (E/V)^2 will be in separate PRs Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1604 --- cpp/CMakeLists.txt | 1 + cpp/include/cugraph/algorithms.hpp | 33 +- cpp/include/cugraph/experimental/graph.hpp | 4 + .../components/weakly_connected_components.cu | 741 ++++++++++++++++++ cpp/tests/CMakeLists.txt | 20 +- .../mg_weakly_connected_components_test.cpp | 227 ++++++ .../weakly_connected_components_test.cpp | 245 ++++++ 7 files changed, 1269 insertions(+), 2 deletions(-) create mode 100644 cpp/src/components/weakly_connected_components.cu create mode 100644 cpp/tests/components/mg_weakly_connected_components_test.cpp create mode 100644 cpp/tests/components/weakly_connected_components_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 483149df3af..62774a74b1e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -438,6 +438,7 @@ add_library(cugraph SHARED src/experimental/pagerank.cu src/experimental/katz_centrality.cu src/tree/mst.cu + src/components/weakly_connected_components.cu src/structure/create_graph_from_edgelist.cu src/utilities/host_barrier.cpp ) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index e5bba7bd5ce..dc10b6b59fb 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1066,6 +1066,8 @@ namespace experimental { * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1104,6 +1106,8 @@ void bfs(raft::handle_t const &handle, * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1136,6 +1140,8 @@ void sssp(raft::handle_t const &handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam result_t Type of PageRank scores. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1188,6 +1194,8 @@ void pagerank(raft::handle_t const &handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam result_t Type of Katz Centrality scores. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1290,5 +1298,28 @@ random_walks(raft::handle_t const &handle, index_t max_depth, bool use_padding = false); +/** + * @brief Finds (weakly-connected-)component IDs of each vertices in the input graph. + * + * The input graph must be symmetric. Component IDs can be arbitrary integers (they can be + * non-consecutive and are not ordered by component size or any other criterion). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * @param graph_view Graph view object. + * @param components Pointer to the output component ID array. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *components, + bool do_expensive_check = false); + } // namespace experimental -} // namespace cugraph +} // namespace cugraph \ No newline at end of file diff --git a/cpp/include/cugraph/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp index 963631dcc19..8dcf0c2d3b9 100644 --- a/cpp/include/cugraph/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -189,6 +189,10 @@ template struct invalid_edge_id : invalid_idx { }; +template +struct invalid_component_id : invalid_idx { +}; + template __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( vertex_t num_vertices, vertex_t v) diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu new file mode 100644 index 00000000000..46fe8b10191 --- /dev/null +++ b/cpp/src/components/weakly_connected_components.cu @@ -0,0 +1,741 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace { + +// FIXME: this function (after modification) may be useful for SSSP with the near-far method to +// determine the near-far threshold. +// add new roots till the sum of the degrees first becomes no smaller than degree_sum_threshold and +// returns a triplet of (new roots, number of scanned candidates, sum of the degrees of the new +// roots) +template +std::tuple, + typename GraphViewType::vertex_type, + typename GraphViewType::edge_type> +accumulate_new_roots(raft::handle_t const &handle, + vertex_partition_device_t vertex_partition, + typename GraphViewType::vertex_type const *components, + typename GraphViewType::edge_type const *degrees, + typename GraphViewType::vertex_type const *candidate_first, + typename GraphViewType::vertex_type const *candidate_last, + typename GraphViewType::vertex_type max_new_roots, + typename GraphViewType::edge_type degree_sum_threshold) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + + // FIXME: tuning parameter (time to scan max_scan_size elements should not take significantly + // longer than scanning a single element) + vertex_t max_scan_size = + static_cast(handle.get_device_properties().multiProcessorCount) * vertex_t{1024}; + + rmm::device_uvector new_roots(max_new_roots, handle.get_stream_view()); + vertex_t num_new_roots{0}; + vertex_t num_scanned{0}; + edge_t degree_sum{0}; + while ((candidate_first + num_scanned < candidate_last) && (degree_sum < degree_sum_threshold) && + (num_new_roots < max_new_roots)) { + auto scan_size = std::min( + max_scan_size, + static_cast(thrust::distance(candidate_first + num_scanned, candidate_last))); + + rmm::device_uvector tmp_new_roots(scan_size, handle.get_stream_view()); + rmm::device_uvector tmp_indices(tmp_new_roots.size(), handle.get_stream_view()); + auto input_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + candidate_first + num_scanned, thrust::make_counting_iterator(vertex_t{0}))); + auto output_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_new_roots.begin(), tmp_indices.begin())); + tmp_new_roots.resize( + static_cast(thrust::distance( + output_pair_first, + thrust::copy_if( + rmm::exec_policy(handle.get_stream_view()), + input_pair_first, + input_pair_first + scan_size, + output_pair_first, + [vertex_partition, components] __device__(auto pair) { + auto v = thrust::get<0>(pair); + return (components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)] == + invalid_component_id::value); + }))), + handle.get_stream_view()); + tmp_indices.resize(tmp_new_roots.size(), handle.get_stream_view()); + + if (tmp_new_roots.size() > 0) { + rmm::device_uvector tmp_cumulative_degrees(tmp_new_roots.size(), + handle.get_stream_view()); + thrust::transform( + rmm::exec_policy(handle.get_stream_view()), + tmp_new_roots.begin(), + tmp_new_roots.end(), + tmp_cumulative_degrees.begin(), + [vertex_partition, degrees] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }); + thrust::inclusive_scan(rmm::exec_policy(handle.get_stream_view()), + tmp_cumulative_degrees.begin(), + tmp_cumulative_degrees.end(), + tmp_cumulative_degrees.begin()); + auto last = thrust::lower_bound(rmm::exec_policy(handle.get_stream_view()), + tmp_cumulative_degrees.begin(), + tmp_cumulative_degrees.end(), + degree_sum_threshold - degree_sum); + if (last != tmp_cumulative_degrees.end()) { ++last; } + auto tmp_num_new_roots = + std::min(static_cast(thrust::distance(tmp_cumulative_degrees.begin(), last)), + max_new_roots - num_new_roots); + + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + tmp_new_roots.begin(), + tmp_new_roots.begin() + tmp_num_new_roots, + new_roots.begin() + num_new_roots); + num_new_roots += tmp_num_new_roots; + vertex_t tmp_num_scanned{0}; + edge_t tmp_degree_sum{0}; + if (tmp_num_new_roots == static_cast(tmp_new_roots.size())) { + tmp_num_scanned = scan_size; + } else { + raft::update_host( + &tmp_num_scanned, tmp_indices.data() + tmp_num_new_roots, size_t{1}, handle.get_stream()); + } + raft::update_host(&tmp_degree_sum, + tmp_cumulative_degrees.data() + (tmp_num_new_roots - 1), + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + num_scanned += tmp_num_scanned; + degree_sum += tmp_degree_sum; + } else { + num_scanned += scan_size; + } + } + + new_roots.resize(num_new_roots, handle.get_stream_view()); + new_roots.shrink_to_fit(handle.get_stream_view()); + + return std::make_tuple(std::move(new_roots), num_scanned, degree_sum); +} + +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +template +struct v_op_t { + using vertex_type = typename GraphViewType::vertex_type; + + vertex_partition_device_t vertex_partition{}; + vertex_type *level_components{}; + decltype(thrust::make_zip_iterator(thrust::make_tuple( + static_cast(nullptr), static_cast(nullptr)))) edge_buffer_first{}; + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this requires + // placing the atomic barrier on managed memory and this adds additional complication. + size_t *num_edge_inserts{}; + size_t next_bucket_idx{}; + + template + __device__ std::enable_if_t>> + operator()(thrust::tuple tagged_v, int v_val /* dummy */) const + { + auto tag = thrust::get<1>(tagged_v); + auto v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(tagged_v)); + // FIXME: better switch to atomic_ref after + // https://github.com/nvidia/libcudacxx/milestone/2 + auto old = + atomicCAS(level_components + v_offset, invalid_component_id::value, tag); + if (old != invalid_component_id::value && old != tag) { // conflict + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + static_cast(1)); + *(edge_buffer_first + edge_idx) = thrust::make_tuple(tag, old); + } + return (old == invalid_component_id::value) + ? thrust::optional>{thrust::make_tuple( + next_bucket_idx, std::byte{0} /* dummy */)} + : thrust::nullopt; + } + + template + __device__ std::enable_if_t>> + operator()(thrust::tuple tagged_v, int v_val /* dummy */) const + { + return thrust::optional>{ + thrust::make_tuple(next_bucket_idx, std::byte{0} /* dummy */)}; + } +}; + +template +void weakly_connected_components_impl(raft::handle_t const &handle, + GraphViewType const &push_graph_view, + typename GraphViewType::vertex_type *components, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const num_vertices = push_graph_view.get_number_of_vertices(); + if (num_vertices == 0) { return; } + + // 1. check input arguments + + CUGRAPH_EXPECTS( + push_graph_view.is_symmetric(), + "Invalid input argument: input graph should be symmetric for weakly connected components."); + + if (do_expensive_check) { + // nothing to do + } + + // 2. recursively run multi-root frontier expansion + + enum class Bucket { cur, next, num_buckets }; + // tuning parameter to balance work per iteration (should be large enough to be throughput + // bounded) vs # conflicts between frontiers with different roots (# conflicts == # edges for the + // next level) + auto degree_sum_threshold = + static_cast(handle.get_device_properties().multiProcessorCount) * edge_t{1024}; + + size_t num_levels{0}; + graph_t + level_graph(handle); + rmm::device_uvector level_renumber_map(0, handle.get_stream_view()); + std::vector> level_component_vectors{}; + // vertex ID in this level to the component ID in the previous level + std::vector> level_renumber_map_vectors{}; + std::vector level_local_vertex_first_vectors{}; + while (true) { + auto level_graph_view = num_levels == 0 ? push_graph_view : level_graph.view(); + vertex_partition_device_t vertex_partition(level_graph_view); + level_component_vectors.push_back(rmm::device_uvector( + num_levels == 0 ? vertex_t{0} : level_graph_view.get_number_of_local_vertices(), + handle.get_stream_view())); + level_renumber_map_vectors.push_back(std::move(level_renumber_map)); + level_local_vertex_first_vectors.push_back(level_graph_view.get_local_vertex_first()); + auto level_components = + num_levels == 0 ? components : level_component_vectors[num_levels].data(); + ++num_levels; + auto degrees = level_graph_view.compute_out_degrees(handle); + auto local_vertex_in_degree_sum = + thrust::reduce(rmm::exec_policy(handle.get_stream_view()), degrees.begin(), degrees.end()); + + // 2-1. filter out isolated vertices + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + thrust::make_counting_iterator(level_graph_view.get_local_vertex_first()), degrees.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream_view()), + pair_first, + pair_first + level_graph_view.get_number_of_local_vertices(), + level_components, + [] __device__(auto pair) { + auto v = thrust::get<0>(pair); + auto degree = thrust::get<1>(pair); + return degree > 0 ? invalid_component_id::value : v; + }); + + // 2-2. initialize new root candidates + + // Vertices are first partitioned to high-degree vertices and low-degree vertices, we can reach + // degree_sum_threshold with fewer high-degree vertices leading to a higher compression ratio. + // The degree threshold is set to ceil(sqrt(degree_sum_threshold * 2)); this guarantees the + // compression ratio of at least 50% (ignoring rounding errors) even if all the selected roots + // fall into a single connected component as there will be at least as many non-root vertices in + // the connected component (assuming there are no multi-edges, if there are multi-edges, we may + // not get 50% compression in # vertices but still get compression in # edges). the remaining + // low-degree vertices will be randomly shuffled so comparable ratios of vertices will be + // selected as roots in the remaining connected components. + + rmm::device_uvector new_root_candidates( + level_graph_view.get_number_of_local_vertices(), handle.get_stream_view()); + new_root_candidates.resize( + thrust::distance( + new_root_candidates.begin(), + thrust::copy_if( + rmm::exec_policy(handle.get_stream_view()), + thrust::make_counting_iterator(level_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(level_graph_view.get_local_vertex_last()), + new_root_candidates.begin(), + [vertex_partition, level_components] __device__(auto v) { + return level_components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck( + v)] == invalid_component_id::value; + })), + handle.get_stream_view()); + auto high_degree_partition_last = thrust::stable_partition( + rmm::exec_policy(handle.get_stream_view()), + new_root_candidates.begin(), + new_root_candidates.end(), + [vertex_partition, + degrees = degrees.data(), + threshold = static_cast( + ceil(sqrt(static_cast(degree_sum_threshold) * 2.0)))] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)] >= + threshold; + }); + thrust::shuffle(rmm::exec_policy(handle.get_stream_view()), + high_degree_partition_last, + new_root_candidates.end(), + thrust::default_random_engine()); + + double constexpr max_new_roots_ratio = + 0.1; // to avoid selecting all the vertices as roots leading to zero compression + auto max_new_roots = std::max( + static_cast(new_root_candidates.size() * max_new_roots_ratio), vertex_t{1}); + + auto init_max_new_roots = max_new_roots; + // to avoid selecting too many (possibly all) vertices as initial roots leading to no + // compression in the worst case. + if (GraphViewType::is_multi_gpu && + (level_graph_view.get_number_of_vertices() <= handle.get_comms().get_size() * 2)) { + auto &comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + auto new_root_candidate_counts = + host_scalar_gather(comm, new_root_candidates.size(), int{0}, handle.get_stream()); + if (comm_rank == 0) { + std::vector gpuids{}; + gpuids.reserve( + std::reduce(new_root_candidate_counts.begin(), new_root_candidate_counts.end())); + for (size_t i = 0; i < new_root_candidate_counts.size(); ++i) { + gpuids.insert(gpuids.end(), new_root_candidate_counts[i], static_cast(i)); + } + std::random_device rd{}; + std::shuffle(gpuids.begin(), gpuids.end(), std::mt19937(rd())); + gpuids.resize( + std::max(static_cast(gpuids.size() * max_new_roots_ratio), vertex_t{1})); + std::vector init_max_new_root_counts(comm_size, vertex_t{0}); + for (size_t i = 0; i < gpuids.size(); ++i) { ++init_max_new_root_counts[gpuids[i]]; } + // FIXME: we need to add host_scalar_scatter +#if 1 + rmm::device_uvector d_counts(comm_size, handle.get_stream_view()); + raft::update_device(d_counts.data(), + init_max_new_root_counts.data(), + init_max_new_root_counts.size(), + handle.get_stream()); + device_bcast( + comm, d_counts.data(), d_counts.data(), d_counts.size(), int{0}, handle.get_stream()); + raft::update_host( + &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); +#else + iinit_max_new_roots = + host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); +#endif + } else { + // FIXME: we need to add host_scalar_scatter +#if 1 + rmm::device_uvector d_counts(comm_size, handle.get_stream_view()); + device_bcast( + comm, d_counts.data(), d_counts.data(), d_counts.size(), int{0}, handle.get_stream()); + raft::update_host( + &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); +#else + iinit_max_new_roots = + host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); +#endif + } + handle.get_stream_view().synchronize(); + } + + // 2-3. initialize vertex frontier, edge_buffer, and col_components (if multi-gpu) + + VertexFrontier(Bucket::num_buckets)> + vertex_frontier(handle); + vertex_t next_candidate_offset{0}; + edge_t edge_count{0}; + + auto edge_buffer = + allocate_dataframe_buffer>(0, handle.get_stream()); + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this + // requires placing the atomic variable on managed memory and this make it less attractive. + rmm::device_scalar num_edge_inserts(size_t{0}, handle.get_stream_view()); + + rmm::device_uvector col_components( + GraphViewType::is_multi_gpu ? level_graph_view.get_number_of_local_adj_matrix_partition_cols() + : vertex_t{0}, + handle.get_stream_view()); + if (GraphViewType::is_multi_gpu) { + thrust::fill(rmm::exec_policy(handle.get_stream_view()), + col_components.begin(), + col_components.end(), + invalid_component_id::value); + } + + // 2.4 iterate till every vertex gets visited + + size_t iter{0}; + while (true) { + if (edge_count < degree_sum_threshold) { + auto [new_roots, num_scanned, degree_sum] = + accumulate_new_roots(handle, + vertex_partition, + level_components, + degrees.data(), + new_root_candidates.data() + next_candidate_offset, + new_root_candidates.data() + new_root_candidates.size(), + iter == 0 ? init_max_new_roots : max_new_roots, + degree_sum_threshold - edge_count); + next_candidate_offset += num_scanned; + edge_count += degree_sum; + + thrust::sort( + rmm::exec_policy(handle.get_stream_view()), new_roots.begin(), new_roots.end()); + + thrust::for_each( + rmm::exec_policy(handle.get_stream_view()), + new_roots.begin(), + new_roots.end(), + [vertex_partition, components = level_components] __device__(auto c) { + components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(c)] = c; + }); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(new_roots.begin(), new_roots.begin())); + vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .insert(pair_first, pair_first + new_roots.size()); + } + + if (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() == 0) { + break; + } + + if (GraphViewType::is_multi_gpu) { + copy_to_adj_matrix_col( + handle, + level_graph_view, + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .begin() + .get_iterator_tuple()), + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .end() + .get_iterator_tuple()), + level_components, + col_components.begin()); + } + + // FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), + // edge_buffer size cannot exceed (# local roots * # aggregate roots) + // FIXME: this is highly pessimistic and lazy in multi-gpu. we can tighten the upper bound. + resize_dataframe_buffer>( + edge_buffer, + num_edge_inserts.value(handle.get_stream_view()) + edge_count + + (GraphViewType::is_multi_gpu ? local_vertex_in_degree_sum : edge_t{0}), + handle.get_stream()); + + update_frontier_v_push_if_out_nbr( + handle, + level_graph_view, + vertex_frontier, + static_cast(Bucket::cur), + std::vector{static_cast(Bucket::next)}, + thrust::make_counting_iterator(0) /* dummy */, + thrust::make_counting_iterator(0) /* dummy */, + [col_components = GraphViewType::is_multi_gpu ? col_components.data() : level_components, + col_first = level_graph_view.get_local_adj_matrix_partition_col_first(), + edge_buffer_first = + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts = num_edge_inserts.data()] __device__(auto tagged_src, + vertex_t dst, + auto src_val, + auto dst_val) { + auto tag = thrust::get<1>(tagged_src); + auto col_offset = dst - col_first; + // FIXME: better switch to atomic_ref after + // https://github.com/nvidia/libcudacxx/milestone/2 + auto old = + atomicCAS(col_components + col_offset, invalid_component_id::value, tag); + if (old != invalid_component_id::value && old != tag) { // conflict + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + static_cast(1)); + *(edge_buffer_first + edge_idx) = thrust::make_tuple(tag, old); + } + return (old == invalid_component_id::value) ? thrust::optional{tag} + : thrust::nullopt; + }, + reduce_op::null(), + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_discard_iterator() /* dummy */, + v_op_t{ + vertex_partition, + level_components, + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts.data(), + static_cast(Bucket::next)}); + + // FIXME: if we maintain sorted & unique edge_buffer elements, we can run sort & unique to + // the newly added edges and run merge & unique (this is unnecessary if we use + // cuco::static_map (no duplicates, ideally we need static_set) + + vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); + vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); + vertex_frontier.swap_buckets(static_cast(Bucket::cur), + static_cast(Bucket::next)); + edge_count = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream_view()), + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .begin() + .get_iterator_tuple()), + thrust::get<0>( + vertex_frontier.get_bucket(static_cast(Bucket::cur)).end().get_iterator_tuple()), + [vertex_partition, degrees = degrees.data()] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }, + edge_t{0}, + thrust::plus()); + + ++iter; + } + + // 2-5. construct the next level graph from the edges emitted on conflicts + + if (auto num_inserts = num_edge_inserts.value(handle.get_stream_view()); num_inserts > 0) { + resize_dataframe_buffer>( + edge_buffer, static_cast(num_inserts * 2), handle.get_stream()); + auto input_first = get_dataframe_buffer_begin>(edge_buffer); + auto output_first = thrust::make_zip_iterator( + thrust::make_tuple(thrust::get<1>(input_first.get_iterator_tuple()), + thrust::get<0>(input_first.get_iterator_tuple()))) + + num_inserts; + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + input_first, + input_first + num_inserts, + output_first); + auto edge_first = get_dataframe_buffer_begin>(edge_buffer); + thrust::sort( + rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + num_inserts * 2); + auto last = thrust::unique( + rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + num_inserts * 2); + resize_dataframe_buffer>( + edge_buffer, static_cast(thrust::distance(edge_first, last)), handle.get_stream()); + shrink_to_fit_dataframe_buffer>(edge_buffer, + handle.get_stream()); + + if (GraphViewType::is_multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + std::tie(edge_buffer, std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, + get_dataframe_buffer_begin>(edge_buffer), + get_dataframe_buffer_end>(edge_buffer), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + auto edge_first = + get_dataframe_buffer_begin>(edge_buffer); + auto edge_last = get_dataframe_buffer_end>(edge_buffer); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), edge_first, edge_last); + auto unique_edge_last = + thrust::unique(rmm::exec_policy(handle.get_stream_view()), edge_first, edge_last); + resize_dataframe_buffer>( + edge_buffer, + static_cast(thrust::distance(edge_first, unique_edge_last)), + handle.get_stream()); + shrink_to_fit_dataframe_buffer>(edge_buffer, + handle.get_stream()); + } + + std::tie(level_graph, level_renumber_map) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(std::get<0>(edge_buffer)), + std::move(std::get<1>(edge_buffer)), + rmm::device_uvector(size_t{0}, handle.get_stream_view()), + graph_properties_t{true, false, false}, + true); + } else { + break; + } + } + + // 3. recursive update the current level component IDs from the next level component IDs + + for (size_t i = 0; i < num_levels - 1; ++i) { + size_t next_level = num_levels - 1 - i; + size_t current_level = next_level - 1; + + rmm::device_uvector next_local_vertices(level_renumber_map_vectors[next_level].size(), + handle.get_stream_view()); + thrust::sequence(rmm::exec_policy(handle.get_stream_view()), + next_local_vertices.begin(), + next_local_vertices.end(), + level_local_vertex_first_vectors[next_level]); + relabel( + handle, + std::make_tuple(next_local_vertices.data(), level_renumber_map_vectors[next_level].data()), + next_local_vertices.size(), + level_component_vectors[next_level].data(), + level_component_vectors[next_level].size(), + false); + relabel( + handle, + std::make_tuple(level_renumber_map_vectors[next_level].data(), + level_component_vectors[next_level].data()), + level_renumber_map_vectors[next_level].size(), + current_level == 0 ? components : level_component_vectors[current_level].data(), + current_level == 0 ? push_graph_view.get_number_of_local_vertices() + : level_component_vectors[current_level].size(), + true); + } +} + +} // namespace + +template +void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *components, + bool do_expensive_check) +{ + weakly_connected_components_impl(handle, graph_view, components, do_expensive_check); +} + +// explicit instantiation + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 39f0307f2d2..768f441a881 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -537,7 +537,7 @@ set(EXPERIMENTAL_BFS_TEST_SRCS ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") ################################################################################################### -# - Experimental BFS tests ------------------------------------------------------------------------ +# - Experimental Multi-source BFS tests ----------------------------------------------------------- set(EXPERIMENTAL_MSBFS_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/experimental/ms_bfs_test.cpp") @@ -569,6 +569,14 @@ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +################################################################################################### +# - WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------------- + +set(WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/components/weakly_connected_components_test.cpp") + +ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST "${WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS}") + ################################################################################################### # - Experimental RANDOM_WALKS tests ------------------------------------------------------------ @@ -652,6 +660,16 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ########################################################################################### + # - MG WEAKLY CONNECTED COMPONENTS tests -------------------------------------------------- + + set(MG_WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/components/mg_weakly_connected_components_test.cpp") + + ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST + "${MG_WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS}") + target_link_libraries(MG_WEAKLY_CONNECTED_COMPONENTS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + else(MPI_CXX_FOUND) message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif(MPI_CXX_FOUND) diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp new file mode 100644 index 00000000000..452c603f484 --- /dev/null +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +struct WeaklyConnectedComponents_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_MGWeaklyConnectedComponents + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_MGWeaklyConnectedComponents() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running weakly connected components on multiple GPUs to that of a + // single-GPU run + template + void run_current_test( + WeaklyConnectedComponents_Usecase const& weakly_connected_components_usecase, + input_usecase_t const& input_usecase) + { + using weight_t = float; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + input_usecase.template construct_graph( + handle, false, true); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 3. run MG weakly connected components + + rmm::device_uvector d_mg_components(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + cugraph::experimental::weakly_connected_components( + handle, mg_graph_view, d_mg_components.data()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG weakly_connected_components took " << elapsed_time * 1e-6 << " s.\n"; + } + + // 4. compare SG & MG results + + if (weakly_connected_components_usecase.check_correctness) { + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_components = + cugraph::test::device_gatherv(handle, d_mg_components.data(), d_mg_components.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumbr MG results + + std::tie(std::ignore, d_mg_aggregate_components) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_components.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG weakly connected components + + rmm::device_uvector d_sg_components(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::weakly_connected_components( + handle, sg_graph_view, d_sg_components.data()); + + // 4-5. compare + + std::vector h_mg_aggregate_components(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_components.data(), + d_mg_aggregate_components.data(), + d_mg_aggregate_components.size(), + handle.get_stream()); + + std::vector h_sg_components(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_components.data(), + d_sg_components.data(), + d_sg_components.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + std::unordered_map mg_to_sg_map{}; + for (size_t i = 0; i < h_sg_components.size(); ++i) { + mg_to_sg_map.insert({h_mg_aggregate_components[i], h_sg_components[i]}); + } + std::transform(h_mg_aggregate_components.begin(), + h_mg_aggregate_components.end(), + h_mg_aggregate_components.begin(), + [&mg_to_sg_map](auto mg_c) { return mg_to_sg_map[mg_c]; }); + + ASSERT_TRUE(std::equal( + h_sg_components.begin(), h_sg_components.end(), h_mg_aggregate_components.begin())) + << "components do not match with the SG values."; + } + } + } +}; + +using Tests_MGWeaklyConnectedComponents_File = + Tests_MGWeaklyConnectedComponents; +using Tests_MGWeaklyConnectedComponents_Rmat = + Tests_MGWeaklyConnectedComponents; + +TEST_P(Tests_MGWeaklyConnectedComponents_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGWeaklyConnectedComponents_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGWeaklyConnectedComponents_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(WeaklyConnectedComponents_Usecase{0}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/weakly_connected_components_test.cpp b/cpp/tests/components/weakly_connected_components_test.cpp new file mode 100644 index 00000000000..80fd1fea3d3 --- /dev/null +++ b/cpp/tests/components/weakly_connected_components_test.cpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +template +void weakly_connected_components_reference(edge_t const* offsets, + vertex_t const* indices, + vertex_t* components, + vertex_t num_vertices) +{ + vertex_t depth{0}; + + std::fill(components, + components + num_vertices, + cugraph::experimental::invalid_component_id::value); + + vertex_t num_scanned{0}; + while (true) { + auto it = std::find(components + num_scanned, + components + num_vertices, + cugraph::experimental::invalid_component_id::value); + if (it == components + num_vertices) { break; } + num_scanned += static_cast(std::distance(components + num_scanned, it)); + auto source = num_scanned; + *(components + source) = source; + std::vector cur_frontier_rows{source}; + std::vector new_frontier_rows{}; + + while (cur_frontier_rows.size() > 0) { + for (auto const row : cur_frontier_rows) { + auto nbr_offset_first = *(offsets + row); + auto nbr_offset_last = *(offsets + row + 1); + for (auto nbr_offset = nbr_offset_first; nbr_offset != nbr_offset_last; ++nbr_offset) { + auto nbr = *(indices + nbr_offset); + if (*(components + nbr) == cugraph::experimental::invalid_component_id::value) { + *(components + nbr) = source; + new_frontier_rows.push_back(nbr); + } + } + } + std::swap(cur_frontier_rows, new_frontier_rows); + new_frontier_rows.clear(); + } + } + + return; +} + +struct WeaklyConnectedComponents_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_WeaklyConnectedComponent + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_WeaklyConnectedComponent() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test( + WeaklyConnectedComponents_Usecase const& weakly_connected_components_usecase, + input_usecase_t const& input_usecase) + { + constexpr bool renumber = true; + + using weight_t = float; + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + cugraph::experimental::graph_t graph(handle); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + input_usecase.template construct_graph( + handle, false, renumber); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto graph_view = graph.view(); + ASSERT_TRUE(graph_view.is_symmetric()) + << "Weakly connected components works only on undirected (symmetric) graphs."; + + rmm::device_uvector d_components(graph_view.get_number_of_vertices(), + handle.get_stream()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + cugraph::experimental::weakly_connected_components(handle, graph_view, d_components.data()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "weakly_connected_components took " << elapsed_time * 1e-6 << " s.\n"; + } + + if (weakly_connected_components_usecase.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + std::vector h_reference_components( + unrenumbered_graph_view.get_number_of_vertices()); + + weakly_connected_components_reference(h_offsets.data(), + h_indices.data(), + h_reference_components.data(), + unrenumbered_graph_view.get_number_of_vertices()); + + std::vector h_cugraph_components(graph_view.get_number_of_vertices()); + if (renumber) { + rmm::device_uvector d_unrenumbered_components(size_t{0}, + handle.get_stream_view()); + std::tie(std::ignore, d_unrenumbered_components) = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_components.data(), d_renumber_map_labels.size()); + raft::update_host(h_cugraph_components.data(), + d_unrenumbered_components.data(), + d_unrenumbered_components.size(), + handle.get_stream()); + } else { + raft::update_host(h_cugraph_components.data(), + d_components.data(), + d_components.size(), + handle.get_stream()); + } + handle.get_stream_view().synchronize(); + + std::unordered_map cuda_to_reference_map{}; + for (size_t i = 0; i < h_reference_components.size(); ++i) { + cuda_to_reference_map.insert({h_cugraph_components[i], h_reference_components[i]}); + } + std::transform( + h_cugraph_components.begin(), + h_cugraph_components.end(), + h_cugraph_components.begin(), + [&cuda_to_reference_map](auto cugraph_c) { return cuda_to_reference_map[cugraph_c]; }); + + ASSERT_TRUE(std::equal( + h_reference_components.begin(), h_reference_components.end(), h_cugraph_components.begin())) + << "components do not match with the reference values."; + } + } +}; + +using Tests_WeaklyConnectedComponents_File = + Tests_WeaklyConnectedComponent; +using Tests_WeaklyConnectedComponents_Rmat = + Tests_WeaklyConnectedComponent; + +// FIXME: add tests for type combinations +TEST_P(Tests_WeaklyConnectedComponents_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_WeaklyConnectedComponents_File, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/karate.mtx")), + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/polbooks.mtx")), + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +CUGRAPH_TEST_PROGRAM_MAIN() From b0644dad4a432b6c16e95cbc189d7d096363f05d Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Fri, 21 May 2021 08:48:35 -0400 Subject: [PATCH 262/343] Update `CHANGELOG.md` links for calver (#1608) This PR updates the `0.20` references in `CHANGELOG.md` to be `21.06`. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1608 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4170e9c4bc0..e51d1bc1534 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ -# cuGraph 0.20.0 (Date TBD) +# cuGraph 21.06.00 (Date TBD) -Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch. +Please see https://github.com/rapidsai/cugraph/releases/tag/v21.06.00a for the latest changes to this development branch. # cuGraph 0.19.0 (21 Apr 2021) From ab19cbd0935031139394d3d1ddeb009646eaa03e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 21 May 2021 13:41:43 -0400 Subject: [PATCH 263/343] WCC performance/memory footprint optimization (#1605) - Performance tune/reduce memory footprint for WCC for SG & MG (when # GPUs < (E/V)^2, additional memory footprint optimizations are necessary if # GPUs > (E/V)^2 and this will be addressed in separate PRs). - MG bug fixes Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1605 --- .../update_frontier_v_push_if_out_nbr.cuh | 139 ++++++++++++++-- .../components/weakly_connected_components.cu | 150 +++++++++++++----- cpp/src/experimental/graph.cu | 5 +- .../mg_weakly_connected_components_test.cpp | 16 ++ .../weakly_connected_components_test.cpp | 22 +++ .../utilities/matrix_market_file_utilities.cu | 2 +- cpp/tests/utilities/rmat_utilities.cu | 27 +++- 7 files changed, 305 insertions(+), 56 deletions(-) diff --git a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh index 5cadf7af2a2..b1d63cc942a 100644 --- a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -505,6 +505,121 @@ size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, } // namespace detail +template +typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierType const& frontier, + size_t cur_frontier_bucket_idx) +{ + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename VertexFrontierType::key_type; + + edge_t ret{0}; + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + auto const& cur_frontier_bucket = frontier.get_bucket(cur_frontier_bucket_idx); + vertex_t const* local_frontier_vertex_first{nullptr}; + vertex_t const* local_frontier_vertex_last{nullptr}; + if constexpr (std::is_same_v) { + local_frontier_vertex_first = cur_frontier_bucket.begin(); + local_frontier_vertex_last = cur_frontier_bucket.end(); + } else { + local_frontier_vertex_first = thrust::get<0>(cur_frontier_bucket.begin().get_iterator_tuple()); + local_frontier_vertex_last = thrust::get<0>(cur_frontier_bucket.end().get_iterator_tuple()); + } + + std::vector local_frontier_sizes{}; + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + local_frontier_sizes = + host_scalar_allgather(col_comm, cur_frontier_bucket.size(), handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast(cur_frontier_bucket.size())}; + } + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + rmm::device_uvector frontier_vertices(local_frontier_sizes[i], + handle.get_stream_view()); + // FIXME: this copy is unnecessary, better fix RAFT comm's bcast to take const iterators for + // input + if (col_comm_rank == static_cast(i)) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_frontier_vertex_first, + local_frontier_vertex_last, + frontier_vertices.begin()); + } + device_bcast(col_comm, + frontier_vertices.data(), + frontier_vertices.data(), + frontier_vertices.size(), + static_cast(i), + handle.get_stream()); + + ret += thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_vertices.begin(), + frontier_vertices.end(), + [matrix_partition] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + } else { + assert(i == 0); + ret += thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_frontier_vertex_first, + local_frontier_vertex_last, + [matrix_partition] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + } + } + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + return ret; +} + // FIXME: this documentation needs to be updated due to (tagged-)vertex support /** * @brief Update (tagged-)vertex frontier and (tagged-)vertex property values iterating over the @@ -628,23 +743,27 @@ void update_frontier_v_push_if_out_nbr( auto payload_buffer = detail::allocate_optional_payload_buffer(size_t{0}, handle.get_stream()); rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); + std::vector local_frontier_sizes{}; + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + local_frontier_sizes = host_scalar_allgather( + col_comm, + static_cast(thrust::distance(frontier_key_first, frontier_key_last)), + handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast( + static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; + } for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); auto matrix_partition_frontier_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - vertex_t matrix_partition_frontier_size{0}; + vertex_t matrix_partition_frontier_size = static_cast(local_frontier_sizes[i]); if (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - matrix_partition_frontier_size = host_scalar_bcast( - col_comm, - (static_cast(col_comm_rank) == i) - ? static_cast(thrust::distance(frontier_key_first, frontier_key_last)) - : vertex_t{0} /* dummy */, - i, - handle.get_stream()); resize_dataframe_buffer( matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); @@ -662,8 +781,6 @@ void update_frontier_v_push_if_out_nbr( i, handle.get_stream()); } else { - matrix_partition_frontier_size = - static_cast(thrust::distance(frontier_key_first, frontier_key_last)); resize_dataframe_buffer( matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -1016,7 +1133,7 @@ void update_frontier_v_push_if_out_nbr( get_dataframe_buffer_begin(key_buffer), next_frontier_bucket_indices); } -} // namespace experimental +} } // namespace experimental } // namespace cugraph diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 46fe8b10191..0688dc7408f 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -71,10 +71,10 @@ accumulate_new_roots(raft::handle_t const &handle, using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; - // FIXME: tuning parameter (time to scan max_scan_size elements should not take significantly - // longer than scanning a single element) + // tuning parameter (time to scan max_scan_size elements should not take significantly longer than + // scanning a single element) vertex_t max_scan_size = - static_cast(handle.get_device_properties().multiProcessorCount) * vertex_t{1024}; + static_cast(handle.get_device_properties().multiProcessorCount) * vertex_t{16384}; rmm::device_uvector new_roots(max_new_roots, handle.get_stream_view()); vertex_t num_new_roots{0}; @@ -178,6 +178,7 @@ struct v_op_t { // placing the atomic barrier on managed memory and this adds additional complication. size_t *num_edge_inserts{}; size_t next_bucket_idx{}; + size_t conflict_bucket_idx{}; // relevant only if GraphViewType::is_multi_gpu is true template __device__ std::enable_if_t>> @@ -191,15 +192,14 @@ struct v_op_t { auto old = atomicCAS(level_components + v_offset, invalid_component_id::value, tag); if (old != invalid_component_id::value && old != tag) { // conflict - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), - static_cast(1)); - *(edge_buffer_first + edge_idx) = thrust::make_tuple(tag, old); + return thrust::optional>{ + thrust::make_tuple(conflict_bucket_idx, std::byte{0} /* dummy */)}; + } else { + return (old == invalid_component_id::value) + ? thrust::optional>{thrust::make_tuple( + next_bucket_idx, std::byte{0} /* dummy */)} + : thrust::nullopt; } - return (old == invalid_component_id::value) - ? thrust::optional>{thrust::make_tuple( - next_bucket_idx, std::byte{0} /* dummy */)} - : thrust::nullopt; } template @@ -241,7 +241,12 @@ void weakly_connected_components_impl(raft::handle_t const &handle, // 2. recursively run multi-root frontier expansion - enum class Bucket { cur, next, num_buckets }; + enum class Bucket { + cur, + next, + conflict /* relevant only if GraphViewType::is_multi_gpu is true */, + num_buckets + }; // tuning parameter to balance work per iteration (should be large enough to be throughput // bounded) vs # conflicts between frontiers with different roots (# conflicts == # edges for the // next level) @@ -272,8 +277,6 @@ void weakly_connected_components_impl(raft::handle_t const &handle, num_levels == 0 ? components : level_component_vectors[num_levels].data(); ++num_levels; auto degrees = level_graph_view.compute_out_degrees(handle); - auto local_vertex_in_degree_sum = - thrust::reduce(rmm::exec_policy(handle.get_stream_view()), degrees.begin(), degrees.end()); // 2-1. filter out isolated vertices @@ -333,7 +336,8 @@ void weakly_connected_components_impl(raft::handle_t const &handle, thrust::default_random_engine()); double constexpr max_new_roots_ratio = - 0.1; // to avoid selecting all the vertices as roots leading to zero compression + 0.05; // to avoid selecting all the vertices as roots leading to zero compression + static_assert(max_new_roots_ratio > 0.0); auto max_new_roots = std::max( static_cast(new_root_candidates.size() * max_new_roots_ratio), vertex_t{1}); @@ -341,13 +345,21 @@ void weakly_connected_components_impl(raft::handle_t const &handle, // to avoid selecting too many (possibly all) vertices as initial roots leading to no // compression in the worst case. if (GraphViewType::is_multi_gpu && - (level_graph_view.get_number_of_vertices() <= handle.get_comms().get_size() * 2)) { + (level_graph_view.get_number_of_vertices() <= + static_cast(handle.get_comms().get_size() * ceil(1.0 / max_new_roots_ratio)))) { auto &comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); + // FIXME: a temporary workaround for a NCCL(2.9.6) bug that causes a hang on DGX1 (due to + // remote memory allocation), host_scalar_gather is sufficient otherwise. +#if 1 + auto new_root_candidate_counts = + host_scalar_allgather(comm, new_root_candidates.size(), handle.get_stream()); +#else auto new_root_candidate_counts = host_scalar_gather(comm, new_root_candidates.size(), int{0}, handle.get_stream()); +#endif if (comm_rank == 0) { std::vector gpuids{}; gpuids.reserve( @@ -423,7 +435,8 @@ void weakly_connected_components_impl(raft::handle_t const &handle, size_t iter{0}; while (true) { - if (edge_count < degree_sum_threshold) { + if ((edge_count < degree_sum_threshold) && + (next_candidate_offset < static_cast(new_root_candidates.size()))) { auto [new_roots, num_scanned, degree_sum] = accumulate_new_roots(handle, vertex_partition, @@ -471,14 +484,18 @@ void weakly_connected_components_impl(raft::handle_t const &handle, col_components.begin()); } - // FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), - // edge_buffer size cannot exceed (# local roots * # aggregate roots) - // FIXME: this is highly pessimistic and lazy in multi-gpu. we can tighten the upper bound. + auto max_pushes = + GraphViewType::is_multi_gpu + ? compute_num_out_nbrs_from_frontier( + handle, level_graph_view, vertex_frontier, static_cast(Bucket::cur)) + : edge_count; + + // FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), edge_buffer + // size cannot exceed (# roots)^2 and we can avoid additional sort & unique (but resizing the + // buffer may be more expensive). + auto old_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); resize_dataframe_buffer>( - edge_buffer, - num_edge_inserts.value(handle.get_stream_view()) + edge_count + - (GraphViewType::is_multi_gpu ? local_vertex_in_degree_sum : edge_t{0}), - handle.get_stream()); + edge_buffer, old_num_edge_inserts + max_pushes, handle.get_stream()); update_frontier_v_push_if_out_nbr( handle, @@ -506,7 +523,9 @@ void weakly_connected_components_impl(raft::handle_t const &handle, static_assert(sizeof(unsigned long long int) == sizeof(size_t)); auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), static_cast(1)); - *(edge_buffer_first + edge_idx) = thrust::make_tuple(tag, old); + // keep only the edges in the lower triangular part + *(edge_buffer_first + edge_idx) = + tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); } return (old == invalid_component_id::value) ? thrust::optional{tag} : thrust::nullopt; @@ -519,11 +538,66 @@ void weakly_connected_components_impl(raft::handle_t const &handle, level_components, get_dataframe_buffer_begin>(edge_buffer), num_edge_inserts.data(), - static_cast(Bucket::next)}); + static_cast(Bucket::next), + static_cast(Bucket::conflict)}); + + if (GraphViewType::is_multi_gpu) { + auto cur_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); + auto &conflict_bucket = vertex_frontier.get_bucket(static_cast(Bucket::conflict)); + resize_dataframe_buffer>( + edge_buffer, cur_num_edge_inserts + conflict_bucket.size(), handle.get_stream()); + thrust::for_each( + rmm::exec_policy(handle.get_stream_view()), + conflict_bucket.begin(), + conflict_bucket.end(), + [vertex_partition, + level_components, + edge_buffer_first = + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts = num_edge_inserts.data()] __device__(auto tagged_v) { + auto v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck( + thrust::get<0>(tagged_v)); + auto old = *(level_components + v_offset); + auto tag = thrust::get<1>(tagged_v); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + static_cast(1)); + // keep only the edges in the lower triangular part + *(edge_buffer_first + edge_idx) = + tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); + }); + conflict_bucket.clear(); + } - // FIXME: if we maintain sorted & unique edge_buffer elements, we can run sort & unique to - // the newly added edges and run merge & unique (this is unnecessary if we use - // cuco::static_map (no duplicates, ideally we need static_set) + // maintain the list of sorted unique edges (we can avoid this if we use cuco::static_map(no + // duplicates, ideally we need static_set)). + auto new_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); + if (new_num_edge_inserts > old_num_edge_inserts) { + auto edge_first = + get_dataframe_buffer_begin>(edge_buffer); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), + edge_first + old_num_edge_inserts, + edge_first + new_num_edge_inserts); + if (old_num_edge_inserts > 0) { + auto tmp_edge_buffer = allocate_dataframe_buffer>( + new_num_edge_inserts, handle.get_stream()); + auto tmp_edge_first = + get_dataframe_buffer_begin>(tmp_edge_buffer); + thrust::merge(rmm::exec_policy(handle.get_stream_view()), + edge_first, + edge_first + old_num_edge_inserts, + edge_first + old_num_edge_inserts, + edge_first + new_num_edge_inserts, + tmp_edge_first); + edge_buffer = std::move(tmp_edge_buffer); + } + edge_first = get_dataframe_buffer_begin>(edge_buffer); + auto unique_edge_last = thrust::unique(rmm::exec_policy(handle.get_stream_view()), + edge_first, + edge_first + new_num_edge_inserts); + auto num_unique_edges = static_cast(thrust::distance(edge_first, unique_edge_last)); + num_edge_inserts.set_value(num_unique_edges, handle.get_stream_view()); + } vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); @@ -547,7 +621,14 @@ void weakly_connected_components_impl(raft::handle_t const &handle, // 2-5. construct the next level graph from the edges emitted on conflicts - if (auto num_inserts = num_edge_inserts.value(handle.get_stream_view()); num_inserts > 0) { + auto num_inserts = num_edge_inserts.value(handle.get_stream_view()); + auto aggregate_num_inserts = num_inserts; + if (GraphViewType::is_multi_gpu) { + auto &comm = handle.get_comms(); + aggregate_num_inserts = host_scalar_allreduce(comm, num_inserts, handle.get_stream()); + } + + if (aggregate_num_inserts > 0) { resize_dataframe_buffer>( edge_buffer, static_cast(num_inserts * 2), handle.get_stream()); auto input_first = get_dataframe_buffer_begin>(edge_buffer); @@ -559,15 +640,6 @@ void weakly_connected_components_impl(raft::handle_t const &handle, input_first, input_first + num_inserts, output_first); - auto edge_first = get_dataframe_buffer_begin>(edge_buffer); - thrust::sort( - rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + num_inserts * 2); - auto last = thrust::unique( - rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + num_inserts * 2); - resize_dataframe_buffer>( - edge_buffer, static_cast(thrust::distance(edge_first, last)), handle.get_stream()); - shrink_to_fit_dataframe_buffer>(edge_buffer, - handle.get_stream()); if (GraphViewType::is_multi_gpu) { auto &comm = handle.get_comms(); diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 7793898def0..ad6f51d75fe 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -295,8 +295,9 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), - static_cast(detail::low_degree_threshold)}; + std::vector h_thresholds = { + static_cast(detail::mid_degree_threshold * col_comm_size), + static_cast(detail::low_degree_threshold * col_comm_size)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index 452c603f484..dd61dafc682 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -224,4 +224,20 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // disable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{false}, + cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false, true)))); + CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/weakly_connected_components_test.cpp b/cpp/tests/components/weakly_connected_components_test.cpp index 80fd1fea3d3..2332aaff261 100644 --- a/cpp/tests/components/weakly_connected_components_test.cpp +++ b/cpp/tests/components/weakly_connected_components_test.cpp @@ -230,6 +230,12 @@ TEST_P(Tests_WeaklyConnectedComponents_File, CheckInt32Int32) run_current_test(std::get<0>(param), std::get<1>(param)); } +TEST_P(Tests_WeaklyConnectedComponents_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + INSTANTIATE_TEST_SUITE_P( file_test, Tests_WeaklyConnectedComponents_File, @@ -242,4 +248,20 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(WeaklyConnectedComponents_Usecase{}, cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_WeaklyConnectedComponents_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_WeaklyConnectedComponents_Rmat, + ::testing::Values( + // disable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{false}, + cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index 9ef03b7ff53..0457cbcc918 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -274,7 +274,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, size_t nnz{}; FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + CUGRAPH_EXPECTS(file != nullptr, "fopen (%s) failure.", graph_file_full_path.c_str()); size_t tmp_m{}; size_t tmp_k{}; diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 8d94f936ab7..8f6a6cf499a 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -143,8 +143,29 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, } if (undirected) { - // FIXME: need to symmetrize - CUGRAPH_FAIL("unimplemented."); +// FIXME: may need to undo this and handle symmetrization elsewhere once the new test graph +// generation API gets integrated +#if 1 + auto offset = d_edgelist_rows.size(); + d_edgelist_rows.resize(offset * 2, handle.get_stream()); + d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); + d_edgelist_weights.resize(test_weighted ? d_edgelist_rows.size() : size_t{0}, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_cols.begin(), + d_edgelist_cols.begin() + offset, + d_edgelist_rows.begin() + offset); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_rows.begin(), + d_edgelist_rows.begin() + offset, + d_edgelist_cols.begin() + offset); + if (test_weighted) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_weights.begin(), + d_edgelist_weights.begin() + offset, + d_edgelist_weights.begin() + offset); + } +#endif } if (multi_gpu) { @@ -240,7 +261,7 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - cugraph::experimental::graph_properties_t{false, false, test_weighted}, + cugraph::experimental::graph_properties_t{undirected, true, test_weighted}, renumber); } From 570d8113ac9bcbbebd7156613d0a3f0d994f63d6 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 21 May 2021 13:42:48 -0400 Subject: [PATCH 264/343] Fea rmm device buffer change (#1609) RMM is going to require specifying the stream in the device_buffer constructor. This changes the one `cugraph` item and it updates to a version of `cuhornet` that makes the necessary changes. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1609 --- cpp/CMakeLists.txt | 2 +- cpp/src/traversal/sssp.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 62774a74b1e..978ec93cc2e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -275,7 +275,7 @@ message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") FetchContent_Declare( cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 6d2fc894cc56dd2ca8fc9d1523a18a6ec444b663 + GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed SOURCE_SUBDIR hornet ) diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index c79c4e5a127..8dcaffd953a 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -47,7 +47,7 @@ void SSSP::setup() // Allocate buffer for data that need to be reset every iteration iter_buffer_size = sizeof(int) * (edges_bmap_size + vertices_bmap_size) + sizeof(IndexType); - iter_buffer.resize(iter_buffer_size); + iter_buffer.resize(iter_buffer_size, stream); // ith bit of relaxed_edges_bmap <=> ith edge was relaxed relaxed_edges_bmap = static_cast(iter_buffer.data()); // ith bit of next_frontier_bmap <=> vertex is active in the next frontier From 03dae45e476d86b1ba8c7bc6baa39603bb13d6aa Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 24 May 2021 20:52:19 -0400 Subject: [PATCH 265/343] Update `cugraph` to with newest CMake features, including CPM for dependencies (#1585) This allows the `cugraph` to be used via `CPM` either from a system installed version or from a build-directory. Authors: - Robert Maynard (https://github.com/robertmaynard) - Paul Taylor (https://github.com/trxcllnt) - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1585 --- build.sh | 10 +- ci/release/update-version.sh | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/environments/cugraph_dev_cuda11.1.yml | 2 +- conda/environments/cugraph_dev_cuda11.2.yml | 2 +- conda/recipes/libcugraph/meta.yaml | 2 +- cpp/CMakeLists.txt | 528 ++++++-------------- cpp/cmake/EvalGpuArchs.cmake | 68 --- cpp/cmake/Modules/FindFAISS.cmake | 98 ---- cpp/cmake/Modules/FindNCCL.cmake | 116 ----- cpp/cmake/faiss_cuda11.patch | 40 -- cpp/cmake/thirdparty/get_cuco.cmake | 35 ++ cpp/cmake/thirdparty/get_cuhornet.cmake | 45 ++ cpp/cmake/thirdparty/get_faiss.cmake | 50 ++ cpp/cmake/thirdparty/get_gtest.cmake | 43 ++ cpp/cmake/thirdparty/get_gunrock.cmake | 64 +++ cpp/cmake/thirdparty/get_nccl.cmake | 42 ++ cpp/cmake/thirdparty/get_raft.cmake | 48 ++ cpp/cmake/thirdparty/get_rmm.cmake | 47 ++ cpp/cmake/thirdparty/get_thrust.cmake | 29 ++ cpp/tests/CMakeLists.txt | 430 ++++------------ python/setup.py | 5 +- python/setuputils.py | 67 +-- 23 files changed, 716 insertions(+), 1059 deletions(-) delete mode 100644 cpp/cmake/EvalGpuArchs.cmake delete mode 100644 cpp/cmake/Modules/FindFAISS.cmake delete mode 100644 cpp/cmake/Modules/FindNCCL.cmake delete mode 100644 cpp/cmake/faiss_cuda11.patch create mode 100644 cpp/cmake/thirdparty/get_cuco.cmake create mode 100644 cpp/cmake/thirdparty/get_cuhornet.cmake create mode 100644 cpp/cmake/thirdparty/get_faiss.cmake create mode 100644 cpp/cmake/thirdparty/get_gtest.cmake create mode 100644 cpp/cmake/thirdparty/get_gunrock.cmake create mode 100644 cpp/cmake/thirdparty/get_nccl.cmake create mode 100644 cpp/cmake/thirdparty/get_raft.cmake create mode 100644 cpp/cmake/thirdparty/get_rmm.cmake create mode 100644 cpp/cmake/thirdparty/get_thrust.cmake diff --git a/build.sh b/build.sh index 682e9b5ed33..8437a32bf94 100755 --- a/build.sh +++ b/build.sh @@ -53,7 +53,7 @@ INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON BUILD_CPP_MG_TESTS=OFF BUILD_STATIC_FAISS=OFF -GPU_ARCH="" +BUILD_ALL_GPU_ARCH=0 # Set defaults for vars that may not have been defined externally # FIXME: if PREFIX is not set, check CONDA_PREFIX, but there is no fallback @@ -96,7 +96,7 @@ if hasArg -n; then INSTALL_TARGET="" fi if hasArg --allgpuarch; then - GPU_ARCH="-DGPU_ARCHS=ALL" + BUILD_ALL_GPU_ARCH=1 fi if hasArg --buildfaiss; then BUILD_STATIC_FAISS=ON @@ -154,15 +154,17 @@ fi ################################################################################ # Configure, build, and install libcugraph if buildAll || hasArg libcugraph; then - if [[ ${GPU_ARCH} == "" ]]; then + if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then + CUGRAPH_CMAKE_CUDA_ARCHITECTURES="NATIVE" echo "Building for the architecture of the GPU in the system..." else + CUGRAPH_CMAKE_CUDA_ARCHITECTURES="ALL" echo "Building for *ALL* supported GPU architectures..." fi mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - ${GPU_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${CUGRAPH_CMAKE_CUDA_ARCHITECTURES} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index f381ed7f6fb..0629f19d3b8 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 20d56b281d2..e12b6143e69 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -21,7 +21,7 @@ dependencies: - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 0eba2baccaa..3fd7013f475 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -21,7 +21,7 @@ dependencies: - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 55f6ad75cec..978cbf4dd5f 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -21,7 +21,7 @@ dependencies: - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 71b22c8cf1b..afab0d01dba 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -31,7 +31,7 @@ build: requirements: build: - - cmake>=3.12.4 + - cmake>=3.20.1 - cudatoolkit {{ cuda_version }}.* - librmm {{ minor_version }}.* - boost-cpp>=1.66 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 978ec93cc2e..b0e52ba73b6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,10 +14,47 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) +cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) +include(FetchContent) +FetchContent_Declare( + rapids-cmake + GIT_REPOSITORY https://github.com/rapidsai/rapids-cmake.git + GIT_TAG origin/branch-21.06 + ) +FetchContent_MakeAvailable(rapids-cmake) + +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) + +rapids_cuda_init_architectures(CUGRAPH) project(CUGRAPH VERSION 21.06.00 LANGUAGES C CXX CUDA) +# Remove the following archs from CMAKE_CUDA_ARCHITECTURES that +# cuhornet currently doesn't support +# +# < 60 +# >= 86 +set(supported_archs "60" "62" "70" "72" "75" "80") +foreach( arch IN LISTS CMAKE_CUDA_ARCHITECTURES) + string(REPLACE "-real" "" arch ${arch}) + if( arch IN_LIST supported_archs ) + list(APPEND usable_arch_values ${arch}) + endif() +endforeach() +# Make sure everything but the 'newest' arch +# is marked as `-real` so we only generate PTX for +# arch > 80 +list(POP_BACK usable_arch_values latest_arch) +list(TRANSFORM usable_arch_values APPEND "-real") +list(APPEND usable_arch_values ${latest_arch}) + +set(CMAKE_CUDA_ARCHITECTURES ${usable_arch_values}) + + # Write the version header include(cmake/Modules/Version.cmake) write_version() @@ -26,147 +63,59 @@ write_version() # - build type ------------------------------------------------------------------------------------ # Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif() +rapids_cmake_build_type(Release) ############################################################################## # - User Options ------------------------------------------------------------ option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF) - -################################################################################################### -# - user options ------------------------------------------------------------------------------ - -set(BLAS_LIBRARIES "" CACHE STRING - "Location of BLAS library for FAISS build.") +set(BLAS_LIBRARIES "" CACHE STRING "Location of BLAS library for FAISS build.") option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search on GPU" OFF) +option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) +option(BUILD_TESTS "Configure CMake to build tests" ON) ################################################################################################### # - compiler options ------------------------------------------------------------------------------ -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_C_COMPILER $ENV{CC}) -set(CMAKE_CXX_COMPILER $ENV{CXX}) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +rapids_find_package(CUDAToolkit REQUIRED + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + ) -set(CMAKE_CUDA_STANDARD 17) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CUGRAPH_CXX_FLAGS "") +set(CUGRAPH_CUDA_FLAGS "") if(CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations") + list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) endif(CMAKE_COMPILER_IS_GNUCXX) -find_package(CUDA) - -# Configure GPU arch to build -set(GUNROCK_GENCODE_SM60 "OFF") -set(GUNROCK_GENCODE_SM61 "OFF") -set(GUNROCK_GENCODE_SM70 "OFF") -set(GUNROCK_GENCODE_SM72 "OFF") -set(GUNROCK_GENCODE_SM75 "OFF") -set(GUNROCK_GENCODE_SM80 "OFF") - -# ARCHS handling: -# -if("${GPU_ARCHS}" STREQUAL "") - include(cmake/EvalGpuArchs.cmake) - evaluate_gpu_archs(GPU_ARCHS) -endif() - -# CUDA 11 onwards cub ships with CTK -if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(CUB_IS_PART_OF_CTK ON) -else() - set(CUB_IS_PART_OF_CTK OFF) -endif() +message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}") -if("${GPU_ARCHS}" STREQUAL "ALL") - set(GPU_ARCHS "60") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};70") - endif() - if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) - set(GPU_ARCHS "${GPU_ARCHS};75") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};80") - endif() -endif() - -message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") -foreach(arch ${GPU_ARCHS}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") - set(GUNROCK_GENCODE_SM${arch} "ON") - set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${arch},code=sm_${arch}") -endforeach() - -list(GET GPU_ARCHS -1 ptx) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") -set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=compute_${ptx}") - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all") +list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) +list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings) +list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable) +list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all) # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking -option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) if (CMAKE_CUDA_LINEINFO) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") + list(APPEND CUGRAPH_CUDA_FLAGS -lineinfo) endif(CMAKE_CUDA_LINEINFO) # Debug options if(CMAKE_BUILD_TYPE MATCHES Debug) message(STATUS "Building with debugging flags") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic") + list(APPEND CUGRAPH_CUDA_FLAGS -G -Xcompiler=-rdynamic) endif(CMAKE_BUILD_TYPE MATCHES Debug) -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -option(BUILD_TESTS "Configure CMake to build tests" - ON) - -################################################################################################### -# - cmake modules --------------------------------------------------------------------------------- - -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) - -include(FeatureSummary) -include(CheckIncludeFiles) -include(CheckLibraryExists) -if(BUILD_TESTS) - include(CTest) -endif(BUILD_TESTS) - -################################################################################################### -# - find boost ------------------------------------------------------------------------------------ - -find_package(Boost REQUIRED) -if(Boost_FOUND) - message(STATUS "Boost found in ${Boost_INCLUDE_DIRS}") -else() - message(FATAL_ERROR "Boost not found, please check your settings.") -endif(Boost_FOUND) - ################################################################################################### # - find openmp ----------------------------------------------------------------------------------- find_package(OpenMP) if(OpenMP_FOUND) # find_package(OPenMP) does not automatically add OpenMP flags to CUDA - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS}") + list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS}) endif(OpenMP_FOUND) @@ -180,218 +129,28 @@ else() endif() ################################################################################################### -# - find gtest ------------------------------------------------------------------------------------ - -if(BUILD_TESTS) - find_package(GTest REQUIRED) -endif(BUILD_TESTS) - -################################################################################################### -# - find RMM -------------------------------------------------------------------------------------- - -find_path(RMM_INCLUDE "rmm" - HINTS - "$ENV{RMM_ROOT}/include" - "$ENV{CONDA_PREFIX}/include/rmm" - "$ENV{CONDA_PREFIX}/include") - -message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") - -################################################################################################### -# - find NCCL ------------------------------------------------------------------------------------- - -if(NOT NCCL_PATH) - find_package(NCCL REQUIRED) -else() - message("-- Manually set NCCL PATH to ${NCCL_PATH}") - set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) - set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) -endif(NOT NCCL_PATH) - -################################################################################################### -# - find MPI - only enabled if MG tests are to be built - -if(BUILD_CUGRAPH_MG_TESTS) - find_package(MPI REQUIRED) -endif(BUILD_CUGRAPH_MG_TESTS) - -################################################################################################### -# - Fetch Content --------------------------------------------------------------------------------- -include(FetchContent) - -# - THRUST/CUB -message("Fetching Thrust") - -FetchContent_Declare( - thrust - GIT_REPOSITORY https://github.com/thrust/thrust.git - GIT_TAG 1.12.0 -) - -FetchContent_GetProperties(thrust) -if(NOT thrust_POPULATED) - FetchContent_Populate(thrust) - # We are not using the thrust CMake targets, so no need to call `add_subdirectory()`. -endif() -set(THRUST_INCLUDE_DIR "${thrust_SOURCE_DIR}") +# - find CPM based dependencies ------------------------------------------------------------------ -# - cuco -message("Fetching cuco") -FetchContent_Declare( - cuco - GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 7678a5ecaa192b8983b02a0191a140097171713e -) +rapids_cpm_init() -FetchContent_GetProperties(cuco) -if(NOT cuco_POPULATED) - FetchContent_Populate(cuco) -endif() -set(CUCO_INCLUDE_DIR "${cuco_SOURCE_DIR}/include") -# - libcudacxx -# NOTE: This is necessary because libcudacxx is not supported in -# debian cuda 10.2 packages. Once 10.2 is deprecated -# we should not need this any longer. -message("Fetching libcudacxx") +include(cmake/thirdparty/get_thrust.cmake) +include(cmake/thirdparty/get_faiss.cmake) +include(cmake/thirdparty/get_nccl.cmake) +include(cmake/thirdparty/get_rmm.cmake) -FetchContent_Declare( - libcudacxx - GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git - GIT_TAG 1.3.0 - GIT_SHALLOW true -) +include(cmake/thirdparty/get_raft.cmake) -FetchContent_GetProperties(libcudacxx) -if(NOT libcudacxx_POPULATED) - message("populating libcudacxx") - FetchContent_Populate(libcudacxx) -endif() -set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") -message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") +include(cmake/thirdparty/get_cuco.cmake) +include(cmake/thirdparty/get_cuhornet.cmake) -# - CUHORNET -FetchContent_Declare( - cuhornet - GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed - SOURCE_SUBDIR hornet -) +include(cmake/thirdparty/get_gunrock.cmake) -FetchContent_GetProperties(cuhornet) -if(NOT cuhornet_POPULATED) - message("populating cuhornet") - FetchContent_Populate(cuhornet) - # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`. +if(BUILD_TESTS) + include(cmake/thirdparty/get_gtest.cmake) endif() -set(CUHORNET_INCLUDE_DIR ${cuhornet_SOURCE_DIR} CACHE STRING "Path to cuhornet includes") - -# - raft - (header only) -# Only cloned if RAFT_PATH env variable is not defined -if(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable detected.") - message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") - set(RAFT_DIR "$ENV{RAFT_PATH}") - -else(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") - - FetchContent_Declare( - raft - GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 66f82b4e79a3e268d0da3cc864ec7ce4ad065296 - SOURCE_SUBDIR raft - ) - - FetchContent_GetProperties(raft) - if(NOT raft_POPULATED) - message("populating raft") - FetchContent_Populate(raft) - # We are not using any raft CMake targets, so no need to call `add_subdirectory()`. - endif() - - set(RAFT_DIR "${raft_SOURCE_DIR}") -endif(DEFINED ENV{RAFT_PATH}) - -################################################################################################### -# - External Projects ----------------------------------------------------------------------------- - -# https://cmake.org/cmake/help/v3.0/module/ExternalProject.html - -# FIXME: gunrock is still using ExternalProject instead of -# FetchContent. Consider migrating to FetchContent soon (this may require -# updates to the gunrock cmake files to support this). - -include(ExternalProject) - -# - GUNROCK -set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") -set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") - -ExternalProject_Add(gunrock_ext - GIT_REPOSITORY https://github.com/gunrock/gunrock.git - GIT_TAG v1.2 - PREFIX ${GUNROCK_DIR} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= - -DGUNROCK_BUILD_SHARED_LIBS=OFF - -DGUNROCK_BUILD_TESTS=OFF - -DCUDA_AUTODETECT_GENCODE=OFF - -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} - -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} - -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} - -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} - -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} - -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} - ${GUNROCK_GENCODE} - BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a - INSTALL_COMMAND "" -) -add_library(gunrock STATIC IMPORTED) -add_dependencies(gunrock gunrock_ext) -set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) - -# - FAISS -# FIXME: The commit currently being fetched from faiss is using autotools which -# is more convenient to build with ExternalProjectAdd. -# Consider migrating to FetchContent once the tagged commit is changed. - -if(BUILD_STATIC_FAISS) - set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING - "Path to FAISS source directory") - ExternalProject_Add(faiss - GIT_REPOSITORY https://github.com/facebookresearch/faiss.git - GIT_TAG 7c2d2388a492d65fdda934c7e74ae87acaeed066 - CONFIGURE_COMMAND LIBS=-pthread - CPPFLAGS=-w - LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib - cmake -B build . - -DCMAKE_BUILD_TYPE=Release - -DBUILD_TESTING=OFF - -DFAISS_ENABLE_PYTHON=OFF - -DBUILD_SHARED_LIBS=OFF - -DFAISS_ENABLE_GPU=ON - -DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR} - -DCUDA_ARCHITECTURES=${FAISS_GPU_ARCHS} - -DBLAS_LIBRARIES=${BLAS_LIBRARIES} - PREFIX ${FAISS_DIR} - BUILD_COMMAND make -C build -j${PARALLEL_LEVEL} VERBOSE=1 - BUILD_BYPRODUCTS ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a - BUILD_ALWAYS 1 - INSTALL_COMMAND "" - UPDATE_COMMAND "" - BUILD_IN_SOURCE 1) - - ExternalProject_Get_Property(faiss install_dir) - add_library(FAISS::FAISS STATIC IMPORTED) - set_property(TARGET FAISS::FAISS PROPERTY - IMPORTED_LOCATION ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a) - set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src/faiss") -else() - set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) - find_package(FAISS REQUIRED) -endif(BUILD_STATIC_FAISS) ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -443,50 +202,68 @@ add_library(cugraph SHARED src/utilities/host_barrier.cpp ) -target_link_directories(cugraph - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the - # link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +set_target_properties(cugraph + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + INTERFACE_POSITION_INDEPENDENT_CODE ON +) -# -# NOTE: This dependency will force the building of cugraph to -# wait until after cugunrock is constructed. -# -add_dependencies(cugraph gunrock_ext) +target_compile_options(cugraph + PRIVATE "$<$:${CUGRAPH_CXX_FLAGS}>" + "$<$:${CUGRAPH_CUDA_FLAGS}>" +) # Per-thread default stream option see https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html # The per-thread default stream does not synchronize with other streams target_compile_definitions(cugraph PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) +file(WRITE "${CUGRAPH_BINARY_DIR}/fatbin.ld" +[=[ +SECTIONS +{ + .nvFatBinSegment : { *(.nvFatBinSegment) } + .nv_fatbin : { *(.nv_fatbin) } +} +]=]) +target_link_options(cugraph PRIVATE "${CUGRAPH_BINARY_DIR}/fatbin.ld") + +add_library(cugraph::cugraph ALIAS cugraph) + ################################################################################################### # - include paths --------------------------------------------------------------------------------- target_include_directories(cugraph PRIVATE - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${Boost_INCLUDE_DIRS}" - "${RMM_INCLUDE}" - "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" - "${CUHORNET_INCLUDE_DIR}/hornet/include" - "${CUHORNET_INCLUDE_DIR}/hornetsnest/include" - "${CUHORNET_INCLUDE_DIR}/xlib/include" - "${CUHORNET_INCLUDE_DIR}/primitives" - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${GUNROCK_INCLUDE_DIR}" - "${NCCL_INCLUDE_DIRS}" - "${RAFT_DIR}/cpp/include" + "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${NCCL_INCLUDE_DIRS}" PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$" + "$" ) ################################################################################################### # - link libraries -------------------------------------------------------------------------------- - -target_link_libraries(cugraph PRIVATE - gunrock cublas cusparse curand cusolver cudart cuda FAISS::FAISS ${NCCL_LIBRARIES}) +target_link_libraries(cugraph + PUBLIC + rmm::rmm + cugraph::Thrust + raft::raft + PRIVATE + cuco::cuco + CUDA::cublas + CUDA::curand + CUDA::cusolver + CUDA::cusparse + cugraph::cuHornet + FAISS::FAISS + gunrock + NCCL::NCCL +) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE @@ -539,49 +316,74 @@ target_link_libraries(cugraph PRIVATE ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) -# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the -# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent cmake -# warnings about policy CMP0104. With this setting, arch flags must be manually -# set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism used in -# cpp/CMakeLists.txt for setting arch options). Run "cmake --help-policy -# CMP0104" for policy details. -# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to the -# findcudatoolkit features in cmake 3.17+ -set_target_properties(cugraph PROPERTIES - CUDA_ARCHITECTURES OFF) ################################################################################################### # - generate tests -------------------------------------------------------------------------------- if(BUILD_TESTS) - if(GTEST_FOUND) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) - endif(GTEST_FOUND) + include(CTest) + add_subdirectory(tests) endif(BUILD_TESTS) ################################################################################################### # - install targets ------------------------------------------------------------------------------- -install(TARGETS cugraph LIBRARY - DESTINATION lib) +install(TARGETS cugraph + DESTINATION lib + EXPORT cugraph-exports) install(DIRECTORY include/ - DESTINATION include/cugraph) + DESTINATION include/cugraph) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp DESTINATION include/cugraph) -install(DIRECTORY ${RAFT_DIR}/cpp/include/raft/ - DESTINATION include/cugraph/raft) +################################################################################################ +# - install export ------------------------------------------------------------------------------- +set(doc_string +[=[ +Provide targets for cuGraph. + +cuGraph library is a collection of GPU accelerated graph algorithms that process data found in +[GPU DataFrames](https://github.com/rapidsai/cudf). + +]=]) + +set(code_string +[=[ +thrust_create_target(cugraph::Thrust FROM_OPTIONS) +]=]) + + rapids_export(INSTALL cugraph + EXPORT_SET cugraph-exports + GLOBAL_TARGETS cugraph + NAMESPACE cugraph:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string + ) + +################################################################################################ +# - build export ------------------------------------------------------------------------------- +rapids_export(BUILD cugraph + EXPORT_SET cugraph-exports + GLOBAL_TARGETS cugraph + NAMESPACE cugraph:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string + ) + ################################################################################################### # - make documentation ---------------------------------------------------------------------------- # requires doxygen and graphviz to be installed # from build directory, run make docs_cugraph -# doc targets for cuGraph -add_custom_command(OUTPUT CUGRAPH_DOXYGEN - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen - COMMAND doxygen Doxyfile - VERBATIM) +# doc targets for cugraph +find_package(Doxygen 1.8.11) +if(Doxygen_FOUND) + add_custom_command(OUTPUT CUGRAPH_DOXYGEN + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen + COMMAND doxygen Doxyfile + VERBATIM) -add_custom_target(docs_cugraph DEPENDS CUGRAPH_DOXYGEN) + add_custom_target(docs_cugraph DEPENDS CUGRAPH_DOXYGEN) +endif() diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake deleted file mode 100644 index f3918542db9..00000000000 --- a/cpp/cmake/EvalGpuArchs.cmake +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -function(evaluate_gpu_archs gpu_archs) - set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu) - set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) - file(WRITE ${eval_file} - " -#include -#include -#include -using namespace std; -int main(int argc, char** argv) { - set archs; - int nDevices; - if((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) { - for(int dev=0;dev::const_iterator itr=archs.begin();itr!=archs.end();++itr) { - printf(first? \"%s\" : \";%s\", itr->c_str()); - first = false; - } - } - printf(\"\\n\"); - return 0; -} -") - execute_process( - COMMAND ${CUDA_NVCC_EXECUTABLE} - -o ${eval_exe} - --run - ${eval_file} - OUTPUT_VARIABLE __gpu_archs - OUTPUT_STRIP_TRAILING_WHITESPACE) - set(__gpu_archs_filtered "${__gpu_archs}") - foreach(arch ${__gpu_archs}) - if (arch VERSION_LESS 60) - list(REMOVE_ITEM __gpu_archs_filtered ${arch}) - endif() - endforeach() - if (NOT __gpu_archs_filtered) - message(FATAL_ERROR "No supported GPU arch found on this system") - endif() - message("Auto detection of gpu-archs: ${__gpu_archs_filtered}") - set(${gpu_archs} ${__gpu_archs_filtered} PARENT_SCOPE) -endfunction(evaluate_gpu_archs) diff --git a/cpp/cmake/Modules/FindFAISS.cmake b/cpp/cmake/Modules/FindFAISS.cmake deleted file mode 100644 index 7c456edfeef..00000000000 --- a/cpp/cmake/Modules/FindFAISS.cmake +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Based on FindPNG.cmake from cmake 3.14.3 - -#[=======================================================================[.rst: -FindFAISS --------- - -Template to generate FindPKG_NAME.cmake CMake modules - -Find FAISS - -Imported targets -^^^^^^^^^^^^^^^^ - -This module defines the following :prop_tgt:`IMPORTED` target: - -``FAISS::FAISS`` - The libFAISS library, if found. - -Result variables -^^^^^^^^^^^^^^^^ - -This module will set the following variables in your project: - -``FAISS_INCLUDE_DIRS`` - where to find FAISS.hpp , etc. -``FAISS_LIBRARIES`` - the libraries to link against to use libFAISS. -``FAISS_FOUND`` - If false, do not try to use FAISS. -``FAISS_VERSION_STRING`` - the version of the FAISS library found - -#]=======================================================================] - -find_path(FAISS_LOCATION faiss/IndexFlat.h - HINTS ${FAISS_INSTALL_DIR} - PATH_SUFFIXES include include/) - -list(APPEND FAISS_NAMES faiss libfaiss) -set(_FAISS_VERSION_SUFFIXES ) - -foreach(v IN LISTS _FAISS_VERSION_SUFFIXES) - list(APPEND FAISS_NAMES faiss${v} libfaiss${v}) - list(APPEND FAISS_NAMES faiss.${v} libfaiss.${v}) -endforeach() -unset(_FAISS_VERSION_SUFFIXES) - -find_library(FAISS_LIBRARY_RELEASE NAMES ${FAISS_NAMES} - HINTS ${FAISS_INSTALL_DIR} - PATH_SUFFIXES lib) - -include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) -select_library_configurations(FAISS) -mark_as_advanced(FAISS_LIBRARY_RELEASE) -unset(FAISS_NAMES) - -# Set by select_library_configurations(), but we want the one from -# find_package_handle_standard_args() below. -unset(FAISS_FOUND) - -if (FAISS_LIBRARY AND FAISS_LOCATION) - set(FAISS_INCLUDE_DIRS ${FAISS_LOCATION} ) - set(FAISS_LIBRARY ${FAISS_LIBRARY}) - - if(NOT TARGET FAISS::FAISS) - add_library(FAISS::FAISS UNKNOWN IMPORTED) - set_target_properties(FAISS::FAISS PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIRS}") - if(EXISTS "${FAISS_LIBRARY}") - set_target_properties(FAISS::FAISS PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${FAISS_LIBRARY}") - endif() - endif() -endif () - - -include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -find_package_handle_standard_args(FAISS - REQUIRED_VARS FAISS_LIBRARY FAISS_LOCATION - VERSION_VAR FAISS_VERSION_STRING) - -mark_as_advanced(FAISS_LOCATION FAISS_LIBRARY) diff --git a/cpp/cmake/Modules/FindNCCL.cmake b/cpp/cmake/Modules/FindNCCL.cmake deleted file mode 100644 index 0f673707444..00000000000 --- a/cpp/cmake/Modules/FindNCCL.cmake +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Based on FindPNG.cmake from cmake 3.14.3 - -#[=======================================================================[.rst: -FindNCCL --------- - -Find libnccl, the NVIDIA Collective Communication Library. A hint to find NCCL -can be provided by setting NCCL_INSTALL_DIR. - -Imported targets -^^^^^^^^^^^^^^^^ - -This module defines the following :prop_tgt:`IMPORTED` target: - -``NCCL::NCCL`` - The libnccl library, if found. - -Result variables -^^^^^^^^^^^^^^^^ - -This module will set the following variables in your project: - -``NCCL_INCLUDE_DIRS`` - where to find nccl.h , etc. -``NCCL_LIBRARIES`` - the libraries to link against to use NCCL. -``NCCL_FOUND`` - If false, do not try to use NCCL. -``NCCL_VERSION_STRING`` - the version of the NCCL library found - -#]=======================================================================] - -find_path(NCCL_NCCL_INCLUDE_DIR nccl.h HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES include) - -#TODO: Does this need to support finding the static library? - -list(APPEND NCCL_NAMES nccl libnccl) -set(_NCCL_VERSION_SUFFIXES 2) - -foreach(v IN LISTS _NCCL_VERSION_SUFFIXES) - list(APPEND NCCL_NAMES nccl${v} libnccl${v}) -endforeach() -unset(_NCCL_VERSION_SUFFIXES) -# For compatibility with versions prior to this multi-config search, honor -# any NCCL_LIBRARY that is already specified and skip the search. -if(NOT NCCL_LIBRARY) - find_library(NCCL_LIBRARY_RELEASE NAMES ${NCCL_NAMES} HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES lib) - include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) - select_library_configurations(NCCL) - mark_as_advanced(NCCL_LIBRARY_RELEASE) -endif() -unset(NCCL_NAMES) - -# Set by select_library_configurations(), but we want the one from -# find_package_handle_standard_args() below. -unset(NCCL_FOUND) - -if (NCCL_LIBRARY AND NCCL_NCCL_INCLUDE_DIR) - set(NCCL_INCLUDE_DIRS ${NCCL_NCCL_INCLUDE_DIR} ) - set(NCCL_LIBRARY ${NCCL_LIBRARY}) - - if(NOT TARGET NCCL::NCCL) - add_library(NCCL::NCCL UNKNOWN IMPORTED) - set_target_properties(NCCL::NCCL PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIRS}") - if(EXISTS "${NCCL_LIBRARY}") - set_target_properties(NCCL::NCCL PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "C" - IMPORTED_LOCATION "${NCCL_LIBRARY}") - endif() - endif() -endif () - -if (NCCL_NCCL_INCLUDE_DIR AND EXISTS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h") - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_major_version_str REGEX "^#define[ \t]+NCCL_MAJOR[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_MAJOR[ \t]+([0-9]+)" "\\1" nccl_major_version_str "${nccl_major_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_minor_version_str REGEX "^#define[ \t]+NCCL_MINOR[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_MINOR[ \t]+([0-9]+)" "\\1" nccl_minor_version_str "${nccl_minor_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_patch_version_str REGEX "^#define[ \t]+NCCL_PATCH[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_PATCH[ \t]+([0-9]+)" "\\1" nccl_patch_version_str "${nccl_patch_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_suffix_version_str REGEX "^#define[ \t]+NCCL_SUFFIX[ \t]+\".*\"") - string(REGEX REPLACE "^#define[ \t]+NCCL_SUFFIX[ \t]+\"(.*)\"" "\\1" nccl_suffix_version_str "${nccl_suffix_version_str}") - - set(NCCL_VERSION_STRING "${nccl_major_version_str}.${nccl_minor_version_str}.${nccl_patch_version_str}${nccl_suffix_version_str}") - - unset(nccl_major_version_str) - unset(nccl_minor_version_str) - unset(nccl_patch_version_str) - unset(nccl_suffix_version_str) -endif () - -include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -find_package_handle_standard_args(NCCL - REQUIRED_VARS NCCL_LIBRARY NCCL_NCCL_INCLUDE_DIR - VERSION_VAR NCCL_VERSION_STRING) - -mark_as_advanced(NCCL_NCCL_INCLUDE_DIR NCCL_LIBRARY) diff --git a/cpp/cmake/faiss_cuda11.patch b/cpp/cmake/faiss_cuda11.patch deleted file mode 100644 index 496ca0e7b23..00000000000 --- a/cpp/cmake/faiss_cuda11.patch +++ /dev/null @@ -1,40 +0,0 @@ -diff --git a/configure b/configure -index ed40dae..f88ed0a 100755 ---- a/configure -+++ b/configure -@@ -2970,7 +2970,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex - ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - -- ax_cxx_compile_alternatives="11 0x" ax_cxx_compile_cxx11_required=true -+ ax_cxx_compile_alternatives="14 11 0x" ax_cxx_compile_cxx11_required=true - ac_ext=cpp - ac_cpp='$CXXCPP $CPPFLAGS' - ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -diff --git a/gpu/utils/DeviceDefs.cuh b/gpu/utils/DeviceDefs.cuh -index 89d3dda..bc0f9b5 100644 ---- a/gpu/utils/DeviceDefs.cuh -+++ b/gpu/utils/DeviceDefs.cuh -@@ -13,7 +13,7 @@ - namespace faiss { namespace gpu { - - #ifdef __CUDA_ARCH__ --#if __CUDA_ARCH__ <= 750 -+#if __CUDA_ARCH__ <= 800 - constexpr int kWarpSize = 32; - #else - #error Unknown __CUDA_ARCH__; please define parameters for compute capability -diff --git a/gpu/utils/MatrixMult-inl.cuh b/gpu/utils/MatrixMult-inl.cuh -index ede225e..4f7eb44 100644 ---- a/gpu/utils/MatrixMult-inl.cuh -+++ b/gpu/utils/MatrixMult-inl.cuh -@@ -51,6 +51,9 @@ rawGemm(cublasHandle_t handle, - auto cBT = GetCudaType::Type; - - // Always accumulate in f32 -+# if __CUDACC_VER_MAJOR__ >= 11 -+ cublasSetMathMode(handle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION); -+# endif - return cublasSgemmEx(handle, transa, transb, m, n, k, - &fAlpha, A, cAT, lda, - B, cBT, ldb, diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake new file mode 100644 index 00000000000..a255793285f --- /dev/null +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -0,0 +1,35 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_cuco VERSION) + + rapids_cpm_find(cuco ${VERSION} + GLOBAL_TARGETS cuco cuco::cuco + CPM_ARGS + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG 7678a5ecaa192b8983b02a0191a140097171713e + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "BUILD_EXAMPLES OFF" + ) + + if(NOT TARGET cuco::cuco) + add_library(cuco::cuco ALIAS cuco) + endif() + +endfunction() + +find_and_configure_cuco(0.0.1) diff --git a/cpp/cmake/thirdparty/get_cuhornet.cmake b/cpp/cmake/thirdparty/get_cuhornet.cmake new file mode 100644 index 00000000000..28c83161ff4 --- /dev/null +++ b/cpp/cmake/thirdparty/get_cuhornet.cmake @@ -0,0 +1,45 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_cuhornet) + + # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`, + # or to use CPM + FetchContent_Declare( + cuhornet + GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git + GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed + SOURCE_SUBDIR hornet + ) + FetchContent_GetProperties(cuhornet) + + if(NOT cuhornet_POPULATED) + FetchContent_Populate(cuhornet) + endif() + + if(NOT TARGET cugraph::cuHornet) + add_library(cugraph::cuHornet IMPORTED INTERFACE GLOBAL) + target_include_directories(cugraph::cuHornet INTERFACE + "${cuhornet_SOURCE_DIR}/hornet/include" + "${cuhornet_SOURCE_DIR}/hornetsnest/include" + "${cuhornet_SOURCE_DIR}/xlib/include" + "${cuhornet_SOURCE_DIR}/primitives" + ) + endif() +endfunction() + + +find_and_configure_cuhornet() diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake new file mode 100644 index 00000000000..4991990e379 --- /dev/null +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -0,0 +1,50 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_faiss) + set(oneValueArgs VERSION PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_find_generate_module(FAISS + HEADER_NAMES faiss/IndexFlat.h + LIBRARY_NAMES faiss + ) + + rapids_cpm_find(FAISS ${PKG_VERSION} + GLOBAL_TARGETS faiss + CPM_ARGS + GIT_REPOSITORY https://github.com/facebookresearch/faiss.git + GIT_TAG ${PKG_PINNED_TAG} + OPTIONS + "FAISS_ENABLE_PYTHON OFF" + "BUILD_SHARED_LIBS OFF" + "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" + "FAISS_ENABLE_GPU ON" + "BUILD_TESTING OFF" + "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" + ) + + if(FAISS_ADDED) + set(FAISS_GPU_HEADERS ${FAISS_SOURCE_DIR} PARENT_SCOPE) + add_library(FAISS::FAISS ALIAS faiss) + endif() + +endfunction() + +find_and_configure_faiss(VERSION 1.7.0 + PINNED_TAG bde7c0027191f29c9dadafe4f6e68ca0ee31fb30 + ) diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake new file mode 100644 index 00000000000..e413cad7601 --- /dev/null +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -0,0 +1,43 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gtest VERSION) + + if(TARGET GTest::gtest) + return() + endif() + + rapids_cpm_find(GTest ${VERSION} + GLOBAL_TARGETS gmock gmock_main gtest gtest_main GTest::gmock GTest::gtest GTest::gtest_main + CPM_ARGS + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-${VERSION} + GIT_SHALLOW TRUE + OPTIONS "INSTALL_GTEST ON" + # googletest >= 1.10.0 provides a cmake config file -- use it if it exists + FIND_PACKAGE_ARGUMENTS "CONFIG" + ) + + if(NOT TARGET GTest::gtest) + add_library(GTest::gmock ALIAS gmock) + add_library(GTest::gmock_main ALIAS gmock_main) + add_library(GTest::gtest ALIAS gtest) + add_library(GTest::gtest_main ALIAS gtest_main) + endif() + +endfunction() + +find_and_configure_gtest(1.10.0) diff --git a/cpp/cmake/thirdparty/get_gunrock.cmake b/cpp/cmake/thirdparty/get_gunrock.cmake new file mode 100644 index 00000000000..056cd4bd5ea --- /dev/null +++ b/cpp/cmake/thirdparty/get_gunrock.cmake @@ -0,0 +1,64 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gunrock VERSION) + + if(NOT TARGET gunrock) + set(GUNROCK_GENCODE_SM60 OFF) + set(GUNROCK_GENCODE_SM61 OFF) + set(GUNROCK_GENCODE_SM70 OFF) + set(GUNROCK_GENCODE_SM72 OFF) + set(GUNROCK_GENCODE_SM75 OFF) + set(GUNROCK_GENCODE_SM80 OFF) + + foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES) + string(REPLACE "-real" "" arch ${arch}) + set(GUNROCK_GENCODE_SM${arch} "ON") + endforeach() + + # FIXME: gunrock is still using ExternalProject instead of CPM, as version 1.2 + # doesn't work with CPM + + include(ExternalProject) + + set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock) + ExternalProject_Add(gunrock_ext + GIT_REPOSITORY https://github.com/gunrock/gunrock.git + GIT_TAG v${VERSION} + PREFIX ${GUNROCK_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= + -DGUNROCK_BUILD_SHARED_LIBS=OFF + -DGUNROCK_BUILD_TESTS=OFF + -DCUDA_AUTODETECT_GENCODE=OFF + -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} + -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} + -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} + -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} + -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} + -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} + BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a + INSTALL_COMMAND "" + ) + + add_library(gunrock STATIC IMPORTED) + add_dependencies(gunrock gunrock_ext) + set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION "${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a") + target_include_directories(gunrock INTERFACE "${GUNROCK_DIR}/src/gunrock_ext") + endif() +endfunction() + + +find_and_configure_gunrock(1.2) diff --git a/cpp/cmake/thirdparty/get_nccl.cmake b/cpp/cmake/thirdparty/get_nccl.cmake new file mode 100644 index 00000000000..30ec976f27c --- /dev/null +++ b/cpp/cmake/thirdparty/get_nccl.cmake @@ -0,0 +1,42 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_nccl) + + if(TARGET NCCL::NCCL) + return() + endif() + + set(oneValueArgs VERSION PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_find_generate_module(NCCL + HEADER_NAMES nccl.h + LIBRARY_NAMES nccl + ) + + # Currently NCCL has no CMake build-system so we require + # it built and installed on the machine already + rapids_find_package(NCCL REQUIRED) + +endfunction() + +find_and_configure_nccl() + + + + diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake new file mode 100644 index 00000000000..d8c9358e023 --- /dev/null +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -0,0 +1,48 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_raft) + + set(oneValueArgs VERSION FORK PINNED_TAG) + cmake_parse_arguments(PKG "" "${oneValueArgs}" "" ${ARGN} ) + + rapids_cpm_find(raft ${PKG_VERSION} + GLOBAL_TARGETS raft::raft + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git + GIT_TAG ${PKG_PINNED_TAG} + SOURCE_SUBDIR cpp + OPTIONS "BUILD_TESTS OFF" + ) + + message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + +endfunction() + +set(CUGRAPH_MIN_VERSION_raft "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}.00") +set(CUGRAPH_BRANCH_VERSION_raft "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}") + + +# Change pinned tag and fork here to test a commit in CI +# To use a different RAFT locally, set the CMake variable +# RPM_raft_SOURCE=/path/to/local/raft +find_and_configure_raft(VERSION ${CUGRAPH_MIN_VERSION_raft} + FORK rapidsai + PINNED_TAG branch-${CUGRAPH_BRANCH_VERSION_raft} + ) + diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake new file mode 100644 index 00000000000..aecb6489f92 --- /dev/null +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -0,0 +1,47 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_rmm VERSION) + + if(${VERSION} MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(MAJOR_AND_MINOR "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}") + else() + set(MAJOR_AND_MINOR "${VERSION}") + endif() + + if(TARGET rmm::rmm) + return() + endif() + + rapids_cpm_find(rmm ${VERSION} + GLOBAL_TARGETS rmm::rmm + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + CPM_ARGS + GIT_REPOSITORY https://github.com/rapidsai/rmm.git + GIT_TAG branch-${MAJOR_AND_MINOR} + GIT_SHALLOW TRUE + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "CUDA_STATIC_RUNTIME ${CUDA_STATIC_RUNTIME}" + "DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}" + ) + +endfunction() + +set(CUGRAPH_MIN_VERSION_rmm "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}.00") + +find_and_configure_rmm(${CUGRAPH_MIN_VERSION_rmm}) diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake new file mode 100644 index 00000000000..86fcffed5d2 --- /dev/null +++ b/cpp/cmake/thirdparty/get_thrust.cmake @@ -0,0 +1,29 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_thrust VERSION) + + rapids_cpm_find(Thrust ${VERSION} + CPM_ARGS + GIT_REPOSITORY https://github.com/thrust/thrust.git + GIT_TAG ${VERSION} + ) + + thrust_create_target(cugraph::Thrust FROM_OPTIONS) + +endfunction() + +find_and_configure_thrust(1.12.0) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 768f441a881..cb7eca3ed11 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,42 +20,34 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_graphs.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") + utilities/matrix_market_file_utilities.cu + utilities/rmat_utilities.cu + utilities/thrust_wrapper.cu + utilities/misc_utilities.cpp + components/wcc_graphs.cu + ../../thirdparty/mmio/mmio.c) + +target_compile_options(cugraphtestutil + PUBLIC "$<$:${CUGRAPH_CXX_FLAGS}>" + "$:${CUGRAPH_CUDA_FLAGS}>>" +) set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) target_include_directories(cugraphtestutil - PRIVATE - "${CUB_INCLUDE_DIR}" - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${RMM_INCLUDE}" - "${NCCL_INCLUDE_DIRS}" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" - "${CMAKE_CURRENT_SOURCE_DIR}/../include" - "${CMAKE_CURRENT_SOURCE_DIR}" - "${RAFT_DIR}/cpp/include" + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CUGRAPH_SOURCE_DIR}/src" ) -target_link_libraries(cugraphtestutil cugraph) +target_link_libraries(cugraphtestutil + PUBLIC + cugraph + cuco::cuco + NCCL::NCCL +) -# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the -# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent -# cmake warnings about policy CMP0104. With this setting, arch flags must be -# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism -# used in cpp/CMakeLists.txt for setting arch options). -# Run "cmake --help-policy CMP0104" for policy details. -# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to -# the findcudatoolkit features in cmake 3.17+ -set_target_properties(cugraphtestutil PROPERTIES - CUDA_ARCHITECTURES OFF) add_library(cugraphmgtestutil STATIC "${CMAKE_CURRENT_SOURCE_DIR}/utilities/device_comm_wrapper.cu") @@ -93,45 +85,23 @@ set_target_properties(cugraphmgtestutil PROPERTIES ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - - target_include_directories(${CMAKE_TEST_NAME} - PRIVATE - "${CUB_INCLUDE_DIR}" - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${RMM_INCLUDE}" - "${NCCL_INCLUDE_DIRS}" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" - "${CMAKE_CURRENT_SOURCE_DIR}/../include" - "${CMAKE_CURRENT_SOURCE_DIR}/../src" - "${CMAKE_CURRENT_SOURCE_DIR}" - "${RAFT_DIR}/cpp/include" - ) - - target_link_directories(${CMAKE_TEST_NAME} - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported - # variable containing the link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +function(ConfigureTest CMAKE_TEST_NAME) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE - cugraphtestutil - cugraph - GTest::GTest - GTest::Main - ${NCCL_LIBRARIES} - cudart - cuda - cublas - cusparse - cusolver - curand) + cugraphtestutil + cugraph + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + NCCL::NCCL + CUDA::cublas + CUDA::cusparse + CUDA::cusolver + CUDA::curand + ) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -184,60 +154,28 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) - # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the - # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent - # cmake warnings about policy CMP0104. With this setting, arch flags must be - # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism - # used in cpp/CMakeLists.txt for setting arch options). - # Run "cmake --help-policy CMP0104" for policy details. - # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to - # the findcudatoolkit features in cmake 3.17+ - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - CUDA_ARCHITECTURES OFF) - add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() -function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - - target_include_directories(${CMAKE_TEST_NAME} - PRIVATE - "${CUB_INCLUDE_DIR}" - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${RMM_INCLUDE}" - "${NCCL_INCLUDE_DIRS}" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" - "${CMAKE_CURRENT_SOURCE_DIR}/../include" - "${CMAKE_CURRENT_SOURCE_DIR}/../src" - "${CMAKE_CURRENT_SOURCE_DIR}" - "${RAFT_DIR}/cpp/include" - ) - - target_link_directories(${CMAKE_TEST_NAME} - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported - # variable containing the link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +function(ConfigureTestMG CMAKE_TEST_NAME) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cugraphmgtestutil cugraphtestutil cugraph - GTest::GTest - GTest::Main - ${NCCL_LIBRARIES} - cudart - cuda - cublas - cusparse - cusolver - curand) + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + NCCL::NCCL + CUDA::cublas + CUDA::cusparse + CUDA::cusolver + CUDA::curand + MPI::MPI_CXX + ) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -290,17 +228,6 @@ function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) - # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the - # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent - # cmake warnings about policy CMP0104. With this setting, arch flags must be - # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism - # used in cpp/CMakeLists.txt for setting arch options). - # Run "cmake --help-policy CMP0104" for policy details. - # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to - # the findcudatoolkit features in cmake 3.17+ - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - CUDA_ARCHITECTURES OFF) - add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} @@ -326,248 +253,124 @@ endif(RAPIDS_DATASET_ROOT_DIR) ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- - -set(KATZ_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") - - ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") +ConfigureTest(KATZ_TEST centrality/katz_centrality_test.cu) ################################################################################################### # - betweenness centrality tests ------------------------------------------------------------------ - -set(BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") - - ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") - -set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") - - ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") +ConfigureTest(BETWEENNESS_TEST centrality/betweenness_centrality_test.cu) +ConfigureTest(EDGE_BETWEENNESS_TEST centrality/edge_betweenness_centrality_test.cu) ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ - -set(SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") - -ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") +ConfigureTest(SSSP_TEST traversal/sssp_test.cu) ################################################################################################### # - BFS tests ------------------------------------------------------------------------------------- - -set(BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") - -ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") +ConfigureTest(BFS_TEST traversal/bfs_test.cu) ################################################################################################### # - LOUVAIN tests --------------------------------------------------------------------------------- - -set(LOUVAIN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") - -ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") +ConfigureTest(LOUVAIN_TEST community/louvain_test.cpp) ################################################################################################### # - LEIDEN tests --------------------------------------------------------------------------------- - -set(LEIDEN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") - -ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") +ConfigureTest(LEIDEN_TEST community/leiden_test.cpp) ################################################################################################### # - ECG tests --------------------------------------------------------------------------------- - -set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") - -ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") +ConfigureTest(ECG_TEST community/ecg_test.cpp) ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- - -set(BALANCED_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") - -ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") +ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) ################################################################################################### # - TRIANGLE tests -------------------------------------------------------------------------------- - -set(TRIANGLE_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") - -ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") +ConfigureTest(TRIANGLE_TEST community/triangle_test.cu) ################################################################################################### # - EGO tests -------------------------------------------------------------------------------- +ConfigureTest(EGO_TEST community/egonet_test.cu) -set(EGO_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") - -ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") ################################################################################################### # - RENUMBERING tests ----------------------------------------------------------------------------- - -set(RENUMBERING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") - -ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") +ConfigureTest(RENUMBERING_TEST renumber/renumber_test.cu) ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- - -set(FA2_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") - -ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") +ConfigureTest(FA2_TEST layout/force_atlas2_test.cu) ################################################################################################### # - TSP tests -------------------------------------------------------------------------- - -set(TSP_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") - - ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") +ConfigureTest(TSP_TEST traversal/tsp_test.cu) ################################################################################################### # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- - -set(CONNECT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") - -ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") +ConfigureTest(CONNECT_TEST components/con_comp_test.cu) ################################################################################################### # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- - -set(SCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") - -ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") +ConfigureTest(SCC_TEST components/scc_test.cu) ################################################################################################### # - WEAKLY CONNECTED COMPONENTS tests ---------------------------------------------------------- - -set(WCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_test.cpp") - -ConfigureTest(WCC_TEST "${WCC_TEST_SRC}") +ConfigureTest(WCC_TEST components/wcc_test.cpp) ################################################################################################### -#-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- - -set(HUNGARIAN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") - -ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") +#-Hungarian (Linear Assignment Problem) tests ---------------------------------------------------- +ConfigureTest(HUNGARIAN_TEST linear_assignment/hungarian_test.cu) ################################################################################################### # - MST tests ---------------------------------------------------------------------------- - -set(MST_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") - -ConfigureTest(MST_TEST "${MST_TEST_SRC}") +ConfigureTest(MST_TEST tree/mst_test.cu) ################################################################################################### # - Experimental stream tests ----------------------------------------------------- - -set(EXPERIMENTAL_STREAM_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/streams.cu") - -ConfigureTest(EXPERIMENTAL_STREAM "${EXPERIMENTAL_STREAM_SRCS}" "") +ConfigureTest(EXPERIMENTAL_STREAM experimental/streams.cu) ################################################################################################### # - Experimental R-mat graph generation tests ----------------------------------------------------- - -set(EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/generate_rmat_test.cpp") - -ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST "${EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST experimental/generate_rmat_test.cpp) ################################################################################################### # - Experimental Graph tests ---------------------------------------------------------------------- - -set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") - -ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_GRAPH_TEST experimental/graph_test.cpp) ################################################################################################### # - Experimental weight-sum tests ----------------------------------------------------------------- - -set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") - -ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST experimental/weight_sum_test.cpp) ################################################################################################### # - Experimental degree tests --------------------------------------------------------------------- - -set(EXPERIMENTAL_DEGREE_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") - -ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_DEGREE_TEST experimental/degree_test.cpp) ################################################################################################### # - Experimental coarsening tests ----------------------------------------------------------------- - -set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") - -ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST experimental/coarsen_graph_test.cpp) ################################################################################################### # - Experimental induced subgraph tests ----------------------------------------------------------- - -set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") - -ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST experimental/induced_subgraph_test.cpp) ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ - -set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") - -ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_BFS_TEST experimental/bfs_test.cpp) ################################################################################################### # - Experimental Multi-source BFS tests ----------------------------------------------------------- - -set(EXPERIMENTAL_MSBFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/ms_bfs_test.cpp") - -ConfigureTest(EXPERIMENTAL_MSBFS_TEST "${EXPERIMENTAL_MSBFS_TEST_SRCS}") - +ConfigureTest(EXPERIMENTAL_MSBFS_TEST experimental/ms_bfs_test.cpp) ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- - -set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") - -ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_SSSP_TEST experimental/sssp_test.cpp) ################################################################################################### # - Experimental PAGERANK tests ------------------------------------------------------------------- - -set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") - -ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_PAGERANK_TEST experimental/pagerank_test.cpp) ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ - -set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") - -ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST experimental/katz_centrality_test.cpp) ################################################################################################### # - WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------------- @@ -579,32 +382,27 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST "${WEAKLY_CONNECTED_COMPONENTS_TE ################################################################################################### # - Experimental RANDOM_WALKS tests ------------------------------------------------------------ - -set(RANDOM_WALKS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_test.cu") - -ConfigureTest(RANDOM_WALKS_TEST "${RANDOM_WALKS_TEST_SRCS}") +ConfigureTest(RANDOM_WALKS_TEST sampling/random_walks_test.cu) ################################################################################################### -set(RANDOM_WALKS_LOW_LEVEL_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/rw_low_level_test.cu") - -ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST "${RANDOM_WALKS_LOW_LEVEL_SRCS}") +ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST sampling/rw_low_level_test.cu) ################################################################################################### -set(RANDOM_WALKS_PROFILING_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_profiling.cu") - # FIXME: since this is technically not a test, consider refactoring the the # ConfigureTest function to share common code with a new ConfigureBenchmark # function (which would not link gtest, etc.) -ConfigureTest(RANDOM_WALKS_PROFILING "${RANDOM_WALKS_PROFILING_SRCS}") +ConfigureTest(RANDOM_WALKS_PROFILING sampling/random_walks_profiling.cu) ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- if(BUILD_CUGRAPH_MG_TESTS) + + ############################################################################################### + # - find MPI - only enabled if MG tests are to be built + find_package(MPI REQUIRED) + execute_process( COMMAND nvidia-smi -L COMMAND wc -l @@ -616,64 +414,34 @@ if(BUILD_CUGRAPH_MG_TESTS) if(MPI_CXX_FOUND) ########################################################################################### # - MG PAGERANK tests --------------------------------------------------------------------- - - set(MG_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp") - - ConfigureTestMG(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") - target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_PAGERANK_TEST pagerank/mg_pagerank_test.cpp) ########################################################################################### # - MG KATZ CENTRALITY tests -------------------------------------------------------------- - - set(MG_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_katz_centrality_test.cpp") - - ConfigureTestMG(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") - target_link_libraries(MG_KATZ_CENTRALITY_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_KATZ_CENTRALITY_TEST experimental/mg_katz_centrality_test.cpp) ########################################################################################### # - MG BFS tests -------------------------------------------------------------------------- - - set(MG_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_bfs_test.cpp") - - ConfigureTestMG(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") - target_link_libraries(MG_BFS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_BFS_TEST experimental/mg_bfs_test.cpp) ########################################################################################### # - MG SSSP tests ------------------------------------------------------------------------- - - set(MG_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_sssp_test.cpp") - - ConfigureTestMG(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") - target_link_libraries(MG_SSSP_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_SSSP_TEST experimental/mg_sssp_test.cpp) ########################################################################################### # - MG LOUVAIN tests ---------------------------------------------------------------------- - - set(MG_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_test.cpp") - - ConfigureTestMG(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") - target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_LOUVAIN_TEST + community/mg_louvain_helper.cu + community/mg_louvain_test.cpp) ########################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests -------------------------------------------------- - - set(MG_WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/components/mg_weakly_connected_components_test.cpp") - ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST - "${MG_WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS}") - target_link_libraries(MG_WEAKLY_CONNECTED_COMPONENTS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) - - else(MPI_CXX_FOUND) + components/mg_weakly_connected_components_test.cpp) + else() message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") - endif(MPI_CXX_FOUND) -endif(BUILD_CUGRAPH_MG_TESTS) + endif() +endif() ################################################################################################### ### enable testing ################################################################################ diff --git a/python/setup.py b/python/setup.py index 799cb805afa..d614eef24d6 100644 --- a/python/setup.py +++ b/python/setup.py @@ -65,8 +65,9 @@ raft_path = get_environment_option('RAFT_PATH') # FIXME: This could clone RAFT, even if it's not needed (eg. running --clean). -raft_include_dir = use_raft_package(raft_path, libcugraph_path, - git_info_file='../cpp/CMakeLists.txt') +# deprecated: This functionality will go away after +# https://github.com/rapidsai/raft/issues/83 +raft_include_dir = use_raft_package(raft_path, libcugraph_path) class CleanCommand(Command): diff --git a/python/setuputils.py b/python/setuputils.py index d93051d05ef..a64e60e1c32 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -20,7 +20,6 @@ import shutil import subprocess import sys -import warnings from pathlib import Path @@ -71,7 +70,7 @@ def clean_folder(path): def use_raft_package(raft_path, cpp_build_path, - git_info_file='../cpp/cmake/Dependencies.cmake'): + git_info_file=None): """ Function to use the python code in RAFT in package.raft @@ -139,10 +138,13 @@ def use_raft_package(raft_path, cpp_build_path, def clone_repo_if_needed(name, cpp_build_path=None, git_info_file=None): if git_info_file is None: - git_info_file = _get_repo_path() + '/cpp/CMakeLists.txt' + git_info_file = \ + _get_repo_path() + '/cpp/cmake/thirdparty/get_{}.cmake'.format( + name + ) if cpp_build_path is None or cpp_build_path is False: - cpp_build_path = _get_repo_path() + '/cpp/build/' + cpp_build_path = _get_repo_path() + '/cpp/build/_deps/' repo_cloned = get_submodule_dependency(name, cpp_build_path=cpp_build_path, @@ -152,7 +154,7 @@ def clone_repo_if_needed(name, cpp_build_path=None, repo_path = ( _get_repo_path() + '/python/_external_repositories/' + name + '/') else: - repo_path = os.path.join(cpp_build_path, '_deps', name + '-src') + repo_path = os.path.join(cpp_build_path, name + '-src/') return repo_path, repo_cloned @@ -192,7 +194,7 @@ def get_submodule_dependency(repo, repo_info = get_repo_cmake_info(repos, git_info_file) - if os.path.exists(cpp_build_path): + if os.path.exists(os.path.join(cpp_build_path, repos[0] + '-src/')): print("-- Third party modules found succesfully in the libcugraph++ " "build folder.") @@ -200,11 +202,11 @@ def get_submodule_dependency(repo, else: - warnings.warn("-- Third party repositories have not been found so they" - "will be cloned. To avoid this set the environment " - "variable CUGRAPH_BUILD_PATH, containing the relative " - "path of the root of the repository to the folder " - "where libcugraph++ was built.") + print("-- Third party repositories have not been found so they" + "will be cloned. To avoid this set the environment " + "variable CUGRAPH_BUILD_PATH, containing the relative " + "path of the root of the repository to the folder " + "where libcugraph++ was built.") for repo in repos: clone_repo(repo, repo_info[repo][0], repo_info[repo][1]) @@ -262,8 +264,8 @@ def get_repo_cmake_info(names, file_path): `ExternalProject_Add(name` file_path : String Relative path of the location of the CMakeLists.txt (or the cmake - module which contains FetchContent_Declare or ExternalProject_Add - definitions) to extract the information. + module which contains ExternalProject_Add definitions) to extract + the information. Returns ------- @@ -272,31 +274,32 @@ def get_repo_cmake_info(names, file_path): where results[name][0] is the url of the repo and repo_info[repo][1] is the tag/commit hash to be cloned as specified by cmake. + """ - with open(file_path, encoding='utf-8') as f: + with open(file_path) as f: s = f.read() results = {} - cmake_ext_proj_decls = ["FetchContent_Declare", "ExternalProject_Add"] - for name in names: - res = None - for decl in cmake_ext_proj_decls: - res = re.search(f'{decl}\(\s*' # noqa: W605 - + '(' + re.escape(name) + ')' - + '\s+.*GIT_REPOSITORY\s+(\S+)\s+.+' # noqa: W605 - + '\s+.*GIT_TAG\s+(\S+)', # noqa: W605 - s) - if res: - break - if res is None: - raise RuntimeError('Could not find any of the following ' - f'statements: {cmake_ext_proj_decls}, for ' - f'module "{name}" in file "{file_path}" with ' - 'GIT_REPOSITORY and GIT_TAG settings') - - results[res.group(1)] = [res.group(2), res.group(3)] + repo = re.findall(r'\s.*GIT_REPOSITORY.*', s) + repo = repo[-1].split()[-1] + fork = re.findall(r'\s.*FORK.*', s) + fork = fork[-1].split()[-1] + repo = repo.replace("${PKG_FORK}", fork) + tag = re.findall(r'\s.*PINNED_TAG.*', s) + tag = tag[-1].split()[-1] + results[name] = [repo, tag] + if tag == 'branch-${CUGRAPH_BRANCH_VERSION_raft}': + loc = _get_repo_path() + '/cpp/CMakeLists.txt' + with open(loc) as f: + cmakelists = f.read() + tag = re.findall(r'\s.*project\(CUGRAPH VERSION.*', cmakelists) + print(tag) + tag = tag[-1].split()[2].split('.') + tag = 'branch-{}.{}'.format(tag[0], tag[1]) + + results[name] = [repo, tag] return results From 8b197f187763a3d713901cebdcbd21dad7db3d3f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 25 May 2021 22:19:00 +1000 Subject: [PATCH 266/343] Fix developer guide examples for device_buffer (#1619) After recent RMM changes, the DEVELOPER_GUIDE.md examples for `rmm::device_buffer` needed to be updated/corrected. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Christopher Harris (https://github.com/cwharris) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1619 --- cpp/docs/DEVELOPER_GUIDE.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index ba24d68aca5..b369183a262 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -146,9 +146,9 @@ Allocates a specified number of bytes of untyped, uninitialized device memory us `device_memory_resource`. If no resource is explicitly provided, uses `rmm::mr::get_current_device_resource()`. -`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s -device memory, whereas a move moves ownership of the device memory from one `device_buffer` to -another. +`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the +`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the +device memory from one `device_buffer` to another. ```c++ // Allocates at least 100 bytes of uninitialized device memory @@ -156,17 +156,21 @@ another. rmm::device_buffer buff(100, stream, mr); void * raw_data = buff.data(); // Raw pointer to underlying device memory -rmm::device_buffer copy(buff); // Deep copies `buff` into `copy` -rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to` +// Deep copies `buff` into `copy` on `stream` +rmm::device_buffer copy(buff, stream); + +// Moves contents of `buff` into `moved_to` +rmm::device_buffer moved_to(std::move(buff)); custom_memory_resource *mr...; -rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource +// Allocates 100 bytes from the custom_memory_resource +rmm::device_buffer custom_buff(100, mr, stream); ``` #### `rmm::device_uvector` -Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with key -differences: +Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with +key differences: - As an optimization, elements are uninitialized and no synchronization occurs at construction. This limits the types `T` to trivially copyable types. - All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream From e541104be8965d52f32b587e77f273bc82d3ab91 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Tue, 25 May 2021 12:04:52 -0400 Subject: [PATCH 267/343] =?UTF-8?q?address=20'ValueError:=20Series=20conta?= =?UTF-8?q?ins=20NULL=20values'=20from=20from=5Fcudf=5Fedge=E2=80=A6=20(#1?= =?UTF-8?q?610)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `G.from_cudf_edgelist` after the `cudf.read_csv` fails with the following error - ``` --------------------------------------------------------------------------- ValueError Traceback (most recent call last) in 1 t_start_build_sg = time.perf_counter() 2 G = cugraph.DiGraph() ----> 3 G.from_cudf_edgelist(e_list, source='src', destination='dst') 4 t_stop_build_sg = time.perf_counter() /opt/conda/envs/rapids/lib/python3.7/site-packages/cugraph/structure/graph_classes.py in from_cudf_edgelist(self, input_df, source, destination, edge_attr, renumber) 127 destination=destination, 128 edge_attr=edge_attr, --> 129 renumber=renumber) 130 131 def from_cudf_adjlist(self, offset_col, index_col, value_col=None): /opt/conda/envs/rapids/lib/python3.7/site-packages/cugraph/structure/graph_implementation/simpleGraph.py in __from_edgelist(self, input_df, source, destination, edge_attr, renumber) 171 source_col, dest_col = symmetrize( 172 source_col, dest_col, multi=self.properties.multi_edge, --> 173 symmetrize=not self.properties.directed) 174 175 self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, /opt/conda/envs/rapids/lib/python3.7/site-packages/cugraph/structure/symmetrize.py in symmetrize(source_col, dest_col, value_col, multi, symmetrize) 200 ) 201 csg.null_check(source_col) --> 202 csg.null_check(dest_col) 203 if value_col is not None: 204 if isinstance(value_col, cudf.Series): /opt/conda/envs/rapids/lib/python3.7/site-packages/cugraph/structure/graph_classes.py in null_check(col) 23 def null_check(col): 24 if col.null_count != 0: ---> 25 raise ValueError("Series contains NULL values") 26 27 ValueError: Series contains NULL values ``` the header from the input is turning into NULLs - ``` $ head soc-LiveJournal1.txt # Directed graph (each unordered pair of nodes is saved once): soc-LiveJournal1.txt # Directed LiveJournal friednship social network # Nodes: 4847571 Edges: 68993773 # FromNodeId ToNodeId 0 1 0 2 0 3 ``` `e_list` ||src|dst |---|---|--- |0|0|NA |1|0|NA |2|0|NA |3|0|0 |4|0|1 ... this issue has existed since at least 0.16 and may warrant further test coverage for cudf's read_csv. Authors: - Matthew Farrellee (https://github.com/mattf) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1610 --- notebooks/demo/batch_betweenness.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/demo/batch_betweenness.ipynb b/notebooks/demo/batch_betweenness.ipynb index e2ad83ff1c4..885d26c9523 100644 --- a/notebooks/demo/batch_betweenness.ipynb +++ b/notebooks/demo/batch_betweenness.ipynb @@ -138,7 +138,7 @@ "outputs": [], "source": [ "t_start_read_sg = time.perf_counter()\n", - "e_list = cudf.read_csv(input_data_path, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'])\n", + "e_list = cudf.read_csv(input_data_path, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'], comment='#')\n", "t_stop_read_sg = time.perf_counter()" ] }, From 54eaabe7fbb08be9e07ac85a1a7f015463b94881 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 25 May 2021 12:57:54 -0400 Subject: [PATCH 268/343] Undo disabling MG C++ testing outputs for non-root processes (#1615) Disabling MG C++ testing outputs for non-root processes has an undesirable side effect of disabling error messages (e.g. exception outputs) as well. This makes failure diagnosis difficult in large-scale runs. Delete the code disabling MG C++ testing outputs for non-root processes in this PR. In long run, we need a more elegant mechanism to disable only redundant test progress messages while still allowing error messages to be displayed. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1615 --- cpp/tests/utilities/base_fixture.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index a705ca10aaa..3121d3074d4 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -160,11 +160,6 @@ inline auto parse_test_options(int argc, char **argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cugraph::test::create_memory_resource(rmm_mode); \ - \ - if (comm_rank != 0) { \ - auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); \ - delete listeners.Release(listeners.default_result_printer()); \ - } \ rmm::mr::set_current_device_resource(resource.get()); \ auto ret = RUN_ALL_TESTS(); \ MPI_TRY(MPI_Finalize()); \ From d6a19e9f3cf8a63549fdc45d43086a3badd176cb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 25 May 2021 13:04:25 -0400 Subject: [PATCH 269/343] Pass rmm memory allocator to cuco::static_map (#1617) Currently cuco::static_map instances in cuGraph are initialized with the cuco::static_map default allocator (which uses CUDA memory allocator). Update to provide the RMM memory allocator instead. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1617 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 24 ++++++-- .../cugraph/utilities/collect_comm.cuh | 30 +++++++--- cpp/src/experimental/relabel.cu | 57 ++++++++++++------- cpp/src/experimental/renumber_edgelist.cu | 53 ++++++++++------- cpp/src/experimental/renumber_utils.cu | 43 +++++++++----- 5 files changed, 139 insertions(+), 68 deletions(-) diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index eca0b9eed4a..6a47ec67f13 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -28,6 +28,8 @@ #include #include +#include +#include #include @@ -209,8 +211,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // 1. build a cuco::static_map object for the k, v pairs. - auto kv_map_ptr = std::make_unique>( - size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( + size_t{0}, + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -268,12 +276,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(map_keys.size()) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple( @@ -287,13 +297,15 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index 6a8db5194be..812a84a0441 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -63,13 +65,17 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. - auto kv_map_ptr = std::make_unique>( + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), @@ -124,12 +130,14 @@ collect_values_for_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( @@ -181,13 +189,17 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. - auto kv_map_ptr = std::make_unique>( + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), @@ -238,12 +250,14 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 5360fad0031..325c6aaea67 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -121,13 +123,18 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map relabel_map{ - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), - rx_label_pair_old_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; + auto poly_alloc = + rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = + rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + relabel_map{// cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(rx_label_pair_old_labels.size()) / load_factor), + rx_label_pair_old_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( @@ -183,22 +190,30 @@ void relabel(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream - cuco::static_map relabel_map( - // cuco::static_map requires at least one empty slot - std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), - unique_old_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value); + { + auto poly_alloc = + rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = + rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + relabel_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), + unique_old_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - relabel_map.find(labels, labels + num_labels, labels); + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + relabel_map.find(labels, labels + num_labels, labels); + } } else { cuco::static_map relabel_map( // cuco::static_map requires at least one empty slot diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 33793ecf727..860664aa8b3 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -647,13 +649,17 @@ renumber_edgelist(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max(static_cast( - static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), - static_cast(partition.get_matrix_partition_major_size(i)) + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), + static_cast(partition.get_matrix_partition_major_size(i)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple( col_comm_rank == static_cast(i) ? renumber_map_labels.begin() @@ -697,13 +703,16 @@ renumber_edgelist(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), - renumber_map_minor_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{// cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(renumber_map_minor_labels.size()) / load_factor), + renumber_map_minor_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple( renumber_map_minor_labels.begin(), @@ -775,12 +784,16 @@ std::enable_if_t> renumber_edgelist( // FIXME: compare this hash based approach with a binary search based approach in both memory // footprint and execution time - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), - renumber_map_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), + renumber_map_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index 765dbb19886..6def9eeb8b5 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -22,6 +22,9 @@ #include #include +#include +#include + #include #include #include @@ -66,8 +69,14 @@ void renumber_ext_vertices(raft::handle_t const& handle, "Invalid input arguments: renumber_map_labels have duplicate elements."); } - auto renumber_map_ptr = std::make_unique>( - size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto renumber_map_ptr = std::make_unique< + cuco::static_map>( + size_t{0}, + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter); if (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -107,13 +116,15 @@ void renumber_ext_vertices(raft::handle_t const& handle, renumber_map_ptr.reset(); - renumber_map_ptr = std::make_unique>( + renumber_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), sorted_unique_ext_vertices.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); auto kv_pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple( @@ -127,13 +138,15 @@ void renumber_ext_vertices(raft::handle_t const& handle, renumber_map_ptr.reset(); - renumber_map_ptr = std::make_unique>( + renumber_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), static_cast(local_int_vertex_last - local_int_vertex_first) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( @@ -306,13 +319,17 @@ void unrenumber_int_vertices(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream - cuco::static_map unrenumber_map( - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), - sorted_unique_int_vertices.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + unrenumber_map{ + // cuco::static_map requires at least one empty slot + std::max( + static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), + sorted_unique_int_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; auto pair_first = thrust::make_transform_iterator( thrust::make_zip_iterator( From d1f3fb994cf4140a10c3a6ac516a31e13d6621da Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Tue, 25 May 2021 19:27:14 +0200 Subject: [PATCH 270/343] Handle int64 in force atlas wrapper and update to uvector (#1607) - Update wrapper to handle int64 - Use `device_uvector` Close: https://github.com/rapidsai/cugraph/issues/1581 Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1607 --- cpp/include/cugraph/algorithms.hpp | 5 +- cpp/src/layout/barnes_hut.hpp | 116 +++++++++--------- cpp/src/layout/exact_fa2.hpp | 55 +++++---- cpp/src/layout/force_atlas2.cu | 15 ++- cpp/tests/layout/force_atlas2_test.cu | 29 +++-- python/cugraph/layout/force_atlas2.pxd | 1 + .../cugraph/layout/force_atlas2_wrapper.pyx | 23 +++- 7 files changed, 138 insertions(+), 106 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index dc10b6b59fb..0b0dd88ce29 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -142,6 +142,8 @@ void overlap_list(GraphCSRView const &graph, * @tparam weight_t Type of edge weights. Supported values : float * or double. * + * @param[in] handle Library handle (RAFT). If a communicator is set in the + * handle, the multi GPU version will be selected. * @param[in] graph cuGraph graph descriptor, should contain the * connectivity information as a COO. Graph is considered undirected. Edge weights are used for this * algorithm and set to 1 by default. @@ -178,7 +180,8 @@ void overlap_list(GraphCSRView const &graph, * */ template -void force_atlas2(GraphCOOView &graph, +void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index ebef93e74fb..a3d413225e4 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -17,14 +17,15 @@ #pragma once #include -#include +#include -#include #include +#include + #include #include +#include -#include #include "bh_kernels.hpp" #include "fa2_kernels.hpp" #include "utils.hpp" @@ -33,7 +34,8 @@ namespace cugraph { namespace detail { template -void barnes_hut(GraphCOOView &graph, +void barnes_hut(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -50,7 +52,7 @@ void barnes_hut(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = {nullptr}; + cudaStream_t stream = handle.get_stream(); const edge_t e = graph.number_of_edges; const vertex_t n = graph.number_of_vertices; @@ -65,15 +67,15 @@ void barnes_hut(GraphCOOView &graph, // Allocate more space //--------------------------------------------------- - rmm::device_vector d_limiter(1); - rmm::device_vector d_maxdepthd(1); - rmm::device_vector d_bottomd(1); - rmm::device_vector d_radiusd(1); + rmm::device_uvector d_limiter(1, stream); + rmm::device_uvector d_maxdepthd(1, stream); + rmm::device_uvector d_bottomd(1, stream); + rmm::device_uvector d_radiusd(1, stream); - unsigned *limiter = d_limiter.data().get(); - int *maxdepthd = d_maxdepthd.data().get(); - int *bottomd = d_bottomd.data().get(); - float *radiusd = d_radiusd.data().get(); + unsigned *limiter = d_limiter.data(); + int *maxdepthd = d_maxdepthd.data(); + int *bottomd = d_bottomd.data(); + float *radiusd = d_radiusd.data(); InitializationKernel<<<1, 1, 0, stream>>>(limiter, maxdepthd, radiusd); CHECK_CUDA(stream); @@ -83,51 +85,52 @@ void barnes_hut(GraphCOOView &graph, const float theta_squared = theta * theta; const int NNODES = nnodes; - rmm::device_vector d_startl(nnodes + 1, 0); - rmm::device_vector d_childl((nnodes + 1) * 4, 0); + rmm::device_uvector d_startl(nnodes + 1, stream); + rmm::device_uvector d_childl((nnodes + 1) * 4, stream); // FA2 requires degree + 1 - rmm::device_vector d_massl(nnodes + 1, 1.f); + rmm::device_uvector d_massl(nnodes + 1, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.end(), 1.f); - rmm::device_vector d_maxxl(blocks * FACTOR1, 0); - rmm::device_vector d_maxyl(blocks * FACTOR1, 0); - rmm::device_vector d_minxl(blocks * FACTOR1, 0); - rmm::device_vector d_minyl(blocks * FACTOR1, 0); + rmm::device_uvector d_maxxl(blocks * FACTOR1, stream); + rmm::device_uvector d_maxyl(blocks * FACTOR1, stream); + rmm::device_uvector d_minxl(blocks * FACTOR1, stream); + rmm::device_uvector d_minyl(blocks * FACTOR1, stream); // Actual mallocs - int *startl = d_startl.data().get(); - int *childl = d_childl.data().get(); - int *massl = d_massl.data().get(); + int *startl = d_startl.data(); + int *childl = d_childl.data(); + int *massl = d_massl.data(); - float *maxxl = d_maxxl.data().get(); - float *maxyl = d_maxyl.data().get(); - float *minxl = d_minxl.data().get(); - float *minyl = d_minyl.data().get(); + float *maxxl = d_maxxl.data(); + float *maxyl = d_maxyl.data(); + float *minxl = d_minxl.data(); + float *minyl = d_minyl.data(); // SummarizationKernel - rmm::device_vector d_countl(nnodes + 1, 0); - int *countl = d_countl.data().get(); + rmm::device_uvector d_countl(nnodes + 1, stream); + int *countl = d_countl.data(); // SortKernel - rmm::device_vector d_sortl(nnodes + 1, 0); - int *sortl = d_sortl.data().get(); + rmm::device_uvector d_sortl(nnodes + 1, stream); + int *sortl = d_sortl.data(); // RepulsionKernel - rmm::device_vector d_rep_forces((nnodes + 1) * 2, 0); - float *rep_forces = d_rep_forces.data().get(); + rmm::device_uvector d_rep_forces((nnodes + 1) * 2, stream); + float *rep_forces = d_rep_forces.data(); - rmm::device_vector d_radius_squared(1, 0); - float *radiusd_squared = d_radius_squared.data().get(); + rmm::device_uvector d_radius_squared(1, stream); + float *radiusd_squared = d_radius_squared.data(); - rmm::device_vector d_nodes_pos((nnodes + 1) * 2, 0); - float *nodes_pos = d_nodes_pos.data().get(); + rmm::device_uvector d_nodes_pos((nnodes + 1) * 2, stream); + float *nodes_pos = d_nodes_pos.data(); // Initialize positions with random values int random_state = 0; // Copy start x and y positions. if (x_start && y_start) { - copy(n, x_start, nodes_pos); - copy(n, y_start, nodes_pos + nnodes + 1); + raft::copy(nodes_pos, x_start, n, stream); + raft::copy(nodes_pos + nnodes + 1, y_start, n, stream); } else { random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream); } @@ -138,15 +141,15 @@ void barnes_hut(GraphCOOView &graph, float *swinging{nullptr}; float *traction{nullptr}; - rmm::device_vector d_attract(n * 2, 0); - rmm::device_vector d_old_forces(n * 2, 0); - rmm::device_vector d_swinging(n, 0); - rmm::device_vector d_traction(n, 0); + rmm::device_uvector d_attract(n * 2, stream); + rmm::device_uvector d_old_forces(n * 2, stream); + rmm::device_uvector d_swinging(n, stream); + rmm::device_uvector d_traction(n, stream); - attract = d_attract.data().get(); - old_forces = d_old_forces.data().get(); - swinging = d_swinging.data().get(); - traction = d_traction.data().get(); + attract = d_attract.data(); + old_forces = d_old_forces.data(); + swinging = d_swinging.data(); + traction = d_traction.data(); // Sort COO for coalesced memory access. sort(graph, stream); @@ -191,10 +194,11 @@ void barnes_hut(GraphCOOView &graph, for (int iter = 0; iter < max_iter; ++iter) { // Reset force values - fill((nnodes + 1) * 2, rep_forces, 0.f); - fill(n * 2, attract, 0.f); - fill(n, swinging, 0.f); - fill(n, traction, 0.f); + thrust::fill( + rmm::exec_policy(stream)->on(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_attract.begin(), d_attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end(), 0.f); ResetKernel<<<1, 1, 0, stream>>>(radiusd_squared, bottomd, NNODES, radiusd); CHECK_CUDA(stream); @@ -320,15 +324,15 @@ void barnes_hut(GraphCOOView &graph, if (callback) callback->on_epoch_end(nodes_pos); if (verbose) { - printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency); - printf("jt: %f, ", jt); - printf("swinging: %f, traction: %f\n", s, t); + std::cout << "iteration: " << iter + 1 << ", speed: " << speed + << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt + << ", swinging: " << s << ", traction: " << t << "\n"; } } // Copy nodes positions into final output pos - copy(n, nodes_pos, pos); - copy(n, nodes_pos + nnodes + 1, pos + n); + raft::copy(pos, nodes_pos, n, stream); + raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream); if (callback) callback->on_train_end(nodes_pos); } diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index abad5a5630f..d34f8843e9b 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -17,12 +17,13 @@ #pragma once #include -#include +#include -#include #include + #include #include +#include #include "exact_repulsion.hpp" #include "fa2_kernels.hpp" @@ -32,7 +33,8 @@ namespace cugraph { namespace detail { template -void exact_fa2(GraphCOOView &graph, +void exact_fa2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -48,7 +50,7 @@ void exact_fa2(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = {nullptr}; + cudaStream_t stream = handle.get_stream(); const edge_t e = graph.number_of_edges; const vertex_t n = graph.number_of_vertices; @@ -59,27 +61,28 @@ void exact_fa2(GraphCOOView &graph, float *d_swinging{nullptr}; float *d_traction{nullptr}; - rmm::device_vector repel(n * 2, 0); - rmm::device_vector attract(n * 2, 0); - rmm::device_vector old_forces(n * 2, 0); + rmm::device_uvector repel(n * 2, stream); + rmm::device_uvector attract(n * 2, stream); + rmm::device_uvector old_forces(n * 2, stream); // FA2 requires degree + 1. - rmm::device_vector mass(n, 1); - rmm::device_vector swinging(n, 0); - rmm::device_vector traction(n, 0); - - d_repel = repel.data().get(); - d_attract = attract.data().get(); - d_old_forces = old_forces.data().get(); - d_mass = mass.data().get(); - d_swinging = swinging.data().get(); - d_traction = traction.data().get(); + rmm::device_uvector mass(n, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end(), 1.f); + rmm::device_uvector swinging(n, stream); + rmm::device_uvector traction(n, stream); + + d_repel = repel.data(); + d_attract = attract.data(); + d_old_forces = old_forces.data(); + d_mass = mass.data(); + d_swinging = swinging.data(); + d_traction = traction.data(); int random_state = 0; random_vector(pos, n * 2, random_state, stream); if (x_start && y_start) { - copy(n, x_start, pos); - copy(n, y_start, pos + n); + raft::copy(pos, x_start, n, stream); + raft::copy(pos + n, y_start, n, stream); } // Sort COO for coalesced memory access. @@ -110,10 +113,10 @@ void exact_fa2(GraphCOOView &graph, for (int iter = 0; iter < max_iter; ++iter) { // Reset force arrays - fill(n * 2, d_repel, 0.f); - fill(n * 2, d_attract, 0.f); - fill(n, d_swinging, 0.f); - fill(n, d_traction, 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), repel.begin(), repel.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), attract.begin(), attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end(), 0.f); // Exact repulsion apply_repulsion(pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream); @@ -180,9 +183,9 @@ void exact_fa2(GraphCOOView &graph, if (callback) callback->on_epoch_end(pos); if (verbose) { - printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency); - printf("jt: %f, ", jt); - printf("swinging: %f, traction: %f\n", s, t); + std::cout << "iteration: " << iter + 1 << ", speed: " << speed + << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt + << ", swinging: " << s << ", traction: " << t << "\n"; } } diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index 6da9b77b45d..86c95cc883e 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -20,7 +20,8 @@ namespace cugraph { template -void force_atlas2(GraphCOOView &graph, +void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -42,7 +43,8 @@ void force_atlas2(GraphCOOView &graph, CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty"); if (!barnes_hut_optimize) { - cugraph::detail::exact_fa2(graph, + cugraph::detail::exact_fa2(handle, + graph, pos, max_iter, x_start, @@ -58,7 +60,8 @@ void force_atlas2(GraphCOOView &graph, verbose, callback); } else { - cugraph::detail::barnes_hut(graph, + cugraph::detail::barnes_hut(handle, + graph, pos, max_iter, x_start, @@ -77,7 +80,8 @@ void force_atlas2(GraphCOOView &graph, } } -template void force_atlas2(GraphCOOView &graph, +template void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -95,7 +99,8 @@ template void force_atlas2(GraphCOOView &graph bool verbose, internals::GraphBasedDimRedCallback *callback); -template void force_atlas2(GraphCOOView &graph, +template void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index c22c256ae02..f2f5561a7d8 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -111,9 +111,10 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam std::vector> adj_matrix(m, std::vector(m)); std::vector force_atlas2(m * 2); + raft::handle_t const handle; + auto stream = handle.get_stream(); // device alloc - rmm::device_vector force_atlas2_vector(m * 2); - float* d_force_atlas2 = force_atlas2_vector.data().get(); + rmm::device_uvector pos(m * 2, stream); // Read ASSERT_EQ((cugraph::test::mm_to_coo( @@ -131,13 +132,13 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam } // Allocate COO on device - rmm::device_vector srcs_v(nnz); - rmm::device_vector dests_v(nnz); - rmm::device_vector weights_v(nnz); + rmm::device_uvector srcs_v(nnz, stream); + rmm::device_uvector dests_v(nnz, stream); + rmm::device_uvector weights_v(nnz, stream); - int* srcs = srcs_v.data().get(); - int* dests = dests_v.data().get(); - T* weights = weights_v.data().get(); + int* srcs = srcs_v.data(); + int* dests = dests_v.data(); + T* weights = weights_v.data(); // FIXME: RAFT error handling mechanism should be used instead CUDA_TRY(cudaMemcpy(srcs, &cooRowInd[0], sizeof(int) * nnz, cudaMemcpyDefault)); @@ -163,8 +164,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam if (PERF) { hr_clock.start(); for (int i = 0; i < PERF_MULTIPLIER; ++i) { - cugraph::force_atlas2(G, - d_force_atlas2, + cugraph::force_atlas2(handle, + G, + pos.data(), max_iter, x_start, y_start, @@ -185,8 +187,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam force_atlas2_time.push_back(time_tmp); } else { cudaProfilerStart(); - cugraph::force_atlas2(G, - d_force_atlas2, + cugraph::force_atlas2(handle, + G, + pos.data(), max_iter, x_start, y_start, @@ -207,7 +210,7 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam // Copy pos to host std::vector h_pos(m * 2); - CUDA_TRY(cudaMemcpy(&h_pos[0], d_force_atlas2, sizeof(float) * m * 2, cudaMemcpyDeviceToHost)); + CUDA_TRY(cudaMemcpy(&h_pos[0], pos.data(), sizeof(float) * m * 2, cudaMemcpyDeviceToHost)); // Transpose the data std::vector> C_contiguous_embedding(m, std::vector(2)); diff --git a/python/cugraph/layout/force_atlas2.pxd b/python/cugraph/layout/force_atlas2.pxd index bf5186c91f9..5496d1b655e 100644 --- a/python/cugraph/layout/force_atlas2.pxd +++ b/python/cugraph/layout/force_atlas2.pxd @@ -25,6 +25,7 @@ cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals": cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void force_atlas2[vertex_t, edge_t, weight_t]( + const handle_t &handle, GraphCOOView[vertex_t, edge_t, weight_t] &graph, float *pos, const int max_iter, diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 7b801d19f1c..1644875f034 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -49,6 +49,10 @@ def force_atlas2(input_graph, Call force_atlas2 """ + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + if not input_graph.edgelist: input_graph.view_edge_list() @@ -61,12 +65,19 @@ def force_atlas2(input_graph, df = cudf.DataFrame() df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) - cdef uintptr_t c_src_indices = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_indices = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0] + src = input_graph.edgelist.edgelist_df['src'] + dst = input_graph.edgelist.edgelist_df['dst'] + + [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) + + cdef uintptr_t c_src_indices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_indices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL if input_graph.edgelist.weights: - c_weights = input_graph.edgelist.edgelist_df['weights'].__cuda_array_interface__['data'][0] + weights = input_graph.edgelist.edgelist_df["weights"] + [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.float32, np.float64]) + c_weights = weights.__cuda_array_interface__['data'][0] cdef uintptr_t x_start = NULL cdef uintptr_t y_start = NULL @@ -100,7 +111,8 @@ def force_atlas2(input_graph, graph_double = GraphCOOView[int,int, double](c_src_indices, c_dst_indices, c_weights, num_verts, num_edges) - c_force_atlas2[int, int, double](graph_double, + c_force_atlas2[int, int, double](handle_[0], + graph_double, pos_ptr, max_iter, x_start, @@ -121,7 +133,8 @@ def force_atlas2(input_graph, graph_float = GraphCOOView[int,int,float](c_src_indices, c_dst_indices, c_weights, num_verts, num_edges) - c_force_atlas2[int, int, float](graph_float, + c_force_atlas2[int, int, float](handle_[0], + graph_float, pos_ptr, max_iter, x_start, From 085922880478779597aa0c4c7e0a117e3e4a7515 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 25 May 2021 18:43:06 -0500 Subject: [PATCH 271/343] Add mnmg out degree (#1592) Authors: - https://github.com/Iroy30 Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1592 --- .../simpleDistributedGraph.py | 15 ++-- .../graph_implementation/simpleGraph.py | 10 ++- .../structure/graph_primtypes_wrapper.pyx | 75 +++++++++++-------- python/cugraph/tests/dask/test_mg_degree.py | 14 +++- .../tests/dask/test_mg_katz_centrality.py | 15 +--- 5 files changed, 71 insertions(+), 58 deletions(-) diff --git a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py index e85f3b6ab6c..21eff99d2bf 100644 --- a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.number_map import NumberMap import cudf import dask_cudf @@ -211,7 +212,7 @@ def in_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.in_degree([0,9,12]) """ - return self._degree(vertex_subset, x=1) + return self._degree(vertex_subset, direction=Direction.IN) def out_degree(self, vertex_subset=None): """ @@ -245,8 +246,7 @@ def out_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.out_degree([0,9,12]) """ - # TODO: Add support - raise Exception("Not supported for distributed graph") + return self._degree(vertex_subset, direction=Direction.OUT) def degree(self, vertex_subset=None): """ @@ -319,14 +319,15 @@ def degrees(self, vertex_subset=None): """ raise Exception("Not supported for distributed graph") - def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + def _degree(self, vertex_subset, direction=Direction.ALL): + vertex_col, degree_col = graph_primtypes_wrapper._mg_degree(self, + direction) df = cudf.DataFrame() df["vertex"] = vertex_col df["degree"] = degree_col - if self.renumbered is True: - df = self.unrenumber(df, "vertex") + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "vertex") if vertex_subset is not None: df = df[df['vertex'].isin(vertex_subset)] diff --git a/python/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/structure/graph_implementation/simpleGraph.py index 4e632a72231..3fa65fd8de6 100644 --- a/python/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleGraph.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils @@ -566,7 +567,7 @@ def in_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.in_degree([0,9,12]) """ - return self._degree(vertex_subset, x=1) + return self._degree(vertex_subset, direction=Direction.IN) def out_degree(self, vertex_subset=None): """ @@ -600,7 +601,7 @@ def out_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.out_degree([0,9,12]) """ - return self._degree(vertex_subset, x=2) + return self._degree(vertex_subset, direction=Direction.OUT) def degree(self, vertex_subset=None): """ @@ -690,8 +691,9 @@ def degrees(self, vertex_subset=None): return df - def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + def _degree(self, vertex_subset, direction=Direction.ALL): + vertex_col, degree_col = graph_primtypes_wrapper._degree(self, + direction) df = cudf.DataFrame() df["vertex"] = vertex_col df["degree"] = degree_col diff --git a/python/cugraph/structure/graph_primtypes_wrapper.pyx b/python/cugraph/structure/graph_primtypes_wrapper.pyx index 7bc62b9a1af..91af28380c3 100644 --- a/python/cugraph/structure/graph_primtypes_wrapper.pyx +++ b/python/cugraph/structure/graph_primtypes_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,6 +21,7 @@ from cugraph.structure.graph_primtypes cimport get_two_hop_neighbors as c_get_tw from cugraph.structure.graph_primtypes cimport renumber_vertices as c_renumber_vertices from cugraph.structure.utils_wrapper import * from libcpp cimport bool +import enum from libc.stdint cimport uintptr_t from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer @@ -45,6 +46,12 @@ def datatype_cast(cols, dtypes): return cols_out +class Direction(enum.Enum): + ALL = 0 + IN = 1 + OUT = 2 + + def renumber(source_col, dest_col): num_edges = len(source_col) @@ -137,7 +144,7 @@ def view_edge_list(input_graph): return src_indices, indices, weights -def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): +def _degree_coo(edgelist_df, src_name, dst_name, direction=Direction.ALL, num_verts=None, sID=None): # # Computing the degree of the input graph from COO # @@ -146,11 +153,11 @@ def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): src = edgelist_df[src_name] dst = edgelist_df[dst_name] - if x == 0: + if direction == Direction.ALL: dir = DIRECTION_IN_PLUS_OUT - elif x == 1: + elif direction == Direction.IN: dir = DIRECTION_IN - elif x == 2: + elif direction == Direction.OUT: dir = DIRECTION_OUT else: raise Exception("x should be 0, 1 or 2") @@ -185,17 +192,17 @@ def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): return vertex_col, degree_col -def _degree_csr(offsets, indices, x=0): +def _degree_csr(offsets, indices, direction=Direction.ALL): cdef DegreeDirection dir - if x == 0: + if direction == Direction.ALL: dir = DIRECTION_IN_PLUS_OUT - elif x == 1: + elif direction == Direction.IN: dir = DIRECTION_IN - elif x == 2: + elif direction == Direction.OUT: dir = DIRECTION_OUT else: - raise Exception("x should be 0, 1 or 2") + raise Exception("direction should be 0, 1 or 2") [offsets, indices] = datatype_cast([offsets, indices], [np.int32]) @@ -220,44 +227,48 @@ def _degree_csr(offsets, indices, x=0): return vertex_col, degree_col -def _degree(input_graph, x=0): - transpose_x = { 0: 0, - 2: 1, - 1: 2 } +def _mg_degree(input_graph, direction=Direction.ALL): + if input_graph.edgelist is None: + input_graph.compute_renumber_edge_list(transposed=False) + input_ddf = input_graph.edgelist.edgelist_df + num_verts = input_ddf[['src', 'dst']].max().max().compute() + 1 + data = DistributedDataHandler.create(data=input_ddf) + comms = Comms.get_comms() + client = default_client() + data.calculate_parts_to_sizes(comms) + if direction==Direction.IN: + degree_ddf = [client.submit(_degree_coo, wf[1][0], 'src', 'dst', Direction.IN, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] + if direction==Direction.OUT: + degree_ddf = [client.submit(_degree_coo, wf[1][0], 'dst', 'src', Direction.IN, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] + wait(degree_ddf) + return degree_ddf[0].result() + + +def _degree(input_graph, direction=Direction.ALL): + transpose_direction = { Direction.ALL: Direction.ALL, + Direction.IN: Direction.OUT, + Direction.OUT: Direction.IN } if input_graph.adjlist is not None: return _degree_csr(input_graph.adjlist.offsets, input_graph.adjlist.indices, - x) + direction) if input_graph.transposedadjlist is not None: return _degree_csr(input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices, - transpose_x[x]) - - if input_graph.edgelist is None and input_graph.distributed: - input_graph.compute_renumber_edge_list(transposed=False) + transpose_direction[direction]) if input_graph.edgelist is not None: - if isinstance(input_graph.edgelist.edgelist_df, dc.DataFrame): - input_ddf = input_graph.edgelist.edgelist_df - num_verts = input_ddf[['src', 'dst']].max().max().compute() + 1 - data = DistributedDataHandler.create(data=input_ddf) - comms = Comms.get_comms() - client = default_client() - data.calculate_parts_to_sizes(comms) - degree_ddf = [client.submit(_degree_coo, wf[1][0], 'src', 'dst', x, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] - wait(degree_ddf) - return degree_ddf[0].result() return _degree_coo(input_graph.edgelist.edgelist_df, - 'src', 'dst', x) + 'src', 'dst', direction) raise Exception("input_graph not COO, CSR or CSC") def _degrees(input_graph): - verts, indegrees = _degree(input_graph,1) - verts, outdegrees = _degree(input_graph, 2) + verts, indegrees = _degree(input_graph, Direction.IN) + verts, outdegrees = _degree(input_graph, Direction.OUT) return verts, indegrees, outdegrees diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index 93e8a365dea..bad55df1ca9 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -36,7 +36,7 @@ def test_dask_mg_degree(client_connection): # FIXME: update this to allow dataset to be parameterized and have dataset # part of test param id (see other tests) - input_data_path = r"../datasets/karate.csv" + input_data_path = r"../datasets/karate-asymmetric.csv" print(f"dataset={input_data_path}") chunksize = cugraph.dask.get_chunksize(input_data_path) @@ -62,10 +62,18 @@ def test_dask_mg_degree(client_connection): g = cugraph.DiGraph() g.from_cudf_edgelist(df, "src", "dst") - merge_df = ( + merge_df_in = ( dg.in_degree() .merge(g.in_degree(), on="vertex", suffixes=["_dg", "_g"]) .compute() ) - assert merge_df["degree_dg"].equals(merge_df["degree_g"]) + merge_df_out = ( + dg.out_degree() + .merge(g.out_degree(), on="vertex", suffixes=["_dg", "_g"]) + .compute() + ) + + assert merge_df_in["degree_dg"].equals(merge_df_in["degree_g"]) + assert merge_df_out["degree_dg"].equals( + merge_df_out["degree_g"]) diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py index eadf0f662d4..8ed604954f4 100644 --- a/python/cugraph/tests/dask/test_mg_katz_centrality.py +++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py @@ -50,21 +50,12 @@ def test_dask_katz_centrality(client_connection): dtype=["int32", "int32", "float32"], ) - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.DiGraph() - g.from_cudf_edgelist(df, "src", "dst") - dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, "src", "dst") - largest_out_degree = g.degrees().nlargest(n=1, columns="out_degree") - largest_out_degree = largest_out_degree["out_degree"].iloc[0] + largest_out_degree = dg.out_degree().compute().\ + nlargest(n=1, columns="degree") + largest_out_degree = largest_out_degree["degree"].iloc[0] katz_alpha = 1 / (largest_out_degree + 1) mg_res = dcg.katz_centrality(dg, alpha=katz_alpha, tol=1e-6) From 2bbef1d149f90b7c172c9a95a755fa00d5c1a865 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 27 May 2021 11:51:01 -0500 Subject: [PATCH 272/343] Updated dependencies for CalVer (#1629) Updated dependencies for CalVer Note: I have not tested this yet. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1629 --- conda/environments/cugraph_dev_cuda11.0.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.1.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.2.yml | 14 +++++++------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index e12b6143e69..0f4eb4ba782 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.0 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.06.* +- libcudf=21.06.* +- rmm=21.06.* +- librmm=21.06.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.06* +- dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.06* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 3fd7013f475..68d3e2310e3 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.1 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.06.* +- libcudf=21.06.* +- rmm=21.06.* +- librmm=21.06.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.06* +- dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.06* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 978cbf4dd5f..e52f6399ccf 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -6,16 +6,16 @@ channels: - conda-forge dependencies: - cudatoolkit=11.2 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.06.* +- libcudf=21.06.* +- rmm=21.06.* +- librmm=21.06.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* +- dask-cuda=21.06* +- dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=0.20* +- ucx-py=21.06* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 From 595d5b296fecef89163bbfa6f699097d2dc7b666 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Thu, 27 May 2021 15:54:27 -0500 Subject: [PATCH 273/343] Initialize force_atlas2 `old_forces` device_uvector, use new `rmm::exec_policy` (#1625) Fixes an issue where the layout for all points converge to `(0,0)` when running multiple layout ticks after https://github.com/rapidsai/cugraph/pull/1607. * Initializes the `d_old_forces` vector to fix layout issue. * Updates to `barnes_hut.hpp` to use non-deprecated `rmm::exec_policy`. Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Hugo Linsenmaier (https://github.com/hlinsen) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1625 --- cpp/src/layout/barnes_hut.hpp | 168 +++++++++++++++++----------------- cpp/src/layout/exact_fa2.hpp | 50 +++++----- 2 files changed, 108 insertions(+), 110 deletions(-) diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index a3d413225e4..ca62eda3716 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -16,8 +16,8 @@ #pragma once -#include #include +#include #include #include @@ -52,9 +52,9 @@ void barnes_hut(raft::handle_t const &handle, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = handle.get_stream(); - const edge_t e = graph.number_of_edges; - const vertex_t n = graph.number_of_vertices; + rmm::cuda_stream_view stream(handle.get_stream()); + const edge_t e = graph.number_of_edges; + const vertex_t n = graph.number_of_vertices; const int blocks = getMultiProcessorCount(); // A tiny jitter to promote numerical stability/ @@ -77,8 +77,8 @@ void barnes_hut(raft::handle_t const &handle, int *bottomd = d_bottomd.data(); float *radiusd = d_radiusd.data(); - InitializationKernel<<<1, 1, 0, stream>>>(limiter, maxdepthd, radiusd); - CHECK_CUDA(stream); + InitializationKernel<<<1, 1, 0, stream.value()>>>(limiter, maxdepthd, radiusd); + CHECK_CUDA(stream.value()); const int FOUR_NNODES = 4 * nnodes; const int FOUR_N = 4 * n; @@ -89,7 +89,7 @@ void barnes_hut(raft::handle_t const &handle, rmm::device_uvector d_childl((nnodes + 1) * 4, stream); // FA2 requires degree + 1 rmm::device_uvector d_massl(nnodes + 1, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.end(), 1.f); + thrust::fill(rmm::exec_policy(stream), d_massl.begin(), d_massl.end(), 1); rmm::device_uvector d_maxxl(blocks * FACTOR1, stream); rmm::device_uvector d_maxyl(blocks * FACTOR1, stream); @@ -129,10 +129,10 @@ void barnes_hut(raft::handle_t const &handle, // Copy start x and y positions. if (x_start && y_start) { - raft::copy(nodes_pos, x_start, n, stream); - raft::copy(nodes_pos + nnodes + 1, y_start, n, stream); + raft::copy(nodes_pos, x_start, n, stream.value()); + raft::copy(nodes_pos + nnodes + 1, y_start, n, stream.value()); } else { - random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream); + random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream.value()); } // Allocate arrays for force computation @@ -151,12 +151,14 @@ void barnes_hut(raft::handle_t const &handle, swinging = d_swinging.data(); traction = d_traction.data(); + thrust::fill(rmm::exec_policy(stream), d_old_forces.begin(), d_old_forces.end(), 0.f); + // Sort COO for coalesced memory access. - sort(graph, stream); - CHECK_CUDA(stream); + sort(graph, stream.value()); + CHECK_CUDA(stream.value()); graph.degree(massl, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -170,8 +172,7 @@ void barnes_hut(raft::handle_t const &handle, // If outboundAttractionDistribution active, compensate. if (outbound_attraction_distribution) { - int sum = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.begin() + n); + int sum = thrust::reduce(rmm::exec_policy(stream), d_massl.begin(), d_massl.begin() + n); outbound_att_compensation = sum / (float)n; } @@ -194,71 +195,70 @@ void barnes_hut(raft::handle_t const &handle, for (int iter = 0; iter < max_iter; ++iter) { // Reset force values - thrust::fill( - rmm::exec_policy(stream)->on(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_attract.begin(), d_attract.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_attract.begin(), d_attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_traction.begin(), d_traction.end(), 0.f); - ResetKernel<<<1, 1, 0, stream>>>(radiusd_squared, bottomd, NNODES, radiusd); - CHECK_CUDA(stream); + ResetKernel<<<1, 1, 0, stream.value()>>>(radiusd_squared, bottomd, NNODES, radiusd); + CHECK_CUDA(stream.value()); // Compute bounding box arround all bodies - BoundingBoxKernel<<>>(startl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - maxxl, - maxyl, - minxl, - minyl, - FOUR_NNODES, - NNODES, - n, - limiter, - radiusd); - CHECK_CUDA(stream); - - ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); - CHECK_CUDA(stream); + BoundingBoxKernel<<>>(startl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + maxxl, + maxyl, + minxl, + minyl, + FOUR_NNODES, + NNODES, + n, + limiter, + radiusd); + CHECK_CUDA(stream.value()); + + ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); + CHECK_CUDA(stream.value()); // Build quadtree - TreeBuildingKernel<<>>( + TreeBuildingKernel<<>>( childl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, maxdepthd, bottomd, radiusd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); - ClearKernel2<<>>(startl, massl, NNODES, bottomd); - CHECK_CUDA(stream); + ClearKernel2<<>>(startl, massl, NNODES, bottomd); + CHECK_CUDA(stream.value()); // Summarizes mass and position for each cell, bottom up approach - SummarizationKernel<<>>( + SummarizationKernel<<>>( countl, childl, massl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, bottomd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); // Group closed bodies together, used to speed up Repulsion kernel - SortKernel<<>>( + SortKernel<<>>( sortl, countl, startl, childl, NNODES, n, bottomd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); // Force computation O(n . log(n)) - RepulsionKernel<<>>(scaling_ratio, - theta, - epssq, - sortl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - rep_forces, - rep_forces + nnodes + 1, - theta_squared, - NNODES, - FOUR_NNODES, - n, - radiusd_squared, - maxdepthd); - CHECK_CUDA(stream); + RepulsionKernel<<>>(scaling_ratio, + theta, + epssq, + sortl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + rep_forces, + rep_forces + nnodes + 1, + theta_squared, + NNODES, + FOUR_NNODES, + n, + radiusd_squared, + maxdepthd); + CHECK_CUDA(stream.value()); apply_gravity(nodes_pos, nodes_pos + nnodes + 1, @@ -269,7 +269,7 @@ void barnes_hut(raft::handle_t const &handle, strong_gravity_mode, scaling_ratio, n, - stream); + stream.value()); apply_attraction(row, col, @@ -284,7 +284,7 @@ void barnes_hut(raft::handle_t const &handle, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream); + stream.value()); compute_local_speed(rep_forces, rep_forces + nnodes + 1, @@ -296,30 +296,28 @@ void barnes_hut(raft::handle_t const &handle, swinging, traction, n, - stream); + stream.value()); // Compute global swinging and traction values - const float s = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end()); + const float s = thrust::reduce(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end()); - const float t = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end()); + const float t = thrust::reduce(rmm::exec_policy(stream), d_traction.begin(), d_traction.end()); // Compute global speed based on gloab and local swinging and traction. adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); // Update positions - apply_forces_bh<<>>(nodes_pos, - nodes_pos + nnodes + 1, - attract, - attract + n, - rep_forces, - rep_forces + nnodes + 1, - old_forces, - old_forces + n, - swinging, - speed, - n); + apply_forces_bh<<>>(nodes_pos, + nodes_pos + nnodes + 1, + attract, + attract + n, + rep_forces, + rep_forces + nnodes + 1, + old_forces, + old_forces + n, + swinging, + speed, + n); if (callback) callback->on_epoch_end(nodes_pos); @@ -331,8 +329,8 @@ void barnes_hut(raft::handle_t const &handle, } // Copy nodes positions into final output pos - raft::copy(pos, nodes_pos, n, stream); - raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream); + raft::copy(pos, nodes_pos, n, stream.value()); + raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream.value()); if (callback) callback->on_train_end(nodes_pos); } diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index d34f8843e9b..a82b7a5faff 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -16,8 +16,8 @@ #pragma once -#include #include +#include #include @@ -50,9 +50,9 @@ void exact_fa2(raft::handle_t const &handle, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = handle.get_stream(); - const edge_t e = graph.number_of_edges; - const vertex_t n = graph.number_of_vertices; + rmm::cuda_stream_view stream(handle.get_stream()); + const edge_t e = graph.number_of_edges; + const vertex_t n = graph.number_of_vertices; float *d_repel{nullptr}; float *d_attract{nullptr}; @@ -64,9 +64,10 @@ void exact_fa2(raft::handle_t const &handle, rmm::device_uvector repel(n * 2, stream); rmm::device_uvector attract(n * 2, stream); rmm::device_uvector old_forces(n * 2, stream); + thrust::fill(rmm::exec_policy(stream), old_forces.begin(), old_forces.end(), 0.f); // FA2 requires degree + 1. rmm::device_uvector mass(n, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end(), 1.f); + thrust::fill(rmm::exec_policy(stream), mass.begin(), mass.end(), 1); rmm::device_uvector swinging(n, stream); rmm::device_uvector traction(n, stream); @@ -78,19 +79,19 @@ void exact_fa2(raft::handle_t const &handle, d_traction = traction.data(); int random_state = 0; - random_vector(pos, n * 2, random_state, stream); + random_vector(pos, n * 2, random_state, stream.value()); if (x_start && y_start) { - raft::copy(pos, x_start, n, stream); - raft::copy(pos + n, y_start, n, stream); + raft::copy(pos, x_start, n, stream.value()); + raft::copy(pos + n, y_start, n, stream.value()); } // Sort COO for coalesced memory access. - sort(graph, stream); - CHECK_CUDA(stream); + sort(graph, stream.value()); + CHECK_CUDA(stream.value()); graph.degree(d_mass, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -102,7 +103,7 @@ void exact_fa2(raft::handle_t const &handle, float jt = 0.f; if (outbound_attraction_distribution) { - int sum = thrust::reduce(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end()); + int sum = thrust::reduce(rmm::exec_policy(stream), mass.begin(), mass.end()); outbound_att_compensation = sum / (float)n; } @@ -113,13 +114,14 @@ void exact_fa2(raft::handle_t const &handle, for (int iter = 0; iter < max_iter; ++iter) { // Reset force arrays - thrust::fill(rmm::exec_policy(stream)->on(stream), repel.begin(), repel.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), attract.begin(), attract.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end(), 0.f); - thrust::fill(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), repel.begin(), repel.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), attract.begin(), attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), swinging.begin(), swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), traction.begin(), traction.end(), 0.f); // Exact repulsion - apply_repulsion(pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream); + apply_repulsion( + pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream.value()); apply_gravity(pos, pos + n, @@ -130,7 +132,7 @@ void exact_fa2(raft::handle_t const &handle, strong_gravity_mode, scaling_ratio, n, - stream); + stream.value()); apply_attraction(row, col, @@ -145,7 +147,7 @@ void exact_fa2(raft::handle_t const &handle, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream); + stream.value()); compute_local_speed(d_repel, d_repel + n, @@ -157,13 +159,11 @@ void exact_fa2(raft::handle_t const &handle, d_swinging, d_traction, n, - stream); + stream.value()); // Compute global swinging and traction values. - const float s = - thrust::reduce(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end()); - const float t = - thrust::reduce(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end()); + const float s = thrust::reduce(rmm::exec_policy(stream), swinging.begin(), swinging.end()); + const float t = thrust::reduce(rmm::exec_policy(stream), traction.begin(), traction.end()); adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); @@ -178,7 +178,7 @@ void exact_fa2(raft::handle_t const &handle, d_swinging, speed, n, - stream); + stream.value()); if (callback) callback->on_epoch_end(pos); From ce5b271e97c67af9432c0b0857837e589adfaff2 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Fri, 28 May 2021 12:32:56 -0500 Subject: [PATCH 274/343] Implement Graph Batching functionality (#1580) This tracks work on graph object serialization and broadcasting functionality: 1. C++ support of un/serialization; 2. C++ / Python support for broadcasting graph; Python / Dask orchestration for assembling all the functionality above will follow up in a future task, when a few more dependencies will become clearer (`graph_t` object Python wrapper, or opaque handle; decision on whether/when to implement dispatch layer; or even forgo the graph object in python, altogether, and just call a function stub after broadcasting: `distribute_and_call(functor, params)`, etc.); The RW Batch functionality is currently under review. Consequently, the scope of this PR has been modified accordingly. Authors: - Andrei Schaffer (https://github.com/aschaffer) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1580 --- cpp/CMakeLists.txt | 2 + cpp/include/cugraph/experimental/graph.hpp | 21 ++ .../cugraph/experimental/graph_view.hpp | 5 + .../cugraph/serialization/serializer.hpp | 204 +++++++++++ .../cugraph/utilities/path_retrieval.hpp | 15 + cpp/src/serialization/serializer.cu | 322 ++++++++++++++++++ cpp/src/utilities/graph_bcast.cu | 45 +++ cpp/src/utilities/graph_bcast.cuh | 110 ++++++ cpp/tests/CMakeLists.txt | 14 +- cpp/tests/bcast/mg_graph_bcast.cpp | 127 +++++++ cpp/tests/sampling/rw_low_level_test.cu | 58 +--- cpp/tests/serialization/un_serialize_test.cpp | 178 ++++++++++ cpp/tests/utilities/test_utilities.hpp | 123 +++++++ 13 files changed, 1178 insertions(+), 46 deletions(-) create mode 100644 cpp/include/cugraph/serialization/serializer.hpp create mode 100644 cpp/src/serialization/serializer.cu create mode 100644 cpp/src/utilities/graph_bcast.cu create mode 100644 cpp/src/utilities/graph_bcast.cuh create mode 100644 cpp/tests/bcast/mg_graph_bcast.cpp create mode 100644 cpp/tests/serialization/un_serialize_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b0e52ba73b6..a83ea69d725 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -159,6 +159,7 @@ add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu src/utilities/path_retrieval.cu + src/utilities/graph_bcast.cu src/structure/graph.cu src/linear_assignment/hungarian.cu src/link_analysis/gunrock_hits.cpp @@ -196,6 +197,7 @@ add_library(cugraph SHARED src/experimental/sssp.cu src/experimental/pagerank.cu src/experimental/katz_centrality.cu + src/serialization/serializer.cu src/tree/mst.cu src/components/weakly_connected_components.cu src/structure/create_graph_from_edgelist.cu diff --git a/cpp/include/cugraph/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp index 8dcf0c2d3b9..1c829016516 100644 --- a/cpp/include/cugraph/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -157,6 +157,27 @@ class graph_t &&offsets, + rmm::device_uvector &&indices, + rmm::device_uvector &&weights, + std::vector &&segment_offsets) + : detail::graph_base_t( + handle, number_of_vertices, number_of_edges, properties), + offsets_(std::move(offsets)), + indices_(std::move(indices)), + weights_(std::move(weights)), + segment_offsets_(std::move(segment_offsets)) + { + } + rmm::device_uvector offsets_; rmm::device_uvector indices_; rmm::device_uvector weights_; diff --git a/cpp/include/cugraph/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp index e077f02ff31..45e716c3647 100644 --- a/cpp/include/cugraph/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -29,6 +29,9 @@ #include namespace cugraph { +namespace serializer { +class serializer_t; // forward... +} namespace experimental { /** @@ -254,6 +257,8 @@ class graph_base_t { bool is_weighted() const { return properties_.is_weighted; } protected: + friend class cugraph::serializer::serializer_t; + raft::handle_t const* get_handle_ptr() const { return handle_ptr_; }; graph_properties_t get_graph_properties() const { return properties_; } diff --git a/cpp/include/cugraph/serialization/serializer.hpp b/cpp/include/cugraph/serialization/serializer.hpp new file mode 100644 index 00000000000..666ee81e98f --- /dev/null +++ b/cpp/include/cugraph/serialization/serializer.hpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include + +#include +#include + +namespace cugraph { +namespace serializer { + +using namespace cugraph::experimental; + +class serializer_t { + public: + using byte_t = uint8_t; + + using device_byte_it = typename rmm::device_uvector::iterator; + using device_byte_cit = typename rmm::device_uvector::const_iterator; + + // cnstr. for serialize() path: + // + serializer_t(raft::handle_t const& handle, size_t total_sz_bytes) + : handle_(handle), + d_storage_(total_sz_bytes, handle.get_stream()), + begin_(d_storage_.begin()), + cbegin_(d_storage_.begin()) + { + } + + // cnstr. for unserialize() path: + // + serializer_t(raft::handle_t const& handle, byte_t const* ptr_d_storage) + : handle_(handle), d_storage_(0, handle.get_stream()), cbegin_(ptr_d_storage) + { + } + + template + struct graph_meta_t; + + template + struct graph_meta_t> { + // purposely empty, for now; + // FIXME: provide implementation for multi-gpu version + }; + + template + struct graph_meta_t> { + using vertex_t = typename graph_t::vertex_type; + using bool_ser_t = uint8_t; + + graph_meta_t(void) {} + + explicit graph_meta_t(graph_t const& graph) + : num_vertices_(graph.get_number_of_vertices()), + num_edges_(graph.get_number_of_edges()), + properties_(graph.get_graph_properties()), + segment_offsets_(graph.view().get_local_adj_matrix_partition_segment_offsets(0)) + { + } + + graph_meta_t(size_t num_vertices, + size_t num_edges, + graph_properties_t const& properties, + std::vector const& segment_offsets) + : num_vertices_(num_vertices), + num_edges_(num_edges), + properties_(properties), + segment_offsets_(segment_offsets) + { + } + + size_t num_vertices_; + size_t num_edges_; + graph_properties_t properties_{}; + std::vector segment_offsets_{}; + + size_t get_device_sz_bytes(void) const + { + return 2 * sizeof(size_t) + segment_offsets_.size() * sizeof(vertex_t) + + 3 * sizeof(bool_ser_t); + } + }; + + // POD-type serialization: + // + template + void serialize(value_t val); + + // POD-type unserialization: + // + template + value_t unserialize(void); + + // device array serialization: + // + template + void serialize(value_t const* p_d_src, size_t size); + + // device vector unserialization; + // extracts device_uvector of `size` bytes_to_value_t elements: + // + template + rmm::device_uvector unserialize( + size_t size); // size of device vector to be unserialized + + // graph serialization, + // with device storage and host metadata: + // (associated with target; e.g., num_vertices, etc.) + // + template + void serialize(graph_t const& graph, graph_meta_t& gmeta); // serialization target + + // graph unserialization, + // with device storage and host metadata: + // (associated with target; e.g., num_vertices, etc.) + // + template + graph_t unserialize(size_t device_sz_bytes, size_t host_sz_bytes); + + template + static std::pair get_device_graph_sz_bytes( + graph_meta_t const& graph_meta) + { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + size_t num_vertices = graph_meta.num_vertices_; + size_t num_edges = graph_meta.num_edges_; + + size_t weight_storage_sz = + graph_meta.properties_.is_weighted ? num_edges * sizeof(weight_t) : 0; + + size_t device_ser_sz = + (num_vertices + 1) * sizeof(edge_t) + num_edges * sizeof(vertex_t) + weight_storage_sz; + + size_t host_ser_sz = graph_meta.get_device_sz_bytes(); + + return std::make_pair( + device_ser_sz, + host_ser_sz); // FIXME: remove when host_bcast() becomes available for host vectors + + } else { + CUGRAPH_FAIL("Unsupported graph type for un/serialization."); + + return std::pair{}; + } + } + + template + static std::pair get_device_graph_sz_bytes(graph_t const& graph) + { + graph_meta_t gmeta{graph}; + return get_device_graph_sz_bytes(gmeta); + } + + byte_t const* get_storage(void) const { return d_storage_.begin(); } + byte_t* get_storage(void) { return d_storage_.begin(); } + + private: + // serialization of graph metadata, via device orchestration: + // + template + void serialize(graph_meta_t const& graph_meta); + + // unserialization of graph metadata, via device orchestration: + // + template + graph_meta_t unserialize( + size_t graph_meta_sz_bytes, + graph_meta_t const& empty_meta); // tag dispatching to avoid conflict with + // `unserialize(size_t)` for device vectors + + raft::handle_t const& handle_; + rmm::device_uvector d_storage_; + device_byte_it begin_{nullptr}; // advances on serialize() + device_byte_cit cbegin_{nullptr}; // advances on unserialize() +}; + +} // namespace serializer +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/path_retrieval.hpp b/cpp/include/cugraph/utilities/path_retrieval.hpp index 4d1b6a1b4d2..3b2408d9037 100644 --- a/cpp/include/cugraph/utilities/path_retrieval.hpp +++ b/cpp/include/cugraph/utilities/path_retrieval.hpp @@ -84,4 +84,19 @@ template std::tuple, rmm::device_uvector, rmm::device_uvector> query_rw_sizes_offsets(raft::handle_t const &handle, index_t num_paths, index_t const *ptr_d_sizes); } // namespace experimental + +namespace broadcast { +/** + * @brief broadcasts graph_t object (only the single GPU version). + * + * @tparam graph_t Type of graph (view). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_ptr pointer to graph object: not `nullptr` on send, `nullptr` (ignored) on receive. + * @return graph_t object that was sent/received + */ +template +graph_t graph_broadcast(raft::handle_t const &handle, graph_t *graph_ptr); +}; // namespace broadcast + } // namespace cugraph diff --git a/cpp/src/serialization/serializer.cu b/cpp/src/serialization/serializer.cu new file mode 100644 index 00000000000..1950ed780c5 --- /dev/null +++ b/cpp/src/serialization/serializer.cu @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +namespace cugraph { +namespace serializer { +template +void serializer_t::serialize(value_t val) +{ + auto byte_buff_sz = sizeof(value_t); + auto it_end = begin_ + byte_buff_sz; + + raft::update_device( + begin_, reinterpret_cast(&val), byte_buff_sz, handle_.get_stream()); + + begin_ = it_end; +} + +template +value_t serializer_t::unserialize(void) +{ + value_t val{}; + auto byte_buff_sz = sizeof(value_t); + + raft::update_host(&val, reinterpret_cast(cbegin_), 1, handle_.get_stream()); + + cbegin_ += byte_buff_sz; + return val; +} + +template +void serializer_t::serialize(value_t const* p_d_src, size_t size) +{ + auto byte_buff_sz = size * sizeof(value_t); + auto it_end = begin_ + byte_buff_sz; + byte_t const* byte_buff = reinterpret_cast(p_d_src); + + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), byte_buff, byte_buff_sz, begin_); + + begin_ = it_end; +} + +template +rmm::device_uvector serializer_t::unserialize(size_t size) +{ + auto byte_buff_sz = size * sizeof(value_t); + rmm::device_uvector d_dest(size, handle_.get_stream()); + byte_t* byte_buff = reinterpret_cast(d_dest.data()); + + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), cbegin_, byte_buff_sz, byte_buff); + + cbegin_ += byte_buff_sz; + return d_dest; +} + +// serialization of graph metadata, via device orchestration: +// +template +void serializer_t::serialize(serializer_t::graph_meta_t const& gmeta) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + using bool_t = typename graph_meta_t::bool_ser_t; + + serialize(gmeta.num_vertices_); + serialize(gmeta.num_edges_); + serialize(static_cast(gmeta.properties_.is_symmetric)); + serialize(static_cast(gmeta.properties_.is_multigraph)); + serialize(static_cast(gmeta.properties_.is_weighted)); + + auto seg_off_sz_bytes = gmeta.segment_offsets_.size() * sizeof(vertex_t); + if (seg_off_sz_bytes > 0) { + auto it_end = begin_ + seg_off_sz_bytes; + + raft::update_device(begin_, + reinterpret_cast(gmeta.segment_offsets_.data()), + seg_off_sz_bytes, + handle_.get_stream()); + + begin_ = it_end; + } + + } else { + CUGRAPH_FAIL("Unsupported graph type for serialization."); + } +} + +// unserialization of graph metadata, via device orchestration: +// +template +serializer_t::graph_meta_t serializer_t::unserialize( + size_t graph_meta_sz_bytes, + serializer_t::graph_meta_t const& empty_meta) // tag dispatching parameter +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + using bool_t = typename graph_meta_t::bool_ser_t; + + CUGRAPH_EXPECTS(graph_meta_sz_bytes >= 2 * sizeof(size_t) + 3 * sizeof(bool_t), + "Un/serialization meta size mismatch."); + + size_t num_vertices = unserialize(); + size_t num_edges = unserialize(); + bool_t is_symmetric = unserialize(); + bool_t is_multigraph = unserialize(); + bool_t is_weighted = unserialize(); + + graph_properties_t properties{static_cast(is_symmetric), + static_cast(is_multigraph), + static_cast(is_weighted)}; + + std::vector segment_offsets{}; + + size_t seg_off_sz_bytes = graph_meta_sz_bytes - 2 * sizeof(size_t) - 3 * sizeof(bool_t); + + if (seg_off_sz_bytes > 0) { + raft::update_host(segment_offsets.data(), + reinterpret_cast(cbegin_), + seg_off_sz_bytes, + handle_.get_stream()); + + cbegin_ += seg_off_sz_bytes; + } + + return graph_meta_t{num_vertices, num_edges, properties, segment_offsets}; + + } else { + CUGRAPH_FAIL("Unsupported graph type for unserialization."); + return graph_meta_t{}; + } +} + +// graph serialization: +// metadata argument (gvmeta) can be used for checking / testing; +// +template +void serializer_t::serialize(graph_t const& graph, serializer_t::graph_meta_t& gvmeta) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + size_t num_vertices = graph.get_number_of_vertices(); + size_t num_edges = graph.get_number_of_edges(); + auto&& gview = graph.view(); + + gvmeta = graph_meta_t{graph}; + + edge_t const* offsets = gview.offsets(); + vertex_t const* indices = gview.indices(); + weight_t const* weights = gview.weights(); + + // FIXME: remove when host_bcast() becomes available for vectors; + // + // for now, this must come first, because unserialize() + // needs it at the beginning to extract graph metadata + // to be able to finish the rest of the graph unserialization; + // + serialize(gvmeta); + + serialize(offsets, num_vertices + 1); + serialize(indices, num_edges); + + if (graph.is_weighted()) serialize(weights, num_edges); + + } else { + CUGRAPH_FAIL("Unsupported graph type for serialization."); + } +} + +// graph unserialization: +// +template +graph_t serializer_t::unserialize(size_t device_sz_bytes, size_t host_sz_bytes) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + graph_meta_t empty_meta{}; // tag-dispatching only + + // FIXME: remove when host_bcast() becomes available for vectors; + // + // for now, this must come first, because unserialize() + // needs it at the beginning to extract graph metadata + // to be able to finish the rest of the graph unserialization; + // + auto gvmeta = unserialize(host_sz_bytes, empty_meta); + + auto pair_sz = get_device_graph_sz_bytes(gvmeta); + + CUGRAPH_EXPECTS((pair_sz.first == device_sz_bytes) && (pair_sz.second == host_sz_bytes), + "Un/serialization size mismatch."); + + vertex_t num_vertices = gvmeta.num_vertices_; + edge_t num_edges = gvmeta.num_edges_; + auto g_props = gvmeta.properties_; + auto seg_offsets = gvmeta.segment_offsets_; + + auto d_offsets = unserialize(num_vertices + 1); + auto d_indices = unserialize(num_edges); + + if (g_props.is_weighted) { + auto d_weights = unserialize(num_edges); + + return graph_t(handle_, + num_vertices, + num_edges, + g_props, + std::move(d_offsets), + std::move(d_indices), + std::move(d_weights), + std::move(seg_offsets)); // RVO-ed + } else { + return graph_t(handle_, + num_vertices, + num_edges, + g_props, + std::move(d_offsets), + std::move(d_indices), + rmm::device_uvector(0, handle_.get_stream()), + std::move(seg_offsets)); // RVO-ed + } + + } else { + CUGRAPH_FAIL("Unsupported graph type for unserialization."); + + return graph_t{handle_}; + } +} + +// Manual template instantiations (EIDir's): +// +template void serializer_t::serialize(int32_t const* p_d_src, size_t size); +template void serializer_t::serialize(int64_t const* p_d_src, size_t size); +template void serializer_t::serialize(float const* p_d_src, size_t size); +template void serializer_t::serialize(double const* p_d_src, size_t size); + +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); + +// serialize graph: +// +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +// unserialize graph: +// +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +} // namespace serializer +} // namespace cugraph diff --git a/cpp/src/utilities/graph_bcast.cu b/cpp/src/utilities/graph_bcast.cu new file mode 100644 index 00000000000..e06c1508cf9 --- /dev/null +++ b/cpp/src/utilities/graph_bcast.cu @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include "graph_bcast.cuh" + +namespace cugraph { +namespace broadcast { +using namespace cugraph::experimental; +// Manual template instantiations (EIDir's): +// +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +} // namespace broadcast +} // namespace cugraph diff --git a/cpp/src/utilities/graph_bcast.cuh b/cpp/src/utilities/graph_bcast.cuh new file mode 100644 index 00000000000..b4007ad20f2 --- /dev/null +++ b/cpp/src/utilities/graph_bcast.cuh @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include +#include + +#include + +namespace cugraph { +namespace broadcast { + +/** + * @brief broadcasts graph_t object (only the single GPU version). + * + * @tparam graph_t Type of graph (view). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_ptr pointer to graph object: not `nullptr` on send, `nullptr` (ignored) on receive. + * @return graph_t object that was sent/received + */ +template +graph_t graph_broadcast(raft::handle_t const& handle, graph_t* graph_ptr) +{ + using namespace cugraph::serializer; + using namespace cugraph::experimental; + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) { + CUGRAPH_EXPECTS(graph_ptr != nullptr, "Cannot serialize nullptr graph pointer."); + + auto pair = serializer_t::get_device_graph_sz_bytes(*graph_ptr); + thrust::tuple dev_sz_host_sz_bytes = + thrust::make_tuple(pair.first, pair.second); + + auto total_graph_dev_sz = pair.first + pair.second; + + serializer_t ser(handle, total_graph_dev_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(*graph_ptr, graph_meta); + + int root{0}; + host_scalar_bcast(handle.get_comms(), dev_sz_host_sz_bytes, root, handle.get_stream()); + device_bcast(handle.get_comms(), + ser.get_storage(), + ser.get_storage(), + total_graph_dev_sz, + root, + handle.get_stream()); + + return std::move(*graph_ptr); + } else { + thrust::tuple dev_sz_host_sz_bytes(0, 0); + + int root{0}; + dev_sz_host_sz_bytes = + host_scalar_bcast(handle.get_comms(), dev_sz_host_sz_bytes, root, handle.get_stream()); + // + auto total_graph_dev_sz = + thrust::get<0>(dev_sz_host_sz_bytes) + thrust::get<1>(dev_sz_host_sz_bytes); + + CUGRAPH_EXPECTS(total_graph_dev_sz > 0, "Graph size comm failure."); + + rmm::device_uvector data_buffer(total_graph_dev_sz, + handle.get_stream_view()); + + device_bcast(handle.get_comms(), + data_buffer.data(), + data_buffer.data(), + total_graph_dev_sz, + root, + handle.get_stream()); + + serializer_t ser(handle, data_buffer.data()); + auto graph = ser.unserialize(thrust::get<0>(dev_sz_host_sz_bytes), + thrust::get<1>(dev_sz_host_sz_bytes)); + + return graph; + } + } else { + CUGRAPH_FAIL("Unsupported graph type for broadcasting."); + + return graph_t{handle}; + } +} + +} // namespace broadcast +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cb7eca3ed11..ae1c9ccd1e0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -381,7 +381,7 @@ set(WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST "${WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS}") ################################################################################################### -# - Experimental RANDOM_WALKS tests ------------------------------------------------------------ +# - Experimental RANDOM_WALKS tests --------------------------------------------------------------- ConfigureTest(RANDOM_WALKS_TEST sampling/random_walks_test.cu) ################################################################################################### @@ -393,6 +393,14 @@ ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST sampling/rw_low_level_test.cu) # function (which would not link gtest, etc.) ConfigureTest(RANDOM_WALKS_PROFILING sampling/random_walks_profiling.cu) +################################################################################################### +# - Serialization tests --------------------------------------------------------------------------- + +set(SERIALIZATION_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/serialization/un_serialize_test.cpp") + +ConfigureTest(SERIALIZATION_TEST "${SERIALIZATION_TEST_SRCS}") + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- @@ -438,6 +446,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG WEAKLY CONNECTED COMPONENTS tests -------------------------------------------------- ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST components/mg_weakly_connected_components_test.cpp) + + ########################################################################################### + # - MG GRAPH BROADCAST tests -------------------------------------------------------------- + ConfigureTestMG(MG_GRAPH_BROADCAST_TEST bcast/mg_graph_bcast.cpp) else() message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif() diff --git a/cpp/tests/bcast/mg_graph_bcast.cpp b/cpp/tests/bcast/mg_graph_bcast.cpp new file mode 100644 index 00000000000..dbb2460abf4 --- /dev/null +++ b/cpp/tests/bcast/mg_graph_bcast.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_SUITE_P() +// +struct GraphBcast_Usecase { + std::string graph_file_full_path{}; + + // FIXME: We really should have a Graph_Testparms_Base class or something + // like that which can handle this graph_full_path thing. + // + explicit GraphBcast_Usecase(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class GraphBcast_MG_Testfixture : public ::testing::TestWithParam { + public: + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + // Run once for each test instance + // + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of broadcasting a graph, + // by comparing the graph that was sent (`sg_graph`) + // with th eone that was received (`graph-copy`): + // + template + void run_test(const GraphBcast_Usecase& param) + { + using namespace cugraph::broadcast; + using sg_graph_t = cugraph::experimental::graph_t; + + raft::handle_t handle; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + cudaStream_t stream = handle.get_stream(); + + sg_graph_t sg_graph(handle); + + rmm::device_uvector d_renumber_map_labels(0, stream); + + std::tie(sg_graph, d_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, param.graph_file_full_path, true, /*renumber=*/false); + + if (comm_rank == 0) { + graph_broadcast(handle, &sg_graph); + ; + } else { + sg_graph_t* g_ignore{nullptr}; + auto graph_copy = graph_broadcast(handle, g_ignore); + auto [same, str_fail] = cugraph::test::compare_graphs(handle, sg_graph, graph_copy); + + if (!same) std::cerr << "Graph comparison failed on " << str_fail << '\n'; + + ASSERT_TRUE(same); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(GraphBcast_MG_Testfixture, CheckInt32Int32Float) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_SUITE_P(simple_test, + GraphBcast_MG_Testfixture, + ::testing::Values(GraphBcast_Usecase("test/datasets/karate.mtx") + //,GraphBcast_Usecase("test/datasets/smallworld.mtx") + )); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index f5b0db0ed3e..77c5b18499a 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -47,38 +47,6 @@ using vector_test_t = detail::device_vec_t; // for debug purposes namespace { // anonym. -template -graph_t make_graph(raft::handle_t const& handle, - std::vector const& v_src, - std::vector const& v_dst, - std::vector const& v_w, - vertex_t num_vertices, - edge_t num_edges, - bool is_weighted) -{ - vector_test_t d_src(num_edges, handle.get_stream()); - vector_test_t d_dst(num_edges, handle.get_stream()); - vector_test_t d_weights(num_edges, handle.get_stream()); - - raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); - raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); - - weight_t* ptr_d_weights{nullptr}; - if (is_weighted) { - raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); - - ptr_d_weights = d_weights.data(); - } - - edgelist_t edgelist{ - d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; - - graph_t graph( - handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); - - return graph; -} - template bool check_col_indices(raft::handle_t const& handle, vector_test_t const& d_crt_out_degs, @@ -126,7 +94,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -206,7 +174,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -282,7 +250,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -378,7 +346,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -456,7 +424,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -528,7 +496,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -673,7 +641,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -748,7 +716,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -805,7 +773,7 @@ TEST(RandomWalksQuery, GraphRWQueryOffsets) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -866,7 +834,7 @@ TEST(RandomWalksSpecialCase, SingleRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -923,8 +891,8 @@ TEST(RandomWalksSpecialCase, UnweightedGraph) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w; - auto graph = - make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted auto graph_view = graph.view(); @@ -981,7 +949,7 @@ TEST(RandomWalksPadded, SimpleGraph) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); diff --git a/cpp/tests/serialization/un_serialize_test.cpp b/cpp/tests/serialization/un_serialize_test.cpp new file mode 100644 index 00000000000..9f11a9aaa27 --- /dev/null +++ b/cpp/tests/serialization/un_serialize_test.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "cuda_profiler_api.h" + +#include +#include + +#include +#include + +#include + +TEST(SerializationTest, GraphSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); +} + +TEST(SerializationTest, GraphDecoupledSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = double; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + // use the following buffer to simulate communication between + // sender and reciever of the serialization: + // + rmm::device_uvector d_storage_comm(0, handle.get_stream()); + + { + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + d_storage_comm.resize(total_ser_sz, handle.get_stream()); + raft::copy(d_storage_comm.data(), ser.get_storage(), total_ser_sz, handle.get_stream()); + } + + { + serializer_t ser(handle, d_storage_comm.data()); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); + } +} + +TEST(SerializationTest, UnweightedGraphDecoupledSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = double; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{}; + + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, v_w, num_vertices, num_edges, /*weighted=*/false); + + ASSERT_TRUE(graph.view().weights() == nullptr); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + // use the following buffer to simulate communication between + // sender and reciever of the serialization: + // + rmm::device_uvector d_storage_comm(0, handle.get_stream()); + + { + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + d_storage_comm.resize(total_ser_sz, handle.get_stream()); + raft::copy(d_storage_comm.data(), ser.get_storage(), total_ser_sz, handle.get_stream()); + } + + { + serializer_t ser(handle, d_storage_comm.data()); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + ASSERT_TRUE(graph_copy.view().weights() == nullptr); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); + } +} diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 2fc7812dbaa..075db1906e1 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -23,6 +23,7 @@ #include #include +#include #include extern "C" { @@ -268,5 +269,127 @@ class Rmat_Usecase { bool multi_gpu_usecase_{}; }; +// alias for easy customization for debug purposes: +// +template +using vector_test_t = rmm::device_uvector; + +template +decltype(auto) make_graph(raft::handle_t const& handle, + std::vector const& v_src, + std::vector const& v_dst, + std::vector const& v_w, + vertex_t num_vertices, + edge_t num_edges, + bool is_weighted) +{ + using namespace cugraph::experimental; + + vector_test_t d_src(num_edges, handle.get_stream()); + vector_test_t d_dst(num_edges, handle.get_stream()); + vector_test_t d_weights(num_edges, handle.get_stream()); + + raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); + raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); + + weight_t* ptr_d_weights{nullptr}; + if (is_weighted) { + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + ptr_d_weights = d_weights.data(); + } + + edgelist_t edgelist{ + d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; + + graph_t graph( + handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); + + return graph; +} + +// compares single GPU CSR graph data: +// (for testing / debugging); +// on first == false, second == brief description of what is different; +// +template +std::pair compare_graphs(raft::handle_t const& handle, + left_graph_t const& lgraph, + right_graph_t const& rgraph) +{ + if constexpr (left_graph_t::is_multi_gpu && right_graph_t::is_multi_gpu) { + // no support for comparing distributed graphs, yet: + // + CUGRAPH_FAIL("Unsupported graph type for comparison."); + return std::make_pair(false, std::string("unsupported")); + } else if constexpr (!std::is_same_v) { + return std::make_pair(false, std::string("type")); + } else { + // both graphs are single GPU: + // + using graph_t = left_graph_t; + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + size_t num_vertices = lgraph.get_number_of_vertices(); + size_t num_edges = lgraph.get_number_of_edges(); + + { + size_t r_num_vertices = rgraph.get_number_of_vertices(); + size_t r_num_edges = rgraph.get_number_of_edges(); + + if (num_vertices != r_num_vertices) return std::make_pair(false, std::string("num_vertices")); + + if (num_edges != r_num_edges) return std::make_pair(false, std::string("num_edges")); + } + + if (lgraph.is_symmetric() != rgraph.is_symmetric()) + return std::make_pair(false, std::string("symmetric")); + + if (lgraph.is_multigraph() != rgraph.is_multigraph()) + return std::make_pair(false, std::string("multigraph")); + + bool is_weighted = lgraph.is_weighted(); + if (is_weighted != rgraph.is_weighted()) return std::make_pair(false, std::string("weighted")); + + auto lgraph_view = lgraph.view(); + auto rgraph_view = rgraph.view(); + + std::vector lv_ro(num_vertices + 1); + std::vector lv_ci(num_edges); + + raft::update_host(lv_ro.data(), lgraph_view.offsets(), num_vertices + 1, handle.get_stream()); + raft::update_host(lv_ci.data(), lgraph_view.indices(), num_edges, handle.get_stream()); + + std::vector rv_ro(num_vertices + 1); + std::vector rv_ci(num_edges); + + raft::update_host(rv_ro.data(), rgraph_view.offsets(), num_vertices + 1, handle.get_stream()); + raft::update_host(rv_ci.data(), rgraph_view.indices(), num_edges, handle.get_stream()); + + if (lv_ro != rv_ro) return std::make_pair(false, std::string("offsets")); + + if (lv_ci != rv_ci) return std::make_pair(false, std::string("indices")); + + if (is_weighted) { + std::vector lv_vs(num_edges); + raft::update_host(lv_vs.data(), lgraph_view.weights(), num_edges, handle.get_stream()); + + std::vector rv_vs(num_edges); + raft::update_host(rv_vs.data(), rgraph_view.weights(), num_edges, handle.get_stream()); + + if (lv_vs != rv_vs) return std::make_pair(false, std::string("values")); + } + + if (lgraph_view.get_local_adj_matrix_partition_segment_offsets(0) != + rgraph_view.get_local_adj_matrix_partition_segment_offsets(0)) + return std::make_pair(false, std::string("segment offsets")); + + return std::make_pair(true, std::string{}); + } +} + } // namespace test } // namespace cugraph From 0f8cb14ce065a386a126719c722cc9340be28a5b Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 1 Jun 2021 08:20:50 -0700 Subject: [PATCH 275/343] Use UCX-Py 0.20 (#1634) cc @raydouglass @pentschev @quasiben Authors: - https://github.com/jakirkham Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1634 --- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/environments/cugraph_dev_cuda11.1.yml | 2 +- conda/environments/cugraph_dev_cuda11.2.yml | 2 +- conda/recipes/cugraph/meta.yaml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 0f4eb4ba782..0315304363b 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.06* - dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=21.06* +- ucx-py=0.20.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 68d3e2310e3..b9b1dfdafc3 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.06* - dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=21.06* +- ucx-py=0.20.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index e52f6399ccf..e35a575e8bb 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.06* - dask-cudf=21.06* - nccl>=2.8.4 -- ucx-py=21.06* +- ucx-py=0.20.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index c687e57b74f..d750c1de188 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -27,7 +27,7 @@ requirements: - cython>=0.29,<0.30 - libcugraph={{ version }} - cudf={{ minor_version }} - - ucx-py {{ minor_version }} + - ucx-py 0.20 - ucx-proc=*=gpu run: - python x.x @@ -38,7 +38,7 @@ requirements: - dask>=2.12.0 - distributed>=2.12.0 - nccl>=2.8.4 - - ucx-py {{ minor_version }} + - ucx-py 0.20 - ucx-proc=*=gpu #test: From c51216cec3a67c0dd645eab33ae71aae517262cf Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 1 Jun 2021 12:43:58 -0400 Subject: [PATCH 276/343] Remove thread local thrust::sort (thrust::sort with the execution policy thrust::seq) from copy_v_transform_reduce_key_aggregated_out_nbr (#1627) thrust::sort(thrust::seq, ....) does not work with arbitrary large input data size and this call can fail if the array size to locally sort is large. This code replaces many thread local sort with one thrust::sort call from host. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1627 --- .../cugraph/matrix_partition_device.cuh | 3 + ...ransform_reduce_key_aggregated_out_nbr.cuh | 202 +++++++----------- 2 files changed, 86 insertions(+), 119 deletions(-) diff --git a/cpp/include/cugraph/matrix_partition_device.cuh b/cpp/include/cugraph/matrix_partition_device.cuh index 3c9736b7ca6..8951e4269bd 100644 --- a/cpp/include/cugraph/matrix_partition_device.cuh +++ b/cpp/include/cugraph/matrix_partition_device.cuh @@ -38,6 +38,9 @@ class matrix_partition_device_base_t { __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } + __host__ __device__ vertex_t const* get_indices() const { return indices_; } + __host__ __device__ weight_t const* get_weights() const { return weights_; } + __device__ thrust::tuple get_local_edges( vertex_t major_offset) const noexcept { diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 6a47ec67f13..48d06e100c8 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -38,89 +38,6 @@ namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128; - -template -__global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, - typename GraphViewType::vertex_type major_first, - typename GraphViewType::vertex_type major_last, - VertexIterator adj_matrix_minor_key_first, - typename GraphViewType::vertex_type* major_vertices, - typename GraphViewType::vertex_type* minor_keys, - typename GraphViewType::weight_type* key_aggregated_edge_weights, - typename GraphViewType::vertex_type invalid_vertex) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); - auto idx = static_cast(tid); - - while (idx < static_cast(major_last - major_first)) { - auto major_offset = major_start_offset + idx; - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = - matrix_partition.get_local_edges(static_cast(major_offset)); - if (local_degree > 0) { - auto local_offset = matrix_partition.get_local_offset(major_offset); - auto minor_key_first = thrust::make_transform_iterator( - indices, [matrix_partition, adj_matrix_minor_key_first] __device__(auto minor) { - return *(adj_matrix_minor_key_first + - matrix_partition.get_minor_offset_from_minor_nocheck(minor)); - }); - thrust::copy( - thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset); - if (weights == nullptr) { - thrust::sort( - thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree); - } else { - thrust::copy( - thrust::seq, weights, weights + local_degree, key_aggregated_edge_weights + local_offset); - thrust::sort_by_key(thrust::seq, - minor_keys + local_offset, - minor_keys + local_offset + local_degree, - key_aggregated_edge_weights + local_offset); - } - // in-place reduce_by_key - vertex_t key_idx{0}; - key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? key_aggregated_edge_weights[local_offset] : weight_t{1.0}; - - for (edge_t i = 1; i < local_degree; ++i) { - if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { - key_aggregated_edge_weights[local_offset + key_idx] += - weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; - } else { - ++key_idx; - minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; - key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; - } - } - thrust::fill(thrust::seq, - major_vertices + local_offset, - major_vertices + local_offset + key_idx + 1, - matrix_partition.get_major_from_major_offset_nocheck(major_offset)); - thrust::fill(thrust::seq, - major_vertices + local_offset + key_idx + 1, - major_vertices + local_offset + local_degree, - invalid_vertex); - } - - idx += gridDim.x * blockDim.x; - } -} - -} // namespace detail - /** * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties. * @@ -340,47 +257,94 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector tmp_major_vertices(matrix_partition.get_number_of_edges(), handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), - handle.get_stream()); + rmm::device_uvector tmp_key_aggregated_edge_weights( + graph_view.is_weighted() ? tmp_major_vertices.size() : size_t{0}, handle.get_stream()); if (matrix_partition.get_major_size() > 0) { - raft::grid_1d_thread_t update_grid( - matrix_partition.get_major_size(), - detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - auto constexpr invalid_vertex = invalid_vertex_id::value; - + auto minor_key_first = thrust::make_transform_iterator( + matrix_partition.get_indices(), + [adj_matrix_col_key_first, matrix_partition] __device__(auto minor) { + return *(adj_matrix_col_key_first + + matrix_partition.get_minor_offset_from_minor_nocheck(minor)); + }); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_key_first, + minor_key_first + matrix_partition.get_number_of_edges(), + tmp_minor_keys.begin()); + if (graph_view.is_weighted()) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition.get_weights(), + matrix_partition.get_weights() + matrix_partition.get_number_of_edges(), + tmp_key_aggregated_edge_weights.begin()); + } // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_last(), - adj_matrix_col_key_first, - tmp_major_vertices.data(), - tmp_minor_keys.data(), - tmp_key_aggregated_edge_weights.data(), - invalid_vertex); + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(matrix_partition.get_major_first()), + thrust::make_counting_iterator(matrix_partition.get_major_first()) + + matrix_partition.get_major_size(), + [matrix_partition, tmp_major_vertices = tmp_major_vertices.begin()] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto local_degree = matrix_partition.get_local_degree(major_offset); + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::fill(thrust::seq, + tmp_major_vertices + local_offset, + tmp_major_vertices + local_offset + local_degree, + major); + }); + rmm::device_uvector reduced_major_vertices(tmp_major_vertices.size(), + handle.get_stream()); + rmm::device_uvector reduced_minor_keys(reduced_major_vertices.size(), + handle.get_stream()); + rmm::device_uvector reduced_key_aggregated_edge_weights( + reduced_major_vertices.size(), handle.get_stream()); + size_t reduced_size{}; + // FIXME: cub segmented sort may be more efficient as this is already sorted by major + auto input_key_first = thrust::make_zip_iterator( + thrust::make_tuple(tmp_major_vertices.begin(), tmp_minor_keys.begin())); + auto output_key_first = thrust::make_zip_iterator( + thrust::make_tuple(reduced_major_vertices.begin(), reduced_minor_keys.begin())); + if (graph_view.is_weighted()) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + tmp_key_aggregated_edge_weights.begin()); + reduced_size = + thrust::distance(output_key_first, + thrust::get<0>(thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + tmp_key_aggregated_edge_weights.begin(), + output_key_first, + reduced_key_aggregated_edge_weights.begin()))); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size()); + reduced_size = + thrust::distance(output_key_first, + thrust::get<0>(thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + thrust::make_constant_iterator(weight_t{1.0}), + output_key_first, + reduced_key_aggregated_edge_weights.begin()))); + } + tmp_major_vertices = std::move(reduced_major_vertices); + tmp_minor_keys = std::move(reduced_minor_keys); + tmp_key_aggregated_edge_weights = std::move(reduced_key_aggregated_edge_weights); + tmp_major_vertices.resize(reduced_size, handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_major_vertices.shrink_to_fit(handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); } - auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); - auto last = - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - triplet_first, - triplet_first + tmp_major_vertices.size(), - [] __device__(auto val) { - return thrust::get<0>(val) == invalid_vertex_id::value; - }); - tmp_major_vertices.resize(thrust::distance(triplet_first, last), handle.get_stream()); - tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); - tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); - if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -391,7 +355,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); - triplet_first = + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); @@ -441,7 +405,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( allocate_dataframe_buffer(tmp_major_vertices.size(), handle.get_stream()); auto tmp_e_op_result_buffer_first = get_dataframe_buffer_begin(tmp_e_op_result_buffer); - triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); thrust::transform( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), From e80a696948e93e91374c4a05ac6956f3ea9f7608 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 1 Jun 2021 11:45:00 -0500 Subject: [PATCH 277/343] Correct install path for include folder to avoid double nesting (#1630) Similar to https://github.com/rapidsai/cuml/pull/3901. After #1491 and #rapids-cmake and #1585, now at install time, the cugraph headers are being nested into `path/to/env/include/cugraph/cugraph` instead of just `path/to/env/include/cugraph/`. This, as far as I'm aware, is unintentional and unlike the rest of RAPIDS projects (cuDF, RMM and cuML). cc @trxcllnt @robertmaynard Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Robert Maynard (https://github.com/robertmaynard) - Seunghwa Kang (https://github.com/seunghwak) - Paul Taylor (https://github.com/trxcllnt) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1630 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a83ea69d725..d1f50c42b14 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -335,7 +335,7 @@ install(TARGETS cugraph EXPORT cugraph-exports) install(DIRECTORY include/ - DESTINATION include/cugraph) + DESTINATION include) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp DESTINATION include/cugraph) From faf2dbf663e4f4ab9c88e35fd5189f4d14ff5bd3 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 1 Jun 2021 12:47:36 -0400 Subject: [PATCH 278/343] transform_reduce_e bug fixes (#1633) Discovered this bug during louvain testing. The variable `idx` is an index controlling iteration over a subset of the vertices, while `major_offset` identifies a particular vertex offset in the overall data structure. Several places where using `idx` where they needed to be using `major_offset`. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1633 --- .../cugraph/patterns/transform_reduce_e.cuh | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/cpp/include/cugraph/patterns/transform_reduce_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_e.cuh index 7cbd4839e4c..151fa1df0c7 100644 --- a/cpp/include/cugraph/patterns/transform_reduce_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_e.cuh @@ -76,7 +76,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( &adj_matrix_row_value_input_first, &adj_matrix_col_value_input_first, &e_op, - idx, + major_offset, indices, weights] __device__(auto i) { auto minor = indices[i]; @@ -84,14 +84,16 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; return evaluate_edge_op(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op Date: Tue, 1 Jun 2021 13:57:16 -0500 Subject: [PATCH 279/343] WCC bindings (#1612) Authors: - https://github.com/Iroy30 Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1612 --- cpp/include/cugraph/utilities/cython.hpp | 7 ++ cpp/src/utilities/cython.cu | 54 +++++++++- python/cugraph/community/egonet_wrapper.pyx | 3 + python/cugraph/components/connectivity.pxd | 7 ++ .../components/connectivity_wrapper.pyx | 71 +++++++------ python/cugraph/dask/__init__.py | 3 +- .../centrality/mg_katz_centrality_wrapper.pyx | 1 + .../dask/community/louvain_wrapper.pyx | 1 + python/cugraph/dask/components/__init__.py | 0 .../cugraph/dask/components/connectivity.py | 63 ++++++++++++ .../dask/components/mg_connectivity.pxd | 26 +++++ .../components/mg_connectivity_wrapper.pyx | 99 +++++++++++++++++++ .../link_analysis/mg_pagerank_wrapper.pyx | 1 + .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 2 + .../dask/traversal/mg_sssp_wrapper.pyx | 1 + .../link_analysis/pagerank_wrapper.pyx | 3 + .../cugraph/sampling/random_walks_wrapper.pyx | 6 +- python/cugraph/structure/graph_utilities.pxd | 1 + python/cugraph/structure/renumber_wrapper.pyx | 3 +- .../tests/dask/test_mg_connectivity.py | 78 +++++++++++++++ python/cugraph/traversal/bfs_wrapper.pyx | 3 + 21 files changed, 400 insertions(+), 33 deletions(-) create mode 100644 python/cugraph/dask/components/__init__.py create mode 100644 python/cugraph/dask/components/connectivity.py create mode 100644 python/cugraph/dask/components/mg_connectivity.pxd create mode 100644 python/cugraph/dask/components/mg_connectivity_wrapper.pyx create mode 100644 python/cugraph/tests/dask/test_mg_connectivity.py diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index 1e246b7255a..f8284a16ae3 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -414,6 +414,7 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu); @@ -498,6 +499,12 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t n_subgraphs, vertex_t radius); +// Wrapper for calling WCC through a graph container +template +void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* components); + // Wrapper for calling graph generator template std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 989de166699..41b1b406063 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -211,6 +211,7 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu) { @@ -248,7 +249,7 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.do_expensive_check = do_expensive_check; experimental::graph_properties_t graph_props{ - .is_symmetric = false, .is_multigraph = false, .is_weighted = is_weighted}; + .is_symmetric = is_symmetric, .is_multigraph = false, .is_weighted = is_weighted}; graph_container.graph_props = graph_props; graph_container.graph_type = graphTypeEnum::graph_t; @@ -981,6 +982,41 @@ void call_sssp(raft::handle_t const& handle, } } +// wrapper for weakly connected components: +// +template +void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* components) +{ + if (graph_container.is_multi_gpu) { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } + } else { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } + } +} + // wrapper for shuffling: // template @@ -1360,6 +1396,22 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* components); + template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 23aa159314f..eb62c2aa56c 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -55,6 +55,8 @@ def egonet(input_graph, vertices, radius=1): weight_t = np.dtype("float32") is_weighted = False + is_symmetric = not input_graph.is_directed() + # Pointers for egonet vertices = vertices.astype('int32') cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] @@ -79,6 +81,7 @@ def egonet(input_graph, vertices, radius=1): num_edges, False, is_weighted, + is_symmetric, False, False) if(weight_t==np.dtype("float32")): diff --git a/python/cugraph/components/connectivity.pxd b/python/cugraph/components/connectivity.pxd index a5549a9f54e..678836216b9 100644 --- a/python/cugraph/components/connectivity.pxd +++ b/python/cugraph/components/connectivity.pxd @@ -17,6 +17,7 @@ # cython: language_level = 3 from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": @@ -30,3 +31,9 @@ cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, cugraph_cc_t connect_type, VT *labels) except + + +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": + cdef void call_wcc[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t *identifiers) except + diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index ac173de3564..588595644ed 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -18,6 +18,7 @@ from cugraph.components.connectivity cimport * from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import utils_wrapper from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t @@ -30,47 +31,59 @@ def weakly_connected_components(input_graph): """ Call connected_components """ - offsets = None - indices = None + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if type(input_graph) is not type_Graph: # - # Need to create a symmetrized CSR for this local - # computation, don't want to keep it. - # - [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']], - [np.int32]) - src, dst = symmetrize(src, dst) - [offsets, indices] = utils_wrapper.coo2csr(src, dst)[0:2] - else: - if not input_graph.adjlist: - input_graph.view_adj_list() - - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], - [np.int32]) + # Need to create a symmetrized COO for this local + # computation + src, dst = symmetrize(src, dst) + weight_t = np.dtype("float32") + weights = None + num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL cdef uintptr_t c_labels_val = df['labels'].__cuda_array_interface__['data'][0]; - cdef GraphCSRView[int,int,float] g - - g = GraphCSRView[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) - - cdef cugraph_cc_t connect_type=CUGRAPH_WEAK - connected_components(g, connect_type, c_labels_val) - - g.get_vertex_identifiers(c_identifier) + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_edges, + num_verts, num_edges, + False, + False, + True, + False, + False) + + call_wcc[int, float](handle_ptr.get()[0], + graph_container, + c_labels_val) return df diff --git a/python/cugraph/dask/__init__.py b/python/cugraph/dask/__init__.py index 830de45c50b..60aebaf19b0 100644 --- a/python/cugraph/dask/__init__.py +++ b/python/cugraph/dask/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,3 +17,4 @@ from .common.read_utils import get_chunksize from .community.louvain import louvain from .centrality.katz_centrality import katz_centrality +from .components.connectivity import weakly_connected_components diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index 5fb9de788cf..c072d5ec143 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -95,6 +95,7 @@ def mg_katz_centrality(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index a3cebeac272..4585270c879 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -98,6 +98,7 @@ def louvain(input_df, num_global_verts, num_global_edges, sorted_by_degree, True, + False, False, True) # store_transposed, multi_gpu # Create the output dataframe, column lengths must be equal to the number of diff --git a/python/cugraph/dask/components/__init__.py b/python/cugraph/dask/components/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph/dask/components/connectivity.py b/python/cugraph/dask/components/connectivity.py new file mode 100644 index 00000000000..7f3a647a0d9 --- /dev/null +++ b/python/cugraph/dask/components/connectivity.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) +from cugraph.dask.components import mg_connectivity_wrapper as mg_connectivity +import cugraph.comms.comms as Comms +import dask_cudf + + +def call_wcc(sID, + data, + num_verts, + num_edges, + vertex_partition_offsets): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return mg_connectivity.mg_wcc(data[0], + num_verts, + num_edges, + vertex_partition_offsets, + wid, + handle) + + +def weakly_connected_components(input_graph): + + client = default_client() + + input_graph.compute_renumber_edge_list() + + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] + num_edges = len(ddf) + data = get_distributed_data(ddf) + + result = [client.submit(call_wcc, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + ddf = dask_cudf.from_delayed(result) + + if input_graph.renumbered: + return input_graph.unrenumber(ddf, 'vertex') + + return ddf diff --git a/python/cugraph/dask/components/mg_connectivity.pxd b/python/cugraph/dask/components/mg_connectivity.pxd new file mode 100644 index 00000000000..04f04a9665e --- /dev/null +++ b/python/cugraph/dask/components/mg_connectivity.pxd @@ -0,0 +1,26 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.graph_utilities cimport * +from libcpp cimport bool + + +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": + + cdef void call_wcc[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t * components) diff --git a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx new file mode 100644 index 00000000000..156d29a9794 --- /dev/null +++ b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx @@ -0,0 +1,99 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.utils_wrapper import * +from cugraph.dask.components cimport mg_connectivity as c_connectivity +import cudf +from cugraph.structure.graph_utilities cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref +import numpy as np + + +def mg_wcc(input_df, + num_global_verts, + num_global_edges, + vertex_partition_offsets, + rank, + handle): + + cdef size_t handle_size_t = handle.getHandle() + handle_ = handle_size_t + + src = input_df['src'] + dst = input_df['dst'] + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + + weights = None + weight_t = np.dtype("float32") + is_weighted = False + + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + # FIXME: needs to be edge_t type not int + cdef int num_local_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + + cdef graph_container_t graph_container + + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_local_edges, + num_global_verts, num_global_edges, + True, + is_weighted, + True, + False, + True) + + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) + df['labels'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t)) + + cdef uintptr_t c_labels_val = df['labels'].__cuda_array_interface__['data'][0]; + + if vertex_t == np.int32: + c_connectivity.call_wcc[int, float](handle_[0], + graph_container, + c_labels_val) + else: + c_connectivity.call_wcc[long, float](handle_[0], + graph_container, + c_labels_val) + + return df diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index c2f92f0f33b..6b8e18c119c 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -91,6 +91,7 @@ def mg_pagerank(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index e2f44ada32c..f0a9f2a81db 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -50,6 +50,7 @@ def mg_bfs(input_df, else: weight_t = np.dtype("float32") + # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, @@ -81,6 +82,7 @@ def mg_bfs(input_df, num_global_verts, num_global_edges, True, False, # BFS runs on unweighted graphs + False, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index 82a4ebe04d6..c11ec967e05 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -85,6 +85,7 @@ def mg_sssp(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 2c619a052ec..5a312f4f633 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -76,6 +76,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. weight_t = np.dtype("float32") is_weighted = False + is_symmetric = not input_graph.is_directed() + # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, @@ -102,6 +104,7 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. num_verts, num_edges, False, is_weighted, + is_symmetric, True, False) diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx index 7b16ff14018..f186a972413 100644 --- a/python/cugraph/sampling/random_walks_wrapper.pyx +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -58,6 +58,9 @@ def random_walks(input_graph, start_vertices, max_depth): else: weight_t = np.dtype("float32") is_weighted = False + + is_symmetric = not input_graph.is_directed() + # Pointers for random_walks start_vertices = start_vertices.astype('int32') cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0] @@ -78,6 +81,7 @@ def random_walks(input_graph, start_vertices, max_depth): num_edges, False, is_weighted, + is_symmetric, False, False) if(vertex_t == np.dtype("int32")): if(edge_t == np.dtype("int32")): @@ -113,4 +117,4 @@ def random_walks(input_graph, start_vertices, max_depth): set_sizes = cudf.Series(data=sizes, dtype=edge_t) return set_vertex, set_edge, set_sizes - \ No newline at end of file + diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index d0942431302..a19ed4c600c 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -51,6 +51,7 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu) except + diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 99626cdee08..1c302d21220 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -112,6 +112,7 @@ def renumber(input_df, # maybe use cpdef ? cdef uintptr_t shuffled_major = NULL cdef uintptr_t shuffled_minor = NULL + # FIXME: Fix fails when do_check = True cdef bool do_check = False # ? for now... cdef bool mg_flag = is_multi_gpu # run Single-GPU or MNMG @@ -175,7 +176,7 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major, shuffled_minor, deref(edge_counts_32.get()), - 1, + do_check, mg_flag).release()) pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper diff --git a/python/cugraph/tests/dask/test_mg_connectivity.py b/python/cugraph/tests/dask/test_mg_connectivity.py new file mode 100644 index 00000000000..2f946789471 --- /dev/null +++ b/python/cugraph/tests/dask/test_mg_connectivity.py @@ -0,0 +1,78 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cugraph.dask as dcg +import gc +import pytest +import cugraph +import dask_cudf +import cudf +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) + + +@pytest.fixture(scope="module") +def client_connection(): + (cluster, client) = setup_local_dask_cluster(p2p=True) + yield client + teardown_local_dask_cluster(cluster, client) + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +def test_dask_wcc(client_connection): + gc.collect() + + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) + input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + g = cugraph.DiGraph() + g.from_cudf_edgelist(df, "src", "dst", renumber=True) + + dg = cugraph.DiGraph() + dg.from_dask_cudf_edgelist(ddf, "src", "dst") + + expected_dist = cugraph.weakly_connected_components(g) + result_dist = dcg.weakly_connected_components(dg) + + result_dist = result_dist.compute() + compare_dist = expected_dist.merge( + result_dist, on="vertex", suffixes=["_local", "_dask"] + ) + + unique_local_labels = compare_dist['labels_local'].unique() + + for label in unique_local_labels.values.tolist(): + dask_labels_df = compare_dist[compare_dist['labels_local'] == label] + dask_labels = dask_labels_df['labels_dask'] + assert (dask_labels.iloc[0] == dask_labels).all() diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index f524b133d02..05c175c563f 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -78,6 +78,8 @@ def bfs(input_graph, start, depth_limit, direction_optimizing=False): c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0] + is_symmetric = not input_graph.is_directed() + # Step 8: Proceed to BFS populate_graph_container(graph_container, handle_[0], @@ -90,6 +92,7 @@ def bfs(input_graph, start, depth_limit, direction_optimizing=False): num_verts, num_edges, False, False, + is_symmetric, False, False) From 45a50aa9b03f734f61fdb8216c0ff235f725b24d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 1 Jun 2021 16:39:41 -0400 Subject: [PATCH 280/343] Delete CUDA_ARCHITECTURES=OFF (#1638) `set_target_properties(cugraphmgtestutil PROPERTIES CUDA_ARCHITECTURES OFF)` was set and this caused cudaFuncGetAttribute to intermittently return an invalid PTX version. This PR deletes this. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1638 --- cpp/tests/CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ae1c9ccd1e0..0608744400d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -71,17 +71,6 @@ target_include_directories(cugraphmgtestutil target_link_libraries(cugraphmgtestutil cugraph) -# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the -# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent -# cmake warnings about policy CMP0104. With this setting, arch flags must be -# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism -# used in cpp/CMakeLists.txt for setting arch options). -# Run "cmake --help-policy CMP0104" for policy details. -# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to -# the findcudatoolkit features in cmake 3.17+ -set_target_properties(cugraphmgtestutil PROPERTIES - CUDA_ARCHITECTURES OFF) - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- From 341f7bd49dd637f85ca6ab56fdde1e76495c06ae Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 1 Jun 2021 14:03:09 -0400 Subject: [PATCH 281/343] increment ucx-py version --- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/environments/cugraph_dev_cuda11.1.yml | 2 +- conda/environments/cugraph_dev_cuda11.2.yml | 2 +- conda/recipes/cugraph/meta.yaml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 2cbb73eafa7..a7da066578a 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20.* +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index b60c8bb5e0e..d986b74efe7 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20.* +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index d8282dd08c1..cc952fd69b3 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -15,7 +15,7 @@ dependencies: - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.8.4 -- ucx-py=0.20.* +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index d750c1de188..e7be902ee46 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -27,7 +27,7 @@ requirements: - cython>=0.29,<0.30 - libcugraph={{ version }} - cudf={{ minor_version }} - - ucx-py 0.20 + - ucx-py 0.21 - ucx-proc=*=gpu run: - python x.x @@ -38,7 +38,7 @@ requirements: - dask>=2.12.0 - distributed>=2.12.0 - nccl>=2.8.4 - - ucx-py 0.20 + - ucx-py 0.21 - ucx-proc=*=gpu #test: From 575677fcf514d5773c9e4c53d67158aadc9da6fa Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 2 Jun 2021 09:07:04 -0500 Subject: [PATCH 282/343] add multi-column support in algorithms - part 2 (#1571) Adds multicolumn support for: jaccard wjaccard overlap woverlap pagerank spectral clustering forceatlas Authors: - https://github.com/Iroy30 Approvers: - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/1571 --- .../link_prediction/Jaccard-Similarity.ipynb | 3 +- .../cugraph/community/spectral_clustering.py | 34 +++++--- python/cugraph/layout/force_atlas2.py | 10 +-- python/cugraph/link_analysis/pagerank.py | 19 +++-- python/cugraph/link_prediction/jaccard.py | 12 +-- python/cugraph/link_prediction/overlap.py | 10 +-- .../link_prediction/overlap_wrapper.pyx | 5 +- python/cugraph/link_prediction/wjaccard.py | 37 +++++--- python/cugraph/link_prediction/woverlap.py | 32 +++++-- .../simpleDistributedGraph.py | 6 ++ .../graph_implementation/simpleGraph.py | 6 ++ python/cugraph/structure/number_map.py | 3 + python/cugraph/tests/test_force_atlas2.py | 65 ++++++++++++++ python/cugraph/tests/test_jaccard.py | 30 +++++++ python/cugraph/tests/test_modularity.py | 37 ++++++++ python/cugraph/tests/test_overlap.py | 30 +++++++ python/cugraph/tests/test_pagerank.py | 85 +++++++++++++++++++ python/cugraph/tests/test_wjaccard.py | 60 ++++++++++++- python/cugraph/tests/test_woverlap.py | 44 +++++++++- python/cugraph/utilities/__init__.py | 1 + python/cugraph/utilities/utils.py | 20 +++++ 21 files changed, 483 insertions(+), 66 deletions(-) diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 21835da1cce..7003bdbc98e 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -451,8 +451,9 @@ "metadata": {}, "outputs": [], "source": [ + "pr_df.rename(columns={'pagerank': 'weight'}, inplace=True)", "# Call weighted Jaccard using the Pagerank scores as weights:\n", - "wdf = cugraph.jaccard_w(G, pr_df['pagerank'])" + "wdf = cugraph.jaccard_w(G, pr_df)" ] }, { diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index 443e2169711..06294af00c9 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -190,8 +190,8 @@ def analyzeClustering_modularity(G, n_clusters, clustering, Specifies the number of clusters in the given clustering clustering : cudf.DataFrame The cluster assignment to analyze. - vertex_col_name : str - The name of the column in the clustering dataframe identifying + vertex_col_name : str or list of str + The names of the column in the clustering dataframe identifying the external vertex id cluster_col_name : str The name of the column in the clustering dataframe identifying @@ -213,8 +213,10 @@ def analyzeClustering_modularity(G, n_clusters, clustering, >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_modularity(G, 5, df) """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -224,11 +226,11 @@ def analyzeClustering_modularity(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name) + clustering = clustering.sort_values('vertex') score = spectral_clustering_wrapper.analyzeClustering_modularity( G, n_clusters, clustering[cluster_col_name] @@ -277,8 +279,10 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df) """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -288,11 +292,11 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name).reset_index(drop=True) + clustering = clustering.sort_values('vertex').reset_index(drop=True) score = spectral_clustering_wrapper.analyzeClustering_edge_cut( G, n_clusters, clustering[cluster_col_name] @@ -339,8 +343,10 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df, >>> 'vertex', 'cluster') """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -348,11 +354,11 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name) + clustering = clustering.sort_values('vertex') score = spectral_clustering_wrapper.analyzeClustering_ratio_cut( G, n_clusters, clustering[cluster_col_name] diff --git a/python/cugraph/layout/force_atlas2.py b/python/cugraph/layout/force_atlas2.py index 0b745d8ca15..d15109249e5 100644 --- a/python/cugraph/layout/force_atlas2.py +++ b/python/cugraph/layout/force_atlas2.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.layout import force_atlas2_wrapper -from cugraph.structure.graph_classes import null_check def force_atlas2( @@ -109,13 +108,14 @@ def on_train_end(self, positions): """ if pos_list is not None: - null_check(pos_list["vertex"]) - null_check(pos_list["x"]) - null_check(pos_list["y"]) if input_graph.renumbered is True: + if input_graph.vertex_column_size() > 1: + cols = pos_list.columns[:-2].to_list() + else: + cols = 'vertex' pos_list = input_graph.add_internal_vertex_id(pos_list, "vertex", - "vertex") + cols) if prevent_overlapping: raise Exception("Feature not supported") diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 4f5f8f6aae0..94b1491e944 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.link_analysis import pagerank_wrapper -from cugraph.structure.graph_classes import null_check import cugraph @@ -67,6 +66,10 @@ def pagerank( Subset of vertices of graph for initial guess for pagerank values nstart['values'] : cudf.Series Pagerank values for vertices + weight: str + The attribute column to be used as edge weights if Graph is a NetworkX + Graph. This parameter is here for NetworkX compatibility and is ignored + in case of a cugraph.Graph dangling : dict This parameter is here for NetworkX compatibility and ignored @@ -94,17 +97,23 @@ def pagerank( G, isNx = cugraph.utilities.check_nx_graph(G, weight) if personalization is not None: - null_check(personalization["vertex"]) - null_check(personalization["values"]) if G.renumbered is True: + if len(G.renumber_map.implementation.col_names) > 1: + cols = personalization.columns[:-1].to_list() + else: + cols = 'vertex' personalization = G.add_internal_vertex_id( - personalization, "vertex", "vertex" + personalization, "vertex", cols ) if nstart is not None: if G.renumbered is True: + if len(G.renumber_map.implementation.col_names) > 1: + cols = nstart.columns[:-1].to_list() + else: + cols = 'vertex' nstart = G.add_internal_vertex_id( - nstart, "vertex", "vertex" + nstart, "vertex", cols ) df = pagerank_wrapper.pagerank( diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 2a9e9625050..e69308ac595 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -13,10 +13,11 @@ import pandas as pd import cudf -from cugraph.structure.graph_classes import Graph, null_check +from cugraph.structure.graph_classes import Graph from cugraph.link_prediction import jaccard_wrapper from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary +from cugraph.utilities import renumber_vertex_pair def jaccard(input_graph, vertex_pair=None): @@ -108,15 +109,8 @@ def jaccard(input_graph, vertex_pair=None): if type(input_graph) is not Graph: raise Exception("input graph must be undirected") - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col - ) - + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: diff --git a/python/cugraph/link_prediction/overlap.py b/python/cugraph/link_prediction/overlap.py index 077080bda1d..4650f24f181 100644 --- a/python/cugraph/link_prediction/overlap.py +++ b/python/cugraph/link_prediction/overlap.py @@ -13,10 +13,10 @@ import pandas as pd from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph_classes import null_check import cudf from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary +from cugraph.utilities import renumber_vertex_pair def overlap_coefficient(G, ebunch=None): @@ -91,14 +91,8 @@ def overlap(input_graph, vertex_pair=None): >>> df = cugraph.overlap(G) """ - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col, - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx index 4cb17aa21a6..ec0274716fb 100644 --- a/python/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/link_prediction/overlap_wrapper.pyx @@ -68,8 +68,9 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): df = cudf.DataFrame() df['overlap_coeff'] = result - first = vertex_pair['first'] - second = vertex_pair['second'] + cols = vertex_pair.columns.to_list() + first = vertex_pair[cols[0]] + second = vertex_pair[cols[1]] # FIXME: multi column support df['source'] = first diff --git a/python/cugraph/link_prediction/wjaccard.py b/python/cugraph/link_prediction/wjaccard.py index 9679d1ba9cf..9616bfd49a8 100644 --- a/python/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/link_prediction/wjaccard.py @@ -11,9 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph_classes import Graph, null_check +from cugraph.structure.graph_classes import Graph from cugraph.link_prediction import jaccard_wrapper import cudf +import numpy as np +from cugraph.utilities import renumber_vertex_pair def jaccard_w(input_graph, weights, vertex_pair=None): @@ -35,8 +37,15 @@ def jaccard_w(input_graph, weights, vertex_pair=None): as an edge list (edge weights are not used for this algorithm). The adjacency list will be computed if not already present. - weights : cudf.Series + weights : cudf.DataFrame Specifies the weights to be used for each vertex. + Vertex should be represented by multiple columns for multi-column + vertices. + + weights['vertex'] : cudf.Series + Contains the vertex identifiers + weights['weight'] : cudf.Series + Contains the weights of vertices vertex_pair : cudf.DataFrame A GPU dataframe consisting of two columns representing pairs of @@ -70,20 +79,28 @@ def jaccard_w(input_graph, weights, vertex_pair=None): if type(input_graph) is not Graph: raise Exception("input graph must be undirected") - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col, - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: raise ValueError("vertex_pair must be a cudf dataframe") - df = jaccard_wrapper.jaccard(input_graph, weights, vertex_pair) + if input_graph.renumbered: + vertex_size = input_graph.vertex_column_size() + if vertex_size == 1: + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', 'vertex' + ) + else: + cols = weights.columns[:vertex_size].to_list() + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', cols + ) + jaccard_weights = cudf.Series(np.ones(len(weights))) + for i in range(len(weights)): + jaccard_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i] + df = jaccard_wrapper.jaccard(input_graph, jaccard_weights, vertex_pair) if input_graph.renumbered: df = input_graph.unrenumber(df, "source") diff --git a/python/cugraph/link_prediction/woverlap.py b/python/cugraph/link_prediction/woverlap.py index fe64f812957..920d3e3f80d 100644 --- a/python/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/link_prediction/woverlap.py @@ -12,8 +12,9 @@ # limitations under the License. from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph_classes import null_check import cudf +import numpy as np +from cugraph.utilities import renumber_vertex_pair def overlap_w(input_graph, weights, vertex_pair=None): @@ -67,20 +68,33 @@ def overlap_w(input_graph, weights, vertex_pair=None): >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.overlap_w(G, M[2]) """ - # FIXME: Add support for multi-column vertices + if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: raise ValueError("vertex_pair must be a cudf dataframe") - df = overlap_wrapper.overlap(input_graph, weights, vertex_pair) + if input_graph.renumbered: + vertex_size = input_graph.vertex_column_size() + if vertex_size == 1: + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', 'vertex' + ) + else: + cols = weights.columns[:vertex_size].to_list() + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', cols + ) + + overlap_weights = cudf.Series(np.ones(len(weights))) + for i in range(len(weights)): + overlap_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i] + + overlap_weights = overlap_weights.astype('float32') + + df = overlap_wrapper.overlap(input_graph, overlap_weights, vertex_pair) if input_graph.renumbered: df = input_graph.unrenumber(df, "source") diff --git a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 21eff99d2bf..951ea8add5b 100644 --- a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -472,3 +472,9 @@ def compute_renumber_edge_list(self, transposed=False): self.edgelist = self.EdgeList(renumbered_ddf) self.renumber_map = number_map self.properties.store_transposed = transposed + + def vertex_column_size(self): + if self.properties.renumbered: + return self.renumber_map.vertex_column_size() + else: + return 1 diff --git a/python/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/structure/graph_implementation/simpleGraph.py index 3fa65fd8de6..e74b04c00b5 100644 --- a/python/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleGraph.py @@ -823,3 +823,9 @@ def neighbors(self, n): return self.renumber_map.from_internal_vertex_id(neighbors)["0"] else: return neighbors + + def vertex_column_size(self): + if self.properties.renumbered: + return self.renumber_map.vertex_column_size() + else: + return 1 diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index d90d7a1fda9..a1ff2f47abf 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -671,3 +671,6 @@ def unrenumber(self, df, column_name, preserve_order=False, return df, col_names else: return df + + def vertex_column_size(self): + return len(self.implementation.col_names) diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index f399460e5e6..54ea9c78588 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -14,6 +14,7 @@ import time import pytest +import cudf import cugraph from cugraph.internals import GraphBasedDimRedCallback from cugraph.tests import utils @@ -135,3 +136,67 @@ def test_force_atlas2(graph_file, score, max_iter, assert test_callback.on_epoch_end_called_count == max_iter # verify `on_train_end` was only called once assert test_callback.on_train_end_called_count == 1 + + +@pytest.mark.parametrize('graph_file, score', DATASETS[:-1]) +@pytest.mark.parametrize('max_iter', MAX_ITERATIONS) +@pytest.mark.parametrize('barnes_hut_optimize', BARNES_HUT_OPTIMIZE) +def test_force_atlas2_multi_column_pos_list(graph_file, score, max_iter, + barnes_hut_optimize): + cu_M = utils.read_csv_file(graph_file) + test_callback = TestCallback() + pos = cugraph_call(cu_M, + max_iter=max_iter, + pos_list=None, + outbound_attraction_distribution=True, + lin_log_mode=False, + prevent_overlapping=False, + edge_weight_influence=1.0, + jitter_tolerance=1.0, + barnes_hut_optimize=False, + barnes_hut_theta=0.5, + scaling_ratio=2.0, + strong_gravity_mode=False, + gravity=1.0, + callback=test_callback) + + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G = cugraph.Graph() + G.from_cudf_edgelist( + cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="2" + ) + + pos_list = cudf.DataFrame() + pos_list['vertex_0'] = pos['vertex'] + pos_list['vertex_1'] = pos_list['vertex_0'] + 1000 + pos_list['x'] = pos['x'] + pos_list['y'] = pos['y'] + + cu_pos = cugraph.force_atlas2( + G, + max_iter=max_iter, + pos_list=pos_list, + outbound_attraction_distribution=True, + lin_log_mode=False, + prevent_overlapping=False, + edge_weight_influence=1.0, + jitter_tolerance=1.0, + barnes_hut_optimize=False, + barnes_hut_theta=0.5, + scaling_ratio=2.0, + strong_gravity_mode=False, + gravity=1.0, + callback=test_callback) + + cu_pos = cu_pos.sort_values('0_vertex') + matrix_file = graph_file.with_suffix(".mtx") + M = scipy.io.mmread(matrix_file) + M = M.todense() + cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas()) + print(cu_trust, score) + assert cu_trust > score diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index b61101ef1d0..cc2795cb464 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -15,6 +15,7 @@ import time import pytest +import cudf import cugraph from cugraph.tests import utils from pathlib import PurePath @@ -222,3 +223,32 @@ def test_jaccard_nx(graph_file): # FIXME: Nx does a full all-pair Jaccard. # cuGraph does a limited 1-hop Jaccard # assert nx_j == cg_j + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_jaccard_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + df_res = cugraph.jaccard(G1, vertex_pair) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + df_exp = cugraph.jaccard(G2, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["jaccard_coeff"].equals(df_exp["jaccard_coeff"]) diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index c1ff95042ed..21b8adae6e6 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -71,6 +71,43 @@ def test_modularity_clustering(graph_file, partitions): assert cu_score > rand_score +# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("partitions", PARTITIONS) +def test_modularity_clustering_multi_column(graph_file, partitions): + gc.collect() + + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="2") + + df1 = cugraph.spectralModularityMaximizationClustering( + G1, partitions, num_eigen_vects=(partitions - 1) + ) + + cu_score = cugraph.analyzeClustering_modularity(G1, partitions, df1, + ['0_vertex', + '1_vertex'], + 'cluster') + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0", + edge_attr="2") + + rand_score = random_call(G2, partitions) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > rand_score + + # Test to ensure DiGraph objs are not accepted # Test all combinations of default/managed and pooled/non-pooled allocation diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index a0c336c3f16..42bc3ea9808 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -18,6 +18,7 @@ import numpy as np import scipy +import cudf import cugraph from cugraph.tests import utils @@ -148,3 +149,32 @@ def test_overlap_edge_vals(graph_file): else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_overlap_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + df_res = cugraph.overlap(G1, vertex_pair) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + df_exp = cugraph.overlap(G2, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["overlap_coeff"].equals(df_exp["overlap_coeff"]) diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 163b2adb967..50be1cd5230 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -241,3 +241,88 @@ def test_pagerank_nx( print(f"{cugraph_pr[i][1]} and {cugraph_pr[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr)) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) +@pytest.mark.parametrize("tol", TOLERANCE) +@pytest.mark.parametrize("alpha", ALPHA) +@pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) +@pytest.mark.parametrize("has_guess", HAS_GUESS) +def test_pagerank_multi_column( + graph_file, max_iter, tol, alpha, personalization_perc, has_guess +): + gc.collect() + + # NetworkX PageRank + M = utils.read_csv_for_nx(graph_file) + nnz_vtx = np.unique(M[['0', '1']]) + + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph() + ) + + networkx_pr, networkx_prsn = networkx_call( + Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx + ) + + cu_nstart = None + if has_guess == 1: + cu_nstart_temp = cudify(networkx_pr) + max_iter = 100 + cu_nstart = cudf.DataFrame() + cu_nstart["vertex_0"] = cu_nstart_temp["vertex"] + cu_nstart["vertex_1"] = cu_nstart["vertex_0"] + 1000 + cu_nstart["values"] = cu_nstart_temp["values"] + + cu_prsn_temp = cudify(networkx_prsn) + if cu_prsn_temp is not None: + cu_prsn = cudf.DataFrame() + cu_prsn["vertex_0"] = cu_prsn_temp["vertex"] + cu_prsn["vertex_1"] = cu_prsn["vertex_0"] + 1000 + cu_prsn["values"] = cu_prsn_temp["values"] + else: + cu_prsn = cu_prsn_temp + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + cu_M["weights"] = cudf.Series(M["weight"]) + + cu_G = cugraph.DiGraph() + cu_G.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="weights") + + df = cugraph.pagerank( + cu_G, + alpha=alpha, + max_iter=max_iter, + tol=tol, + personalization=cu_prsn, + nstart=cu_nstart, + ) + + cugraph_pr = [] + + df = df.sort_values("0_vertex").reset_index(drop=True) + + pr_scores = df["pagerank"].to_array() + for i, rank in enumerate(pr_scores): + cugraph_pr.append((i, rank)) + + # Calculating mismatch + networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if ( + abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1 + and cugraph_pr[i][0] == networkx_pr[i][0] + ): + err = err + 1 + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_pr)) diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index 9f82857a8d7..f3b3fb9efd6 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -38,16 +38,19 @@ def cugraph_call(cu_M): # Device data - weights_arr = cudf.Series( + weight_arr = cudf.Series( np.ones(max(cu_M["0"].max(), cu_M["1"].max()) + 1, dtype=np.float32) ) + weights = cudf.DataFrame() + weights['vertex'] = np.arange(len(weight_arr), dtype=np.int32) + weights['weight'] = weight_arr G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Jaccard Call t1 = time.time() - df = cugraph.jaccard_w(G, weights_arr) + df = cugraph.jaccard_w(G, weights) t2 = time.time() - t1 print("Time : " + str(t2)) @@ -100,3 +103,56 @@ def test_wjaccard(graph_file): for i in range(len(cu_coeff)): diff = abs(nx_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_wjaccard_multi_column_weights(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + # suppress F841 (local variable is assigned but never used) in flake8 + # no networkX equivalent to compare cu_coeff against... + cu_coeff = cugraph_call(cu_M) # noqa: F841 + nx_coeff = networkx_call(M) + for i in range(len(cu_coeff)): + diff = abs(nx_coeff[i] - cu_coeff[i]) + assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_wjaccard_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + weight_arr = cudf.Series(np.ones(G2.number_of_vertices(), + dtype=np.float32)) + weights = cudf.DataFrame() + weights['vertex'] = G2.nodes() + weights['vertex_'] = weights['vertex'] + 1000 + weights['weight'] = weight_arr + + df_res = cugraph.jaccard_w(G1, weights, vertex_pair) + + weights = weights[['vertex', 'weight']] + df_exp = cugraph.jaccard_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["jaccard_coeff"].equals(df_exp["jaccard_coeff"]) diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index b6ceda40116..66032ac3f48 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -28,13 +28,16 @@ def cugraph_call(cu_M, pairs): weights_arr = cudf.Series( np.ones(max(cu_M["0"].max(), cu_M["1"].max()) + 1, dtype=np.float32) ) + weights = cudf.DataFrame() + weights['vertex'] = np.arange(len(weights_arr), dtype=np.int32) + weights['weight'] = weights_arr G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Overlap Call t1 = time.time() - df = cugraph.overlap_w(G, weights_arr, pairs) + df = cugraph.overlap_w(G, weights, pairs) t2 = time.time() - t1 print("Time : " + str(t2)) df = df.sort_values(by=["source", "destination"]) @@ -114,3 +117,42 @@ def test_woverlap(graph_file): else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_woverlap_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + weight_arr = cudf.Series(np.ones(G2.number_of_vertices(), + dtype=np.float32)) + + weights = cudf.DataFrame() + weights['vertex'] = G2.nodes() + weights['vertex_'] = weights['vertex'] + 1000 + weights['weight'] = weight_arr + + df_res = cugraph.overlap_w(G1, weights, vertex_pair) + + weights = weights[['vertex', 'weight']] + df_exp = cugraph.overlap_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["overlap_coeff"].equals(df_exp["overlap_coeff"]) diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py index 38b46b0fe87..6dc23ff53b7 100644 --- a/python/cugraph/utilities/__init__.py +++ b/python/cugraph/utilities/__init__.py @@ -24,5 +24,6 @@ is_matrix_type, is_cp_matrix_type, is_sp_matrix_type, + renumber_vertex_pair ) from cugraph.utilities.path_retrieval import get_traversed_cost diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index 01bae895e5a..e4e93513630 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -335,3 +335,23 @@ def import_optional(mod, import_from=None): pass return namespace.get(mod) + + +def renumber_vertex_pair(input_graph, vertex_pair): + vertex_size = input_graph.vertex_column_size() + columns = vertex_pair.columns.to_list() + if vertex_size == 1: + for col in vertex_pair.columns: + if input_graph.renumbered: + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, col, col + ) + else: + if input_graph.renumbered: + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, "src", columns[:vertex_size] + ) + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, "dst", columns[vertex_size:] + ) + return vertex_pair From 12d3a61cd3c2b267adbc690b475d7fb9824a447a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 3 Jun 2021 00:30:07 -0400 Subject: [PATCH 283/343] Remove Pascal guard and test cuGraph use of cuco::static_map on Pascal (#1640) Dependent on https://github.com/NVIDIA/cuCollections/pull/83 Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1640 --- cpp/cmake/thirdparty/get_cuco.cmake | 2 +- .../experimental/include_cuco_static_map.cuh | 33 -------- ...ransform_reduce_key_aggregated_out_nbr.cuh | 17 +--- .../cugraph/utilities/collect_comm.cuh | 32 ++------ cpp/src/community/louvain.cu | 28 +++---- cpp/src/experimental/louvain.cuh | 2 - cpp/src/experimental/relabel.cu | 35 ++------ cpp/src/experimental/renumber_edgelist.cu | 64 +++------------ cpp/src/experimental/renumber_utils.cu | 46 ++--------- python/cugraph/tests/dask/test_mg_louvain.py | 19 ++--- python/cugraph/tests/test_ecg.py | 33 +++----- python/cugraph/tests/test_leiden.py | 8 -- python/cugraph/tests/test_louvain.py | 79 ++++++++----------- 13 files changed, 96 insertions(+), 302 deletions(-) delete mode 100644 cpp/include/cugraph/experimental/include_cuco_static_map.cuh diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake index a255793285f..b9542a42f26 100644 --- a/cpp/cmake/thirdparty/get_cuco.cmake +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -20,7 +20,7 @@ function(find_and_configure_cuco VERSION) GLOBAL_TARGETS cuco cuco::cuco CPM_ARGS GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 7678a5ecaa192b8983b02a0191a140097171713e + GIT_TAG 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700 OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" diff --git a/cpp/include/cugraph/experimental/include_cuco_static_map.cuh b/cpp/include/cugraph/experimental/include_cuco_static_map.cuh deleted file mode 100644 index 9e54acef72c..00000000000 --- a/cpp/include/cugraph/experimental/include_cuco_static_map.cuh +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED -// -// cuco/static_map.cuh depends on features not supported on or before Pascal. -// -// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil -// result in compilation errors. -// -// If we're Pascal or before we do nothing here and will suppress including -// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED -// which will result in the full implementation being pulled in. -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 -#else -#define CUCO_STATIC_MAP_DEFINED -#include -#endif diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 48d06e100c8..367e29e12f0 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -27,12 +27,11 @@ #include #include +#include #include #include #include -#include - #include namespace cugraph { @@ -202,12 +201,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( invalid_vertex_id::value, stream_adapter); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))); kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream @@ -224,11 +219,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( invalid_vertex_id::value, stream_adapter); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index 812a84a0441..ddc5621e929 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -16,10 +16,10 @@ #pragma once #include -#include #include #include +#include #include #include #include @@ -77,11 +77,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, invalid_vertex_id::value, stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } @@ -139,13 +135,8 @@ collect_values_for_keys(raft::comms::comms_t const &comm, invalid_vertex_id::value, stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())); kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } @@ -201,11 +192,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, invalid_vertex_id::value, stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } @@ -259,13 +246,8 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, invalid_vertex_id::value, stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())); kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 0dc913ffaba..842a7f39750 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -21,6 +21,9 @@ #include +CUCO_DECLARE_BITWISE_COMPARABLE(float) +CUCO_DECLARE_BITWISE_COMPARABLE(double) + namespace cugraph { namespace detail { @@ -48,25 +51,12 @@ std::pair>, weight_t> louvain( size_t max_level, weight_t resolution) { - // "FIXME": remove this check and the guards below - // - // Disable louvain(experimental::graph_view_t,...) - // versions for GPU architectures < 700 - // (cuco/static_map.cuh depends on features not supported on or before Pascal) - // - cudaDeviceProp device_prop; - CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); - - if (device_prop.major < 7) { - CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); - } else { - experimental::Louvain> - runner(handle, graph_view); - - weight_t wt = runner(max_level, resolution); - - return std::make_pair(runner.move_dendrogram(), wt); - } + experimental::Louvain> + runner(handle, graph_view); + + weight_t wt = runner(max_level, resolution); + + return std::make_pair(runner.move_dendrogram(), wt); } template diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 1e618482f68..6205f13e94d 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -365,7 +365,6 @@ class Louvain { rmm::device_uvector &next_cluster_v, bool up_down) { -#ifdef CUCO_STATIC_MAP_DEFINED rmm::device_uvector old_cluster_sum_v( current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); rmm::device_uvector cluster_subtract_v( @@ -501,7 +500,6 @@ class Louvain { d_src_cluster_cache_, [] __device__(auto src, auto dst, auto wt, auto x, auto y) { return wt; }, weight_t{0}); -#endif } void shrink_graph() diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 325c6aaea67..7e7a4d64b3e 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -25,6 +23,7 @@ #include #include +#include #include #include #include @@ -56,11 +55,6 @@ void relabel(raft::handle_t const& handle, { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -136,12 +130,8 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())); relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); rx_label_pair_old_labels.resize(0, handle.get_stream()); @@ -204,13 +194,8 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())); relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); relabel_map.find(labels, labels + num_labels, labels); } @@ -222,13 +207,8 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value, invalid_vertex_id::value); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))); relabel_map.insert(pair_first, pair_first + num_label_pairs); if (skip_missing_labels) { thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -254,7 +234,6 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value) == 0, "Invalid input argument: labels include old label values missing in old_new_label_pairs."); } -#endif return; } diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 860664aa8b3..005ba93d3c4 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -26,6 +24,7 @@ #include #include +#include #include #include #include @@ -47,7 +46,6 @@ namespace cugraph { namespace experimental { namespace detail { -#ifdef CUCO_STATIC_MAP_DEFINED template rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, @@ -546,7 +544,6 @@ void expensive_check_edgelist( } } } -#endif template std::enable_if_t const& edgelist_edge_counts, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of enumber_edgelist not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); @@ -660,14 +651,10 @@ renumber_edgelist(raft::handle_t const& handle, invalid_vertex_id::value, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - col_comm_rank == static_cast(i) ? renumber_map_labels.begin() - : renumber_map_major_labels.begin(), - thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + col_comm_rank == static_cast(i) ? renumber_map_labels.begin() + : renumber_map_major_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))); renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); renumber_map.find(edgelist_major_vertices[i], edgelist_major_vertices[i] + edgelist_edge_counts[i], @@ -713,13 +700,9 @@ renumber_edgelist(raft::handle_t const& handle, invalid_vertex_id::value, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - renumber_map_minor_labels.begin(), - thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))); renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { renumber_map.find(edgelist_minor_vertices[i], @@ -740,12 +723,6 @@ renumber_edgelist(raft::handle_t const& handle, return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); -#else - return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), - partition_t{}, - vertex_t{0}, - edge_t{0}); -#endif } template @@ -757,12 +734,6 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { expensive_check_edgelist( handle, @@ -794,12 +765,8 @@ std::enable_if_t> renumber_edgelist( invalid_vertex_id::value, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))); renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); renumber_map.find( edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); @@ -807,9 +774,6 @@ std::enable_if_t> renumber_edgelist( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); return renumber_map_labels; -#else - return rmm::device_uvector(0, handle.get_stream()); -#endif } } // namespace detail @@ -824,10 +788,6 @@ renumber_edgelist(raft::handle_t const& handle, std::vector const& edgelist_edge_counts, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, optional_local_vertex_span, edgelist_major_vertices, @@ -845,10 +805,6 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, optional_vertex_span, edgelist_major_vertices, diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index 6def9eeb8b5..9cd2b9a1408 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -14,14 +14,13 @@ * limitations under the License. */ -#include - #include #include #include #include #include +#include #include #include @@ -49,11 +48,6 @@ void renumber_ext_vertices(raft::handle_t const& handle, { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "renumber_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { rmm::device_uvector labels(local_int_vertex_last - local_int_vertex_first, handle.get_stream()); @@ -126,12 +120,8 @@ void renumber_ext_vertices(raft::handle_t const& handle, invalid_vertex_id::value, stream_adapter); - auto kv_pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto kv_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())); renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream @@ -148,12 +138,8 @@ void renumber_ext_vertices(raft::handle_t const& handle, invalid_vertex_id::value, stream_adapter); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))); renumber_map_ptr->insert(pair_first, pair_first + (local_int_vertex_last - local_int_vertex_first)); } @@ -177,7 +163,6 @@ void renumber_ext_vertices(raft::handle_t const& handle, } renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); -#endif } template @@ -190,11 +175,6 @@ void unrenumber_local_int_vertices( vertex_t local_int_vertex_last, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "unrenumber_local_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { CUGRAPH_EXPECTS( thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -217,7 +197,6 @@ void unrenumber_local_int_vertices( ? v : renumber_map_labels[v - local_int_vertex_first]; }); -#endif } template @@ -232,11 +211,6 @@ void unrenumber_int_vertices(raft::handle_t const& handle, { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "unrenumber_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { CUGRAPH_EXPECTS( thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -331,13 +305,8 @@ void unrenumber_int_vertices(raft::handle_t const& handle, invalid_vertex_id::value, stream_adapter}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(sorted_unique_int_vertices.begin(), - rx_ext_vertices_for_sorted_unique_int_vertices.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_int_vertices.begin(), rx_ext_vertices_for_sorted_unique_int_vertices.begin())); unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); unrenumber_map.find(vertices, vertices + num_vertices, vertices); } else { @@ -349,7 +318,6 @@ void unrenumber_int_vertices(raft::handle_t const& handle, local_int_vertex_last, do_expensive_check); } -#endif } // explicit instantiation diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index bd7374fb75e..c67d8fcb1f9 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -17,7 +17,6 @@ import cugraph import dask_cudf from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from cugraph.dask.common.mg_utils import (is_single_gpu, setup_local_dask_cluster, teardown_local_dask_cluster) @@ -88,15 +87,11 @@ def test_mg_louvain_with_edgevals(daskGraphFromDataset): # FIXME: daskGraphFromDataset returns a DiGraph, which Louvain is currently # accepting. In the future, an MNMG symmeterize will need to be called to # create a Graph for Louvain. - if is_device_version_less_than((7, 0)): - with pytest.raises(RuntimeError): - parts, mod = dcg.louvain(daskGraphFromDataset) - else: - parts, mod = dcg.louvain(daskGraphFromDataset) + parts, mod = dcg.louvain(daskGraphFromDataset) - # FIXME: either call Nx with the same dataset and compare results, or - # hardcode golden results to compare to. - print() - print(parts.compute()) - print(mod) - print() + # FIXME: either call Nx with the same dataset and compare results, or + # hardcode golden results to compare to. + print() + print(parts.compute()) + print(mod) + print() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index ba705a787ee..e51ef9b7a98 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath @@ -61,32 +60,20 @@ def golden_call(graph_file): def test_ecg_clustering(graph_file, min_weight, ensemble_size): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - # Get the modularity score for partitioning versus random assignment - with pytest.raises(RuntimeError): - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - else: - # Read in the graph and get a cugraph object - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - - # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - golden_score = golden_call(graph_file) + # Get the modularity score for partitioning versus random assignment + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + golden_score = golden_call(graph_file) - # Assert that the partitioning has better modularity than the random - # assignment - assert cu_score > (0.95 * golden_score) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > (0.95 * golden_score) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index 89203d5014c..b6c23dad6f2 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -20,8 +20,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than - # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -55,9 +53,6 @@ def cugraph_louvain(G, edgevals=False): return parts, mod -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() @@ -78,9 +73,6 @@ def test_leiden(graph_file): assert leiden_mod >= (0.99 * louvain_mod) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index 50e9ccaa4c5..fc112b8d657 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -72,63 +71,53 @@ def networkx_call(M): def test_louvain_with_edgevals(graph_file): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file) - with pytest.raises(RuntimeError): - cu_parts, cu_mod = cugraph_call(cu_M) - else: - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) - nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", - edge_attr="weight", create_using=nx.Graph() - ) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_louvain(graph_file): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file) - with pytest.raises(RuntimeError): - cu_parts, cu_mod = cugraph_call(cu_M) - else: - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M) - nx_parts = networkx_call(M) + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M) + nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", - edge_attr="weight", create_using=nx.Graph() - ) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 From 0d20a8b78da99b29445ee8a2e3e302e65e6172b0 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 3 Jun 2021 10:06:04 -0500 Subject: [PATCH 284/343] Upgraded recipe and dev envs to NCCL 2.9.9 (#1636) Upgraded recipe and dev envs to NCCL 2.9.9. The `cugraph` python conda recipe previously included NCCL, but (I think) it's a transient dependency since `libcugraph` actually needs it, so it was removed from the `cugraph` recipe. cc @dantegd for consistency with cuML. Note: the NCCL 2.9.9 upgrade was tested during benchmark runs, but the conda recipe changes have not been tested. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Jordan Jacobelli (https://github.com/Ethyling) - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1636 --- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/environments/cugraph_dev_cuda11.1.yml | 2 +- conda/environments/cugraph_dev_cuda11.2.yml | 2 +- conda/recipes/cugraph/meta.yaml | 1 - conda/recipes/libcugraph/meta.yaml | 4 ++-- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 0315304363b..0288ca525f9 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=21.06* - dask-cudf=21.06* -- nccl>=2.8.4 +- nccl>=2.9.9 - ucx-py=0.20.* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index b9b1dfdafc3..15985efb075 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=21.06* - dask-cudf=21.06* -- nccl>=2.8.4 +- nccl>=2.9.9 - ucx-py=0.20.* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index e35a575e8bb..b09463a83a1 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=21.06* - dask-cudf=21.06* -- nccl>=2.8.4 +- nccl>=2.9.9 - ucx-py=0.20.* - ucx-proc=*=gpu - scipy diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index d750c1de188..c5c59e405e5 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -37,7 +37,6 @@ requirements: - dask-cuda {{ minor_version }} - dask>=2.12.0 - distributed>=2.12.0 - - nccl>=2.8.4 - ucx-py 0.20 - ucx-proc=*=gpu diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index afab0d01dba..144eb85ce9e 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -35,7 +35,7 @@ requirements: - cudatoolkit {{ cuda_version }}.* - librmm {{ minor_version }}.* - boost-cpp>=1.66 - - nccl>=2.8.4 + - nccl>=2.9.9 - ucx-proc=*=gpu - gtest - gmock @@ -43,7 +43,7 @@ requirements: - conda-forge::libfaiss=1.7.0 run: - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - nccl>=2.8.4 + - nccl>=2.9.9 - ucx-proc=*=gpu - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 From 4e20f7338711e108263274c265e40ca8dc7298fc Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 3 Jun 2021 12:04:06 -0400 Subject: [PATCH 285/343] adding test graphs - part 2 (#1603) This effort was originally targeted toward the WCC effort, but has been expanded a bit. This supersedes #1545 which I will close. The goal here is to create a means for constructing test graphs in an easier fashion. Testing the capabilities of different graph algorithms might require a variety of graphs. The objective of this PR is to better organize the graph generation components and to introduce some components to help in composing graphs out of multiple components. This PR introduces the following capabilities: * Create an ER graph * Create a collection of Complete Graphs * Create a collection of 2D mesh graphs * Create a collection of 3D mesh graphs * Create a random path graph (connect all vertices with a single randomly ordered path) * Translate vertex ids of a graph * Combine multiple edge lists into a single graph Closes #1543 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1603 --- cpp/CMakeLists.txt | 5 +- .../cugraph/experimental/graph_generator.hpp | 139 ---- cpp/include/cugraph/graph_generators.hpp | 364 +++++++++ cpp/include/cugraph/utilities/cython.hpp | 6 +- cpp/src/generators/erdos_renyi_generator.cu | 122 ++++ .../generate_rmat_edgelist.cu | 43 +- cpp/src/generators/generator_tools.cu | 301 ++++++++ cpp/src/generators/simple_generators.cu | 343 +++++++++ cpp/src/utilities/cython.cu | 85 ++- cpp/tests/CMakeLists.txt | 16 + cpp/tests/community/mg_louvain_helper.cu | 42 -- cpp/tests/community/mg_louvain_helper.hpp | 5 + cpp/tests/community/mg_louvain_test.cpp | 2 +- .../mg_weakly_connected_components_test.cpp | 32 +- cpp/tests/components/wcc_test.cpp | 26 +- .../weakly_connected_components_test.cpp | 2 +- cpp/tests/experimental/bfs_test.cpp | 1 + cpp/tests/experimental/generate_rmat_test.cpp | 82 +-- .../experimental/katz_centrality_test.cpp | 1 + cpp/tests/experimental/mg_bfs_test.cpp | 31 +- .../experimental/mg_katz_centrality_test.cpp | 5 +- cpp/tests/experimental/mg_sssp_test.cpp | 31 +- cpp/tests/experimental/pagerank_test.cpp | 1 + cpp/tests/experimental/sssp_test.cpp | 1 + cpp/tests/generators/erdos_renyi_test.cpp | 98 +++ cpp/tests/generators/generators_test.cpp | 689 ++++++++++++++++++ cpp/tests/pagerank/mg_pagerank_test.cpp | 40 +- cpp/tests/utilities/rmat_utilities.cu | 19 +- cpp/tests/utilities/test_graphs.hpp | 558 ++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 162 ++-- cpp/tests/utilities/thrust_wrapper.cu | 48 ++ cpp/tests/utilities/thrust_wrapper.hpp | 11 + python/cugraph/generators/rmat.pxd | 6 +- 33 files changed, 2811 insertions(+), 506 deletions(-) delete mode 100644 cpp/include/cugraph/experimental/graph_generator.hpp create mode 100644 cpp/include/cugraph/graph_generators.hpp create mode 100644 cpp/src/generators/erdos_renyi_generator.cu rename cpp/src/{experimental => generators}/generate_rmat_edgelist.cu (84%) create mode 100644 cpp/src/generators/generator_tools.cu create mode 100644 cpp/src/generators/simple_generators.cu create mode 100644 cpp/tests/generators/erdos_renyi_test.cpp create mode 100644 cpp/tests/generators/generators_test.cpp create mode 100644 cpp/tests/utilities/test_graphs.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d1f50c42b14..8c85e18fd2f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -185,7 +185,10 @@ add_library(cugraph SHARED src/components/connectivity.cu src/centrality/katz_centrality.cu src/centrality/betweenness_centrality.cu - src/experimental/generate_rmat_edgelist.cu + src/generators/generate_rmat_edgelist.cu + src/generators/generator_tools.cu + src/generators/simple_generators.cu + src/generators/erdos_renyi_generator.cu src/experimental/graph.cu src/experimental/graph_view.cu src/experimental/coarsen_graph.cu diff --git a/cpp/include/cugraph/experimental/graph_generator.hpp b/cpp/include/cugraph/experimental/graph_generator.hpp deleted file mode 100644 index 78a73e2fe05..00000000000 --- a/cpp/include/cugraph/experimental/graph_generator.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include -#include - -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief generate an edge list for an R-mat graph. - * - * This function allows multi-edges and self-loops similar to the Graph 500 reference - * implementation. - * - * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 - * specification (note that scrambling does not affect cuGraph's graph construction performance, so - * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to - * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p - * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part - * (inculding the diagonal) of the graph adjacency matrix. - * - * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different - * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as - * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * - * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in - * [0, V), where V = 1 << @p scale. - * @param num_edges Number of edges to generate. - * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param seed Seed value for the random number generator. - * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part - * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to - * `false`). - * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) - * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values - * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference - * implementation version 3.0.0. - * @return std::tuple, rmm::device_uvector> A tuple of - * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. - */ -template -std::tuple, rmm::device_uvector> generate_rmat_edgelist( - raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a = 0.57, - double b = 0.19, - double c = 0.19, - uint64_t seed = 0, - bool clip_and_flip = false, - bool scramble_vertex_ids = false); - -enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; - -/** - * @brief generate multiple edge lists using the R-mat graph generator. - * - * This function allows multi-edges and self-loops similar to the Graph 500 reference - * implementation. - * - * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 - * specification (note that scrambling does not affect cuGraph's graph construction performance, so - * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to - * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p - * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part - * (inculding the diagonal) of the graph adjacency matrix. - * - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param n_edgelists Number of edge lists (graphs) to generate - * @param min_scale Scale factor to set the minimum number of verties in the graph. - * @param max_scale Scale factor to set the maximum number of verties in the graph. - * @param edge_factor Average number of edges per vertex to generate. - * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the - * R-MAT generator - * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, - * are set. - * @param seed Seed value for the random number generator. - * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part - * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to - * `false`). - * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) - * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values - * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference - * implementation version 3.0.0. - * @return A vector of std::tuple, rmm::device_uvector> of - *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge - *source vertex IDs and edge destination vertex IDs. - */ -template -std::vector, rmm::device_uvector>> -generate_rmat_edgelists( - raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor = 16, - generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, - generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, - uint64_t seed = 0, - bool clip_and_flip = false, - bool scramble_vertex_ids = false); - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp new file mode 100644 index 00000000000..9bd002b4299 --- /dev/null +++ b/cpp/include/cugraph/graph_generators.hpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include + +namespace cugraph { + +/** + * @brief generate an edge list for an R-mat graph. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * NOTE: The scramble_vertex_ids function needs to be called in order to generate a + * graph conforming to the Graph 500 specification (note that scrambling does not + * affect cuGraph's graph construction performance, so this is generally unnecessary). + * If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (including the diagonal) of the graph adjacency matrix. + * + * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different + * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as + * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * + * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in + * [0, V), where V = 1 << @p scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const &handle, + size_t scale, + size_t num_edges, + double a = 0.57, + double b = 0.19, + double c = 0.19, + uint64_t seed = 0, + bool clip_and_flip = false); + +enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; + +/** + * @brief generate multiple edge lists using the R-mat graph generator. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * NOTE: The scramble_vertex_ids function needs to be called in order to generate a + * graph conforming to the Graph 500 specification (note that scrambling does not + * affect cuGraph's graph construction performance, so this is generally unnecessary). + * If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (including the diagonal) of the graph adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param n_edgelists Number of edge lists (graphs) to generate + * @param min_scale Scale factor to set the minimum number of verties in the graph. + * @param max_scale Scale factor to set the maximum number of verties in the graph. + * @param edge_factor Average number of edges per vertex to generate. + * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the + * R-MAT generator + * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + * are set. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @return A vector of std::tuple, rmm::device_uvector> of + *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge + *source vertex IDs and edge destination vertex IDs. + */ +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists( + raft::handle_t const &handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor = 16, + generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, + generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, + uint64_t seed = 0, + bool clip_and_flip = false); + +/** + * @brief generate an edge list for path graph + * + * A path graph of size n connects the vertices from 0 to (n - 1) + * in a single long path: ((0,1), (1,2), ..., (n - 2, n - 1) + * + * If executed in a multi-gpu context (handle comms has been initialized) + * the path will span all GPUs including an edge from the last vertex on + * GPU i to the first vertex on GPU (i+1) + * + * This function will generate a collection of path graphs. @p component_parameters_v + * defines the parameters for generating each component. Each element of + * @p component_parameters_v defines a tuple consisting of the number of vertices + * and the base vertex id for the component. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v A vector containing tuples consisting of the number of vertices and + * base vertex id for each component to generate. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_path_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge list for a 2D Mesh Graph + * + * A sequence of 2D mesh graphs will be constructed according to the + * component specifications. Each 2D mesh graph is configured with a tuple + * containing (x, y, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint 2D mesh constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge list for a 3D Mesh Graph + * + * A sequence of 3D mesh graphs will be constructed according to the + * component specifications. Each 3D mesh graph is configured with a tuple + * containing (x, y, z, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint 3D mesh constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge lists for some complete graphs + * + * A sequence of complete graphs will be constructed according to the + * component specifications. Each complete graph is configured with a tuple + * containing (n, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint complete graph constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge lists for an Erdos-Renyi graph + * + * This API supports the G(n,p) model which requires O(n^2) work. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate Erdos-Renyi edges for its portion of the 2D + * partitioning of the adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_vertices Number of vertices to use in the generated graph + * @param p Probability for edge creation + * @param base_vertex_id Starting vertex id for the generated graph + * @param seed Seed value for the random number generator. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const &handle, + vertex_t num_vertices, + float p, + vertex_t base_vertex_id, + uint64_t seed = 0); + +/** + * @brief generate an edge lists for an Erdos-Renyi graph + * + * This API supports the G(n,m) model + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate Erdos-Renyi edges for its portion of the 2D + * partitioning of the adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_vertices Number of vertices to use in each complete graph + * @param m Number of edges to generate + * @param base_vertex_id Starting vertex id for the generated graph + * @param seed Seed value for the random number generator. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const &handle, + vertex_t num_vertices, + size_t m, + vertex_t base_vertex_id, + uint64_t seed = 0); + +/** + * @brief symmetrize an edgelist + * + * Given an edgelist for a graph, symmetrize and deduplicate edges. + * + * If a duplicate edge exists in a weighted graph, one of the weights is arbitrarily + * returned. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of weights. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param d_src_v Vector of source vertices + * @param d_dst_v Vector of destination vertices + * @param d_weights_v Optional vector of edge weights + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +/** + * @brief scramble vertex ids in a graph + * + * Given an edgelist for a graph, scramble all vertex ids by the given offset. + * This translation is done in place. + * + * The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param d_src_v Vector of source vertices + * @param d_dst_v Vector of destination vertices + * @param vertex_id_offset Offset to add to each vertex id + * @param seed Used to initialize random number generator + */ +template +void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + vertex_t vertex_id_offset, + uint64_t seed = 0); + +/** + * @brief Combine edgelists from multiple sources into a single edgelist + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will operate only on its subset of data. Any shuffling to get + * edges onto the same GPU should be done prior to calling this function. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param sources The source vertex ids to combine + * @param dests The destination vertex ids to combine + * @param weights Optional vector of weights to combine + * @param remove_multi_edges If true (the default) then remove multi edges, if false leave them in + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector> A tuple of rmm::device_uvector objects for edge source vertex IDs + * and edge destination vertex IDs and edge weights. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&d_sources, + std::vector> &&d_dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges = true); + +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index f8284a16ae3..f187a985108 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include @@ -523,8 +523,8 @@ call_generate_rmat_edgelists(raft::handle_t const& handle, size_t min_scale, size_t max_scale, size_t edge_factor, - cugraph::experimental::generator_distribution_t size_distribution, - cugraph::experimental::generator_distribution_t edge_distribution, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, uint64_t seed, bool clip_and_flip, bool scramble_vertex_ids); diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu new file mode 100644 index 00000000000..8452a613174 --- /dev/null +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { + +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + vertex_t num_vertices, + float p, + vertex_t base_vertex_id, + uint64_t seed) +{ + CUGRAPH_EXPECTS(num_vertices < std::numeric_limits::max(), + "Implementation cannot support specified value"); + + auto random_iterator = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [seed] __device__(size_t index) { + thrust::default_random_engine rng(seed); + thrust::uniform_real_distribution dist(0.0, 1.0); + rng.discard(index); + return dist(rng); + }); + + size_t count = thrust::count_if(rmm::exec_policy(handle.get_stream()), + random_iterator, + random_iterator + num_vertices * num_vertices, + [p] __device__(float prob) { return prob < p; }); + + rmm::device_uvector indices_v(count, handle.get_stream()); + + thrust::copy_if(rmm::exec_policy(handle.get_stream()), + random_iterator, + random_iterator + num_vertices * num_vertices, + indices_v.begin(), + [p] __device__(float prob) { return prob < p; }); + + rmm::device_uvector src_v(count, handle.get_stream()); + rmm::device_uvector dst_v(count, handle.get_stream()); + + thrust::transform(rmm::exec_policy(handle.get_stream()), + indices_v.begin(), + indices_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())), + [num_vertices] __device__(size_t index) { + size_t src = index / num_vertices; + size_t dst = index % num_vertices; + + return thrust::make_tuple(static_cast(src), + static_cast(dst)); + }); + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(src_v), std::move(dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + vertex_t num_vertices, + size_t m, + vertex_t base_vertex_id, + uint64_t seed) +{ + CUGRAPH_FAIL("Not implemented"); +} + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + int32_t num_vertices, + float p, + int32_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + int64_t num_vertices, + float p, + int64_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + int32_t num_vertices, + size_t m, + int32_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + int64_t num_vertices, + size_t m, + int64_t base_vertex_id, + uint64_t seed); + +} // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/generators/generate_rmat_edgelist.cu similarity index 84% rename from cpp/src/experimental/generate_rmat_edgelist.cu rename to cpp/src/generators/generate_rmat_edgelist.cu index e0cccd70071..638d18b1831 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/generators/generate_rmat_edgelist.cu @@ -14,9 +14,7 @@ * limitations under the License. */ -#include - -#include +#include #include #include @@ -28,11 +26,10 @@ #include #include +#include #include -#include "rmm/detail/error.hpp" namespace cugraph { -namespace experimental { template std::tuple, rmm::device_uvector> generate_rmat_edgelist( @@ -43,8 +40,7 @@ std::tuple, rmm::device_uvector> generat double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) + bool clip_and_flip) { CUGRAPH_EXPECTS((size_t{1} << scale) <= static_cast(std::numeric_limits::max()), "Invalid input argument: scale too large for vertex_t."); @@ -105,21 +101,6 @@ std::tuple, rmm::device_uvector> generat num_edges_generated += num_edges_to_generate; } - if (scramble_vertex_ids) { - rands.resize(0, handle.get_stream()); - rands.shrink_to_fit(handle.get_stream()); - - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + srcs.size(), - pair_first, - [scale] __device__(auto pair) { - return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale), - detail::scramble(thrust::get<1>(pair), scale)); - }); - } - return std::make_tuple(std::move(srcs), std::move(dsts)); } @@ -133,8 +114,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) + bool clip_and_flip) { CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); CUGRAPH_EXPECTS( @@ -171,7 +151,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, for (size_t i = 0; i < n_edgelists; i++) { output.push_back(generate_rmat_edgelist( - handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids)); + handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip)); } return output; } @@ -184,8 +164,7 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -195,8 +174,7 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -207,8 +185,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -219,8 +196,6 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); -} // namespace experimental } // namespace cugraph diff --git a/cpp/src/generators/generator_tools.cu b/cpp/src/generators/generator_tools.cu new file mode 100644 index 00000000000..3ebef13f3b1 --- /dev/null +++ b/cpp/src/generators/generator_tools.cu @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +namespace cugraph { + +namespace detail { + +template +rmm::device_uvector append_all(raft::handle_t const &handle, + std::vector> &&input) +{ + size_t size{0}; + // for (size_t i = 0; i < input.size(); ++i) size += input[i].size(); + for (auto &element : input) size += element.size(); + + rmm::device_uvector output(size, handle.get_stream()); + auto output_iter = output.begin(); + + for (auto &element : input) { + raft::copy(output_iter, element.begin(), element.size(), handle.get_stream()); + output_iter += element.size(); + } + + /* +for (size_t i = 0; i < input.size(); ++i) { + raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream()); + output_iter += input[i].size(); +} + */ + + return output; +} + +} // namespace detail + +template +void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + vertex_t vertex_id_offset, + uint64_t seed) +{ + vertex_t scale = 1 + raft::log2(d_src_v.size()); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream()), + pair_first, + pair_first + d_src_v.size(), + pair_first, + [scale] __device__(auto pair) { + return thrust::make_tuple( + experimental::detail::scramble(thrust::get<0>(pair), scale), + experimental::detail::scramble(thrust::get<1>(pair), scale)); + }); +} + +template +std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges) +{ + CUGRAPH_EXPECTS(sources.size() == dests.size(), + "sources and dests vertex lists must be the same size"); + + if (optional_d_weights) { + CUGRAPH_EXPECTS(sources.size() == optional_d_weights.value().size(), + "has_weights is specified, sources and weights must be the same size"); + + thrust::for_each_n( + thrust::host, + thrust::make_zip_iterator( + thrust::make_tuple(sources.begin(), dests.begin(), optional_d_weights.value().begin())), + sources.size(), + [](auto tuple) { + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() != thrust::get<1>(tuple).size(), + "source vertex and dest vertex uvectors must be same size"); + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() != thrust::get<2>(tuple).size(), + "source vertex and weights uvectors must be same size"); + }); + } else { + thrust::for_each_n( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(sources.begin(), dests.begin())), + sources.size(), + [](auto tuple) { + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() == thrust::get<1>(tuple).size(), + "source vertex and dest vertex uvectors must be same size"); + }); + } + + std::vector> d_weights; + + rmm::device_uvector srcs_v(0, handle.get_stream()); + rmm::device_uvector dsts_v(0, handle.get_stream()); + rmm::device_uvector weights_v(0, handle.get_stream()); + + srcs_v = detail::append_all(handle, std::move(sources)); + dsts_v = detail::append_all(handle, std::move(dests)); + + if (optional_d_weights) { + weights_v = detail::append_all(handle, std::move(optional_d_weights.value())); + } + + if (remove_multi_edges) { + size_t number_of_edges{srcs_v.size()}; + + if (optional_d_weights) { + thrust::sort( + rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator( + thrust::make_tuple(srcs_v.begin(), dsts_v.begin(), weights_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end(), weights_v.end()))); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())); + auto end_iter = thrust::unique_by_key(rmm::exec_policy(handle.get_stream()), + pair_first, + pair_first + srcs_v.size(), + weights_v.begin()); + + number_of_edges = thrust::distance(pair_first, thrust::get<0>(end_iter)); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end()))); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())); + + auto end_iter = thrust::unique( + rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end()))); + + number_of_edges = thrust::distance(pair_first, end_iter); + } + + srcs_v.resize(number_of_edges, handle.get_stream()); + srcs_v.shrink_to_fit(handle.get_stream()); + dsts_v.resize(number_of_edges, handle.get_stream()); + dsts_v.shrink_to_fit(handle.get_stream()); + + if (optional_d_weights) { + weights_v.resize(number_of_edges, handle.get_stream()); + weights_v.shrink_to_fit(handle.get_stream()); + } + } + + return std::make_tuple( + std::move(srcs_v), + std::move(dsts_v), + optional_d_weights + ? std::move(std::optional>(std::move(weights_v))) + : std::nullopt); +} + +template +std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v) +{ + auto offset = d_src_v.size(); + d_src_v.resize(offset * 2, handle.get_stream_view()); + d_dst_v.resize(offset * 2, handle.get_stream_view()); + + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + d_dst_v.begin(), + d_dst_v.begin() + offset, + d_src_v.begin() + offset); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + d_src_v.begin(), + d_src_v.begin() + offset, + d_dst_v.begin() + offset); + if (optional_d_weights_v) { + optional_d_weights_v->resize(d_src_v.size(), handle.get_stream_view()); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + optional_d_weights_v->begin(), + optional_d_weights_v->begin() + offset, + optional_d_weights_v->begin() + offset); + } + + return std::make_tuple(std::move(d_src_v), + std::move(d_dst_v), + optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt); +} + +template void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + int32_t vertex_id_offset, + uint64_t seed); + +template void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + int64_t vertex_id_offset, + uint64_t seed); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +} // namespace cugraph diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu new file mode 100644 index 00000000000..413e08962e7 --- /dev/null +++ b/cpp/src/generators/simple_generators.cu @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include + +namespace cugraph { + +template +std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { return (std::get<0>(tuple) - 1); }, + size_t{0}, + std::plus()); + + bool edge_off_end{false}; + + if (handle.comms_initialized()) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + if (comm_size > 1) { + if (comm_rank < comm_size) { + num_edges += component_parms_v.size(); + edge_off_end = true; + } + } + } + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto src_iterator = d_src_v.begin(); + auto dst_iterator = d_dst_v.begin(); + + for (auto tuple : component_parms_v) { + vertex_t num_vertices, base_vertex_id; + std::tie(num_vertices, base_vertex_id) = tuple; + + vertex_t num_edges{num_vertices - 1}; + + if (edge_off_end) ++num_edges; + + thrust::sequence(rmm::exec_policy(handle.get_stream()), + src_iterator, + src_iterator + num_edges, + base_vertex_id); + + thrust::sequence(rmm::exec_policy(handle.get_stream()), + dst_iterator, + dst_iterator + num_edges, + base_vertex_id + 1); + + src_iterator += num_edges; + dst_iterator += num_edges; + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t x, y; + std::tie(x, y, std::ignore) = tuple; + + return ((x - 1) * y) + (x * (y - 1)); + }, + size_t{0}, + std::plus()); + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t x, y, base_vertex_id; + std::tie(x, y, base_vertex_id) = tuple; + + vertex_t num_vertices = x * y; + + auto x_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + 1))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + x_iterator, + x_iterator + num_vertices - 1, + output_iterator, + [base_vertex_id, x] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the last column of a graph + return ((dst - base_vertex_id) % x) != 0; + }); + + auto y_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + y_iterator, + y_iterator + num_vertices - x, + output_iterator, + [base_vertex_id, x, y] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y)) >= x; + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t x, y, z; + std::tie(x, y, z, std::ignore) = tuple; + + return ((x - 1) * y * z) + (x * (y - 1) * z) + (x * y * (z - 1)); + }, + size_t{0}, + std::plus()); + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t x, y, z, base_vertex_id; + std::tie(x, y, z, base_vertex_id) = tuple; + + vertex_t num_vertices = x * y * z; + + auto x_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + 1))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + x_iterator, + x_iterator + num_vertices - 1, + output_iterator, + [base_vertex_id, x] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the last column of a graph + return ((dst - base_vertex_id) % x) != 0; + }); + + auto y_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + y_iterator, + y_iterator + num_vertices - x, + output_iterator, + [base_vertex_id, x, y] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y)) >= x; + }); + + auto z_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x * y))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + z_iterator, + z_iterator + num_vertices - x * y, + output_iterator, + [base_vertex_id, x, y, z] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y * z)) >= (x * y); + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + std::for_each(component_parms_v.begin(), component_parms_v.end(), [](auto tuple) { + vertex_t num_vertices = std::get<0>(tuple); + CUGRAPH_EXPECTS(num_vertices < std::numeric_limits::max(), + "Implementation cannot support specified value"); + }); + + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t num_vertices = std::get<0>(tuple); + return num_vertices * (num_vertices - 1) / 2; + }, + size_t{0}, + std::plus()); + + vertex_t invalid_vertex{std::numeric_limits::max()}; + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t num_vertices, base_vertex_id; + std::tie(num_vertices, base_vertex_id) = tuple; + + auto transform_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { + size_t graph_index = index / (num_vertices * num_vertices); + size_t local_index = index % (num_vertices * num_vertices); + + vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); + vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); + + if (src == dst) { + src = invalid_vertex; + dst = invalid_vertex; + } else { + src += (graph_index * num_vertices); + dst += (graph_index * num_vertices); + } + + return thrust::make_tuple(src, dst); + }); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + transform_iter, + transform_iter + num_vertices * num_vertices, + output_iterator, + [invalid_vertex] __device__(auto tuple) { + auto src = thrust::get<0>(tuple); + auto dst = thrust::get<1>(tuple); + + return (src != invalid_vertex) && (src < dst); + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, std::vector> const& component_parms_v); + +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 41b1b406063..a95e4eb5421 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -17,9 +17,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -807,8 +807,13 @@ std::unique_ptr call_generate_rmat_edgelist(raft::handle_t co bool clip_and_flip, bool scramble_vertex_ids) { - auto src_dst_tuple = cugraph::experimental::generate_rmat_edgelist( - handle, scale, num_edges, a, b, c, seed, clip_and_flip, scramble_vertex_ids); + auto src_dst_tuple = cugraph::generate_rmat_edgelist( + handle, scale, num_edges, a, b, c, seed, clip_and_flip); + + if (scramble_vertex_ids) { + cugraph::scramble_vertex_ids( + handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); + } graph_generator_t gg_vals{ std::make_unique(std::get<0>(src_dst_tuple).release()), @@ -824,23 +829,29 @@ call_generate_rmat_edgelists(raft::handle_t const& handle, size_t min_scale, size_t max_scale, size_t edge_factor, - cugraph::experimental::generator_distribution_t size_distribution, - cugraph::experimental::generator_distribution_t edge_distribution, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, uint64_t seed, bool clip_and_flip, bool scramble_vertex_ids) { - auto src_dst_vec_tuple = - cugraph::experimental::generate_rmat_edgelists(handle, - n_edgelists, - min_scale, - max_scale, - edge_factor, - size_distribution, - edge_distribution, - seed, - clip_and_flip, - scramble_vertex_ids); + auto src_dst_vec_tuple = cugraph::generate_rmat_edgelists(handle, + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip); + + if (scramble_vertex_ids) { + std::for_each( + src_dst_vec_tuple.begin(), src_dst_vec_tuple.end(), [&handle, seed](auto& src_dst_tuple) { + cugraph::scramble_vertex_ids( + handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); + }); + } std::vector, std::unique_ptr>> gg_vec; @@ -1504,31 +1515,29 @@ template std::unique_ptr call_generate_rmat_edgelist template std::vector< std::pair, std::unique_ptr>> -call_generate_rmat_edgelists( - raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::experimental::generator_distribution_t size_distribution, - cugraph::experimental::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); template std::vector< std::pair, std::unique_ptr>> -call_generate_rmat_edgelists( - raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::experimental::generator_distribution_t size_distribution, - cugraph::experimental::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); } // namespace cython } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 0608744400d..ec18640bc11 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -240,6 +240,22 @@ endif(RAPIDS_DATASET_ROOT_DIR) ### test sources ################################################################################## ################################################################################################### +################################################################################################### +# - graph generator tests ------------------------------------------------------------------------- + +set(GRAPH_GENERATORS_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/generators/generators_test.cpp") + + ConfigureTest(GRAPH_GENERATORS_TEST "${GRAPH_GENERATORS_TEST_SRC}") + +################################################################################################### +# - erdos renyi graph generator tests ------------------------------------------------------------- + +set(ERDOS_RENYI_GENERATOR_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/generators/erdos_renyi_test.cpp") + + ConfigureTest(ERDOS_RENYI_GENERATOR_TEST "${ERDOS_RENYI_GENERATOR_TEST_SRC}") + ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- ConfigureTest(KATZ_TEST centrality/katz_centrality_test.cu) diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index 2b1b5ade41e..935c36c9232 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -31,44 +31,6 @@ namespace cugraph { namespace test { -template -bool compare_renumbered_vectors(raft::handle_t const &handle, - rmm::device_uvector const &v1, - rmm::device_uvector const &v2) -{ - vertex_t max = 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - v1.begin(), - v1.end(), - vertex_t{0}); - - rmm::device_uvector map(max, handle.get_stream()); - - auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); - - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - iter, - iter + v1.size(), - [d_map = map.data()] __device__(auto pair) { - vertex_t e1 = thrust::get<0>(pair); - vertex_t e2 = thrust::get<1>(pair); - - d_map[e1] = e2; - }); - - auto error_count = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - iter, - iter + v1.size(), - [d_map = map.data()] __device__(auto pair) { - vertex_t e1 = thrust::get<0>(pair); - vertex_t e2 = thrust::get<1>(pair); - - return (d_map[e1] != e2); - }); - - return (error_count == 0); -} - template void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, rmm::device_uvector &edgelist_rows_v, @@ -306,10 +268,6 @@ template void single_gpu_renumber_edgelist_given_number_map( rmm::device_uvector &d_edgelist_cols, rmm::device_uvector &d_renumber_map_gathered_v); -template bool compare_renumbered_vectors(raft::handle_t const &handle, - rmm::device_uvector const &v1, - rmm::device_uvector const &v2); - template std::unique_ptr> coarsen_graph( raft::handle_t const &handle, diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp index 6d074e2d5e9..5ed710b7417 100644 --- a/cpp/tests/community/mg_louvain_helper.hpp +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -24,6 +24,11 @@ namespace cugraph { namespace test { +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + std::vector const &v1, + std::vector const &v2); + template bool compare_renumbered_vectors(raft::handle_t const &handle, rmm::device_uvector const &v1, diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 7e085919fd7..9c6d7bb4491 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -157,7 +157,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam std::tie(std::ignore, sg_modularity) = cugraph::louvain(handle, graph_view, d_clustering_v.data(), size_t{1}, resolution); - EXPECT_TRUE(cugraph::test::compare_renumbered_vectors( + EXPECT_TRUE(cugraph::test::renumbered_vectors_same( handle, d_clustering_v, d_dendrogram_gathered_v)); sg_graph = diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index dd61dafc682..a64919c4f92 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -224,20 +224,20 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_MGWeaklyConnectedComponents_Rmat, - ::testing::Values( - // enable correctness checks - std::make_tuple(WeaklyConnectedComponents_Usecase{}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false, true)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_large_test, - Tests_MGWeaklyConnectedComponents_Rmat, - ::testing::Values( - // disable correctness checks - std::make_tuple(WeaklyConnectedComponents_Usecase{false}, - cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false, true)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // disable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{false}, + cugraph::test::Rmat_Usecase( + 20, 16, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/wcc_test.cpp b/cpp/tests/components/wcc_test.cpp index 381757bc977..9f6254d445f 100644 --- a/cpp/tests/components/wcc_test.cpp +++ b/cpp/tests/components/wcc_test.cpp @@ -9,8 +9,8 @@ * */ -#include #include +#include #include #include @@ -42,9 +42,13 @@ class Tests_WCC : public ::testing::TestWithParam graph(handle); + std::cout << "calling construct_graph" << std::endl; + std::tie(graph, std::ignore) = - input_usecase.template construct_graph( - handle, false, false); + cugraph::test::construct_graph( + handle, input_usecase, false, false); + + std::cout << "back from construct_graph" << std::endl; auto graph_view = graph.view(); @@ -59,7 +63,7 @@ class Tests_WCC : public ::testing::TestWithParam; using Tests_WCC_Rmat = Tests_WCC; -using Tests_WCC_LineGraph = Tests_WCC; +using Tests_WCC_PathGraph = Tests_WCC; TEST_P(Tests_WCC_File, WCC) { @@ -71,7 +75,7 @@ TEST_P(Tests_WCC_Rmat, WCC) auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_WCC_LineGraph, WCC) +TEST_P(Tests_WCC_PathGraph, WCC) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); @@ -89,9 +93,13 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(WCC_Usecase{}, cugraph::test::File_Usecase("test/datasets/hollywood.mtx")))); INSTANTIATE_TEST_SUITE_P( - line_graph_test, - Tests_WCC_LineGraph, - ::testing::Values(std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(1000)), - std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(100000)))); + path_graph_test, + Tests_WCC_PathGraph, + ::testing::Values(std::make_tuple(WCC_Usecase{}, + cugraph::test::PathGraph_Usecase( + std::vector>({{1000, 0}}))), + std::make_tuple(WCC_Usecase{}, + cugraph::test::PathGraph_Usecase( + std::vector>({{100000, 0}}))))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/weakly_connected_components_test.cpp b/cpp/tests/components/weakly_connected_components_test.cpp index 2332aaff261..6523b6a280a 100644 --- a/cpp/tests/components/weakly_connected_components_test.cpp +++ b/cpp/tests/components/weakly_connected_components_test.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 2c8ab894096..3fea9f371e0 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 6d97628e83d..7c2dbb3911a 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include @@ -178,7 +178,7 @@ class Tests_GenerateRmat : public ::testing::TestWithParam CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::tie(d_srcs, d_dsts) = cugraph::experimental::generate_rmat_edgelist( + std::tie(d_srcs, d_dsts) = cugraph::generate_rmat_edgelist( handle, configuration.scale, (size_t{1} << configuration.scale) * configuration.edge_factor, @@ -186,8 +186,8 @@ class Tests_GenerateRmat : public ::testing::TestWithParam configuration.b, configuration.c, uint64_t{0}, - configuration.clip_and_flip, - static_cast(scramble)); + configuration.clip_and_flip); + // static_cast(scramble)); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -290,15 +290,15 @@ typedef struct GenerateRmats_Usecase_t { size_t min_scale{0}; size_t max_scale{0}; size_t edge_factor{0}; - cugraph::experimental::generator_distribution_t component_distribution; - cugraph::experimental::generator_distribution_t edge_distribution; + cugraph::generator_distribution_t component_distribution; + cugraph::generator_distribution_t edge_distribution; GenerateRmats_Usecase_t(size_t n_edgelists, size_t min_scale, size_t max_scale, size_t edge_factor, - cugraph::experimental::generator_distribution_t component_distribution, - cugraph::experimental::generator_distribution_t edge_distribution) + cugraph::generator_distribution_t component_distribution, + cugraph::generator_distribution_t edge_distribution) : n_edgelists(n_edgelists), min_scale(min_scale), max_scale(max_scale), @@ -322,15 +322,14 @@ class Tests_GenerateRmats : public ::testing::TestWithParam(handle, - configuration.n_edgelists, - configuration.min_scale, - configuration.max_scale, - configuration.edge_factor, - configuration.component_distribution, - configuration.edge_distribution, - uint64_t{0}); + auto outputs = cugraph::generate_rmat_edgelists(handle, + configuration.n_edgelists, + configuration.min_scale, + configuration.max_scale, + configuration.edge_factor, + configuration.component_distribution, + configuration.edge_distribution, + uint64_t{0}); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement ASSERT_EQ(configuration.n_edgelists, outputs.size()); @@ -346,29 +345,28 @@ TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test(GetParam()); INSTANTIATE_TEST_SUITE_P( simple_test, Tests_GenerateRmats, - ::testing::Values( - GenerateRmats_Usecase(8, - 1, - 16, - 32, - cugraph::experimental::generator_distribution_t::UNIFORM, - cugraph::experimental::generator_distribution_t::UNIFORM), - GenerateRmats_Usecase(8, - 1, - 16, - 32, - cugraph::experimental::generator_distribution_t::UNIFORM, - cugraph::experimental::generator_distribution_t::POWER_LAW), - GenerateRmats_Usecase(8, - 3, - 16, - 32, - cugraph::experimental::generator_distribution_t::POWER_LAW, - cugraph::experimental::generator_distribution_t::UNIFORM), - GenerateRmats_Usecase(8, - 3, - 16, - 32, - cugraph::experimental::generator_distribution_t::POWER_LAW, - cugraph::experimental::generator_distribution_t::POWER_LAW))); + ::testing::Values(GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::generator_distribution_t::UNIFORM, + cugraph::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::generator_distribution_t::UNIFORM, + cugraph::generator_distribution_t::POWER_LAW), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::generator_distribution_t::POWER_LAW, + cugraph::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::generator_distribution_t::POWER_LAW, + cugraph::generator_distribution_t::POWER_LAW))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index d0fc558c89f..aa66e69d4f7 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index a832e0f99ac..04eb1bf7b43 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -292,20 +293,20 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_MGBFS_Rmat, - ::testing::Values( - // enable correctness checks - std::make_tuple(BFS_Usecase{0}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_large_test, - Tests_MGBFS_Rmat, - ::testing::Values( - // disable correctness checks for large graphs - std::make_tuple(BFS_Usecase{0, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGBFS_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(BFS_Usecase{0}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGBFS_Rmat, + ::testing::Values( + // disable correctness checks for large graphs + std::make_tuple(BFS_Usecase{0, false}, + cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index d67cd9090b8..27ef64d124e 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -250,7 +251,7 @@ INSTANTIATE_TEST_SUITE_P(rmat_small_test, ::testing::Values(KatzCentrality_Usecase{false}, KatzCentrality_Usecase{true}), ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P(rmat_large_test, Tests_MGKatzCentrality_Rmat, @@ -259,6 +260,6 @@ INSTANTIATE_TEST_SUITE_P(rmat_large_test, ::testing::Values(KatzCentrality_Usecase{false, false}, KatzCentrality_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index 8568545cbd6..da5120163df 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -298,20 +299,20 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(SSSP_Usecase{1000}, cugraph::test::File_Usecase("test/datasets/wiki2003.mtx")))); -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_MGSSSP_Rmat, - ::testing::Values( - // enable correctness checks - std::make_tuple(SSSP_Usecase{0}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_large_test, - Tests_MGSSSP_Rmat, - ::testing::Values( - // disable correctness checks for large graphs - std::make_tuple(SSSP_Usecase{0, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGSSSP_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(SSSP_Usecase{0}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGSSSP_Rmat, + ::testing::Values( + // disable correctness checks for large graphs + std::make_tuple(SSSP_Usecase{0, false}, + cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 9b07059d2da..5c0b0f288d4 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index e12df163551..9a50553a114 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/generators/erdos_renyi_test.cpp b/cpp/tests/generators/erdos_renyi_test.cpp new file mode 100644 index 00000000000..c91a9af7c41 --- /dev/null +++ b/cpp/tests/generators/erdos_renyi_test.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include + +struct GenerateErdosRenyiTest : public ::testing::Test { +}; + +template +void test_symmetric(std::vector &h_src_v, std::vector &h_dst_v) +{ + std::vector reverse_src_v(h_src_v.size()); + std::vector reverse_dst_v(h_dst_v.size()); + + std::copy(h_src_v.begin(), h_src_v.end(), reverse_dst_v.begin()); + std::copy(h_dst_v.begin(), h_dst_v.end(), reverse_src_v.begin()); + + thrust::sort(thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(h_src_v.begin(), h_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(h_src_v.end(), h_dst_v.end()))); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(reverse_src_v.begin(), reverse_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(reverse_src_v.end(), reverse_dst_v.end()))); + + EXPECT_EQ(reverse_src_v, h_src_v); + EXPECT_EQ(reverse_dst_v, h_dst_v); +} + +template +void er_test(size_t num_vertices, float p) +{ + raft::handle_t handle; + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + + std::tie(d_src_v, d_dst_v) = + cugraph::generate_erdos_renyi_graph_edgelist_gnp(handle, num_vertices, p, 0); + + handle.get_stream_view().synchronize(); + + std::vector h_src_v(d_src_v.size()); + std::vector h_dst_v(d_dst_v.size()); + + raft::update_host(h_src_v.data(), d_src_v.data(), d_src_v.size(), handle.get_stream()); + raft::update_host(h_dst_v.data(), d_dst_v.data(), d_dst_v.size(), handle.get_stream()); + + handle.get_stream_view().synchronize(); + + float expected_edge_count = p * num_vertices * num_vertices; + + ASSERT_GE(h_src_v.size(), static_cast(expected_edge_count * 0.8)); + ASSERT_LE(h_src_v.size(), static_cast(expected_edge_count * 1.2)); + ASSERT_EQ(std::count_if(h_src_v.begin(), + h_src_v.end(), + [n = static_cast(num_vertices)](auto v) { + return !cugraph::experimental::is_valid_vertex(n, v); + }), + 0); + ASSERT_EQ(std::count_if(h_dst_v.begin(), + h_dst_v.end(), + [n = static_cast(num_vertices)](auto v) { + return !cugraph::experimental::is_valid_vertex(n, v); + }), + 0); +} + +TEST_F(GenerateErdosRenyiTest, ERTest) +{ + er_test(size_t{10}, float{0.1}); + er_test(size_t{20}, float{0.1}); + er_test(size_t{50}, float{0.1}); + er_test(size_t{10000}, float{0.1}); +} + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/generators/generators_test.cpp b/cpp/tests/generators/generators_test.cpp new file mode 100644 index 00000000000..11e63d81f36 --- /dev/null +++ b/cpp/tests/generators/generators_test.cpp @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include + +#include + +struct GeneratorsTest : public ::testing::Test { +}; + +TEST_F(GeneratorsTest, PathGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 1, 2, 3}); + std::vector expected_dst_v({1, 2, 3, 4}); + std::vector actual_src_v; + std::vector actual_dst_v; + + std::vector> parameters({{5, 0}}); + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::tie(src_v, dst_v) = cugraph::generate_path_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh2DGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector expected_dst_v({1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_v, dst_v) = cugraph::generate_2d_mesh_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh3DGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v( + {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, + 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, + 72, 73, 75, 76, 78, 79, 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, + 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, + 63, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}); + + std::vector expected_dst_v( + {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, + 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, + 73, 74, 76, 77, 79, 80, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, + 30, 31, 32, 33, 34, 35, 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, + 66, 67, 68, 69, 70, 71, 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80}); + + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{3, 3, 3, 0}, {3, 3, 3, 27}, {3, 3, 3, 54}}); + + std::tie(src_v, dst_v) = cugraph::generate_3d_mesh_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTestTriangles) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 0, 1, 3, 3, 4, 6, 6, 7}); + std::vector expected_dst_v({1, 2, 2, 4, 5, 5, 7, 8, 8}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{3, 0}, {3, 3}, {3, 6}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTest5) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13}); + std::vector expected_dst_v({1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}, {5, 5}, {5, 10}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, LineGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + std::vector expected_src_v({0, 1, 2, 3, 1, 2, 3, 4}); + std::vector expected_dst_v({1, 2, 3, 4, 0, 1, 2, 3}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}}); + + std::tie(src_v, dst_v) = cugraph::generate_path_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh2DGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t x{4}; + size_t y{2}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, + 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + std::vector expected_dst_v({1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_v, dst_v) = cugraph::generate_2d_mesh_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh3DGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t x{3}; + size_t y{3}; + size_t z{3}; + size_t num_graphs{3}; + + std::vector expected_src_v( + {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, + 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, + 72, 73, 75, 76, 78, 79, 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, + 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, + 63, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 1, 2, 4, 5, 7, 8, + 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, + 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 79, 80, + 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 30, 31, 32, 33, 34, 35, + 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, 66, 67, 68, 69, 70, 71, + 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80}); + + std::vector expected_dst_v( + {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, + 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, + 73, 74, 76, 77, 79, 80, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, + 30, 31, 32, 33, 34, 35, 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, + 66, 67, 68, 69, 70, 71, 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 1, 3, 4, 6, 7, + 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, + 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76, 78, 79, + 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, + 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}); + + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{3, 3, 3, 0}, {3, 3, 3, 27}, {3, 3, 3, 54}}); + + std::tie(src_v, dst_v) = cugraph::generate_3d_mesh_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTestTrianglesSymmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{3}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 1, 3, 3, 4, 6, 6, 7, 1, 2, 2, 4, 5, 5, 7, 8, 8}); + std::vector expected_dst_v({1, 2, 2, 4, 5, 5, 7, 8, 8, 0, 0, 1, 3, 3, 4, 6, 6, 7}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{3, 0}, {3, 3}, {3, 6}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTest5Symmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13, + 1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14}); + std::vector expected_dst_v({1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14, + 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}, {5, 5}, {5, 10}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CombineGraphsTest) +{ + using vertex_t = int32_t; + using weight_t = float; + + raft::handle_t handle; + + size_t num_vertices{8}; + + std::vector expected_src_v({0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector expected_dst_v({1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + + rmm::device_uvector src_graph_1_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_1_v(0, handle.get_stream()); + rmm::device_uvector src_graph_2_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_2_v(0, handle.get_stream()); + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters1({{num_vertices, 0}}); + std::vector> parameters2( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_graph_1_v, dst_graph_1_v) = + cugraph::generate_path_graph_edgelist(handle, parameters1); + std::tie(src_graph_2_v, dst_graph_2_v) = + cugraph::generate_2d_mesh_graph_edgelist(handle, parameters2); + + std::vector> sources; + sources.push_back(std::move(src_graph_1_v)); + sources.push_back(std::move(src_graph_2_v)); + + std::vector> dests; + dests.push_back(std::move(dst_graph_1_v)); + dests.push_back(std::move(dst_graph_2_v)); + + std::tie(src_v, dst_v, std::ignore) = cugraph::combine_edgelists( + handle, std::move(sources), std::move(dests), std::nullopt); + + std::vector actual_src_v; + std::vector actual_dst_v; + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(dst_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CombineGraphsOffsetsTest) +{ + using vertex_t = int32_t; + using weight_t = float; + + raft::handle_t handle; + + size_t num_vertices{8}; + vertex_t offset{10}; + + std::vector expected_src_v({0, 1, 2, 3, 4, 5, 6, 10, 11, 12, 14, 15, 16, + 18, 19, 20, 22, 23, 24, 26, 27, 28, 30, 31, 32, 10, + 11, 12, 13, 18, 19, 20, 21, 26, 27, 28, 29}); + std::vector expected_dst_v({1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 15, 16, 17, + 19, 20, 21, 23, 24, 25, 27, 28, 29, 31, 32, 33, 14, + 15, 16, 17, 22, 23, 24, 25, 30, 31, 32, 33}); + + rmm::device_uvector src_graph_1_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_1_v(0, handle.get_stream()); + rmm::device_uvector src_graph_2_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_2_v(0, handle.get_stream()); + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters1({{num_vertices, 0}}); + std::vector> parameters2( + {{4, 2, 10}, {4, 2, 18}, {4, 2, 26}}); + + std::tie(src_graph_1_v, dst_graph_1_v) = + cugraph::generate_path_graph_edgelist(handle, parameters1); + std::tie(src_graph_2_v, dst_graph_2_v) = + cugraph::generate_2d_mesh_graph_edgelist(handle, parameters2); + + std::vector> sources; + sources.push_back(std::move(src_graph_1_v)); + sources.push_back(std::move(src_graph_2_v)); + + std::vector> dests; + dests.push_back(std::move(dst_graph_1_v)); + dests.push_back(std::move(dst_graph_2_v)); + + std::tie(src_v, dst_v, std::ignore) = cugraph::combine_edgelists( + handle, std::move(sources), std::move(dests), std::nullopt); + + std::vector actual_src_v; + std::vector actual_dst_v; + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(dst_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, ScrambleTest) +{ + using vertex_t = int32_t; + using edge_t = int32_t; + + edge_t num_vertices{30}; + edge_t num_edges{100}; + + raft::handle_t handle; + + std::vector input_src_v(num_edges); + std::vector input_dst_v(num_edges); + + std::default_random_engine generator{}; + std::uniform_int_distribution distribution{0, num_vertices - 1}; + + std::generate(input_src_v.begin(), input_src_v.end(), [&distribution, &generator]() { + return distribution(generator); + }); + std::generate(input_dst_v.begin(), input_dst_v.end(), [&distribution, &generator]() { + return distribution(generator); + }); + + rmm::device_uvector d_src_v(input_src_v.size(), handle.get_stream()); + rmm::device_uvector d_dst_v(input_src_v.size(), handle.get_stream()); + std::vector output_src_v(input_src_v.size()); + std::vector output_dst_v(input_src_v.size()); + + raft::update_device(d_src_v.data(), input_src_v.data(), input_src_v.size(), handle.get_stream()); + raft::update_device(d_dst_v.data(), input_dst_v.data(), input_dst_v.size(), handle.get_stream()); + + cugraph::scramble_vertex_ids(handle, d_src_v, d_dst_v, 5, 0); + + raft::update_host(output_src_v.data(), d_src_v.data(), d_src_v.size(), handle.get_stream()); + raft::update_host(output_dst_v.data(), d_dst_v.data(), d_dst_v.size(), handle.get_stream()); + + EXPECT_TRUE(cugraph::test::renumbered_vectors_same(handle, input_src_v, output_src_v)); + EXPECT_TRUE(cugraph::test::renumbered_vectors_same(handle, input_dst_v, output_dst_v)); +} + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 21a2c11f4a7..6370c7b7758 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -218,8 +219,7 @@ class Tests_MGPageRank auto sg_graph_view = sg_graph.view(); - ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == - sg_graph_view.get_number_of_vertices()); + ASSERT_EQ(mg_graph_view.get_number_of_vertices(), sg_graph_view.get_number_of_vertices()); // 5-4. run SG PageRank @@ -300,22 +300,24 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGPageRank_Rmat, - ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false}, - PageRank_Usecase{0.5, false}, - PageRank_Usecase{0.0, true}, - PageRank_Usecase{0.5, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); - -INSTANTIATE_TEST_SUITE_P(rmat_large_tests, - Tests_MGPageRank_Rmat, - ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false, false}, - PageRank_Usecase{0.5, false, false}, - PageRank_Usecase{0.0, true, false}, - PageRank_Usecase{0.5, true, false}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false, false}, + PageRank_Usecase{0.5, false, false}, + PageRank_Usecase{0.0, true, false}, + PageRank_Usecase{0.5, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 8f6a6cf499a..fda72fc9054 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -94,15 +94,14 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, rmm::device_uvector d_tmp_rows(0, handle.get_stream()); rmm::device_uvector d_tmp_cols(0, handle.get_stream()); std::tie(i == 0 ? d_edgelist_rows : d_tmp_rows, i == 0 ? d_edgelist_cols : d_tmp_cols) = - cugraph::experimental::generate_rmat_edgelist(handle, - scale, - partition_edge_counts[i], - a, - b, - c, - base_seed + id, - undirected ? true : false, - scramble_vertex_ids); + cugraph::generate_rmat_edgelist(handle, + scale, + partition_edge_counts[i], + a, + b, + c, + base_seed + id, + undirected ? true : false); rmm::device_uvector d_tmp_weights(0, handle.get_stream()); if (test_weighted) { diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp new file mode 100644 index 00000000000..b8ee8f024b0 --- /dev/null +++ b/cpp/tests/utilities/test_graphs.hpp @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +namespace detail { + +class TranslateGraph_Usecase { + public: + TranslateGraph_Usecase() = delete; + TranslateGraph_Usecase(size_t base_vertex_id = 0) : base_vertex_id_(base_vertex_id) {} + + template + void translate(raft::handle_t const& handle, + rmm::device_uvector& d_src, + rmm::device_uvector& d_dst) const + { + if (base_vertex_id_ > 0) + cugraph::test::translate_vertex_ids( + handle, d_src, d_dst, static_cast(base_vertex_id_)); + } + + size_t base_vertex_id_{}; +}; + +} // namespace detail + +class File_Usecase : public detail::TranslateGraph_Usecase { + public: + File_Usecase() = delete; + + File_Usecase(std::string const& graph_file_path, size_t base_vertex_id = 0) + : detail::TranslateGraph_Usecase(base_vertex_id) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path_ = graph_file_path; + } + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices; + bool is_symmetric; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path_, test_weighted); + + translate(handle, d_src_v, d_dst_v); + +#if 0 + if (multi_gpu) { + std::tie(d_src_v, d_dst_v) = filter_edgelist_by_gpu(handle, d_src_v, d_dst_v); + } +#endif + + return std::make_tuple( + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + static_cast(detail::TranslateGraph_Usecase::base_vertex_id_) + num_vertices, + is_symmetric); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices; + bool is_symmetric; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + this->template construct_edgelist( + handle, test_weighted); + + // TODO: Consider calling construct_edgelist and creating + // a generic test function to take the edgelist and + // do the graph construction. + // + // Would be more reusable across tests + // + return read_graph_from_matrix_market_file( + handle, graph_file_full_path_, test_weighted, renumber); + } + + private: + std::string graph_file_full_path_{}; +}; + +class Rmat_Usecase : public detail::TranslateGraph_Usecase { + public: + Rmat_Usecase() = delete; + + Rmat_Usecase(size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + size_t base_vertex_id = 0, + bool multi_gpu_usecase = false) + : detail::TranslateGraph_Usecase(base_vertex_id), + scale_(scale), + edge_factor_(edge_factor), + a_(a), + b_(b), + c_(c), + seed_(seed), + undirected_(undirected), + scramble_vertex_ids_(scramble_vertex_ids), + multi_gpu_usecase_(multi_gpu_usecase) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + // TODO: Tease through generate_graph_from_rmat_params + // to extract the edgelist part + // Call cugraph::translate_vertex_ids(handle, d_src_v, d_dst_v, base_vertex_id_); + + CUGRAPH_FAIL("Not implemented"); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + std::vector partition_ids(1); + size_t comm_size; + + if (multi_gpu_usecase_) { + auto& comm = handle.get_comms(); + comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + partition_ids.resize(multi_gpu ? size_t{1} : static_cast(comm_size)); + + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + } else { + comm_size = 1; + partition_ids[0] = size_t{0}; + } + + // TODO: Need to offset by base_vertex_id_ + // static_cast(base_vertex_id_)); + // Consider using construct_edgelist like other options + return generate_graph_from_rmat_params( + handle, + scale_, + edge_factor_, + a_, + b_, + c_, + seed_, + undirected_, + scramble_vertex_ids_, + test_weighted, + renumber, + partition_ids, + comm_size); + } + + private: + size_t scale_{}; + size_t edge_factor_{}; + double a_{}; + double b_{}; + double c_{}; + uint64_t seed_{}; + bool undirected_{}; + bool scramble_vertex_ids_{}; + bool multi_gpu_usecase_{}; +}; + +class PathGraph_Usecase { + public: + PathGraph_Usecase() = delete; + + PathGraph_Usecase(std::vector> parms, + bool weighted = false, + bool scramble = false) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + rmm::device_uvector weights_v(0, handle.get_stream()); + + constexpr bool symmetric{true}; + + std::vector> converted_parms(parms_.size()); + + std::transform(parms_.begin(), parms_.end(), converted_parms.begin(), [](auto p) { + return std::make_tuple(static_cast(std::get<0>(p)), + static_cast(std::get<1>(p))); + }); + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::tie(src_v, dst_v) = + cugraph::generate_path_graph_edgelist(handle, converted_parms); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + if (test_weighted) { + auto length = src_v.size(); + weights_v.resize(length, handle.get_stream()); + } + + return std::make_tuple( + std::move(src_v), std::move(dst_v), std::move(weights_v), num_vertices_, symmetric); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + CUGRAPH_FAIL("not implemented"); + } + + private: + std::vector> parms_{}; + size_t num_vertices_{0}; + bool weighted_{false}; +}; + +class Mesh2DGraph_Usecase { + public: + Mesh2DGraph_Usecase() = delete; + + Mesh2DGraph_Usecase(std::vector> const& parms, bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +class Mesh3DGraph_Usecase { + public: + Mesh3DGraph_Usecase() = delete; + + Mesh3DGraph_Usecase(std::vector> const& parms, + bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +class CompleteGraph_Usecase { + public: + CompleteGraph_Usecase() = delete; + + CompleteGraph_Usecase(std::vector> const& parms, bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +namespace detail { + +template +struct combined_construct_graph_tuple_impl { + template + std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>> + construct_edges(raft::handle_t const& handle, + bool test_weighted, + generator_tuple_t const& generator_tuple) const + { + return combined_construct_graph_tuple_impl() + .construct_edges(generator_tuple) + .push_back(std::get(generator_tuple).construct_edges(handle, test_weighted)); + } +}; + +template +struct combined_construct_graph_tuple_impl { + template + std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>> + construct_edges(raft::handle_t const& handle, + bool test_weighted, + generator_tuple_t const& generator_tuple) const + { + return std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>>(); + } +}; + +} // namespace detail + +template +class CombinedGenerator_Usecase { + CombinedGenerator_Usecase() = delete; + + CombinedGenerator_Usecase(generator_tuple_t const& tuple) : generator_tuple_(tuple) {} + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + size_t constexpr tuple_size{std::tuple_size::value}; + + auto edge_tuple_vector = + detail::combined_construct_graph_tuple_impl() + .construct_edges(handle, test_weighted, generator_tuple_); + + // Need to combine + CUGRAPH_FAIL("not implemented"); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + // Call construct_edgelist to get tuple of edge lists + // return generate_graph_from_edgelist<...>(...) + CUGRAPH_FAIL("not implemented"); + } + + private: + generator_tuple_t const& generator_tuple_; +}; + +template +std::tuple, + rmm::device_uvector> +construct_graph(raft::handle_t const& handle, + input_usecase_t const& input_usecase, + bool test_weighted, + bool renumber = true) +{ + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices{0}; + bool is_symmetric{false}; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + input_usecase + .template construct_edgelist( + handle, test_weighted); + + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + renumber); +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 075db1906e1..09da0556e44 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -18,6 +18,8 @@ #include #include +#include +#include #include #include @@ -153,122 +155,6 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::vector const& partition_ids, size_t num_partitions); -class File_Usecase { - public: - File_Usecase() = delete; - - File_Usecase(std::string const& graph_file_path) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path_ = graph_file_path; - } - } - - template - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector> - construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const - { - return read_graph_from_matrix_market_file( - handle, graph_file_full_path_, test_weighted, renumber); - } - - private: - std::string graph_file_full_path_{}; -}; - -class Rmat_Usecase { - public: - Rmat_Usecase() = delete; - - Rmat_Usecase(size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool multi_gpu_usecase = false) - : scale_(scale), - edge_factor_(edge_factor), - a_(a), - b_(b), - c_(c), - seed_(seed), - undirected_(undirected), - scramble_vertex_ids_(scramble_vertex_ids), - multi_gpu_usecase_(multi_gpu_usecase) - { - } - - template - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector> - construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const - { - std::vector partition_ids(1); - size_t comm_size; - - if (multi_gpu_usecase_) { - auto& comm = handle.get_comms(); - comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - partition_ids.resize(multi_gpu ? size_t{1} : static_cast(comm_size)); - - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - } else { - comm_size = 1; - partition_ids[0] = size_t{0}; - } - - return generate_graph_from_rmat_params( - handle, - scale_, - edge_factor_, - a_, - b_, - c_, - seed_, - undirected_, - scramble_vertex_ids_, - test_weighted, - renumber, - partition_ids, - comm_size); - } - - private: - size_t scale_{}; - size_t edge_factor_{}; - double a_{}; - double b_{}; - double c_{}; - uint64_t seed_{}; - bool undirected_{}; - bool scramble_vertex_ids_{}; - bool multi_gpu_usecase_{}; -}; - // alias for easy customization for debug purposes: // template @@ -391,5 +277,49 @@ std::pair compare_graphs(raft::handle_t const& handle, } } +template +bool renumbered_vectors_same(raft::handle_t const& handle, + std::vector const& v1, + std::vector const& v2) +{ + if (v1.size() != v2.size()) return false; + + std::map map; + + auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); + + std::for_each(iter, iter + v1.size(), [&map](auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + map[e1] = e2; + }); + + auto error_count = std::count_if(iter, iter + v1.size(), [&map](auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + return (map[e1] != e2); + }); + + return (error_count == 0); +} + +template +bool renumbered_vectors_same(raft::handle_t const& handle, + rmm::device_uvector const& v1, + rmm::device_uvector const& v2) +{ + if (v1.size() != v2.size()) return false; + + std::vector h_v1(v1.size()); + std::vector h_v2(v1.size()); + + raft::update_host(h_v1.data(), v1.data(), v1.size(), handle.get_stream()); + raft::update_host(h_v2.data(), v2.data(), v2.size(), handle.get_stream()); + + return renumbered_vectors_same(handle, h_v1, h_v2); +} + } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index dfd420b1e2d..ae36582d18d 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -81,5 +81,53 @@ sort_by_key(raft::handle_t const& handle, int64_t const* values, size_t num_pairs); +template +void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + vertex_t vertex_id_offset) +{ + thrust::transform(rmm::exec_policy(handle.get_stream()), + d_src_v.begin(), + d_src_v.end(), + d_src_v.begin(), + [offset = vertex_id_offset] __device__(vertex_t v) { return offset + v; }); + + thrust::transform(rmm::exec_policy(handle.get_stream()), + d_dst_v.begin(), + d_dst_v.end(), + d_dst_v.begin(), + [offset = vertex_id_offset] __device__(vertex_t v) { return offset + v; }); +} + +template +void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + vertex_t vertex_id_offset) +{ + thrust::sequence(rmm::exec_policy(handle.get_stream()), + d_vertices_v.begin(), + d_vertices_v.end(), + vertex_id_offset); +} + +template void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + int32_t vertex_id_offset); + +template void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + int64_t vertex_id_offset); + +template void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + int32_t vertex_id_offset); + +template void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + int64_t vertex_id_offset); + } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp index 96f370f884c..45208a6b921 100644 --- a/cpp/tests/utilities/thrust_wrapper.hpp +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -26,5 +26,16 @@ template std::tuple, rmm::device_uvector> sort_by_key( raft::handle_t const& handle, vertex_t const* keys, value_t const* values, size_t num_pairs); +template +void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + vertex_t vertex_id_offset); + +template +void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + vertex_t vertex_id_offset); + } // namespace test } // namespace cugraph diff --git a/python/cugraph/generators/rmat.pxd b/python/cugraph/generators/rmat.pxd index 16606b59d0f..3c51108c778 100644 --- a/python/cugraph/generators/rmat.pxd +++ b/python/cugraph/generators/rmat.pxd @@ -14,10 +14,10 @@ from libcpp cimport bool from cugraph.structure.graph_utilities cimport * from libcpp.vector cimport vector -cdef extern from "cugraph/experimental/graph_generator.hpp" namespace "cugraph::experimental": +cdef extern from "cugraph/graph_generators.hpp" namespace "cugraph": ctypedef enum generator_distribution_t: - POWER_LAW "cugraph::experimental::generator_distribution_t::POWER_LAW" - UNIFORM "cugraph::experimental::generator_distribution_t::UNIFORM" + POWER_LAW "cugraph::generator_distribution_t::POWER_LAW" + UNIFORM "cugraph::generator_distribution_t::UNIFORM" cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": From 637c13947fb443d42985a62a7ec904066b267224 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 3 Jun 2021 14:51:52 -0500 Subject: [PATCH 286/343] Update the Random Walk binding (#1599) Closes #1579 Authors: - https://github.com/Iroy30 - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - Rick Ratzel (https://github.com/rlratzel) - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1599 --- cpp/include/cugraph/utilities/cython.hpp | 14 +- cpp/src/utilities/cython.cu | 36 +++- notebooks/sampling/RandomWalk.ipynb | 178 +++--------------- python/cugraph/__init__.py | 2 +- python/cugraph/sampling/__init__.py | 2 +- python/cugraph/sampling/random_walks.pxd | 8 +- python/cugraph/sampling/random_walks.py | 77 ++++---- .../cugraph/sampling/random_walks_wrapper.pyx | 59 +++++- python/cugraph/structure/graph_utilities.pxd | 5 + python/cugraph/tests/test_random_walks.py | 106 +++++++---- 10 files changed, 251 insertions(+), 236 deletions(-) diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index f187a985108..273e55bae25 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -210,6 +210,12 @@ struct random_walk_ret_t { std::unique_ptr d_sizes_; }; +struct random_walk_path_t { + std::unique_ptr d_v_offsets; + std::unique_ptr d_w_sizes; + std::unique_ptr d_w_offsets; +}; + struct graph_generator_t { std::unique_ptr d_source; std::unique_ptr d_destination; @@ -538,7 +544,13 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth); + edge_t max_depth, + bool use_padding); + +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes); // convertor from random_walks return type to COO: // diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index a95e4eb5421..e9bf9ffe031 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -879,7 +879,8 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth) + edge_t max_depth, + bool use_padding) { if (graph_container.weightType == numberTypeEnum::floatType) { using weight_t = float; @@ -888,7 +889,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -907,7 +908,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -924,6 +925,20 @@ call_random_walks(raft::handle_t const& handle, } } +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes) +{ + auto triplet = + cugraph::experimental::query_rw_sizes_offsets(handle, num_paths, vertex_path_sizes); + random_walk_path_t rw_path_tri{ + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + return std::make_unique(std::move(rw_path_tri)); +} + template std::unique_ptr random_walks_to_coo(raft::handle_t const& handle, random_walk_ret_t& rw_tri) @@ -1354,21 +1369,30 @@ template std::unique_ptr call_random_walks( graph_container_t const& graph_container, int32_t const* ptr_start_set, int32_t num_paths, - int32_t max_depth); + int32_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int32_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int64_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int32_t num_paths, int32_t const* vertex_path_sizes); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int64_t num_paths, int64_t const* vertex_path_sizes); template std::unique_ptr random_walks_to_coo( raft::handle_t const& handle, random_walk_ret_t& rw_tri); diff --git a/notebooks/sampling/RandomWalk.ipynb b/notebooks/sampling/RandomWalk.ipynb index 31a521db1c1..84f8e1db07f 100644 --- a/notebooks/sampling/RandomWalk.ipynb +++ b/notebooks/sampling/RandomWalk.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -78,20 +78,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(34, 78)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# some stats on the graph\n", "(G.number_of_nodes(), G.number_of_edges() )" @@ -99,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -109,11 +98,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "rw, so = cugraph.random_walks(G, seeds, 4)" + "# random walk path length\n", + "path_length = 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rw, so, sz = cugraph.random_walks(G, seeds, path_length, use_padding=True)" ] }, { @@ -131,144 +130,27 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 3\n", - "2 6\n", - "dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "so" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
srcdstweight
01761.0
16171.0
21761.0
319331.0
433311.0
53121.0
\n", - "
" - ], - "text/plain": [ - " src dst weight\n", - "0 17 6 1.0\n", - "1 6 17 1.0\n", - "2 17 6 1.0\n", - "3 19 33 1.0\n", - "4 33 31 1.0\n", - "5 31 2 1.0" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "rw" + "rw.head(10)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seed 17 starts at index 0 and is 3 rows\n", - "seed 19 starts at index 3 and is 3 rows\n" - ] - } - ], + "outputs": [], "source": [ + "idx = 0\n", "for i in range(len(seeds)):\n", - " print(f\"seed {seeds[i]} starts at index {so[i]} and is {so[1 + 1] - so[1]} rows\")" + " for j in range(path_length):\n", + " print(f\"{rw[idx]}\", end=\" \")\n", + " idx += 1\n", + " print(\" \")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -305,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 1a113b93d8d..55c35fa7b4b 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -107,7 +107,7 @@ from cugraph.raft import raft_include_test from cugraph.comms import comms -from cugraph.sampling import random_walks +from cugraph.sampling import random_walks, rw_path # Versioneer from ._version import get_versions diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py index fd9d072d4f8..ab0bfab0c66 100644 --- a/python/cugraph/sampling/__init__.py +++ b/python/cugraph/sampling/__init__.py @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks import random_walks +from cugraph.sampling.random_walks import random_walks, rw_path diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd index 1eaea92c3e5..f86d6396c98 100644 --- a/python/cugraph/sampling/random_walks.pxd +++ b/python/cugraph/sampling/random_walks.pxd @@ -19,4 +19,10 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": const graph_container_t &g, const vertex_t *ptr_d_start, edge_t num_paths, - edge_t max_depth) except + + edge_t max_depth, + bool use_padding) except + + + cdef unique_ptr[random_walk_path_t] call_rw_paths[index_t]( + const handle_t &handle, + index_t num_paths, + const index_t* sizes) except + diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 84fde262010..fc21abd3bc4 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -14,16 +14,12 @@ import cudf from cugraph.sampling import random_walks_wrapper import cugraph -from collections import defaultdict -# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series - -def random_walks( - G, - start_vertices, - max_depth=None -): +def random_walks(G, + start_vertices, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -43,16 +39,20 @@ def random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ if max_depth is None: raise TypeError("must specify a 'max_depth'") @@ -74,7 +74,7 @@ def random_walks( start_vertices = G.lookup_internal_vertex_id(start_vertices) vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( - G, start_vertices, max_depth) + G, start_vertices, max_depth, use_padding) if G.renumbered: df_ = cudf.DataFrame() @@ -82,21 +82,32 @@ def random_walks( df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True) vertex_set = cudf.Series(df_['vertex_set']) - edge_list = defaultdict(list) - next_path_idx = 0 - offsets = [0] - - df = cudf.DataFrame() - for s in sizes.values_host: - for i in range(next_path_idx, s+next_path_idx-1): - edge_list['src'].append(vertex_set.values_host[i]) - edge_list['dst'].append(vertex_set.values_host[i+1]) - next_path_idx += s - df = df.append(edge_list, ignore_index=True) - offsets.append(df.index[-1]+1) - edge_list['src'].clear() - edge_list['dst'].clear() - df['weight'] = edge_set - offsets = cudf.Series(offsets) - - return df, offsets + if use_padding: + edge_set_sz = (max_depth-1)*len(start_vertices) + return vertex_set, edge_set[:edge_set_sz], sizes + + vertex_set_sz = sizes.sum() + edge_set_sz = vertex_set_sz - len(start_vertices) + return vertex_set[:vertex_set_sz], edge_set[:edge_set_sz], sizes + + +def rw_path(num_paths, sizes): + """ + Retrieve more information on the obtained paths in case use_padding + is False. + + parameters + ---------- + num_paths: int + Number of paths in the random walk output. + + sizes: int + Path size returned in random walk output. + + Returns + ------- + path_data : cudf.DataFrame + Dataframe containing vetex path offsets, edge weight offsets and + edge weight sizes for each path. + """ + return random_walks_wrapper.rw_path_retrieval(num_paths, sizes) diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx index f186a972413..64194976e87 100644 --- a/python/cugraph/sampling/random_walks_wrapper.pyx +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -10,7 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks cimport call_random_walks +from cugraph.sampling.random_walks cimport call_random_walks, call_rw_paths #from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_utilities cimport * from libcpp cimport bool @@ -24,7 +24,9 @@ import numpy.ctypeslib as ctypeslib from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer from cython.operator cimport dereference as deref -def random_walks(input_graph, start_vertices, max_depth): + + +def random_walks(input_graph, start_vertices, max_depth, use_padding): """ Call random_walks """ @@ -89,32 +91,71 @@ def random_walks(input_graph, start_vertices, max_depth): graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (vertex_t == edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) rw_ret= move(rw_ret_ptr.get()[0]) vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_)) edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_)) - sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) vertex_set = Buffer(vertex_set) edge_set = Buffer(edge_set) - sizes = Buffer(sizes) set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t) set_edge = cudf.Series(data=edge_set, dtype=weight_t) - set_sizes = cudf.Series(data=sizes, dtype=edge_t) + + if not use_padding: + sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) + sizes = Buffer(sizes) + set_sizes = cudf.Series(data=sizes, dtype=edge_t) + else: + set_sizes = None return set_vertex, set_edge, set_sizes - + + +def rw_path_retrieval(num_paths, sizes): + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + index_t = sizes.dtype + + cdef unique_ptr[random_walk_path_t] rw_path_ptr + cdef uintptr_t c_sizes = sizes.__cuda_array_interface__['data'][0] + + if index_t == np.dtype("int32"): + rw_path_ptr = move(call_rw_paths[int](deref(handle_), + num_paths, + c_sizes)) + else: # index_t == np.dtype("int64"): + rw_path_ptr = move(call_rw_paths[long](deref(handle_), + num_paths, + c_sizes)) + + rw_path = move(rw_path_ptr.get()[0]) + vertex_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_v_offsets)) + weight_sizes = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_sizes)) + weight_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_offsets)) + vertex_offsets = Buffer(vertex_offsets) + weight_sizes = Buffer(weight_sizes) + weight_offsets = Buffer(weight_offsets) + + df = cudf.DataFrame() + df['vertex_offsets'] = cudf.Series(data=vertex_offsets, dtype=index_t) + df['weight_sizes'] = cudf.Series(data=weight_sizes, dtype=index_t) + df['weight_offsets'] = cudf.Series(data=weight_offsets, dtype=index_t) + return df diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index a19ed4c600c..2d5b081dd0c 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -94,6 +94,11 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] d_coalesced_w_ unique_ptr[device_buffer] d_sizes_ + cdef cppclass random_walk_path_t: + unique_ptr[device_buffer] d_v_offsets + unique_ptr[device_buffer] d_w_sizes + unique_ptr[device_buffer] d_w_offsets + cdef cppclass graph_generator_t: unique_ptr[device_buffer] d_source unique_ptr[device_buffer] d_destination diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index ba0cd6eadc9..302a93cd02a 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -29,11 +29,10 @@ DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -def calc_random_walks( - graph_file, - directed=False, - max_depth=None -): +def calc_random_walks(graph_file, + directed=False, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -52,16 +51,20 @@ def calc_random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ G = utils.generate_cugraph_graph_from_file( graph_file, directed=directed, edgevals=True) @@ -69,45 +72,47 @@ def calc_random_walks( k = random.randint(1, 10) start_vertices = random.sample(range(G.number_of_vertices()), k) - df, offsets = cugraph.random_walks(G, start_vertices, max_depth) + vertex_paths, edge_weights, vertex_path_sizes = cugraph.random_walks( + G, start_vertices, max_depth, use_padding) - return df, offsets, start_vertices + return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices -def check_random_walks(df, offsets, seeds, df_G=None): +def check_random_walks(path_data, seeds, df_G=None): invalid_edge = 0 invalid_seeds = 0 - invalid_weight = 0 offsets_idx = 0 - for i in range(len(df.index)): - src, dst, weight = df.iloc[i].to_array() - if i == offsets[offsets_idx]: - if df['src'].iloc[i] != seeds[offsets_idx]: + next_path_idx = 0 + v_paths = path_data[0] + sizes = path_data[2].to_array().tolist() + + for s in sizes: + for i in range(next_path_idx, next_path_idx+s-1): + src, dst = v_paths.iloc[i], v_paths.iloc[i+1] + if i == next_path_idx and src != seeds[offsets_idx]: invalid_seeds += 1 print( "[ERR] Invalid seed: " " src {} != src {}" - .format(df['src'].iloc[i], offsets[offsets_idx]) + .format(src, seeds[offsets_idx]) ) - offsets_idx += 1 + offsets_idx += 1 + next_path_idx += s - edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index( - drop=True) exp_edge = df_G.loc[ (df_G['src'] == (src)) & ( df_G['dst'] == (dst))].reset_index(drop=True) - if not exp_edge.equals(edge[:1]): + if not (exp_edge['src'].loc[0], exp_edge['dst'].loc[0]) == (src, dst): print( "[ERR] Invalid edge: " - "There is no edge src {} dst {} weight {}" - .format(src, dst, weight) + "There is no edge src {} dst {}" + .format(src, dst) ) - invalid_weight += 1 + invalid_edge += 1 assert invalid_edge == 0 assert invalid_seeds == 0 - assert invalid_weight == 0 # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -121,11 +126,9 @@ def prepare_test(): @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("max_depth", [None]) -def test_random_walks_invalid_max_dept( - graph_file, - directed, - max_depth -): +def test_random_walks_invalid_max_dept(graph_file, + directed, + max_depth): prepare_test() with pytest.raises(TypeError): df, offsets, seeds = calc_random_walks( @@ -137,7 +140,7 @@ def test_random_walks_invalid_max_dept( @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -def test_random_walks( +def test_random_walks_coalesced( graph_file, directed ): @@ -145,12 +148,43 @@ def test_random_walks( df_G = utils.read_csv_file(graph_file) df_G.rename( columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) - df, offsets, seeds = calc_random_walks( + path_data, seeds = calc_random_walks( graph_file, directed, max_depth=max_depth ) - check_random_walks(df, offsets, seeds, df_G) + check_random_walks(path_data, seeds, df_G) + + # Check path query output + df = cugraph.rw_path(len(seeds), path_data[2]) + v_offsets = [0] + path_data[2].cumsum()[:-1].to_array().tolist() + w_offsets = [0] + (path_data[2]-1).cumsum()[:-1].to_array().tolist() + + assert df['weight_sizes'].equals(path_data[2]-1) + assert df['vertex_offsets'].to_array().tolist() == v_offsets + assert df['weight_offsets'].to_array().tolist() == w_offsets + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks_padded( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + path_data, seeds = calc_random_walks( + graph_file, + directed, + max_depth=max_depth, + use_padding=True + ) + v_paths = path_data[0] + e_weights = path_data[1] + assert len(v_paths) == max_depth*len(seeds) + assert len(e_weights) == (max_depth - 1)*len(seeds) """@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) From ed0e3a2c04d7c2468aa3c91196556616dbc76539 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 4 Jun 2021 07:57:58 -0400 Subject: [PATCH 287/343] MG WCC improvements (#1628) Should be merged after PR #1636. - Undo NCCL 2.9.6 bug workarounds. - MG WCC weak scaling improvements. Authors: - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1628 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 3 - .../cugraph/utilities/shuffle_comm.cuh | 4 - .../components/weakly_connected_components.cu | 84 +++++++++++++------ cpp/src/experimental/graph_view.cu | 56 ++++++++----- cpp/src/experimental/renumber_edgelist.cu | 3 - python/cugraph/tests/test_force_atlas2.py | 3 + 6 files changed, 99 insertions(+), 54 deletions(-) diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 367e29e12f0..f9c6fed059b 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -431,9 +431,6 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). - // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to - // remote memory allocation), this barrier is unnecessary otherwise. - col_comm.barrier(); auto rx_sizes = host_scalar_gather(col_comm, tmp_major_vertices.size(), i, handle.get_stream()); std::vector rx_displs{}; diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 56f55a31a14..009dde845b5 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -73,10 +73,6 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, rx_offsets, rx_src_ranks, stream); - // FIXME: temporary unverified work-around for a NCCL (2.9.6) bug that causes a hang on DGX1 (due - // to remote memory allocation), this synchronization is unnecessary otherwise but seems like - // suppress the hange issue. Need to be revisited once NCCL 2.10 is released. - CUDA_TRY(cudaDeviceSynchronize()); raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 0688dc7408f..0c552ad24fc 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -342,37 +342,69 @@ void weakly_connected_components_impl(raft::handle_t const &handle, static_cast(new_root_candidates.size() * max_new_roots_ratio), vertex_t{1}); auto init_max_new_roots = max_new_roots; - // to avoid selecting too many (possibly all) vertices as initial roots leading to no - // compression in the worst case. - if (GraphViewType::is_multi_gpu && - (level_graph_view.get_number_of_vertices() <= - static_cast(handle.get_comms().get_size() * ceil(1.0 / max_new_roots_ratio)))) { + if (GraphViewType::is_multi_gpu) { auto &comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - // FIXME: a temporary workaround for a NCCL(2.9.6) bug that causes a hang on DGX1 (due to - // remote memory allocation), host_scalar_gather is sufficient otherwise. -#if 1 - auto new_root_candidate_counts = - host_scalar_allgather(comm, new_root_candidates.size(), handle.get_stream()); -#else + auto first_candidate_degree = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream_view()), + new_root_candidates.begin(), + new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0), + [vertex_partition, degrees = degrees.data()] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }, + edge_t{0}, + thrust::plus{}); + + auto first_candidate_degrees = + host_scalar_gather(comm, first_candidate_degree, int{0}, handle.get_stream()); auto new_root_candidate_counts = host_scalar_gather(comm, new_root_candidates.size(), int{0}, handle.get_stream()); -#endif + if (comm_rank == 0) { - std::vector gpuids{}; - gpuids.reserve( - std::reduce(new_root_candidate_counts.begin(), new_root_candidate_counts.end())); - for (size_t i = 0; i < new_root_candidate_counts.size(); ++i) { - gpuids.insert(gpuids.end(), new_root_candidate_counts[i], static_cast(i)); - } - std::random_device rd{}; - std::shuffle(gpuids.begin(), gpuids.end(), std::mt19937(rd())); - gpuids.resize( - std::max(static_cast(gpuids.size() * max_new_roots_ratio), vertex_t{1})); std::vector init_max_new_root_counts(comm_size, vertex_t{0}); - for (size_t i = 0; i < gpuids.size(); ++i) { ++init_max_new_root_counts[gpuids[i]]; } + + // if there exists very high degree vertices, we can exceed degree_sum_threshold * comm_size + // with fewer than one root per GPU + if (std::reduce(first_candidate_degrees.begin(), first_candidate_degrees.end()) > + degree_sum_threshold * comm_size) { + std::vector> degree_gpuid_pairs(comm_size); + for (int i = 0; i < comm_size; ++i) { + degree_gpuid_pairs[i] = std::make_tuple(first_candidate_degrees[i], i); + } + std::sort(degree_gpuid_pairs.begin(), degree_gpuid_pairs.end(), [](auto lhs, auto rhs) { + return std::get<0>(lhs) > std::get<0>(rhs); + }); + edge_t sum{0}; + for (size_t i = 0; i < degree_gpuid_pairs.size(); ++i) { + sum += std::get<0>(degree_gpuid_pairs[i]); + init_max_new_root_counts[std::get<1>(degree_gpuid_pairs[i])] = 1; + if (sum > degree_sum_threshold * comm_size) { break; } + } + } + // to avoid selecting too many (possibly all) vertices as initial roots leading to no + // compression in the worst case. + else if (level_graph_view.get_number_of_vertices() <= + static_cast(handle.get_comms().get_size() * + ceil(1.0 / max_new_roots_ratio))) { + std::vector gpuids{}; + gpuids.reserve( + std::reduce(new_root_candidate_counts.begin(), new_root_candidate_counts.end())); + for (size_t i = 0; i < new_root_candidate_counts.size(); ++i) { + gpuids.insert(gpuids.end(), new_root_candidate_counts[i], static_cast(i)); + } + std::random_device rd{}; + std::shuffle(gpuids.begin(), gpuids.end(), std::mt19937(rd())); + gpuids.resize( + std::max(static_cast(gpuids.size() * max_new_roots_ratio), vertex_t{1})); + for (size_t i = 0; i < gpuids.size(); ++i) { ++init_max_new_root_counts[gpuids[i]]; } + } else { + std::fill(init_max_new_root_counts.begin(), + init_max_new_root_counts.end(), + std::numeric_limits::max()); + } + // FIXME: we need to add host_scalar_scatter #if 1 rmm::device_uvector d_counts(comm_size, handle.get_stream_view()); @@ -401,7 +433,9 @@ void weakly_connected_components_impl(raft::handle_t const &handle, host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); #endif } + handle.get_stream_view().synchronize(); + init_max_new_roots = std::min(init_max_new_roots, max_new_roots); } // 2-3. initialize vertex frontier, edge_buffer, and col_components (if multi-gpu) @@ -502,7 +536,9 @@ void weakly_connected_components_impl(raft::handle_t const &handle, level_graph_view, vertex_frontier, static_cast(Bucket::cur), - std::vector{static_cast(Bucket::next)}, + GraphViewType::is_multi_gpu ? std::vector{static_cast(Bucket::next), + static_cast(Bucket::conflict)} + : std::vector{static_cast(Bucket::next)}, thrust::make_counting_iterator(0) /* dummy */, thrust::make_counting_iterator(0) /* dummy */, [col_components = GraphViewType::is_multi_gpu ? col_components.data() : level_components, diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index c22fde5f4c7..3dc5dee4756 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -534,9 +534,13 @@ graph_view_ton(handle.get_stream()), in_degrees.begin(), in_degrees.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(edge_t{0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != in_degrees.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -557,8 +561,8 @@ edge_t graph_view_ton(handle.get_stream()), in_degrees.begin(), in_degrees.end()); - edge_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + edge_t ret{0}; + if (it != in_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -576,9 +580,13 @@ graph_view_ton(handle.get_stream()), out_degrees.begin(), out_degrees.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(edge_t{0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != out_degrees.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -599,8 +607,8 @@ edge_t graph_view_ton(handle.get_stream()), out_degrees.begin(), out_degrees.end()); - edge_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + edge_t ret{0}; + if (it != out_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -618,9 +626,13 @@ graph_view_ton(handle.get_stream()), in_weight_sums.begin(), in_weight_sums.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != in_weight_sums.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -641,8 +653,8 @@ weight_t graph_view_ton(handle.get_stream()), in_weight_sums.begin(), in_weight_sums.end()); - weight_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + weight_t ret{0.0}; + if (it != in_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -660,9 +672,13 @@ graph_view_ton(handle.get_stream()), out_weight_sums.begin(), out_weight_sums.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != out_weight_sums.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -683,8 +699,8 @@ weight_t graph_view_t< auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), out_weight_sums.begin(), out_weight_sums.end()); - weight_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + weight_t ret{0.0}; + if (it != out_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 005ba93d3c4..d6e3f8c93f6 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -115,9 +115,6 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_major_labels(0, handle.get_stream()); rmm::device_uvector rx_major_counts(0, handle.get_stream()); - // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to - // remote memory allocation), this barrier is unnecessary otherwise. - col_comm.barrier(); auto rx_sizes = host_scalar_gather( col_comm, tmp_major_labels.size(), static_cast(i), handle.get_stream()); std::vector rx_displs{}; diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index 54ea9c78588..1128f52904a 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -138,6 +138,9 @@ def test_force_atlas2(graph_file, score, max_iter, assert test_callback.on_train_end_called_count == 1 +# FIXME: this test occasionally fails - skipping to prevent CI failures but +# need to revisit ASAP +@pytest.mark.skip(reason="non-deterministric - needs fixing!") @pytest.mark.parametrize('graph_file, score', DATASETS[:-1]) @pytest.mark.parametrize('max_iter', MAX_ITERATIONS) @pytest.mark.parametrize('barnes_hut_optimize', BARNES_HUT_OPTIMIZE) From d30339bcba6f661e87a81e5553a259d89b51daec Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 4 Jun 2021 15:46:46 -0500 Subject: [PATCH 288/343] Made call to cugraph.random_walks() ignore the unused return value (#1648) Made call to cugraph.random_walks() ignore the unused return value. Manually tested in browser with jupyterlab. Author: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1648 --- notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb index be50c075455..65cf9fb59eb 100644 --- a/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb @@ -124,7 +124,8 @@ "source": [ "def run_rw(_G, _seeds, _depth):\n", " t1 = time.time()\n", - " _, _ = cugraph.random_walks(_G, _seeds, _depth)\n", + " # cugraph.random_walks() returns a 3-tuple, which is being ignored here.\n", + " cugraph.random_walks(_G, _seeds, _depth)\n", " t2 = time.time() - t1\n", " return t2" ] @@ -450,7 +451,7 @@ "metadata": {}, "outputs": [], "source": [ - "# sequenctial = so also get a single random seed\n", + "# sequential = so also get a single random seed\n", "for i in range (max_seeds) :\n", " for j in range(i):\n", " seeds = random.sample(nodes, 1)\n", From 57ee8666488a6e9dfe137d812e773de5c3eee1f1 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 7 Jun 2021 17:30:05 +0200 Subject: [PATCH 289/343] Update UCX-Py version to 0.20 (#1649) Author: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1649 --- ci/benchmark/build.sh | 2 +- ci/gpu/build.sh | 2 +- ci/release/update-version.sh | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index d48f475f2eb..f5cedae5e29 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -74,7 +74,7 @@ gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-for "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=${MINOR_VERSION}" \ + "ucx-py=0.20.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 30dc7373e15..7c9c353f23c 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -62,7 +62,7 @@ gpuci_conda_retry install -y \ "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=${MINOR_VERSION}" \ + "ucx-py=0.20.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=$MINOR_VERSION.*" \ "rapids-notebook-env=$MINOR_VERSION.*" \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 0629f19d3b8..c45d7eacd88 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -50,6 +50,5 @@ for FILE in conda/environments/*.yml; do sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/ucx-py=${CURRENT_SHORT_TAG}/ucx-py=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done \ No newline at end of file +done From b0faf2190977e79a4b39ef2c1f4874ab01c745c0 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 8 Jun 2021 10:11:27 -0400 Subject: [PATCH 290/343] Bug fix in triangle counting (#1654) * fix overflow condition if available memory is larger than 32 bit computation would support * fix clang-format issues --- cpp/src/community/triangles_counting.cu | 14 +++++--- cpp/tests/community/triangle_test.cu | 43 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index 31acb4859bd..cd5b8bc6614 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -759,8 +759,9 @@ void TrianglesCount::tcount_b2b() cudaMemGetInfo(&free_bytes, &total_bytes); CHECK_CUDA(m_stream); - int nblock = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmldL1); //@TODO: what? - nblock = MIN(nblock, m_mat.nrows); + size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmldL1); + + int nblock = static_cast(MIN(nblock_available, static_cast(m_mat.nrows))); // allocate level 1 bitmap rmm::device_vector bmapL1_d(bmldL1 * nblock, uint32_t{0}); @@ -793,8 +794,10 @@ void TrianglesCount::tcount_wrp() cudaMemGetInfo(&free_bytes, &total_bytes); CHECK_CUDA(m_stream); - int nblock = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmld * (THREADS / 32)); - nblock = MIN(nblock, DIV_UP(m_mat.nrows, (THREADS / 32))); + size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmld * (THREADS / 32)); + + int nblock = static_cast( + MIN(nblock_available, static_cast(DIV_UP(m_mat.nrows, (THREADS / 32))))); size_t bmap_sz = bmld * nblock * (THREADS / 32); @@ -827,7 +830,8 @@ void TrianglesCount::count() tcount_wrp(); else { const int shMinBlkXSM = 6; - if (size_t{m_shared_mem_per_block * 8 / shMinBlkXSM} < (size_t)m_mat.N) + if (static_cast(m_shared_mem_per_block * 8 / shMinBlkXSM) < + static_cast(m_mat.N)) tcount_b2b(); else tcount_bsh(); diff --git a/cpp/tests/community/triangle_test.cu b/cpp/tests/community/triangle_test.cu index 0faeb795e15..b40c4734a14 100644 --- a/cpp/tests/community/triangle_test.cu +++ b/cpp/tests/community/triangle_test.cu @@ -63,4 +63,47 @@ TEST(triangle, dolphin) ASSERT_EQ(count, expected); } +TEST(triangle, karate) +{ + using vertex_t = int32_t; + using edge_t = int32_t; + using weight_t = float; + + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, + 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, + 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; + std::vector ind_h = { + 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, + 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, + 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, + 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, + 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, + 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, + 33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32}; + + std::vector w_h(ind_h.size(), weight_t{1.0}); + + vertex_t num_verts = off_h.size() - 1; + int num_edges = ind_h.size(); + + uint64_t expected{135}; + + rmm::device_vector offsets_v(off_h); + rmm::device_vector indices_v(ind_h); + rmm::device_vector weights_v(w_h); + + cugraph::GraphCSRView graph_csr( + offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + + uint64_t count{0}; + + try { + count = cugraph::triangle::triangle_count(graph_csr); + } catch (std::exception& e) { + std::cout << "Exception: " << e.what() << std::endl; + } + + ASSERT_EQ(count, expected); +} + CUGRAPH_TEST_PROGRAM_MAIN() From a2a2ba23b28006350c809ddcd339019015333c7f Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Wed, 9 Jun 2021 12:00:20 -0400 Subject: [PATCH 291/343] FIX update-version.sh for CalVer (#1658) --- ci/release/update-version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index c45d7eacd88..ce681bad378 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -20,7 +20,7 @@ NEXT_FULL_TAG=$1 # Get current version -CURRENT_TAG=$(git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_TAG=$(git tag --merged HEAD | grep -xE '^v.*' | sort --version-sort | tail -n 1 | tr -d 'v') CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') @@ -51,4 +51,4 @@ for FILE in conda/environments/*.yml; do sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done +done \ No newline at end of file From 496a40a7bae51214a0ea15984e77b987cc44002c Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 9 Jun 2021 12:07:03 -0400 Subject: [PATCH 292/343] update changelog --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e51d1bc1534..e98a21296c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,40 @@ -# cuGraph 21.06.00 (Date TBD) +# cuGraph 21.06.00 (9 Jun 2021) -Please see https://github.com/rapidsai/cugraph/releases/tag/v21.06.00a for the latest changes to this development branch. +## 🐛 Bug Fixes + +- Delete CUDA_ARCHITECTURES=OFF ([#1638](https://github.com/rapidsai/cugraph/pull/1638)) [@seunghwak](https://github.com/seunghwak) +- tansfom_educe_e bug fixes ([#1633](https://github.com/rapidsai/cugraph/pull/1633)) [@ChuckHastings](https://github.com/ChuckHastings) +- Coect install path fo include folde to avoid double nesting ([#1630](https://github.com/rapidsai/cugraph/pull/1630)) [@dantegd](https://github.com/dantegd) +- Remove thead local thust::sot (thust::sot with the execution policy thust::seq) fom copy_v_tansfom_educe_key_aggegated_out_nb ([#1627](https://github.com/rapidsai/cugraph/pull/1627)) [@seunghwak](https://github.com/seunghwak) + +## 🚀 New Featues + +- SG & MG Weakly Connected Components ([#1604](https://github.com/rapidsai/cugraph/pull/1604)) [@seunghwak](https://github.com/seunghwak) + +## 🛠️ Impovements + +- Remove Pascal guad and test cuGaph use of cuco::static_map on Pascal ([#1640](https://github.com/rapidsai/cugraph/pull/1640)) [@seunghwak](https://github.com/seunghwak) +- Upgaded ecipe and dev envs to NCCL 2.9.9 ([#1636](https://github.com/rapidsai/cugraph/pull/1636)) [@latzel](https://github.com/latzel) +- Use UCX-Py 0.20 ([#1634](https://github.com/rapidsai/cugraph/pull/1634)) [@jakikham](https://github.com/jakikham) +- Updated dependencies fo CalVe ([#1629](https://github.com/rapidsai/cugraph/pull/1629)) [@latzel](https://github.com/latzel) +- MG WCC impovements ([#1628](https://github.com/rapidsai/cugraph/pull/1628)) [@seunghwak](https://github.com/seunghwak) +- Initialize foce_atlas2 `old_foces` device_uvecto, use new `mm::exec_policy` ([#1625](https://github.com/rapidsai/cugraph/pull/1625)) [@txcllnt](https://github.com/txcllnt) +- Fix develope guide examples fo device_buffe ([#1619](https://github.com/rapidsai/cugraph/pull/1619)) [@haism](https://github.com/haism) +- Pass mm memoy allocato to cuco::static_map ([#1617](https://github.com/rapidsai/cugraph/pull/1617)) [@seunghwak](https://github.com/seunghwak) +- Undo disabling MG C++ testing outputs fo non-oot pocesses ([#1615](https://github.com/rapidsai/cugraph/pull/1615)) [@seunghwak](https://github.com/seunghwak) +- WCC bindings ([#1612](https://github.com/rapidsai/cugraph/pull/1612)) [@Ioy30](https://github.com/Ioy30) +- addess 'ValueEo: Seies contains NULL values' fom fom_cudf_edge… ([#1610](https://github.com/rapidsai/cugraph/pull/1610)) [@mattf](https://github.com/mattf) +- Fea mm device buffe change ([#1609](https://github.com/rapidsai/cugraph/pull/1609)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update `CHANGELOG.md` links fo calve ([#1608](https://github.com/rapidsai/cugraph/pull/1608)) [@ajschmidt8](https://github.com/ajschmidt8) +- Handle int64 in foce atlas wappe and update to uvecto ([#1607](https://github.com/rapidsai/cugraph/pull/1607)) [@hlinsen](https://github.com/hlinsen) +- Update docs build scipt ([#1606](https://github.com/rapidsai/cugraph/pull/1606)) [@ajschmidt8](https://github.com/ajschmidt8) +- WCC pefomance/memoy footpint optimization ([#1605](https://github.com/rapidsai/cugraph/pull/1605)) [@seunghwak](https://github.com/seunghwak) +- adding test gaphs - pat 2 ([#1603](https://github.com/rapidsai/cugraph/pull/1603)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update the Random Walk binding ([#1599](https://github.com/rapidsai/cugraph/pull/1599)) [@Ioy30](https://github.com/Ioy30) +- Add mnmg out degee ([#1592](https://github.com/rapidsai/cugraph/pull/1592)) [@Ioy30](https://github.com/Ioy30) +- Update `cugaph` to with newest CMake featues, including CPM fo dependencies ([#1585](https://github.com/rapidsai/cugraph/pull/1585)) [@obetmaynad](https://github.com/obetmaynad) +- Implement Gaph Batching functionality ([#1580](https://github.com/rapidsai/cugraph/pull/1580)) [@aschaffe](https://github.com/aschaffe) +- add multi-column suppot in algoithms - pat 2 ([#1571](https://github.com/rapidsai/cugraph/pull/1571)) [@Ioy30](https://github.com/Ioy30) # cuGraph 0.19.0 (21 Apr 2021) From 5c1d7d85d697c569800d540b0f1d66f156521157 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Wed, 9 Jun 2021 17:05:20 -0400 Subject: [PATCH 293/343] FIX update-version.sh docs path --- ci/release/update-version.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index ce681bad378..2ff14c6c6e9 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -42,8 +42,8 @@ function sed_runner() { sed_runner 's/'"CUGRAPH VERSION .* LANGUAGES C CXX CUDA)"'/'"CUGRAPH VERSION ${NEXT_FULL_TAG} LANGUAGES C CXX CUDA)"'/g' cpp/CMakeLists.txt # RTD update -sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/source/conf.py -sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/source/conf.py +sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cugraph/source/conf.py +sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cugraph/source/conf.py for FILE in conda/environments/*.yml; do sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" ${FILE}; @@ -51,4 +51,4 @@ for FILE in conda/environments/*.yml; do sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done \ No newline at end of file +done From 2491a525cd5ee627dc4e5cedba7dc19706362d88 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 10 Jun 2021 14:36:39 -0400 Subject: [PATCH 294/343] fix changelog (#1663) --- CHANGELOG.md | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e98a21296c5..602d7134ae2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,38 +3,38 @@ ## 🐛 Bug Fixes - Delete CUDA_ARCHITECTURES=OFF ([#1638](https://github.com/rapidsai/cugraph/pull/1638)) [@seunghwak](https://github.com/seunghwak) -- tansfom_educe_e bug fixes ([#1633](https://github.com/rapidsai/cugraph/pull/1633)) [@ChuckHastings](https://github.com/ChuckHastings) -- Coect install path fo include folde to avoid double nesting ([#1630](https://github.com/rapidsai/cugraph/pull/1630)) [@dantegd](https://github.com/dantegd) -- Remove thead local thust::sot (thust::sot with the execution policy thust::seq) fom copy_v_tansfom_educe_key_aggegated_out_nb ([#1627](https://github.com/rapidsai/cugraph/pull/1627)) [@seunghwak](https://github.com/seunghwak) +- transform_reduce_e bug fixes ([#1633](https://github.com/rapidsai/cugraph/pull/1633)) [@ChuckHastings](https://github.com/ChuckHastings) +- Correct install path for include folder to avoid double nesting ([#1630](https://github.com/rapidsai/cugraph/pull/1630)) [@dantegd](https://github.com/dantegd) +- Remove thread local thrust::sort (thrust::sort with the execution policy thrust::seq) from copy_v_transform_reduce_key_aggregated_out_nbr ([#1627](https://github.com/rapidsai/cugraph/pull/1627)) [@seunghwak](https://github.com/seunghwak) -## 🚀 New Featues +## 🚀 New Features - SG & MG Weakly Connected Components ([#1604](https://github.com/rapidsai/cugraph/pull/1604)) [@seunghwak](https://github.com/seunghwak) -## 🛠️ Impovements - -- Remove Pascal guad and test cuGaph use of cuco::static_map on Pascal ([#1640](https://github.com/rapidsai/cugraph/pull/1640)) [@seunghwak](https://github.com/seunghwak) -- Upgaded ecipe and dev envs to NCCL 2.9.9 ([#1636](https://github.com/rapidsai/cugraph/pull/1636)) [@latzel](https://github.com/latzel) -- Use UCX-Py 0.20 ([#1634](https://github.com/rapidsai/cugraph/pull/1634)) [@jakikham](https://github.com/jakikham) -- Updated dependencies fo CalVe ([#1629](https://github.com/rapidsai/cugraph/pull/1629)) [@latzel](https://github.com/latzel) -- MG WCC impovements ([#1628](https://github.com/rapidsai/cugraph/pull/1628)) [@seunghwak](https://github.com/seunghwak) -- Initialize foce_atlas2 `old_foces` device_uvecto, use new `mm::exec_policy` ([#1625](https://github.com/rapidsai/cugraph/pull/1625)) [@txcllnt](https://github.com/txcllnt) -- Fix develope guide examples fo device_buffe ([#1619](https://github.com/rapidsai/cugraph/pull/1619)) [@haism](https://github.com/haism) -- Pass mm memoy allocato to cuco::static_map ([#1617](https://github.com/rapidsai/cugraph/pull/1617)) [@seunghwak](https://github.com/seunghwak) -- Undo disabling MG C++ testing outputs fo non-oot pocesses ([#1615](https://github.com/rapidsai/cugraph/pull/1615)) [@seunghwak](https://github.com/seunghwak) -- WCC bindings ([#1612](https://github.com/rapidsai/cugraph/pull/1612)) [@Ioy30](https://github.com/Ioy30) -- addess 'ValueEo: Seies contains NULL values' fom fom_cudf_edge… ([#1610](https://github.com/rapidsai/cugraph/pull/1610)) [@mattf](https://github.com/mattf) -- Fea mm device buffe change ([#1609](https://github.com/rapidsai/cugraph/pull/1609)) [@ChuckHastings](https://github.com/ChuckHastings) -- Update `CHANGELOG.md` links fo calve ([#1608](https://github.com/rapidsai/cugraph/pull/1608)) [@ajschmidt8](https://github.com/ajschmidt8) -- Handle int64 in foce atlas wappe and update to uvecto ([#1607](https://github.com/rapidsai/cugraph/pull/1607)) [@hlinsen](https://github.com/hlinsen) -- Update docs build scipt ([#1606](https://github.com/rapidsai/cugraph/pull/1606)) [@ajschmidt8](https://github.com/ajschmidt8) -- WCC pefomance/memoy footpint optimization ([#1605](https://github.com/rapidsai/cugraph/pull/1605)) [@seunghwak](https://github.com/seunghwak) -- adding test gaphs - pat 2 ([#1603](https://github.com/rapidsai/cugraph/pull/1603)) [@ChuckHastings](https://github.com/ChuckHastings) -- Update the Random Walk binding ([#1599](https://github.com/rapidsai/cugraph/pull/1599)) [@Ioy30](https://github.com/Ioy30) -- Add mnmg out degee ([#1592](https://github.com/rapidsai/cugraph/pull/1592)) [@Ioy30](https://github.com/Ioy30) -- Update `cugaph` to with newest CMake featues, including CPM fo dependencies ([#1585](https://github.com/rapidsai/cugraph/pull/1585)) [@obetmaynad](https://github.com/obetmaynad) -- Implement Gaph Batching functionality ([#1580](https://github.com/rapidsai/cugraph/pull/1580)) [@aschaffe](https://github.com/aschaffe) -- add multi-column suppot in algoithms - pat 2 ([#1571](https://github.com/rapidsai/cugraph/pull/1571)) [@Ioy30](https://github.com/Ioy30) +## 🛠️ Improvements + +- Remove Pascal guard and test cuGraph use of cuco::static_map on Pascal ([#1640](https://github.com/rapidsai/cugraph/pull/1640)) [@seunghwak](https://github.com/seunghwak) +- Upgraded recipe and dev envs to NCCL 2.9.9 ([#1636](https://github.com/rapidsai/cugraph/pull/1636)) [@rlratzel](https://github.com/rlratzel) +- Use UCX-Py 0.20 ([#1634](https://github.com/rapidsai/cugraph/pull/1634)) [@jakirkham](https://github.com/jakirkham) +- Updated dependencies for CalVer ([#1629](https://github.com/rapidsai/cugraph/pull/1629)) [@rlratzel](https://github.com/rlratzel) +- MG WCC improvements ([#1628](https://github.com/rapidsai/cugraph/pull/1628)) [@seunghwak](https://github.com/seunghwak) +- Initialize force_atlas2 `old_forces` device_uvector, use new `rmm::exec_policy` ([#1625](https://github.com/rapidsai/cugraph/pull/1625)) [@trxcllnt](https://github.com/trxcllnt) +- Fix developer guide examples for device_buffer ([#1619](https://github.com/rapidsai/cugraph/pull/1619)) [@harrism](https://github.com/harrism) +- Pass rmm memory allocator to cuco::static_map ([#1617](https://github.com/rapidsai/cugraph/pull/1617)) [@seunghwak](https://github.com/seunghwak) +- Undo disabling MG C++ testing outputs for non-root processes ([#1615](https://github.com/rapidsai/cugraph/pull/1615)) [@seunghwak](https://github.com/seunghwak) +- WCC bindings ([#1612](https://github.com/rapidsai/cugraph/pull/1612)) [@Iroy30](https://github.com/Iroy30) +- address 'ValueError: Series contains NULL values' from from_cudf_edge… ([#1610](https://github.com/rapidsai/cugraph/pull/1610)) [@mattf](https://github.com/mattf) +- Fea rmm device buffer change ([#1609](https://github.com/rapidsai/cugraph/pull/1609)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update `CHANGELOG.md` links for calver ([#1608](https://github.com/rapidsai/cugraph/pull/1608)) [@ajschmidt8](https://github.com/ajschmidt8) +- Handle int64 in force atlas wrapper and update to uvector ([#1607](https://github.com/rapidsai/cugraph/pull/1607)) [@hlinsen](https://github.com/hlinsen) +- Update docs build script ([#1606](https://github.com/rapidsai/cugraph/pull/1606)) [@ajschmidt8](https://github.com/ajschmidt8) +- WCC performance/memory footprint optimization ([#1605](https://github.com/rapidsai/cugraph/pull/1605)) [@seunghwak](https://github.com/seunghwak) +- adding test graphs - part 2 ([#1603](https://github.com/rapidsai/cugraph/pull/1603)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update the Random Walk binding ([#1599](https://github.com/rapidsai/cugraph/pull/1599)) [@Iroy30](https://github.com/Iroy30) +- Add mnmg out degree ([#1592](https://github.com/rapidsai/cugraph/pull/1592)) [@Iroy30](https://github.com/Iroy30) +- Update `cugraph` to with newest CMake features, including CPM for dependencies ([#1585](https://github.com/rapidsai/cugraph/pull/1585)) [@robertmaynard](https://github.com/robertmaynard) +- Implement Graph Batching functionality ([#1580](https://github.com/rapidsai/cugraph/pull/1580)) [@aschaffer](https://github.com/aschaffer) +- add multi-column support in algorithms - part 2 ([#1571](https://github.com/rapidsai/cugraph/pull/1571)) [@Iroy30](https://github.com/Iroy30) # cuGraph 0.19.0 (21 Apr 2021) From 6b5079ca88590c8e2e81f45ef15d4b666cbef0b4 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 15 Jun 2021 00:47:07 +1000 Subject: [PATCH 295/343] Update to changed `rmm::device_scalar` API (#1637) After rapidsai/rmm/#789 is a breaking API change for `rmm::device_scalar`. This PR fixes a couple of uses of `rmm::device_scalar` to fix the build of cuGraph, and should be merged immediately after rapidsai/rmm/#789. Also fixes an unrelated narrowing conversion warning. Authors: - Mark Harris (https://github.com/harrism) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1637 --- ci/gpu/build.sh | 8 ++++---- cpp/src/components/weakly_connected_components.cu | 2 +- cpp/src/traversal/tsp.cu | 8 ++++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7c9c353f23c..a74eefffc8b 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -72,10 +72,6 @@ gpuci_conda_retry install -y \ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_conda_retry install -y "your-pkg=1.0.0" -gpuci_logger "Install the master version of dask and distributed" -pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps -pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps - gpuci_logger "Check versions" python --version $CC --version @@ -108,6 +104,10 @@ else echo "Installing $CONDA_FILE" conda install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + gpuci_logger "Install the master version of dask and distributed" + pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps + pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps + echo "Build cugraph..." $WORKSPACE/build.sh cugraph fi diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 0c552ad24fc..241b7182fda 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -632,7 +632,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, edge_first, edge_first + new_num_edge_inserts); auto num_unique_edges = static_cast(thrust::distance(edge_first, unique_edge_last)); - num_edge_inserts.set_value(num_unique_edges, handle.get_stream_view()); + num_edge_inserts.set_value_async(num_unique_edges, handle.get_stream_view()); } vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu index a28ddbbaa3f..17183d48f3d 100644 --- a/cpp/src/traversal/tsp.cu +++ b/cpp/src/traversal/tsp.cu @@ -19,6 +19,8 @@ #include +#include + #include "tsp.hpp" #include "tsp_solver.hpp" @@ -53,6 +55,8 @@ TSP::TSP(raft::handle_t const &handle, warp_size_(handle_.get_device_properties().warpSize), sm_count_(handle_.get_device_properties().multiProcessorCount), restart_batch_(8192), + mylock_scalar_(stream_), + best_cost_scalar_(stream_), neighbors_vec_((k_ + 1) * nodes_, stream_), work_vec_(restart_batch_ * ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_), stream_), @@ -81,9 +85,9 @@ void TSP::setup() void TSP::reset_batch() { - mylock_scalar_.set_value_zero(stream_); + mylock_scalar_.set_value_to_zero_async(stream_); auto const max{std::numeric_limits::max()}; - best_cost_scalar_.set_value(max, stream_); + best_cost_scalar_.set_value_async(max, stream_); } void TSP::get_initial_solution(int const batch) From 93c6e03ed8110c910312257dfbf0d52ed78a9a77 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 15 Jun 2021 00:26:40 +0200 Subject: [PATCH 296/343] Update UCX-Py version to 0.21 (#1650) Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1650 --- ci/benchmark/build.sh | 2 +- ci/gpu/build.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index f5cedae5e29..f8100222c12 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -74,7 +74,7 @@ gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-for "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=0.20.*" \ + "ucx-py=0.21.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index a74eefffc8b..02e139fc05e 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -62,7 +62,7 @@ gpuci_conda_retry install -y \ "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=0.20.*" \ + "ucx-py=0.21.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=$MINOR_VERSION.*" \ "rapids-notebook-env=$MINOR_VERSION.*" \ From 4f7dbfc3de1ac99567a0fa8ff636d4bff3aaf26b Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 15 Jun 2021 08:19:53 -0400 Subject: [PATCH 297/343] Rename include/cugraph/patterns to include/cugraph/prims (#1644) Rename the patterns directory to prims. [skip-ci] at this point to save the CI resource for the 21.06 code freeze. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1644 --- .../{patterns => prims}/any_of_adj_matrix_row.cuh | 0 .../copy_to_adj_matrix_row_col.cuh | 0 .../copy_v_transform_reduce_in_out_nbr.cuh | 4 ++-- ...y_v_transform_reduce_key_aggregated_out_nbr.cuh | 0 .../cugraph/{patterns => prims}/count_if_e.cuh | 4 ++-- .../cugraph/{patterns => prims}/count_if_v.cuh | 0 .../cugraph/{patterns => prims}/edge_op_utils.cuh | 0 .../cugraph/{patterns => prims}/reduce_op.cuh | 0 .../cugraph/{patterns => prims}/reduce_v.cuh | 0 ...ransform_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- .../{patterns => prims}/transform_reduce_e.cuh | 2 +- .../{patterns => prims}/transform_reduce_v.cuh | 0 .../transform_reduce_v_with_adj_matrix_row.cuh | 0 .../update_frontier_v_push_if_out_nbr.cuh | 4 ++-- .../{patterns => prims}/vertex_frontier.cuh | 0 cpp/src/components/weakly_connected_components.cu | 6 +++--- cpp/src/experimental/bfs.cu | 6 +++--- cpp/src/experimental/coarsen_graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 2 +- cpp/src/experimental/katz_centrality.cu | 8 ++++---- cpp/src/experimental/louvain.cuh | 12 ++++++------ cpp/src/experimental/pagerank.cu | 14 +++++++------- cpp/src/experimental/relabel.cu | 2 +- cpp/src/experimental/sssp.cu | 12 ++++++------ 24 files changed, 40 insertions(+), 40 deletions(-) rename cpp/include/cugraph/{patterns => prims}/any_of_adj_matrix_row.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/copy_to_adj_matrix_row_col.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/copy_v_transform_reduce_in_out_nbr.cuh (99%) rename cpp/include/cugraph/{patterns => prims}/copy_v_transform_reduce_key_aggregated_out_nbr.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/count_if_e.cuh (97%) rename cpp/include/cugraph/{patterns => prims}/count_if_v.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/edge_op_utils.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/reduce_op.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/reduce_v.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/transform_reduce_by_adj_matrix_row_col_key_e.cuh (99%) rename cpp/include/cugraph/{patterns => prims}/transform_reduce_e.cuh (99%) rename cpp/include/cugraph/{patterns => prims}/transform_reduce_v.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/transform_reduce_v_with_adj_matrix_row.cuh (100%) rename cpp/include/cugraph/{patterns => prims}/update_frontier_v_push_if_out_nbr.cuh (99%) rename cpp/include/cugraph/{patterns => prims}/vertex_frontier.cuh (100%) diff --git a/cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh b/cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh similarity index 100% rename from cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh rename to cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh diff --git a/cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh similarity index 100% rename from cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh rename to cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh similarity index 99% rename from cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh rename to cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh index 4284396370d..0bae6da71e6 100644 --- a/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh similarity index 100% rename from cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh rename to cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh diff --git a/cpp/include/cugraph/patterns/count_if_e.cuh b/cpp/include/cugraph/prims/count_if_e.cuh similarity index 97% rename from cpp/include/cugraph/patterns/count_if_e.cuh rename to cpp/include/cugraph/prims/count_if_e.cuh index 039be17252d..b8fa5dc9d8d 100644 --- a/cpp/include/cugraph/patterns/count_if_e.cuh +++ b/cpp/include/cugraph/prims/count_if_e.cuh @@ -16,8 +16,8 @@ #pragma once #include -#include -#include +#include +#include #include diff --git a/cpp/include/cugraph/patterns/count_if_v.cuh b/cpp/include/cugraph/prims/count_if_v.cuh similarity index 100% rename from cpp/include/cugraph/patterns/count_if_v.cuh rename to cpp/include/cugraph/prims/count_if_v.cuh diff --git a/cpp/include/cugraph/patterns/edge_op_utils.cuh b/cpp/include/cugraph/prims/edge_op_utils.cuh similarity index 100% rename from cpp/include/cugraph/patterns/edge_op_utils.cuh rename to cpp/include/cugraph/prims/edge_op_utils.cuh diff --git a/cpp/include/cugraph/patterns/reduce_op.cuh b/cpp/include/cugraph/prims/reduce_op.cuh similarity index 100% rename from cpp/include/cugraph/patterns/reduce_op.cuh rename to cpp/include/cugraph/prims/reduce_op.cuh diff --git a/cpp/include/cugraph/patterns/reduce_v.cuh b/cpp/include/cugraph/prims/reduce_v.cuh similarity index 100% rename from cpp/include/cugraph/patterns/reduce_v.cuh rename to cpp/include/cugraph/prims/reduce_v.cuh diff --git a/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh similarity index 99% rename from cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh rename to cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 58633fb1e22..6041c6da3e2 100644 --- a/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/cugraph/patterns/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh similarity index 99% rename from cpp/include/cugraph/patterns/transform_reduce_e.cuh rename to cpp/include/cugraph/prims/transform_reduce_e.cuh index 151fa1df0c7..f8252974f95 100644 --- a/cpp/include/cugraph/patterns/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/cugraph/patterns/transform_reduce_v.cuh b/cpp/include/cugraph/prims/transform_reduce_v.cuh similarity index 100% rename from cpp/include/cugraph/patterns/transform_reduce_v.cuh rename to cpp/include/cugraph/prims/transform_reduce_v.cuh diff --git a/cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh similarity index 100% rename from cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh rename to cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh diff --git a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh similarity index 99% rename from cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh rename to cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh index b1d63cc942a..abb1a7e18cb 100644 --- a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh @@ -18,8 +18,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/cpp/include/cugraph/patterns/vertex_frontier.cuh b/cpp/include/cugraph/prims/vertex_frontier.cuh similarity index 100% rename from cpp/include/cugraph/patterns/vertex_frontier.cuh rename to cpp/include/cugraph/prims/vertex_frontier.cuh diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 241b7182fda..ff8fbb9d032 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -18,9 +18,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 817e9cbd225..b75590f89b3 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -16,9 +16,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index e648691f8b1..834c41cbbf3 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 3dc5dee4756..eacec51371d 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index ad62f5e9d68..515c49fda00 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -16,10 +16,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 6205f13e94d..27e66c48086 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -20,12 +20,12 @@ #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index db54783453e..66cc416f91b 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -16,13 +16,13 @@ #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 7e7a4d64b3e..2ba2fb751eb 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index c8e7f1eb7a0..06872ead17f 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -16,12 +16,12 @@ #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include From e47c396e75d69af40444fbabb2a301449df90cde Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 15 Jun 2021 07:20:45 -0500 Subject: [PATCH 298/343] Added accidentally-removed cpp-mgtests target back to the valid args list (#1652) Added accidentally-removed cpp-mgtests target back to the valid args list. This was removed during an update to add `--buildfaiss`, likely due to a bad merge? Tested by running with the target and observing the MG tests being built and the MPI library being pulled in (still building, will update if something goes wrong) Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) - Hugo Linsenmaier (https://github.com/hlinsen) URL: https://github.com/rapidsai/cugraph/pull/1652 --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 8437a32bf94..506cc482c59 100755 --- a/build.sh +++ b/build.sh @@ -19,7 +19,7 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean uninstall libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" +VALIDARGS="clean uninstall libcugraph cugraph cpp-mgtests docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -99,7 +99,7 @@ if hasArg --allgpuarch; then BUILD_ALL_GPU_ARCH=1 fi if hasArg --buildfaiss; then - BUILD_STATIC_FAISS=ON + BUILD_STATIC_FAISS=ON fi if hasArg --show_depr_warn; then BUILD_DISABLE_DEPRECATION_WARNING=OFF From 40a57f5a253a9e309fa56273833a38b1ed802952 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 15 Jun 2021 07:22:57 -0500 Subject: [PATCH 299/343] Use nested include in destination of install headers to avoid docker permission issues (#1656) Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1656 --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5d272bfe7f6..ae0b524689f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -337,8 +337,8 @@ install(TARGETS cugraph DESTINATION lib EXPORT cugraph-exports) -install(DIRECTORY include/ - DESTINATION include) +install(DIRECTORY include/cugraph/ + DESTINATION include/cugraph) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp DESTINATION include/cugraph) From 237c6cde5575f13ecb03e8e3275479d116ecc6ed Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 15 Jun 2021 14:23:29 +0200 Subject: [PATCH 300/343] Update pins to Dask/Distributed >= 2021.6.0 (#1666) Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - https://github.com/jakirkham - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1666 --- conda/environments/cugraph_dev_cuda11.0.yml | 4 ++-- conda/environments/cugraph_dev_cuda11.1.yml | 4 ++-- conda/environments/cugraph_dev_cuda11.2.yml | 4 ++-- conda/recipes/cugraph/meta.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 7cf456aab97..91e640ea50b 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -10,8 +10,8 @@ dependencies: - libcudf=21.08.* - rmm=21.08.* - librmm=21.08.* -- dask>=2.12.0 -- distributed>=2.12.0 +- dask>=2021.6.0 +- distributed>=2021.6.0 - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.9.9 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 5d6837c1f84..39fb0ca5811 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -10,8 +10,8 @@ dependencies: - libcudf=21.08.* - rmm=21.08.* - librmm=21.08.* -- dask>=2.12.0 -- distributed>=2.12.0 +- dask>=2021.6.0 +- distributed>=2021.6.0 - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.9.9 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 8e2f5d9158b..80fdf63a8e9 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -10,8 +10,8 @@ dependencies: - libcudf=21.08.* - rmm=21.08.* - librmm=21.08.* -- dask>=2.12.0 -- distributed>=2.12.0 +- dask>=2021.6.0 +- distributed>=2021.6.0 - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.9.9 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index ef229c43179..834fbab5838 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -35,8 +35,8 @@ requirements: - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} - - dask>=2.12.0 - - distributed>=2.12.0 + - dask>=2021.6.0 + - distributed>=2021.6.0 - ucx-py 0.21 - ucx-proc=*=gpu From 27444ede9d861a68b8d2bd14f3a58eb7dfd84544 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 15 Jun 2021 10:45:24 -0400 Subject: [PATCH 301/343] Fea cleanup stream part1 (#1653) Addresses part of #1538 and #1422 Breaking the work for these issues into smaller chunks to make it easier to test, review and approve. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1653 --- cpp/include/cugraph/dendrogram.hpp | 5 +- .../cugraph/utilities/collect_comm.cuh | 58 ++--- .../cugraph/utilities/dataframe_buffer.cuh | 54 +++-- cpp/include/cugraph/utilities/device_comm.cuh | 163 ++++++------- .../cugraph/utilities/shuffle_comm.cuh | 85 +++---- cpp/src/centrality/betweenness_centrality.cu | 25 +- cpp/src/community/ecg.cu | 42 ++-- cpp/src/community/leiden.cuh | 25 +- cpp/src/community/louvain.cuh | 152 +++++++------ cpp/src/converters/COOtoCSR.cuh | 58 ++--- cpp/src/experimental/graph.cu | 101 +++++---- cpp/src/experimental/graph_view.cu | 64 +++--- cpp/src/experimental/induced_subgraph.cu | 47 ++-- cpp/src/experimental/louvain.cuh | 60 ++--- cpp/src/experimental/relabel.cu | 70 +++--- cpp/src/experimental/renumber_edgelist.cu | 108 +++++---- cpp/src/experimental/renumber_utils.cu | 83 ++++--- cpp/src/generators/generate_rmat_edgelist.cu | 10 +- cpp/src/layout/barnes_hut.hpp | 214 +++++++++--------- cpp/src/layout/exact_fa2.hpp | 58 ++--- cpp/src/layout/utils.hpp | 18 +- cpp/src/link_prediction/jaccard.cu | 17 +- cpp/src/sampling/random_walks.cuh | 90 ++++---- cpp/src/structure/graph.cu | 18 +- cpp/src/utilities/graph_utils.cuh | 165 +++----------- 25 files changed, 848 insertions(+), 942 deletions(-) diff --git a/cpp/include/cugraph/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp index aa0802e80b3..2640944dc09 100644 --- a/cpp/include/cugraph/dendrogram.hpp +++ b/cpp/include/cugraph/dendrogram.hpp @@ -27,10 +27,11 @@ class Dendrogram { public: void add_level(vertex_t first_index, vertex_t num_verts, - cudaStream_t stream, + rmm::cuda_stream_view stream_view, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - level_ptr_.push_back(std::make_unique>(num_verts, stream, mr)); + level_ptr_.push_back( + std::make_unique>(num_verts, stream_view, mr)); level_first_index_.push_back(first_index); } diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index ddc5621e929..76eff8afc71 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -50,7 +50,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, VertexIterator1 collect_key_first, VertexIterator1 collect_key_last, KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { using vertex_t = typename std::iterator_traits::value_type; static_assert( @@ -66,7 +66,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); - auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, stream_view); auto kv_map_ptr = std::make_unique< cuco::static_map>( // cuco::static_map requires at least one empty slot @@ -84,37 +84,37 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 2. collect values for the unique keys in [collect_key_first, collect_key_last) rmm::device_uvector unique_keys(thrust::distance(collect_key_first, collect_key_last), - stream); + stream_view); thrust::copy( - rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin()); - thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end()); + rmm::exec_policy(stream_view), collect_key_first, collect_key_last, unique_keys.begin()); + thrust::sort(rmm::exec_policy(stream_view), unique_keys.begin(), unique_keys.end()); unique_keys.resize( thrust::distance( unique_keys.begin(), - thrust::unique(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end())), - stream); + thrust::unique(rmm::exec_policy(stream_view), unique_keys.begin(), unique_keys.end())), + stream_view); - rmm::device_uvector values_for_unique_keys(0, stream); + rmm::device_uvector values_for_unique_keys(0, stream_view); { - rmm::device_uvector rx_unique_keys(0, stream); + rmm::device_uvector rx_unique_keys(0, stream_view); std::vector rx_value_counts{}; std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( comm, unique_keys.begin(), unique_keys.end(), [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, - stream); + stream_view); - rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream_view); - CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + stream_view.synchronize(); // cuco::static_map currently does not take stream kv_map_ptr->find( rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); - rmm::device_uvector rx_values_for_unique_keys(0, stream); + rmm::device_uvector rx_values_for_unique_keys(0, stream_view); std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream_view); values_for_unique_keys = std::move(rx_values_for_unique_keys); } @@ -122,7 +122,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, // values_for_unique_keys. - CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + stream_view.synchronize(); // cuco::static_map currently does not take stream kv_map_ptr.reset(); @@ -143,7 +143,7 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 4. find values for [collect_key_first, collect_key_last) auto value_buffer = allocate_dataframe_buffer( - thrust::distance(collect_key_first, collect_key_last), stream); + thrust::distance(collect_key_first, collect_key_last), stream_view); kv_map_ptr->find( collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); @@ -165,7 +165,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, VertexIterator1 collect_unique_key_first, VertexIterator1 collect_unique_key_last, KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { using vertex_t = typename std::iterator_traits::value_type; static_assert( @@ -181,7 +181,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); - auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, stream_view); auto kv_map_ptr = std::make_unique< cuco::static_map>( // cuco::static_map requires at least one empty slot @@ -199,33 +199,33 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 2. collect values for the unique keys in [collect_unique_key_first, collect_unique_key_last) rmm::device_uvector unique_keys( - thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream_view); + thrust::copy(rmm::exec_policy(stream_view), collect_unique_key_first, collect_unique_key_last, unique_keys.begin()); - rmm::device_uvector values_for_unique_keys(0, stream); + rmm::device_uvector values_for_unique_keys(0, stream_view); { - rmm::device_uvector rx_unique_keys(0, stream); + rmm::device_uvector rx_unique_keys(0, stream_view); std::vector rx_value_counts{}; std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( comm, unique_keys.begin(), unique_keys.end(), [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, - stream); + stream_view); - rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream_view); - CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + stream_view.synchronize(); // cuco::static_map currently does not take stream kv_map_ptr->find( rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); - rmm::device_uvector rx_values_for_unique_keys(0, stream); + rmm::device_uvector rx_values_for_unique_keys(0, stream_view); std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream_view); values_for_unique_keys = std::move(rx_values_for_unique_keys); } @@ -233,7 +233,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, // values_for_unique_keys. - CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + stream_view.synchronize(); // cuco::static_map currently does not take stream kv_map_ptr.reset(); @@ -254,7 +254,7 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 4. find values for [collect_unique_key_first, collect_unique_key_last) auto value_buffer = allocate_dataframe_buffer( - thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream_view); kv_map_ptr->find(collect_unique_key_first, collect_unique_key_last, get_dataframe_buffer_begin(value_buffer)); diff --git a/cpp/include/cugraph/utilities/dataframe_buffer.cuh b/cpp/include/cugraph/utilities/dataframe_buffer.cuh index beaf4cabe00..d730a3afcff 100644 --- a/cpp/include/cugraph/utilities/dataframe_buffer.cuh +++ b/cpp/include/cugraph/utilities/dataframe_buffer.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -31,49 +32,50 @@ namespace experimental { namespace detail { template -auto allocate_dataframe_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) +auto allocate_dataframe_buffer_tuple_element_impl(size_t buffer_size, + rmm::cuda_stream_view stream_view) { using element_t = typename thrust::tuple_element::type; - return rmm::device_uvector(buffer_size, stream); + return rmm::device_uvector(buffer_size, stream_view); } template auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, size_t buffer_size, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { return std::make_tuple( - allocate_dataframe_buffer_tuple_element_impl(buffer_size, stream)...); + allocate_dataframe_buffer_tuple_element_impl(buffer_size, stream_view)...); } template struct resize_dataframe_buffer_tuple_iterator_element_impl { - void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) + void run(BufferType& buffer, size_t new_buffer_size, rmm::cuda_stream_view stream_view) { - std::get(buffer).resize(new_buffer_size, stream); + std::get(buffer).resize(new_buffer_size, stream_view); resize_dataframe_buffer_tuple_iterator_element_impl().run( - buffer, new_buffer_size, stream); + buffer, new_buffer_size, stream_view); } }; template struct resize_dataframe_buffer_tuple_iterator_element_impl { - void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) {} + void run(BufferType& buffer, size_t new_buffer_size, rmm::cuda_stream_view stream_view) {} }; template struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { - void run(BufferType& buffer, cudaStream_t stream) + void run(BufferType& buffer, rmm::cuda_stream_view stream_view) { - std::get(buffer).shrink_to_fit(stream); + std::get(buffer).shrink_to_fit(stream_view); shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() - .run(buffer, stream); + .run(buffer, stream_view); } }; template struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { - void run(BufferType& buffer, cudaStream_t stream) {} + void run(BufferType& buffer, rmm::cuda_stream_view stream_view) {} }; template @@ -108,57 +110,61 @@ auto get_dataframe_buffer_end_tuple_impl(std::index_sequence, BufferType& } // namespace detail template ::value>* = nullptr> -auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +auto allocate_dataframe_buffer(size_t buffer_size, rmm::cuda_stream_view stream_view) { - return rmm::device_uvector(buffer_size, stream); + return rmm::device_uvector(buffer_size, stream_view); } template ::value>* = nullptr> -auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +auto allocate_dataframe_buffer(size_t buffer_size, rmm::cuda_stream_view stream_view) { size_t constexpr tuple_size = thrust::tuple_size::value; return detail::allocate_dataframe_buffer_tuple_impl( - std::make_index_sequence(), buffer_size, stream); + std::make_index_sequence(), buffer_size, stream_view); } template ::value>* = nullptr> -void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +void resize_dataframe_buffer(BufferType& buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) { - buffer.resize(new_buffer_size, stream); + buffer.resize(new_buffer_size, stream_view); } template ::value>* = nullptr> -void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +void resize_dataframe_buffer(BufferType& buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) { size_t constexpr tuple_size = thrust::tuple_size::value; detail:: resize_dataframe_buffer_tuple_iterator_element_impl() - .run(buffer, new_buffer_size, stream); + .run(buffer, new_buffer_size, stream_view); } template ::value>* = nullptr> -void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +void shrink_to_fit_dataframe_buffer(BufferType& buffer, rmm::cuda_stream_view stream_view) { - buffer.shrink_to_fit(stream); + buffer.shrink_to_fit(stream_view); } template ::value>* = nullptr> -void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +void shrink_to_fit_dataframe_buffer(BufferType& buffer, rmm::cuda_stream_view stream_view) { size_t constexpr tuple_size = thrust::tuple_size::value; detail::shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() - .run(buffer, stream); + .run(buffer, stream_view); } template #include +#include #include #include @@ -175,7 +176,7 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, OutputIterator output_first, size_t rx_count, int src, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // no-op } @@ -191,7 +192,7 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, OutputIterator output_first, size_t rx_count, int src, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { using value_type = typename std::iterator_traits::value_type; static_assert( @@ -202,7 +203,7 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, iter_to_raw_ptr(output_first), rx_count, src, - stream); + stream_view.value()); } template @@ -214,7 +215,7 @@ struct device_sendrecv_tuple_iterator_element_impl { OutputIterator output_first, size_t rx_count, int src, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { using output_value_t = typename thrust:: tuple_element::value_type>::type; @@ -228,9 +229,9 @@ struct device_sendrecv_tuple_iterator_element_impl { tuple_element_output_first, rx_count, src, - stream); + stream_view.value()); device_sendrecv_tuple_iterator_element_impl().run( - comm, input_first, tx_count, dst, output_first, rx_count, src, stream); + comm, input_first, tx_count, dst, output_first, rx_count, src, stream_view); } }; @@ -243,7 +244,7 @@ struct device_sendrecv_tuple_iterator_element_impl const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // no-op } @@ -277,7 +278,7 @@ device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, std::vector const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { using value_type = typename std::iterator_traits::value_type; static_assert( @@ -290,7 +291,7 @@ device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view.value()); } template @@ -304,7 +305,7 @@ struct device_multicast_sendrecv_tuple_iterator_element_impl { std::vector const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { using output_value_t = typename thrust:: tuple_element::value_type>::type; @@ -320,7 +321,7 @@ struct device_multicast_sendrecv_tuple_iterator_element_impl { rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); device_multicast_sendrecv_tuple_iterator_element_impl() .run(comm, input_first, @@ -331,7 +332,7 @@ struct device_multicast_sendrecv_tuple_iterator_element_impl { rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); } }; @@ -346,7 +347,7 @@ struct device_multicast_sendrecv_tuple_iterator_element_impl const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { } }; @@ -358,7 +359,7 @@ device_bcast_impl(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // no-op } @@ -372,14 +373,14 @@ device_bcast_impl(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); if (comm.get_rank() == root) { - comm.bcast(iter_to_raw_ptr(input_first), count, root, stream); + comm.bcast(iter_to_raw_ptr(input_first), count, root, stream_view.value()); } else { - comm.bcast(iter_to_raw_ptr(output_first), count, root, stream); + comm.bcast(iter_to_raw_ptr(output_first), count, root, stream_view.value()); } } @@ -390,16 +391,16 @@ struct device_bcast_tuple_iterator_element_impl { OutputIterator output_first, size_t count, int root, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { device_bcast_impl(comm, thrust::get(input_first.get_iterator_tuple()), thrust::get(output_first.get_iterator_tuple()), count, root, - stream); + stream_view); device_bcast_tuple_iterator_element_impl().run( - comm, input_first, output_first, count, root, stream); + comm, input_first, output_first, count, root, stream_view); } }; @@ -410,7 +411,7 @@ struct device_bcast_tuple_iterator_element_impl::value_type, typename std::iterator_traits::value_type>::value); - comm.allreduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, stream); + comm.allreduce( + iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, stream_view.value()); } template @@ -450,16 +452,16 @@ struct device_allreduce_tuple_iterator_element_impl { OutputIterator output_first, size_t count, raft::comms::op_t op, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { device_allreduce_impl(comm, thrust::get(input_first.get_iterator_tuple()), thrust::get(output_first.get_iterator_tuple()), count, op, - stream); + stream_view); device_allreduce_tuple_iterator_element_impl().run( - comm, input_first, output_first, count, op, stream); + comm, input_first, output_first, count, op, stream_view); } }; @@ -470,7 +472,7 @@ struct device_allreduce_tuple_iterator_element_impl::value_type, typename std::iterator_traits::value_type>::value); - comm.reduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, root, stream); + comm.reduce(iter_to_raw_ptr(input_first), + iter_to_raw_ptr(output_first), + count, + op, + root, + stream_view.value()); } template @@ -513,7 +520,7 @@ struct device_reduce_tuple_iterator_element_impl { size_t count, raft::comms::op_t op, int root, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { device_reduce_impl(comm, thrust::get(input_first.get_iterator_tuple()), @@ -521,9 +528,9 @@ struct device_reduce_tuple_iterator_element_impl { count, op, root, - stream); + stream_view); device_reduce_tuple_iterator_element_impl().run( - comm, input_first, output_first, count, op, root, stream); + comm, input_first, output_first, count, op, root, stream_view); } }; @@ -535,7 +542,7 @@ struct device_reduce_tuple_iterator_element_impl const& recvcounts, std::vector const& displacements, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // no-op } @@ -561,7 +568,7 @@ device_allgatherv_impl(raft::comms::comms_t const& comm, OutputIterator output_first, std::vector const& recvcounts, std::vector const& displacements, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); @@ -569,7 +576,7 @@ device_allgatherv_impl(raft::comms::comms_t const& comm, iter_to_raw_ptr(output_first), recvcounts.data(), displacements.data(), - stream); + stream_view.value()); } template @@ -579,16 +586,16 @@ struct device_allgatherv_tuple_iterator_element_impl { OutputIterator output_first, std::vector const& recvcounts, std::vector const& displacements, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { device_allgatherv_impl(comm, thrust::get(input_first.get_iterator_tuple()), thrust::get(output_first.get_iterator_tuple()), recvcounts, displacements, - stream); + stream_view); device_allgatherv_tuple_iterator_element_impl().run( - comm, input_first, output_first, recvcounts, displacements, stream); + comm, input_first, output_first, recvcounts, displacements, stream_view); } }; @@ -599,7 +606,7 @@ struct device_allgatherv_tuple_iterator_element_impl const& recvcounts, std::vector const& displacements, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { } }; @@ -613,7 +620,7 @@ device_gatherv_impl(raft::comms::comms_t const& comm, std::vector const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // no-op } @@ -629,7 +636,7 @@ device_gatherv_impl(raft::comms::comms_t const& comm, std::vector const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); @@ -639,7 +646,7 @@ device_gatherv_impl(raft::comms::comms_t const& comm, recvcounts.data(), displacements.data(), root, - stream); + stream_view.value()); } template @@ -651,7 +658,7 @@ struct device_gatherv_tuple_iterator_element_impl { std::vector const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { device_gatherv_impl(comm, thrust::get(input_first.get_iterator_tuple()), @@ -660,9 +667,9 @@ struct device_gatherv_tuple_iterator_element_impl { recvcounts, displacements, root, - stream); + stream_view); device_gatherv_tuple_iterator_element_impl().run( - comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } }; @@ -675,7 +682,7 @@ struct device_gatherv_tuple_iterator_element_impl const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) const + rmm::cuda_stream_view stream_view) const { } }; @@ -772,10 +779,10 @@ device_sendrecv(raft::comms::comms_t const& comm, OutputIterator output_first, size_t rx_count, int src, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { detail::device_sendrecv_impl( - comm, input_first, tx_count, dst, output_first, rx_count, src, stream); + comm, input_first, tx_count, dst, output_first, rx_count, src, stream_view); } template @@ -790,7 +797,7 @@ device_sendrecv(raft::comms::comms_t const& comm, OutputIterator output_first, size_t rx_count, int src, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -806,7 +813,7 @@ device_sendrecv(raft::comms::comms_t const& comm, OutputIterator, size_t{0}, tuple_size>() - .run(comm, input_first, tx_count, dst, output_first, rx_count, src, stream); + .run(comm, input_first, tx_count, dst, output_first, rx_count, src, stream_view); } template @@ -822,7 +829,7 @@ device_multicast_sendrecv(raft::comms::comms_t const& comm, std::vector const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { detail::device_multicast_sendrecv_impl(comm, input_first, @@ -833,7 +840,7 @@ device_multicast_sendrecv(raft::comms::comms_t const& comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); } template @@ -850,7 +857,7 @@ device_multicast_sendrecv(raft::comms::comms_t const& comm, std::vector const& rx_counts, std::vector const& rx_offsets, std::vector const& rx_src_ranks, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -875,7 +882,7 @@ device_multicast_sendrecv(raft::comms::comms_t const& comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); } template @@ -887,9 +894,9 @@ device_bcast(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - detail::device_bcast_impl(comm, input_first, output_first, count, root, stream); + detail::device_bcast_impl(comm, input_first, output_first, count, root, stream_view); } template @@ -902,7 +909,7 @@ device_bcast(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -913,7 +920,7 @@ device_bcast(raft::comms::comms_t const& comm, detail:: device_bcast_tuple_iterator_element_impl() - .run(comm, input_first, output_first, count, root, stream); + .run(comm, input_first, output_first, count, root, stream_view); } template @@ -925,9 +932,9 @@ device_allreduce(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, raft::comms::op_t op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - detail::device_allreduce_impl(comm, input_first, output_first, count, op, stream); + detail::device_allreduce_impl(comm, input_first, output_first, count, op, stream_view); } template @@ -940,7 +947,7 @@ device_allreduce(raft::comms::comms_t const& comm, OutputIterator output_first, size_t count, raft::comms::op_t op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -953,7 +960,7 @@ device_allreduce(raft::comms::comms_t const& comm, OutputIterator, size_t{0}, tuple_size>() - .run(comm, input_first, output_first, count, op, stream); + .run(comm, input_first, output_first, count, op, stream_view); } template @@ -966,9 +973,9 @@ device_reduce(raft::comms::comms_t const& comm, size_t count, raft::comms::op_t op, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - detail::device_reduce_impl(comm, input_first, output_first, count, op, root, stream); + detail::device_reduce_impl(comm, input_first, output_first, count, op, root, stream_view); } template @@ -982,7 +989,7 @@ device_reduce(raft::comms::comms_t const& comm, size_t count, raft::comms::op_t op, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -995,7 +1002,7 @@ device_reduce(raft::comms::comms_t const& comm, OutputIterator, size_t{0}, tuple_size>() - .run(comm, input_first, output_first, count, op, root, stream); + .run(comm, input_first, output_first, count, op, root, stream_view); } template @@ -1007,10 +1014,10 @@ device_allgatherv(raft::comms::comms_t const& comm, OutputIterator output_first, std::vector const& recvcounts, std::vector const& displacements, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { detail::device_allgatherv_impl( - comm, input_first, output_first, recvcounts, displacements, stream); + comm, input_first, output_first, recvcounts, displacements, stream_view); } template @@ -1023,7 +1030,7 @@ device_allgatherv(raft::comms::comms_t const& comm, OutputIterator output_first, std::vector const& recvcounts, std::vector const& displacements, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -1036,7 +1043,7 @@ device_allgatherv(raft::comms::comms_t const& comm, OutputIterator, size_t{0}, tuple_size>() - .run(comm, input_first, output_first, recvcounts, displacements, stream); + .run(comm, input_first, output_first, recvcounts, displacements, stream_view); } template @@ -1050,10 +1057,10 @@ device_gatherv(raft::comms::comms_t const& comm, std::vector const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { detail::device_gatherv_impl( - comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } template @@ -1068,7 +1075,7 @@ device_gatherv(raft::comms::comms_t const& comm, std::vector const& recvcounts, std::vector const& displacements, int root, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { static_assert( thrust::tuple_size::value_type>::value == @@ -1081,7 +1088,7 @@ device_gatherv(raft::comms::comms_t const& comm, OutputIterator, size_t{0}, tuple_size>() - .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); + .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } } // namespace experimental diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 009dde845b5..e4f7067cfdf 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -18,9 +18,9 @@ #include #include -#include #include #include +#include #include #include @@ -46,11 +46,11 @@ inline std::tuple, std::vector> compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, rmm::device_uvector const &d_tx_value_counts, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); - rmm::device_uvector d_rx_value_counts(comm_size, stream); + rmm::device_uvector d_rx_value_counts(comm_size, stream_view); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. std::vector tx_counts(comm_size, size_t{1}); @@ -72,12 +72,12 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); - raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); - raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); + raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream_view.value()); + raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream_view.value()); - CUDA_TRY(cudaStreamSynchronize(stream)); // rx_counts should be up-to-date + stream_view.synchronize(); std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); @@ -115,9 +115,9 @@ rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [I ValueIterator tx_value_last /* [INOUT */, ValueToGPUIdOp value_to_group_id_op, int num_groups, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - thrust::sort(rmm::exec_policy(stream)->on(stream), + thrust::sort(rmm::exec_policy(stream_view), tx_value_first, tx_value_last, [value_to_group_id_op] __device__(auto lhs, auto rhs) { @@ -127,19 +127,19 @@ rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [I auto group_id_first = thrust::make_transform_iterator( tx_value_first, [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); - rmm::device_uvector d_tx_dst_ranks(num_groups, stream); - rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); + rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto last = - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + thrust::reduce_by_key(rmm::exec_policy(stream_view), group_id_first, group_id_first + thrust::distance(tx_value_first, tx_value_last), thrust::make_constant_iterator(size_t{1}), d_tx_dst_ranks.begin(), d_tx_value_counts.begin()); if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { - rmm::device_uvector d_counts(num_groups, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), + rmm::device_uvector d_counts(num_groups, stream_view); + thrust::fill(rmm::exec_policy(stream_view), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream_view), d_tx_value_counts.begin(), thrust::get<1>(last), d_tx_dst_ranks.begin(), @@ -156,9 +156,9 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN ValueIterator tx_value_first /* [INOUT */, KeyToGPUIdOp key_to_group_id_op, int num_groups, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::sort_by_key(rmm::exec_policy(stream_view), tx_key_first, tx_key_last, tx_value_first, @@ -168,18 +168,18 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN auto group_id_first = thrust::make_transform_iterator( tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); - rmm::device_uvector d_tx_dst_ranks(num_groups, stream); - rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); - auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream_view), group_id_first, group_id_first + thrust::distance(tx_key_first, tx_key_last), thrust::make_constant_iterator(size_t{1}), d_tx_dst_ranks.begin(), d_tx_value_counts.begin()); if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { - rmm::device_uvector d_counts(num_groups, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), + rmm::device_uvector d_counts(num_groups, stream_view); + thrust::fill(rmm::exec_policy(stream_view), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream_view), d_tx_value_counts.begin(), thrust::get<1>(last), d_tx_dst_ranks.begin(), @@ -194,12 +194,13 @@ template auto shuffle_values(raft::comms::comms_t const &comm, TxValueIterator tx_value_first, std::vector const &tx_value_counts, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); - rmm::device_uvector d_tx_value_counts(comm_size, stream); - raft::update_device(d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream); + rmm::device_uvector d_tx_value_counts(comm_size, stream_view); + raft::update_device( + d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream_view.value()); std::vector tx_counts{}; std::vector tx_offsets{}; @@ -208,11 +209,11 @@ auto shuffle_values(raft::comms::comms_t const &comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); auto rx_value_buffer = allocate_dataframe_buffer::value_type>( - rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -227,7 +228,7 @@ auto shuffle_values(raft::comms::comms_t const &comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); if (rx_counts.size() < static_cast(comm_size)) { std::vector tmp_rx_counts(comm_size, size_t{0}); @@ -246,12 +247,12 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, ValueIterator tx_value_first /* [INOUT */, ValueIterator tx_value_last /* [INOUT */, ValueToGPUIdOp value_to_gpu_id_op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); - auto d_tx_value_counts = - groupby_and_count(tx_value_first, tx_value_last, value_to_gpu_id_op, comm.get_size(), stream); + auto d_tx_value_counts = groupby_and_count( + tx_value_first, tx_value_last, value_to_gpu_id_op, comm.get_size(), stream_view); std::vector tx_counts{}; std::vector tx_offsets{}; @@ -260,11 +261,11 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); auto rx_value_buffer = allocate_dataframe_buffer::value_type>( - rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -279,7 +280,7 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); if (rx_counts.size() < static_cast(comm_size)) { std::vector tmp_rx_counts(comm_size, size_t{0}); @@ -298,12 +299,12 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, VertexIterator tx_key_last /* [INOUT */, ValueIterator tx_value_first /* [INOUT */, KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); auto d_tx_value_counts = groupby_and_count( - tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, comm.get_size(), stream); + tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, comm.get_size(), stream_view); std::vector tx_counts{}; std::vector tx_offsets{}; @@ -312,13 +313,13 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); rmm::device_uvector::value_type> rx_keys( - rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); auto rx_value_buffer = allocate_dataframe_buffer::value_type>( - rx_keys.size(), stream); + rx_keys.size(), stream_view); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -331,7 +332,7 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). @@ -346,7 +347,7 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, rx_counts, rx_offsets, rx_src_ranks, - stream); + stream_view); if (rx_counts.size() < static_cast(comm_size)) { std::vector tmp_rx_counts(comm_size, size_t{0}); diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index cdee2140382..32dddd203db 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -23,7 +23,9 @@ #include #include #include + #include +#include #include #include "betweenness_centrality.cuh" @@ -227,15 +229,13 @@ void BC::compute_single_source(vertex_t so // the traversal, this value is avalaible within the bfs implementation and // there could be a way to access it directly and avoid both replace and the // max - thrust::replace(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::replace(rmm::exec_policy(handle_.get_stream_view()), distances_, distances_ + number_of_vertices_, std::numeric_limits::max(), static_cast(-1)); - auto current_max_depth = - thrust::max_element(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - distances_, - distances_ + number_of_vertices_); + auto current_max_depth = thrust::max_element( + rmm::exec_policy(handle_.get_stream_view()), distances_, distances_ + number_of_vertices_); vertex_t max_depth = 0; CUDA_TRY(cudaMemcpy(&max_depth, current_max_depth, sizeof(vertex_t), cudaMemcpyDeviceToHost)); // Step 2) Dependency accumulation @@ -265,7 +265,7 @@ void BC::accumulate(vertex_t source_vertex template void BC::initialize_dependencies() { - thrust::fill(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), deltas_, deltas_ + number_of_vertices_, static_cast(0)); @@ -315,16 +315,13 @@ template ::add_reached_endpoints_to_source_betweenness( vertex_t source_vertex) { - vertex_t number_of_unvisited_vertices = - thrust::count(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - distances_, - distances_ + number_of_vertices_, - -1); + vertex_t number_of_unvisited_vertices = thrust::count( + rmm::exec_policy(handle_.get_stream_view()), distances_, distances_ + number_of_vertices_, -1); vertex_t number_of_visited_vertices_except_source = number_of_vertices_ - number_of_unvisited_vertices - 1; rmm::device_vector buffer(1); buffer[0] = number_of_visited_vertices_except_source; - thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::transform(rmm::exec_policy(handle_.get_stream_view()), buffer.begin(), buffer.end(), betweenness_ + source_vertex, @@ -335,7 +332,7 @@ void BC::add_reached_endpoints_to_source_b template void BC::add_vertices_dependencies_to_betweenness() { - thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::transform(rmm::exec_policy(handle_.get_stream_view()), deltas_, deltas_ + number_of_vertices_, betweenness_, @@ -420,7 +417,7 @@ void BC::apply_rescale_factor_to_betweenne { size_t result_size = number_of_vertices_; if (is_edge_betweenness_) result_size = number_of_edges_; - thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::transform(rmm::exec_policy(handle_.get_stream_view()), betweenness_, betweenness_ + result_size, thrust::make_constant_iterator(rescale_factor), diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index ef171d127fe..b990055c16c 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -20,8 +20,8 @@ #include #include -#include #include +#include #include @@ -90,16 +90,15 @@ struct update_functor { * responsible for freeing the allocated memory using ALLOC_FREE_TRY(). */ template -void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) +void get_permutation_vector(T size, T seed, T *permutation, rmm::cuda_stream_view stream_view) { - rmm::device_uvector randoms_v(size, stream); + rmm::device_uvector randoms_v(size, stream_view); thrust::counting_iterator index(seed); - thrust::transform( - rmm::exec_policy(stream)->on(stream), index, index + size, randoms_v.begin(), prg()); - thrust::sequence(rmm::exec_policy(stream)->on(stream), permutation, permutation + size, 0); + thrust::transform(rmm::exec_policy(stream_view), index, index + size, randoms_v.begin(), prg()); + thrust::sequence(rmm::exec_policy(stream_view), permutation, permutation + size, 0); thrust::sort_by_key( - rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation); + rmm::exec_policy(stream_view), randoms_v.begin(), randoms_v.end(), permutation); } template @@ -117,10 +116,12 @@ class EcgLouvain : public cugraph::Louvain { void initialize_dendrogram_level(vertex_t num_vertices) override { - this->dendrogram_->add_level(0, num_vertices, this->stream_); + this->dendrogram_->add_level(0, num_vertices, this->handle_.get_stream_view()); - get_permutation_vector( - num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); + get_permutation_vector(num_vertices, + seed_, + this->dendrogram_->current_level_begin(), + this->handle_.get_stream_view()); } private: @@ -146,11 +147,9 @@ void ecg(raft::handle_t const &handle, "Invalid input argument: clustering is NULL, should be a device pointer to " "memory for storing the result"); - cudaStream_t stream{0}; + rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream_view()); - rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream()); - - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), graph.edge_data, graph.edge_data + graph.number_of_edges, ecg_weights_v.data()); @@ -172,17 +171,18 @@ void ecg(raft::handle_t const &handle, dim3 grid, block; block.x = 512; grid.x = min(vertex_t{CUDA_MAX_BLOCKS}, (graph.number_of_edges / 512 + 1)); - match_check_kernel<<>>(graph.number_of_edges, - graph.number_of_vertices, - graph.offsets, - graph.indices, - runner.get_dendrogram().get_level_ptr_nocheck(0), - ecg_weights_v.data()); + match_check_kernel<<>>( + graph.number_of_edges, + graph.number_of_vertices, + graph.offsets, + graph.indices, + runner.get_dendrogram().get_level_ptr_nocheck(0), + ecg_weights_v.data()); } // Set weights = min_weight + (1 - min-weight)*sum/ensemble_size update_functor uf(min_weight, ensemble_size); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), ecg_weights_v.begin(), ecg_weights_v.end(), ecg_weights_v.begin(), diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index 4ffb7c20eb2..e7e358777a4 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -42,10 +42,12 @@ class Leiden : public Louvain { this->timer_start("update_clustering_constrained"); rmm::device_uvector next_cluster_v(this->dendrogram_->current_level_size(), - this->stream_); - rmm::device_uvector delta_Q_v(graph.number_of_edges, this->stream_); - rmm::device_uvector cluster_hash_v(graph.number_of_edges, this->stream_); - rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, this->stream_); + this->handle_.get_stream_view()); + rmm::device_uvector delta_Q_v(graph.number_of_edges, this->handle_.get_stream_view()); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, + this->handle_.get_stream_view()); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, + this->handle_.get_stream_view()); vertex_t const *d_src_indices = this->src_indices_v_.data(); vertex_t const *d_dst_indices = graph.indices; @@ -56,7 +58,7 @@ class Leiden : public Louvain { weight_t *d_delta_Q = delta_Q_v.data(); vertex_t *d_constraint = constraint_v_.data(); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + thrust::copy(rmm::exec_policy(this->handle_.get_stream_view()), this->dendrogram_->current_level_begin(), this->dendrogram_->current_level_end(), next_cluster_v.data()); @@ -79,7 +81,7 @@ class Leiden : public Louvain { // Filter out positive delta_Q values for nodes not in the same constraint group thrust::for_each( - rmm::exec_policy(this->stream_)->on(this->stream_), + rmm::exec_policy(this->handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), [d_src_indices, d_dst_indices, d_constraint, d_delta_Q] __device__(vertex_t i) { @@ -95,14 +97,14 @@ class Leiden : public Louvain { new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + thrust::copy(rmm::exec_policy(this->handle_.get_stream_view()), next_cluster_v.begin(), next_cluster_v.end(), this->dendrogram_->current_level_begin()); } } - this->timer_stop(this->stream_); + this->timer_stop(this->handle_.get_stream_view()); return cur_Q; } @@ -110,7 +112,7 @@ class Leiden : public Louvain { { size_t num_level{0}; - weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(this->stream_)->on(this->stream_), + weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(this->handle_.get_stream_view()), this->weights_v_.begin(), this->weights_v_.end()); @@ -132,9 +134,10 @@ class Leiden : public Louvain { // // Initialize every cluster to reference each vertex to itself // - this->dendrogram_->add_level(0, current_graph.number_of_vertices, this->stream_); + this->dendrogram_->add_level( + 0, current_graph.number_of_vertices, this->handle_.get_stream_view()); - thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), + thrust::sequence(rmm::exec_policy(this->handle_.get_stream_view()), this->dendrogram_->current_level_begin(), this->dendrogram_->current_level_end()); diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 8fa2b81783a..2c6cf31a61e 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -23,6 +23,7 @@ #include #include +#include //#define TIMING @@ -52,29 +53,28 @@ class Louvain { // to change the logic to populate this properly // in generate_superverticies_graph. // - offsets_v_(graph.number_of_vertices + 1, handle.get_stream()), - indices_v_(graph.number_of_edges, handle.get_stream()), - weights_v_(graph.number_of_edges, handle.get_stream()), - src_indices_v_(graph.number_of_edges, handle.get_stream()), - vertex_weights_v_(graph.number_of_vertices, handle.get_stream()), - cluster_weights_v_(graph.number_of_vertices, handle.get_stream()), - tmp_arr_v_(graph.number_of_vertices, handle.get_stream()), - cluster_inverse_v_(graph.number_of_vertices, handle.get_stream()), + offsets_v_(graph.number_of_vertices + 1, handle.get_stream_view()), + indices_v_(graph.number_of_edges, handle.get_stream_view()), + weights_v_(graph.number_of_edges, handle.get_stream_view()), + src_indices_v_(graph.number_of_edges, handle.get_stream_view()), + vertex_weights_v_(graph.number_of_vertices, handle.get_stream_view()), + cluster_weights_v_(graph.number_of_vertices, handle.get_stream_view()), + tmp_arr_v_(graph.number_of_vertices, handle.get_stream_view()), + cluster_inverse_v_(graph.number_of_vertices, handle.get_stream_view()), number_of_vertices_(graph.number_of_vertices), - number_of_edges_(graph.number_of_edges), - stream_(handle.get_stream()) + number_of_edges_(graph.number_of_edges) { - thrust::copy(rmm::exec_policy(stream_)->on(stream_), + thrust::copy(rmm::exec_policy(handle_.get_stream_view()), graph.offsets, graph.offsets + graph.number_of_vertices + 1, offsets_v_.begin()); - thrust::copy(rmm::exec_policy(stream_)->on(stream_), + thrust::copy(rmm::exec_policy(handle_.get_stream_view()), graph.indices, graph.indices + graph.number_of_edges, indices_v_.begin()); - thrust::copy(rmm::exec_policy(stream_)->on(stream_), + thrust::copy(rmm::exec_policy(handle_.get_stream_view()), graph.edge_data, graph.edge_data + graph.number_of_edges, weights_v_.begin()); @@ -89,17 +89,19 @@ class Louvain { { vertex_t n_verts = graph.number_of_vertices; - rmm::device_uvector inc(n_verts, stream_); - rmm::device_uvector deg(n_verts, stream_); + rmm::device_uvector inc(n_verts, handle_.get_stream_view()); + rmm::device_uvector deg(n_verts, handle_.get_stream_view()); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), inc.begin(), inc.end(), weight_t{0.0}); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), deg.begin(), deg.end(), weight_t{0.0}); + thrust::fill( + rmm::exec_policy(handle_.get_stream_view()), inc.begin(), inc.end(), weight_t{0.0}); + thrust::fill( + rmm::exec_policy(handle_.get_stream_view()), deg.begin(), deg.end(), weight_t{0.0}); // FIXME: Already have weighted degree computed in main loop, // could pass that in rather than computing d_deg... which // would save an atomicAdd (synchronization) // - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), [d_inc = inc.data(), @@ -123,7 +125,7 @@ class Louvain { }); weight_t Q = thrust::transform_reduce( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( @@ -146,8 +148,8 @@ class Louvain { virtual weight_t operator()(size_t max_level, weight_t resolution) { - weight_t total_edge_weight = - thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); + weight_t total_edge_weight = thrust::reduce( + rmm::exec_policy(handle_.get_stream_view()), weights_v_.begin(), weights_v_.end()); weight_t best_modularity = weight_t{-1}; @@ -193,10 +195,10 @@ class Louvain { #endif } - void timer_stop(cudaStream_t stream) + void timer_stop(rmm::cuda_stream_view stream_view) { #ifdef TIMING - CUDA_TRY(cudaStreamSynchronize(stream)); + stream_view.synchronize(); hr_timer_.stop(); #endif } @@ -210,9 +212,9 @@ class Louvain { virtual void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(0, num_vertices, stream_); + dendrogram_->add_level(0, num_vertices, handle_.get_stream_view()); - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + thrust::sequence(rmm::exec_policy(handle_.get_stream_view()), dendrogram_->current_level_begin(), dendrogram_->current_level_end()); } @@ -232,7 +234,7 @@ class Louvain { // MNMG: copy_v_transform_reduce_out_nbr, then copy // thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), [d_offsets, d_indices, d_weights, d_vertex_weights, d_cluster_weights] __device__( @@ -244,7 +246,7 @@ class Louvain { d_cluster_weights[src] = sum; }); - timer_stop(stream_); + timer_stop(handle_.get_stream_view()); } virtual weight_t update_clustering(weight_t total_edge_weight, @@ -253,17 +255,19 @@ class Louvain { { timer_start("update_clustering"); - rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), stream_); - rmm::device_uvector delta_Q_v(graph.number_of_edges, stream_); - rmm::device_uvector cluster_hash_v(graph.number_of_edges, stream_); - rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, stream_); + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), + handle_.get_stream_view()); + rmm::device_uvector delta_Q_v(graph.number_of_edges, handle_.get_stream_view()); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, handle_.get_stream_view()); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, + handle_.get_stream_view()); vertex_t *d_cluster = dendrogram_->current_level_begin(); weight_t const *d_vertex_weights = vertex_weights_v_.data(); weight_t *d_cluster_weights = cluster_weights_v_.data(); weight_t *d_delta_Q = delta_Q_v.data(); - thrust::copy(rmm::exec_policy(stream_)->on(stream_), + thrust::copy(rmm::exec_policy(handle_.get_stream_view()), dendrogram_->current_level_begin(), dendrogram_->current_level_end(), next_cluster_v.data()); @@ -291,14 +295,14 @@ class Louvain { new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(stream_)->on(stream_), + thrust::copy(rmm::exec_policy(handle_.get_stream_view()), next_cluster_v.begin(), next_cluster_v.end(), dendrogram_->current_level_begin()); } } - timer_stop(stream_); + timer_stop(handle_.get_stream_view()); return cur_Q; } @@ -320,18 +324,20 @@ class Louvain { weight_t *d_old_cluster_sum = old_cluster_sum_v.data(); weight_t *d_new_cluster_sum = d_delta_Q; - thrust::fill(rmm::exec_policy(stream_)->on(stream_), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill( - rmm::exec_policy(stream_)->on(stream_), delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), + delta_Q_v.begin(), + delta_Q_v.end(), + weight_t{0.0}); + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), old_cluster_sum_v.begin(), old_cluster_sum_v.end(), weight_t{0.0}); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), [d_src_indices = src_indices_v_.data(), @@ -370,7 +376,7 @@ class Louvain { }); thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), [total_edge_weight, @@ -409,16 +415,19 @@ class Louvain { rmm::device_uvector &delta_Q_v, bool up_down) { - rmm::device_uvector temp_vertices_v(graph.number_of_vertices, stream_); - rmm::device_uvector temp_cluster_v(graph.number_of_vertices, stream_); - rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, stream_); - - thrust::fill(rmm::exec_policy(stream_)->on(stream_), + rmm::device_uvector temp_vertices_v(graph.number_of_vertices, + handle_.get_stream_view()); + rmm::device_uvector temp_cluster_v(graph.number_of_vertices, + handle_.get_stream_view()); + rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, + handle_.get_stream_view()); + + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), temp_cluster_v.begin(), temp_cluster_v.end(), vertex_t{-1}); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), temp_delta_Q_v.begin(), temp_delta_Q_v.end(), weight_t{0}); @@ -430,7 +439,7 @@ class Louvain { thrust::make_zip_iterator(thrust::make_tuple(temp_cluster_v.begin(), temp_delta_Q_v.begin())); auto cluster_reduce_end = - thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + thrust::reduce_by_key(rmm::exec_policy(handle_.get_stream_view()), src_indices_v_.begin(), src_indices_v_.end(), cluster_reduce_iterator, @@ -449,7 +458,7 @@ class Louvain { vertex_t final_size = thrust::distance(temp_vertices_v.data(), cluster_reduce_end.first); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(final_size), [up_down, @@ -480,12 +489,12 @@ class Louvain { // renumber the clusters to the range 0..(num_clusters-1) vertex_t num_clusters = renumber_clusters(); - cluster_weights_v_.resize(num_clusters, stream_); + cluster_weights_v_.resize(num_clusters, handle_.get_stream_view()); // shrink our graph to represent the graph of supervertices generate_superverticies_graph(graph, num_clusters); - timer_stop(stream_); + timer_stop(handle_.get_stream_view()); } vertex_t renumber_clusters() @@ -499,7 +508,7 @@ class Louvain { // // New technique. Initialize cluster_inverse_v_ to 0 // - thrust::fill(rmm::exec_policy(stream_)->on(stream_), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), cluster_inverse_v_.begin(), cluster_inverse_v_.end(), vertex_t{0}); @@ -510,7 +519,7 @@ class Louvain { auto first_1 = thrust::make_constant_iterator(1); auto last_1 = first_1 + old_num_clusters; - thrust::scatter(rmm::exec_policy(stream_)->on(stream_), + thrust::scatter(rmm::exec_policy(handle_.get_stream_view()), first_1, last_1, dendrogram_->current_level_begin(), @@ -520,47 +529,47 @@ class Louvain { // Now we'll copy all of the clusters that have a value of 1 into a temporary array // auto copy_end = thrust::copy_if( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(old_num_clusters), tmp_arr_v_.begin(), [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); - tmp_arr_v_.resize(new_num_clusters, stream_); + tmp_arr_v_.resize(new_num_clusters, handle_.get_stream_view()); // // Now we can set each value in cluster_inverse of a cluster to its index // - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(new_num_clusters), [d_cluster_inverse, d_tmp_array] __device__(const vertex_t idx) { d_cluster_inverse[d_tmp_array[idx]] = idx; }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(old_num_clusters), [d_cluster, d_cluster_inverse] __device__(vertex_t i) { d_cluster[i] = d_cluster_inverse[d_cluster[i]]; }); - cluster_inverse_v_.resize(new_num_clusters, stream_); + cluster_inverse_v_.resize(new_num_clusters, handle_.get_stream_view()); return new_num_clusters; } void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { - rmm::device_uvector new_src_v(graph.number_of_edges, stream_); - rmm::device_uvector new_dst_v(graph.number_of_edges, stream_); - rmm::device_uvector new_weight_v(graph.number_of_edges, stream_); + rmm::device_uvector new_src_v(graph.number_of_edges, handle_.get_stream_view()); + rmm::device_uvector new_dst_v(graph.number_of_edges, handle_.get_stream_view()); + rmm::device_uvector new_weight_v(graph.number_of_edges, handle_.get_stream_view()); // // Renumber the COO // - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::for_each(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), [d_old_src = src_indices_v_.data(), @@ -576,12 +585,12 @@ class Louvain { }); thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), new_dst_v.begin(), new_dst_v.end(), thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_weight_v.begin()))); thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), + rmm::exec_policy(handle_.get_stream_view()), new_src_v.begin(), new_src_v.end(), thrust::make_zip_iterator(thrust::make_tuple(new_dst_v.begin(), new_weight_v.begin()))); @@ -594,7 +603,7 @@ class Louvain { thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())); auto new_start = thrust::make_zip_iterator(thrust::make_tuple(src_indices_v_.data(), graph.indices)); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + auto new_end = thrust::reduce_by_key(rmm::exec_policy(handle_.get_stream_view()), start, start + graph.number_of_edges, new_weight_v.begin(), @@ -606,20 +615,21 @@ class Louvain { graph.number_of_edges = thrust::distance(new_start, new_end.first); graph.number_of_vertices = num_clusters; - detail::fill_offset( - src_indices_v_.data(), graph.offsets, num_clusters, graph.number_of_edges, stream_); - CHECK_CUDA(stream_); + detail::fill_offset(src_indices_v_.data(), + graph.offsets, + num_clusters, + graph.number_of_edges, + handle_.get_stream_view()); - src_indices_v_.resize(graph.number_of_edges, stream_); - indices_v_.resize(graph.number_of_edges, stream_); - weights_v_.resize(graph.number_of_edges, stream_); + src_indices_v_.resize(graph.number_of_edges, handle_.get_stream_view()); + indices_v_.resize(graph.number_of_edges, handle_.get_stream_view()); + weights_v_.resize(graph.number_of_edges, handle_.get_stream_view()); } protected: raft::handle_t const &handle_; vertex_t number_of_vertices_; edge_t number_of_edges_; - cudaStream_t stream_; std::unique_ptr> dendrogram_; diff --git a/cpp/src/converters/COOtoCSR.cuh b/cpp/src/converters/COOtoCSR.cuh index 2876f1ccf52..7dcf28cbb0f 100644 --- a/cpp/src/converters/COOtoCSR.cuh +++ b/cpp/src/converters/COOtoCSR.cuh @@ -30,8 +30,8 @@ #include #include -#include #include +#include #include #include @@ -55,38 +55,38 @@ namespace detail { * @tparam WT Type of edge weights. Supported value : float or double. * * @param[in] graph The input graph object - * @param[in] stream The cuda stream for kernel calls + * @param[in] stream_view The cuda stream for kernel calls * * @param[out] result Total number of vertices */ template -VT sort(GraphCOOView &graph, cudaStream_t stream) +VT sort(GraphCOOView &graph, rmm::cuda_stream_view stream_view) { VT max_src_id; VT max_dst_id; if (graph.has_data()) { thrust::stable_sort_by_key( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream_view), graph.dst_indices, graph.dst_indices + graph.number_of_edges, thrust::make_zip_iterator(thrust::make_tuple(graph.src_indices, graph.edge_data))); CUDA_TRY(cudaMemcpy( &max_dst_id, &(graph.dst_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault)); thrust::stable_sort_by_key( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream_view), graph.src_indices, graph.src_indices + graph.number_of_edges, thrust::make_zip_iterator(thrust::make_tuple(graph.dst_indices, graph.edge_data))); CUDA_TRY(cudaMemcpy( &max_src_id, &(graph.src_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault)); } else { - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::stable_sort_by_key(rmm::exec_policy(stream_view), graph.dst_indices, graph.dst_indices + graph.number_of_edges, graph.src_indices); CUDA_TRY(cudaMemcpy( &max_dst_id, &(graph.dst_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault)); - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), + thrust::stable_sort_by_key(rmm::exec_policy(stream_view), graph.src_indices, graph.src_indices + graph.number_of_edges, graph.dst_indices); @@ -97,14 +97,15 @@ VT sort(GraphCOOView &graph, cudaStream_t stream) } template -void fill_offset( - VT *source, ET *offsets, VT number_of_vertices, ET number_of_edges, cudaStream_t stream) +void fill_offset(VT *source, + ET *offsets, + VT number_of_vertices, + ET number_of_edges, + rmm::cuda_stream_view stream_view) { - thrust::fill(rmm::exec_policy(stream)->on(stream), - offsets, - offsets + number_of_vertices + 1, - number_of_edges); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::fill( + rmm::exec_policy(stream_view), offsets, offsets + number_of_vertices + 1, number_of_edges); + thrust::for_each(rmm::exec_policy(stream_view), thrust::make_counting_iterator(1), thrust::make_counting_iterator(number_of_edges), [source, offsets] __device__(ET index) { @@ -116,7 +117,7 @@ void fill_offset( off[src[0]] = ET{0}; auto iter = thrust::make_reverse_iterator(offsets + number_of_vertices + 1); - thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), + thrust::inclusive_scan(rmm::exec_policy(stream_view), iter, iter + number_of_vertices + 1, iter, @@ -127,15 +128,15 @@ template rmm::device_buffer create_offset(VT *source, VT number_of_vertices, ET number_of_edges, - cudaStream_t stream, + rmm::cuda_stream_view stream_view, rmm::mr::device_memory_resource *mr) { // Offset array needs an extra element at the end to contain the ending offsets // of the last vertex - rmm::device_buffer offsets_buffer(sizeof(ET) * (number_of_vertices + 1), stream, mr); + rmm::device_buffer offsets_buffer(sizeof(ET) * (number_of_vertices + 1), stream_view, mr); ET *offsets = static_cast(offsets_buffer.data()); - fill_offset(source, offsets, number_of_vertices, number_of_edges, stream); + fill_offset(source, offsets, number_of_vertices, number_of_edges, stream_view); return offsets_buffer; } @@ -146,13 +147,13 @@ template std::unique_ptr> coo_to_csr(GraphCOOView const &graph, rmm::mr::device_memory_resource *mr) { - cudaStream_t stream{nullptr}; + rmm::cuda_stream_view stream_view; - GraphCOO temp_graph(graph, stream, mr); + GraphCOO temp_graph(graph, stream_view.value(), mr); GraphCOOView temp_graph_view = temp_graph.view(); - VT total_vertex_count = detail::sort(temp_graph_view, stream); + VT total_vertex_count = detail::sort(temp_graph_view, stream_view); rmm::device_buffer offsets = detail::create_offset( - temp_graph.src_indices(), total_vertex_count, temp_graph.number_of_edges(), stream, mr); + temp_graph.src_indices(), total_vertex_count, temp_graph.number_of_edges(), stream_view, mr); auto coo_contents = temp_graph.release(); GraphSparseContents csr_contents{ total_vertex_count, @@ -167,11 +168,14 @@ std::unique_ptr> coo_to_csr(GraphCOOView const template void coo_to_csr_inplace(GraphCOOView &graph, GraphCSRView &result) { - cudaStream_t stream{nullptr}; - - detail::sort(graph, stream); - detail::fill_offset( - graph.src_indices, result.offsets, graph.number_of_vertices, graph.number_of_edges, stream); + rmm::cuda_stream_view stream_view; + + detail::sort(graph, stream_view); + detail::fill_offset(graph.src_indices, + result.offsets, + graph.number_of_vertices, + graph.number_of_edges, + stream_view); CUDA_TRY(cudaMemcpy( result.indices, graph.dst_indices, sizeof(VT) * graph.number_of_edges, cudaMemcpyDefault)); diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index ad6f51d75fe..2a6a60e5280 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -20,10 +20,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -68,13 +68,13 @@ std:: vertex_t minor_first, vertex_t minor_last, bool is_weighted, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - rmm::device_uvector offsets((major_last - major_first) + 1, stream); - rmm::device_uvector indices(edgelist.number_of_edges, stream); - rmm::device_uvector weights(is_weighted ? edgelist.number_of_edges : 0, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), edge_t{0}); - thrust::fill(rmm::exec_policy(stream)->on(stream), indices.begin(), indices.end(), vertex_t{0}); + rmm::device_uvector offsets((major_last - major_first) + 1, stream_view); + rmm::device_uvector indices(edgelist.number_of_edges, stream_view); + rmm::device_uvector weights(is_weighted ? edgelist.number_of_edges : 0, stream_view); + thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0}); + thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0}); // FIXME: need to performance test this code with R-mat graphs having highly-skewed degree // distribution. If there is a small number of vertices with very large degrees, atomicAdd can @@ -91,7 +91,7 @@ std:: auto p_indices = indices.data(); auto p_weights = is_weighted ? weights.data() : static_cast(nullptr); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(stream_view), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, store_transposed ? edgelist.p_dst_vertices + edgelist.number_of_edges : edgelist.p_src_vertices + edgelist.number_of_edges, @@ -100,12 +100,12 @@ std:: }); thrust::exclusive_scan( - rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), offsets.begin()); + rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin()); if (is_weighted) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -128,7 +128,7 @@ std:: } else { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist.p_src_vertices, edgelist.p_dst_vertices)); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -185,7 +185,7 @@ graph_tget_handle_ptr()->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - auto default_stream = this->get_handle_ptr()->get_stream(); + auto default_stream_view = this->get_handle_ptr()->get_stream_view(); CUGRAPH_EXPECTS(edgelists.size() > 0, "Invalid input argument: edgelists.size() should be non-zero."); @@ -226,7 +226,7 @@ graph_ton(default_stream), + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(default_stream_view), edge_first, edge_first + edgelists[i].number_of_edges, out_of_range_t{ @@ -234,7 +234,7 @@ graph_tget_number_of_edges(), "Invalid input argument: the sum of local edge counts does not match with number_of_edges."); @@ -257,9 +257,9 @@ graph_t offsets(0, default_stream); - rmm::device_uvector indices(0, default_stream); - rmm::device_uvector weights(0, default_stream); + rmm::device_uvector offsets(0, default_stream_view); + rmm::device_uvector indices(0, default_stream_view); + rmm::device_uvector weights(0, default_stream_view); std::tie(offsets, indices, weights) = edgelist_to_compressed_sparse(edgelists[i], major_first, @@ -267,7 +267,7 @@ graph_tget_handle_ptr()->get_stream()); + default_stream_view); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); if (properties.is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } @@ -282,7 +282,7 @@ graph_ton(default_stream), + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), degrees.begin(), degrees.end(), thrust::greater{}), @@ -294,26 +294,26 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, - default_stream); + default_stream_view); std::vector h_thresholds = { static_cast(detail::mid_degree_threshold * col_comm_size), static_cast(detail::low_degree_threshold * col_comm_size)}; raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream_view.value()); rmm::device_uvector segment_offsets(detail::num_segments_per_vertex_partition + 1, - default_stream); + default_stream_view); // temporaries are necessary because the &&-overload of device_uvector is deleted // Note that we must sync `default_stream` before these temporaries go out of scope to // avoid use after free. (The syncs are at the end of this function) auto zero_vertex = vertex_t{0}; auto vertex_count = static_cast(degrees.size()); - segment_offsets.set_element_async(0, zero_vertex, default_stream); + segment_offsets.set_element_async(0, zero_vertex, default_stream_view); segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, vertex_count, default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream_view); - thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), + thrust::upper_bound(rmm::exec_policy(default_stream_view), degrees.begin(), degrees.end(), d_thresholds.begin(), @@ -322,23 +322,22 @@ graph_t{}); rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), - default_stream); + default_stream_view); col_comm.allgather(segment_offsets.data(), aggregate_segment_offsets.data(), segment_offsets.size(), - default_stream); + default_stream_view.value()); adj_matrix_partition_segment_offsets_.resize(aggregate_segment_offsets.size()); raft::update_host(adj_matrix_partition_segment_offsets_.data(), aggregate_segment_offsets.data(), aggregate_segment_offsets.size(), - default_stream); + default_stream_view.value()); - auto status = col_comm.sync_stream( - default_stream); // this is necessary as degrees, d_thresholds, and segment_offsets will - // become out-of-scope once control flow exits this block and - // adj_matrix_partition_segment_offsets_ can be used right after return. - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + default_stream_view + .synchronize(); // this is necessary as degrees, d_thresholds, and segment_offsets will + // become out-of-scope once control flow exits this block and + // adj_matrix_partition_segment_offsets_ can be used right after return. } // optional expensive checks (part 3/3) @@ -366,13 +365,13 @@ graph_t( handle, number_of_vertices, edgelist.number_of_edges, properties), - offsets_(rmm::device_uvector(0, handle.get_stream())), - indices_(rmm::device_uvector(0, handle.get_stream())), - weights_(rmm::device_uvector(0, handle.get_stream())) + offsets_(rmm::device_uvector(0, handle.get_stream_view())), + indices_(rmm::device_uvector(0, handle.get_stream_view())), + weights_(rmm::device_uvector(0, handle.get_stream_view())) { // cheap error checks - auto default_stream = this->get_handle_ptr()->get_stream(); + auto default_stream_view = this->get_handle_ptr()->get_stream_view(); CUGRAPH_EXPECTS( ((edgelist.number_of_edges == 0) || (edgelist.p_src_vertices != nullptr)) && @@ -392,7 +391,7 @@ graph_ton(default_stream), + rmm::exec_policy(default_stream_view), edge_first, edge_first + edgelist.number_of_edges, out_of_range_t{ @@ -415,7 +414,7 @@ graph_tget_number_of_vertices(), properties.is_weighted, - this->get_handle_ptr()->get_stream()); + default_stream_view); // update degree-based segment offsets (to be used for graph analytics kernel optimization) @@ -428,7 +427,7 @@ graph_ton(default_stream), + thrust::is_sorted(rmm::exec_policy(default_stream_view), degree_first, degree_first + this->get_number_of_vertices(), thrust::greater{}), @@ -440,26 +439,26 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, - default_stream); + default_stream_view); std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), static_cast(detail::low_degree_threshold)}; raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream_view.value()); rmm::device_uvector segment_offsets(detail::num_segments_per_vertex_partition + 1, - default_stream); + default_stream_view); // temporaries are necessary because the &&-overload of device_uvector is deleted // Note that we must sync `default_stream` before these temporaries go out of scope to // avoid use after free. (The syncs are at the end of this function) auto zero_vertex = vertex_t{0}; auto vertex_count = static_cast(this->get_number_of_vertices()); - segment_offsets.set_element_async(0, zero_vertex, default_stream); + segment_offsets.set_element_async(0, zero_vertex, default_stream_view); segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, vertex_count, default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream_view); - thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), + thrust::upper_bound(rmm::exec_policy(default_stream_view), degree_first, degree_first + this->get_number_of_vertices(), d_thresholds.begin(), @@ -468,11 +467,13 @@ graph_t{}); segment_offsets_.resize(segment_offsets.size()); - raft::update_host( - segment_offsets_.data(), segment_offsets.data(), segment_offsets.size(), default_stream); + raft::update_host(segment_offsets_.data(), + segment_offsets.data(), + segment_offsets.size(), + default_stream_view.value()); - CUDA_TRY(cudaStreamSynchronize( - default_stream)); // this is necessary as segment_offsets_ can be used right after return. + default_stream_view + .synchronize(); // this is necessary as segment_offsets_ can be used right after return. } // optional expensive checks (part 3/3) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index eacec51371d..d5b4308c80e 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -22,9 +22,9 @@ #include #include -#include #include #include +#include #include #include @@ -210,7 +210,7 @@ graph_view_tget_handle_ptr()->get_stream(); + auto default_stream_view = this->get_handle_ptr()->get_stream_view(); auto const row_comm_rank = this->get_handle_ptr() ->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()) @@ -228,7 +228,7 @@ graph_view_ton(default_stream), + thrust::is_sorted(rmm::exec_policy(default_stream_view), adj_matrix_partition_offsets[i], adj_matrix_partition_offsets[i] + (major_last - major_first + 1)), "Internal Error: adj_matrix_partition_offsets[] is not sorted."); @@ -236,20 +236,20 @@ graph_view_ton(default_stream), + thrust::count_if(rmm::exec_policy(default_stream_view), adj_matrix_partition_indices[i], adj_matrix_partition_indices[i] + number_of_local_edges, out_of_range_t{minor_first, minor_last}) == 0, "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); } number_of_local_edges_sum = host_scalar_allreduce( - this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream); + this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream_view.value()); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), "Internal Error: the sum of local edges counts does not match with " "number_of_local_edges."); @@ -257,7 +257,7 @@ graph_view_ton(default_stream), + thrust::is_sorted(rmm::exec_policy(default_stream_view), degrees.begin(), degrees.end(), thrust::greater{}), @@ -332,16 +332,16 @@ graph_view_tget_handle_ptr()->get_stream(); + auto default_stream_view = this->get_handle_ptr()->get_stream_view(); - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), offsets, offsets + (this->get_number_of_vertices() + 1)), "Internal Error: offsets is not sorted."); // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(default_stream)->on(default_stream), + thrust::count_if(rmm::exec_policy(default_stream_view), indices, indices + this->get_number_of_edges(), out_of_range_t{0, this->get_number_of_vertices()}) == 0, @@ -351,7 +351,7 @@ graph_view_t{offsets}); - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), degree_first, degree_first + this->get_number_of_vertices(), thrust::greater{}), @@ -531,9 +531,8 @@ graph_view_ton(handle.get_stream()), - in_degrees.begin(), - in_degrees.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), in_degrees.begin(), in_degrees.end()); rmm::device_scalar ret(edge_t{0}, handle.get_stream()); device_allreduce(handle.get_comms(), it != in_degrees.end() ? it : ret.data(), @@ -558,9 +557,8 @@ edge_t graph_view_ton(handle.get_stream()), - in_degrees.begin(), - in_degrees.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), in_degrees.begin(), in_degrees.end()); edge_t ret{0}; if (it != in_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); @@ -577,9 +575,8 @@ graph_view_ton(handle.get_stream()), - out_degrees.begin(), - out_degrees.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), out_degrees.begin(), out_degrees.end()); rmm::device_scalar ret(edge_t{0}, handle.get_stream()); device_allreduce(handle.get_comms(), it != out_degrees.end() ? it : ret.data(), @@ -604,9 +601,8 @@ edge_t graph_view_ton(handle.get_stream()), - out_degrees.begin(), - out_degrees.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), out_degrees.begin(), out_degrees.end()); edge_t ret{0}; if (it != out_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); @@ -623,9 +619,8 @@ graph_view_ton(handle.get_stream()), - in_weight_sums.begin(), - in_weight_sums.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), in_weight_sums.begin(), in_weight_sums.end()); rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); device_allreduce(handle.get_comms(), it != in_weight_sums.end() ? it : ret.data(), @@ -650,9 +645,8 @@ weight_t graph_view_ton(handle.get_stream()), - in_weight_sums.begin(), - in_weight_sums.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), in_weight_sums.begin(), in_weight_sums.end()); weight_t ret{0.0}; if (it != in_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); @@ -669,9 +663,8 @@ graph_view_ton(handle.get_stream()), - out_weight_sums.begin(), - out_weight_sums.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), out_weight_sums.begin(), out_weight_sums.end()); rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); device_allreduce(handle.get_comms(), it != out_weight_sums.end() ? it : ret.data(), @@ -696,9 +689,8 @@ weight_t graph_view_t< std::enable_if_t>::compute_max_out_weight_sum(raft::handle_t const& handle) const { auto out_weight_sums = compute_out_weight_sums(handle); - auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - out_weight_sums.begin(), - out_weight_sums.end()); + auto it = thrust::max_element( + rmm::exec_policy(handle.get_stream_view()), out_weight_sums.begin(), out_weight_sums.end()); weight_t ret{0.0}; if (it != out_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 062bf18cd95..af96103c486 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -20,9 +20,9 @@ #include #include -#include #include #include +#include #include #include @@ -73,18 +73,17 @@ extract_induced_subgraphs( raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); raft::update_host( &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + handle.get_stream_view().synchronize(); CUGRAPH_EXPECTS(should_be_zero == 0, "Invalid input argument: subgraph_offsets[0] should be 0."); - CUGRAPH_EXPECTS( - thrust::is_sorted(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - subgraph_offsets, - subgraph_offsets + (num_subgraphs + 1)), - "Invalid input argument: subgraph_offsets is not sorted."); + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(handle.get_stream_view()), + subgraph_offsets, + subgraph_offsets + (num_subgraphs + 1)), + "Invalid input argument: subgraph_offsets is not sorted."); vertex_partition_device_t> vertex_partition(graph_view); - CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream_view()), subgraph_vertices, subgraph_vertices + num_aggregate_subgraph_vertices, [vertex_partition] __device__(auto v) { @@ -95,7 +94,7 @@ extract_induced_subgraphs( CUGRAPH_EXPECTS( thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(num_subgraphs), [subgraph_offsets, subgraph_vertices] __device__(auto i) { @@ -119,28 +118,28 @@ extract_induced_subgraphs( if (multi_gpu) { CUGRAPH_FAIL("Unimplemented."); - return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), - rmm::device_uvector(0, handle.get_stream()), - rmm::device_uvector(0, handle.get_stream()), - rmm::device_uvector(0, handle.get_stream())); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream_view()), + rmm::device_uvector(0, handle.get_stream_view()), + rmm::device_uvector(0, handle.get_stream_view()), + rmm::device_uvector(0, handle.get_stream_view())); } else { // 2-1. Phase 1: calculate memory requirements size_t num_aggregate_subgraph_vertices{}; raft::update_host( &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + handle.get_stream_view().synchronize(); rmm::device_uvector subgraph_vertex_output_offsets( num_aggregate_subgraph_vertices + 1, - handle.get_stream()); // for each element of subgraph_vertices + handle.get_stream_view()); // for each element of subgraph_vertices matrix_partition_device_t> matrix_partition(graph_view, 0); // count the numbers of the induced subgraph edges for each vertex in the aggregate subgraph // vertex list. thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(num_aggregate_subgraph_vertices), subgraph_vertex_output_offsets.begin(), @@ -166,7 +165,7 @@ extract_induced_subgraphs( return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); }); }); - thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream_view()), subgraph_vertex_output_offsets.begin(), subgraph_vertex_output_offsets.end(), subgraph_vertex_output_offsets.begin()); @@ -176,19 +175,19 @@ extract_induced_subgraphs( subgraph_vertex_output_offsets.data() + num_aggregate_subgraph_vertices, 1, handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + handle.get_stream_view().synchronize(); // 2-2. Phase 2: find the edges in the induced subgraphs - rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream()); - rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream()); + rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream_view()); + rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream_view()); rmm::device_uvector edge_weights( - graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream()); + graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream_view()); // fill the edge list buffer (to be returned) for each vetex in the aggregate subgraph vertex // list (use the offsets computed in the Phase 1) thrust::for_each( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(num_aggregate_subgraph_vertices), [subgraph_offsets, @@ -244,8 +243,8 @@ extract_induced_subgraphs( } }); - rmm::device_uvector subgraph_edge_offsets(num_subgraphs + 1, handle.get_stream()); - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::device_uvector subgraph_edge_offsets(num_subgraphs + 1, handle.get_stream_view()); + thrust::gather(rmm::exec_policy(handle.get_stream_view()), subgraph_offsets, subgraph_offsets + (num_subgraphs + 1), subgraph_vertex_output_offsets.begin(), diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 27e66c48086..c9e01157628 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -61,12 +61,12 @@ class Louvain { handle_(handle), dendrogram_(std::make_unique>()), current_graph_view_(graph_view), - cluster_keys_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), - cluster_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), - vertex_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), - src_vertex_weights_cache_v_(0, handle.get_stream()), - src_cluster_cache_v_(0, handle.get_stream()), - dst_cluster_cache_v_(0, handle.get_stream()) + cluster_keys_v_(graph_view.get_number_of_local_vertices(), handle.get_stream_view()), + cluster_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream_view()), + vertex_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream_view()), + src_vertex_weights_cache_v_(0, handle.get_stream_view()), + src_cluster_cache_v_(0, handle.get_stream_view()), + dst_cluster_cache_v_(0, handle.get_stream_view()) { } @@ -122,16 +122,16 @@ class Louvain { #endif } - void timer_stop(cudaStream_t stream) + void timer_stop(rmm::cuda_stream_view stream_view) { #ifdef TIMING if (graph_view_t::is_multi_gpu) { if (handle.get_comms().get_rank() == 0) { - CUDA_TRY(cudaStreamSynchronize(stream)); + stream_view.synchronize(); hr_timer_.stop(); } } else { - CUDA_TRY(cudaStreamSynchronize(stream)); + stream_view.synchronize(); hr_timer_.stop(); } #endif @@ -152,9 +152,9 @@ class Louvain { void initialize_dendrogram_level(vertex_t num_vertices) { dendrogram_->add_level( - current_graph_view_.get_local_vertex_first(), num_vertices, handle_.get_stream()); + current_graph_view_.get_local_vertex_first(), num_vertices, handle_.get_stream_view()); - thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::sequence(rmm::exec_policy(handle_.get_stream_view()), dendrogram_->current_level_begin(), dendrogram_->current_level_end(), current_graph_view_.get_local_vertex_first()); @@ -164,7 +164,7 @@ class Louvain { weight_t modularity(weight_t total_edge_weight, weight_t resolution) { weight_t sum_degree_squared = thrust::transform_reduce( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), cluster_weights_v_.begin(), cluster_weights_v_.end(), [] __device__(weight_t p) { return p * p; }, @@ -201,10 +201,10 @@ class Louvain { timer_start("compute_vertex_and_cluster_weights"); vertex_weights_v_ = current_graph_view_.compute_out_weight_sums(handle_); - cluster_keys_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); - cluster_weights_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); + cluster_keys_v_.resize(vertex_weights_v_.size(), handle_.get_stream_view()); + cluster_weights_v_.resize(vertex_weights_v_.size(), handle_.get_stream_view()); - thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::sequence(rmm::exec_policy(handle_.get_stream_view()), cluster_keys_v_.begin(), cluster_keys_v_.end(), current_graph_view_.get_local_vertex_first()); @@ -219,8 +219,8 @@ class Louvain { if (graph_view_t::is_multi_gpu) { auto const comm_size = handle_.get_comms().get_size(); - rmm::device_uvector rx_keys_v(0, handle_.get_stream()); - rmm::device_uvector rx_weights_v(0, handle_.get_stream()); + rmm::device_uvector rx_keys_v(0, handle_.get_stream_view()); + rmm::device_uvector rx_weights_v(0, handle_.get_stream_view()); auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(cluster_keys_v_.begin(), cluster_weights_v_.begin())); @@ -233,13 +233,13 @@ class Louvain { [key_func = cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, - handle_.get_stream()); + handle_.get_stream_view()); cluster_keys_v_ = std::move(rx_keys_v); cluster_weights_v_ = std::move(rx_weights_v); } - timer_stop(handle_.get_stream()); + timer_stop(handle_.get_stream_view()); } template @@ -247,7 +247,7 @@ class Louvain { { if (graph_view_t::is_multi_gpu) { src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), - handle_.get_stream()); + handle_.get_stream_view()); copy_to_adj_matrix_row(handle_, current_graph_view_, input.begin(), src_cache_v.begin()); return src_cache_v.begin(); } else { @@ -260,7 +260,7 @@ class Louvain { { if (graph_view_t::is_multi_gpu) { dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols(), - handle_.get_stream()); + handle_.get_stream_view()); copy_to_adj_matrix_col(handle_, current_graph_view_, input.begin(), dst_cache_v.begin()); return dst_cache_v.begin(); } else { @@ -273,7 +273,7 @@ class Louvain { timer_start("update_clustering"); rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), - handle_.get_stream()); + handle_.get_stream_view()); raft::copy(next_cluster_v.begin(), dendrogram_->current_level_begin(), @@ -308,7 +308,7 @@ class Louvain { } } - timer_stop(handle_.get_stream()); + timer_stop(handle_.get_stream_view()); return cur_Q; } @@ -317,7 +317,7 @@ class Louvain { { auto output_buffer = cugraph::experimental::allocate_dataframe_buffer>( - current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream_view()); experimental::copy_v_transform_reduce_out_nbr( handle_, @@ -340,7 +340,7 @@ class Louvain { output_buffer)); thrust::transform( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), cugraph::experimental::get_dataframe_buffer_begin>( output_buffer), cugraph::experimental::get_dataframe_buffer_begin>( @@ -350,7 +350,7 @@ class Louvain { [] __device__(auto p) { return thrust::get<1>(p); }); thrust::transform( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), cugraph::experimental::get_dataframe_buffer_begin>( output_buffer), cugraph::experimental::get_dataframe_buffer_begin>( @@ -400,12 +400,12 @@ class Louvain { map_key_last = cluster_keys_v_.end(); map_value_first = cluster_weights_v_.begin(); } else { - thrust::sort_by_key(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::sort_by_key(rmm::exec_policy(handle_.get_stream_view()), cluster_keys_v_.begin(), cluster_keys_v_.end(), cluster_weights_v_.begin()); - thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::transform(rmm::exec_policy(handle_.get_stream_view()), next_cluster_v.begin(), next_cluster_v.end(), src_cluster_weights_v.begin(), @@ -473,7 +473,7 @@ class Louvain { output_buffer)); thrust::transform( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), next_cluster_v.begin(), next_cluster_v.end(), cugraph::experimental::get_dataframe_buffer_begin>( @@ -514,7 +514,7 @@ class Louvain { current_graph_view_ = current_graph_->view(); rmm::device_uvector numbering_indices(numbering_map.size(), handle_.get_stream()); - thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::sequence(rmm::exec_policy(handle_.get_stream_view()), numbering_indices.begin(), numbering_indices.end(), current_graph_view_.get_local_vertex_first()); diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 2ba2fb751eb..caefe0be806 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -22,10 +22,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -63,40 +63,41 @@ void relabel(raft::handle_t const& handle, // find unique old labels (to be relabeled) - rmm::device_uvector unique_old_labels(num_labels, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::device_uvector unique_old_labels(num_labels, handle.get_stream_view()); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), labels, labels + num_labels, unique_old_labels.data()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(handle.get_stream_view()), unique_old_labels.begin(), unique_old_labels.end()); unique_old_labels.resize( - thrust::distance( - unique_old_labels.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_old_labels.begin(), - unique_old_labels.end())), - handle.get_stream()); - unique_old_labels.shrink_to_fit(handle.get_stream()); + thrust::distance(unique_old_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), + unique_old_labels.begin(), + unique_old_labels.end())), + handle.get_stream_view()); + unique_old_labels.shrink_to_fit(handle.get_stream_view()); // collect new labels for the unique old labels - rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream()); + rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream_view()); { // shuffle the old_new_label_pairs based on applying the compute_gpu_id_from_vertex_t functor // to the old labels - rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); - rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); + rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream_view()); + rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream_view()); { - rmm::device_uvector label_pair_old_labels(num_label_pairs, handle.get_stream()); - rmm::device_uvector label_pair_new_labels(num_label_pairs, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::device_uvector label_pair_old_labels(num_label_pairs, + handle.get_stream_view()); + rmm::device_uvector label_pair_new_labels(num_label_pairs, + handle.get_stream_view()); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), std::get<0>(old_new_label_pairs), std::get<0>(old_new_label_pairs) + num_label_pairs, label_pair_old_labels.begin()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), std::get<1>(old_new_label_pairs), std::get<1>(old_new_label_pairs) + num_label_pairs, label_pair_new_labels.begin()); @@ -109,13 +110,12 @@ void relabel(raft::handle_t const& handle, pair_first, pair_first + num_label_pairs, [key_func] __device__(auto val) { return key_func(thrust::get<0>(val)); }, - handle.get_stream()); + handle.get_stream_view()); } // update intermediate relabel map - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); @@ -134,28 +134,27 @@ void relabel(raft::handle_t const& handle, thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())); relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); - rx_label_pair_old_labels.resize(0, handle.get_stream()); - rx_label_pair_new_labels.resize(0, handle.get_stream()); - rx_label_pair_old_labels.shrink_to_fit(handle.get_stream()); - rx_label_pair_new_labels.shrink_to_fit(handle.get_stream()); + rx_label_pair_old_labels.resize(0, handle.get_stream_view()); + rx_label_pair_new_labels.resize(0, handle.get_stream_view()); + rx_label_pair_old_labels.shrink_to_fit(handle.get_stream_view()); + rx_label_pair_new_labels.shrink_to_fit(handle.get_stream_view()); // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back { - rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_unique_old_labels(0, handle.get_stream_view()); std::vector rx_value_counts{}; std::tie(rx_unique_old_labels, rx_value_counts) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_old_labels.begin(), unique_old_labels.end(), [key_func] __device__(auto val) { return key_func(val); }, - handle.get_stream()); + handle.get_stream_view()); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream if (skip_missing_labels) { - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), rx_unique_old_labels.begin(), rx_unique_old_labels.end(), rx_unique_old_labels.begin(), @@ -173,8 +172,11 @@ void relabel(raft::handle_t const& handle, // corresponding old labels } - std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( - handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); + std::tie(new_labels_for_unique_old_labels, std::ignore) = + shuffle_values(handle.get_comms(), + rx_unique_old_labels.begin(), + rx_value_counts, + handle.get_stream_view()); } } @@ -211,7 +213,7 @@ void relabel(raft::handle_t const& handle, thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))); relabel_map.insert(pair_first, pair_first + num_label_pairs); if (skip_missing_labels) { - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), labels, labels + num_labels, labels, @@ -228,7 +230,7 @@ void relabel(raft::handle_t const& handle, if (do_expensive_check && !skip_missing_labels) { CUGRAPH_EXPECTS( - thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::count(rmm::exec_policy(handle.get_stream_view()), labels, labels + num_labels, invalid_vertex_id::value) == 0, diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index d6e3f8c93f6..afd7bce772e 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -23,10 +23,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -83,16 +83,16 @@ rmm::device_uvector compute_renumber_map( { rmm::device_uvector sorted_major_labels(edgelist_edge_counts[i], handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), edgelist_major_vertices[i], edgelist_major_vertices[i] + edgelist_edge_counts[i], sorted_major_labels.begin()); // FIXME: better refactor this sort-count_if-reduce_by_key routine for reuse - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_major_labels.begin(), sorted_major_labels.end()); auto num_unique_labels = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::count_if(rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(sorted_major_labels.size()), [labels = sorted_major_labels.data()] __device__(auto i) { @@ -100,7 +100,7 @@ rmm::device_uvector compute_renumber_map( }); tmp_major_labels.resize(num_unique_labels, handle.get_stream()); tmp_major_counts.resize(tmp_major_labels.size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream_view()), sorted_major_labels.begin(), sorted_major_labels.end(), thrust::make_constant_iterator(edge_t{1}), @@ -146,20 +146,19 @@ rmm::device_uvector compute_renumber_map( } if (multi_gpu) { // FIXME: better refactor this sort-count_if-reduce_by_key routine for reuse - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort_by_key(rmm::exec_policy(handle.get_stream_view()), major_labels.begin(), major_labels.end(), major_counts.begin()); - auto num_unique_labels = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(major_labels.size()), - [labels = major_labels.data()] __device__(auto i) { - return (i == 0) || (labels[i - 1] != labels[i]); - }); + auto num_unique_labels = thrust::count_if(rmm::exec_policy(handle.get_stream_view()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(major_labels.size()), + [labels = major_labels.data()] __device__(auto i) { + return (i == 0) || (labels[i - 1] != labels[i]); + }); rmm::device_uvector tmp_major_labels(num_unique_labels, handle.get_stream()); rmm::device_uvector tmp_major_counts(tmp_major_labels.size(), handle.get_stream()); - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream_view()), major_labels.begin(), major_labels.end(), major_counts.begin(), @@ -177,20 +176,18 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector minor_labels(minor_displs.back() + edgelist_edge_counts.back(), handle.get_stream()); for (size_t i = 0; i < edgelist_minor_vertices.size(); ++i) { - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), edgelist_minor_vertices[i], edgelist_minor_vertices[i] + edgelist_edge_counts[i], minor_labels.begin() + minor_displs[i]); } - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end()); - minor_labels.resize( - thrust::distance(minor_labels.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end())), - handle.get_stream()); + thrust::sort( + rmm::exec_policy(handle.get_stream_view()), minor_labels.begin(), minor_labels.end()); + minor_labels.resize(thrust::distance(minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), + minor_labels.begin(), + minor_labels.end())), + handle.get_stream()); if (multi_gpu) { auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -216,15 +213,13 @@ rmm::device_uvector compute_renumber_map( [key_func = detail::compute_gpu_id_from_vertex_t{row_comm_size}] __device__( auto val) { return key_func(val); }, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_minor_labels.begin(), - rx_minor_labels.end()); + thrust::sort( + rmm::exec_policy(handle.get_stream_view()), rx_minor_labels.begin(), rx_minor_labels.end()); rx_minor_labels.resize( - thrust::distance( - rx_minor_labels.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_minor_labels.begin(), - rx_minor_labels.end())), + thrust::distance(rx_minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), + rx_minor_labels.begin(), + rx_minor_labels.end())), handle.get_stream()); minor_labels = std::move(rx_minor_labels); } @@ -241,14 +236,14 @@ rmm::device_uvector compute_renumber_map( comm.barrier(); // currently, this is ncclAllReduce #endif } - minor_labels.shrink_to_fit(handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream_view()); // 3. merge major and minor labels and vertex labels rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), - handle.get_stream()); - rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); - thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + handle.get_stream_view()); + rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream_view()); + thrust::merge_by_key(rmm::exec_policy(handle.get_stream_view()), major_labels.begin(), major_labels.end(), minor_labels.begin(), @@ -267,13 +262,12 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); rmm::device_uvector counts(labels.size(), handle.get_stream()); - auto pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - merged_labels.begin(), - merged_labels.end(), - merged_counts.begin(), - labels.begin(), - counts.begin()); + auto pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream_view()), + merged_labels.begin(), + merged_labels.end(), + merged_counts.begin(), + labels.begin(), + counts.begin()); merged_labels.resize(0, handle.get_stream()); merged_counts.resize(0, handle.get_stream()); merged_labels.shrink_to_fit(handle.get_stream()); @@ -289,14 +283,14 @@ rmm::device_uvector compute_renumber_map( if (optional_vertex_span) { auto [vertices, num_vertices] = *optional_vertex_span; auto num_isolated_vertices = thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { return !thrust::binary_search(thrust::seq, label_first, label_last, v); }); isolated_vertices.resize(num_isolated_vertices, handle.get_stream()); - thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy_if(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, isolated_vertices.begin(), @@ -308,11 +302,11 @@ rmm::device_uvector compute_renumber_map( if (isolated_vertices.size() > 0) { labels.resize(labels.size() + isolated_vertices.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), isolated_vertices.begin(), isolated_vertices.end(), labels.end() - isolated_vertices.size()); - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::fill(rmm::exec_policy(handle.get_stream_view()), counts.end() - isolated_vertices.size(), counts.end(), edge_t{0}); @@ -320,7 +314,7 @@ rmm::device_uvector compute_renumber_map( // 6. sort by degree - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort_by_key(rmm::exec_policy(handle.get_stream_view()), counts.begin(), counts.end(), labels.begin(), @@ -341,16 +335,16 @@ void expensive_check_edgelist( if (optional_vertex_span) { auto [vertices, num_vertices] = *optional_vertex_span; sorted_local_vertices.resize(num_vertices, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, sorted_local_vertices.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_local_vertices.begin(), sorted_local_vertices.end()); CUGRAPH_EXPECTS(static_cast(thrust::distance( sorted_local_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), sorted_local_vertices.begin(), sorted_local_vertices.end()))) == sorted_local_vertices.size(), "Invalid input argument: local_vertices should not have duplicates."); @@ -375,7 +369,7 @@ void expensive_check_edgelist( auto [local_vertices, num_local_vertices] = *optional_vertex_span; CUGRAPH_EXPECTS( thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), local_vertices, local_vertices + num_local_vertices, [comm_rank, @@ -390,7 +384,7 @@ void expensive_check_edgelist( thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); CUGRAPH_EXPECTS( thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + edgelist_edge_counts[i], [comm_size, @@ -442,7 +436,7 @@ void expensive_check_edgelist( recvcounts, displacements, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_major_vertices.begin(), sorted_major_vertices.end()); } @@ -472,7 +466,7 @@ void expensive_check_edgelist( recvcounts, displacements, handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_minor_vertices.begin(), sorted_minor_vertices.end()); } @@ -492,7 +486,7 @@ void expensive_check_edgelist( thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); CUGRAPH_EXPECTS( thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + edgelist_edge_counts[i], [num_major_vertices = static_cast(sorted_major_vertices.size()), @@ -521,7 +515,7 @@ void expensive_check_edgelist( thrust::make_tuple(edgelist_major_vertices[0], edgelist_minor_vertices[0])); CUGRAPH_EXPECTS( thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), edge_first, edge_first + edgelist_edge_counts[0], [sorted_local_vertices = sorted_local_vertices.data(), diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index 9cd2b9a1408..dc2d44a139a 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -50,17 +50,16 @@ void renumber_ext_vertices(raft::handle_t const& handle, if (do_expensive_check) { rmm::device_uvector labels(local_int_vertex_last - local_int_vertex_first, - handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + handle.get_stream_view()); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), renumber_map_labels, renumber_map_labels + labels.size(), labels.begin()); - thrust::sort( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels.begin(), labels.end()); - CUGRAPH_EXPECTS(thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - labels.begin(), - labels.end()) == labels.end(), - "Invalid input arguments: renumber_map_labels have duplicate elements."); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), labels.begin(), labels.end()); + CUGRAPH_EXPECTS( + thrust::unique(rmm::exec_policy(handle.get_stream_view()), labels.begin(), labels.end()) == + labels.end(), + "Invalid input arguments: renumber_map_labels have duplicate elements."); } auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); @@ -75,26 +74,26 @@ void renumber_ext_vertices(raft::handle_t const& handle, auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - rmm::device_uvector sorted_unique_ext_vertices(num_vertices, handle.get_stream()); + rmm::device_uvector sorted_unique_ext_vertices(num_vertices, + handle.get_stream_view()); sorted_unique_ext_vertices.resize( thrust::distance( sorted_unique_ext_vertices.begin(), - thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy_if(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, sorted_unique_ext_vertices.begin(), [] __device__(auto v) { return v != invalid_vertex_id::value; })), - handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + handle.get_stream_view()); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_unique_ext_vertices.begin(), sorted_unique_ext_vertices.end()); sorted_unique_ext_vertices.resize( - thrust::distance( - sorted_unique_ext_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_unique_ext_vertices.begin(), - sorted_unique_ext_vertices.end())), - handle.get_stream()); + thrust::distance(sorted_unique_ext_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end())), + handle.get_stream_view()); auto int_vertices_for_sorted_unique_ext_vertices = collect_values_for_unique_keys( comm, @@ -104,7 +103,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, sorted_unique_ext_vertices.begin(), sorted_unique_ext_vertices.end(), detail::compute_gpu_id_from_vertex_t{comm_size}, - handle.get_stream()); + handle.get_stream_view()); handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream @@ -145,10 +144,10 @@ void renumber_ext_vertices(raft::handle_t const& handle, } if (do_expensive_check) { - rmm::device_uvector contains(num_vertices, handle.get_stream()); + rmm::device_uvector contains(num_vertices, handle.get_stream_view()); renumber_map_ptr->contains(vertices, vertices + num_vertices, contains.begin()); auto vc_pair_first = thrust::make_zip_iterator(thrust::make_tuple(vertices, contains.begin())); - CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream_view()), vc_pair_first, vc_pair_first + num_vertices, [] __device__(auto pair) { @@ -177,7 +176,7 @@ void unrenumber_local_int_vertices( { if (do_expensive_check) { CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::count_if(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, [local_int_vertex_first, local_int_vertex_last] __device__(auto v) { @@ -188,7 +187,7 @@ void unrenumber_local_int_vertices( "+ num_vertices)."); } - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, vertices, @@ -213,7 +212,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, if (do_expensive_check) { CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::count_if(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, [int_vertex_last = vertex_partition_lasts.back()] __device__(auto v) { @@ -228,36 +227,36 @@ void unrenumber_int_vertices(raft::handle_t const& handle, auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - rmm::device_uvector sorted_unique_int_vertices(num_vertices, handle.get_stream()); + rmm::device_uvector sorted_unique_int_vertices(num_vertices, + handle.get_stream_view()); sorted_unique_int_vertices.resize( thrust::distance( sorted_unique_int_vertices.begin(), - thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::copy_if(rmm::exec_policy(handle.get_stream_view()), vertices, vertices + num_vertices, sorted_unique_int_vertices.begin(), [] __device__(auto v) { return v != invalid_vertex_id::value; })), - handle.get_stream()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + handle.get_stream_view()); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), sorted_unique_int_vertices.begin(), sorted_unique_int_vertices.end()); sorted_unique_int_vertices.resize( - thrust::distance( - sorted_unique_int_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_unique_int_vertices.begin(), - sorted_unique_int_vertices.end())), - handle.get_stream()); + thrust::distance(sorted_unique_int_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream_view()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end())), + handle.get_stream_view()); rmm::device_uvector d_vertex_partition_lasts(vertex_partition_lasts.size(), - handle.get_stream()); + handle.get_stream_view()); raft::update_device(d_vertex_partition_lasts.data(), vertex_partition_lasts.data(), vertex_partition_lasts.size(), handle.get_stream()); rmm::device_uvector d_tx_int_vertex_offsets(d_vertex_partition_lasts.size(), - handle.get_stream()); - thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + handle.get_stream_view()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream_view()), sorted_unique_int_vertices.begin(), sorted_unique_int_vertices.end(), d_vertex_partition_lasts.begin(), @@ -272,13 +271,13 @@ void unrenumber_int_vertices(raft::handle_t const& handle, std::adjacent_difference( h_tx_int_vertex_counts.begin(), h_tx_int_vertex_counts.end(), h_tx_int_vertex_counts.begin()); - rmm::device_uvector rx_int_vertices(0, handle.get_stream()); + rmm::device_uvector rx_int_vertices(0, handle.get_stream_view()); std::vector rx_int_vertex_counts{}; std::tie(rx_int_vertices, rx_int_vertex_counts) = shuffle_values( - comm, sorted_unique_int_vertices.begin(), h_tx_int_vertex_counts, handle.get_stream()); + comm, sorted_unique_int_vertices.begin(), h_tx_int_vertex_counts, handle.get_stream_view()); auto tx_ext_vertices = std::move(rx_int_vertices); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), tx_ext_vertices.begin(), tx_ext_vertices.end(), tx_ext_vertices.begin(), @@ -287,9 +286,9 @@ void unrenumber_int_vertices(raft::handle_t const& handle, }); rmm::device_uvector rx_ext_vertices_for_sorted_unique_int_vertices( - 0, handle.get_stream()); + 0, handle.get_stream_view()); std::tie(rx_ext_vertices_for_sorted_unique_int_vertices, std::ignore) = - shuffle_values(comm, tx_ext_vertices.begin(), rx_int_vertex_counts, handle.get_stream()); + shuffle_values(comm, tx_ext_vertices.begin(), rx_int_vertex_counts, handle.get_stream_view()); handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream diff --git a/cpp/src/generators/generate_rmat_edgelist.cu b/cpp/src/generators/generate_rmat_edgelist.cu index 638d18b1831..40df2fa5568 100644 --- a/cpp/src/generators/generate_rmat_edgelist.cu +++ b/cpp/src/generators/generate_rmat_edgelist.cu @@ -17,10 +17,10 @@ #include #include -#include #include #include #include +#include #include #include @@ -53,10 +53,10 @@ std::tuple, rmm::device_uvector> generat auto max_edges_to_generate_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * 1024; rmm::device_uvector rands( - std::min(num_edges, max_edges_to_generate_per_iteration) * 2 * scale, handle.get_stream()); + std::min(num_edges, max_edges_to_generate_per_iteration) * 2 * scale, handle.get_stream_view()); - rmm::device_uvector srcs(num_edges, handle.get_stream()); - rmm::device_uvector dsts(num_edges, handle.get_stream()); + rmm::device_uvector srcs(num_edges, handle.get_stream_view()); + rmm::device_uvector dsts(num_edges, handle.get_stream_view()); size_t num_edges_generated{0}; while (num_edges_generated < num_edges) { @@ -67,7 +67,7 @@ std::tuple, rmm::device_uvector> generat rng.uniform( rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, handle.get_stream()); thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(num_edges_to_generate), pair_first, diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index ca62eda3716..4cbd8fbd668 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -16,8 +16,9 @@ #pragma once -#include -#include +#include "bh_kernels.hpp" +#include "fa2_kernels.hpp" +#include "utils.hpp" #include #include @@ -26,9 +27,10 @@ #include #include -#include "bh_kernels.hpp" -#include "fa2_kernels.hpp" -#include "utils.hpp" +#include + +#include +#include namespace cugraph { namespace detail { @@ -52,7 +54,7 @@ void barnes_hut(raft::handle_t const &handle, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - rmm::cuda_stream_view stream(handle.get_stream()); + rmm::cuda_stream_view stream_view(handle.get_stream_view()); const edge_t e = graph.number_of_edges; const vertex_t n = graph.number_of_vertices; @@ -67,34 +69,34 @@ void barnes_hut(raft::handle_t const &handle, // Allocate more space //--------------------------------------------------- - rmm::device_uvector d_limiter(1, stream); - rmm::device_uvector d_maxdepthd(1, stream); - rmm::device_uvector d_bottomd(1, stream); - rmm::device_uvector d_radiusd(1, stream); + rmm::device_uvector d_limiter(1, stream_view); + rmm::device_uvector d_maxdepthd(1, stream_view); + rmm::device_uvector d_bottomd(1, stream_view); + rmm::device_uvector d_radiusd(1, stream_view); unsigned *limiter = d_limiter.data(); int *maxdepthd = d_maxdepthd.data(); int *bottomd = d_bottomd.data(); float *radiusd = d_radiusd.data(); - InitializationKernel<<<1, 1, 0, stream.value()>>>(limiter, maxdepthd, radiusd); - CHECK_CUDA(stream.value()); + InitializationKernel<<<1, 1, 0, stream_view.value()>>>(limiter, maxdepthd, radiusd); + CHECK_CUDA(stream_view.value()); const int FOUR_NNODES = 4 * nnodes; const int FOUR_N = 4 * n; const float theta_squared = theta * theta; const int NNODES = nnodes; - rmm::device_uvector d_startl(nnodes + 1, stream); - rmm::device_uvector d_childl((nnodes + 1) * 4, stream); + rmm::device_uvector d_startl(nnodes + 1, stream_view); + rmm::device_uvector d_childl((nnodes + 1) * 4, stream_view); // FA2 requires degree + 1 - rmm::device_uvector d_massl(nnodes + 1, stream); - thrust::fill(rmm::exec_policy(stream), d_massl.begin(), d_massl.end(), 1); + rmm::device_uvector d_massl(nnodes + 1, stream_view); + thrust::fill(rmm::exec_policy(stream_view), d_massl.begin(), d_massl.end(), 1); - rmm::device_uvector d_maxxl(blocks * FACTOR1, stream); - rmm::device_uvector d_maxyl(blocks * FACTOR1, stream); - rmm::device_uvector d_minxl(blocks * FACTOR1, stream); - rmm::device_uvector d_minyl(blocks * FACTOR1, stream); + rmm::device_uvector d_maxxl(blocks * FACTOR1, stream_view); + rmm::device_uvector d_maxyl(blocks * FACTOR1, stream_view); + rmm::device_uvector d_minxl(blocks * FACTOR1, stream_view); + rmm::device_uvector d_minyl(blocks * FACTOR1, stream_view); // Actual mallocs int *startl = d_startl.data(); @@ -107,21 +109,21 @@ void barnes_hut(raft::handle_t const &handle, float *minyl = d_minyl.data(); // SummarizationKernel - rmm::device_uvector d_countl(nnodes + 1, stream); + rmm::device_uvector d_countl(nnodes + 1, stream_view); int *countl = d_countl.data(); // SortKernel - rmm::device_uvector d_sortl(nnodes + 1, stream); + rmm::device_uvector d_sortl(nnodes + 1, stream_view); int *sortl = d_sortl.data(); // RepulsionKernel - rmm::device_uvector d_rep_forces((nnodes + 1) * 2, stream); + rmm::device_uvector d_rep_forces((nnodes + 1) * 2, stream_view); float *rep_forces = d_rep_forces.data(); - rmm::device_uvector d_radius_squared(1, stream); + rmm::device_uvector d_radius_squared(1, stream_view); float *radiusd_squared = d_radius_squared.data(); - rmm::device_uvector d_nodes_pos((nnodes + 1) * 2, stream); + rmm::device_uvector d_nodes_pos((nnodes + 1) * 2, stream_view); float *nodes_pos = d_nodes_pos.data(); // Initialize positions with random values @@ -129,10 +131,11 @@ void barnes_hut(raft::handle_t const &handle, // Copy start x and y positions. if (x_start && y_start) { - raft::copy(nodes_pos, x_start, n, stream.value()); - raft::copy(nodes_pos + nnodes + 1, y_start, n, stream.value()); + raft::copy(nodes_pos, x_start, n, stream_view.value()); + raft::copy(nodes_pos + nnodes + 1, y_start, n, stream_view.value()); } else { - random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream.value()); + raft::random::Rng rng(random_state); + rng.uniform(nodes_pos, (nnodes + 1) * 2, -100.0f, 100.0f, stream_view.value()); } // Allocate arrays for force computation @@ -141,24 +144,24 @@ void barnes_hut(raft::handle_t const &handle, float *swinging{nullptr}; float *traction{nullptr}; - rmm::device_uvector d_attract(n * 2, stream); - rmm::device_uvector d_old_forces(n * 2, stream); - rmm::device_uvector d_swinging(n, stream); - rmm::device_uvector d_traction(n, stream); + rmm::device_uvector d_attract(n * 2, stream_view); + rmm::device_uvector d_old_forces(n * 2, stream_view); + rmm::device_uvector d_swinging(n, stream_view); + rmm::device_uvector d_traction(n, stream_view); attract = d_attract.data(); old_forces = d_old_forces.data(); swinging = d_swinging.data(); traction = d_traction.data(); - thrust::fill(rmm::exec_policy(stream), d_old_forces.begin(), d_old_forces.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), d_old_forces.begin(), d_old_forces.end(), 0.f); // Sort COO for coalesced memory access. - sort(graph, stream.value()); - CHECK_CUDA(stream.value()); + sort(graph, stream_view.value()); + CHECK_CUDA(stream_view.value()); graph.degree(massl, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -172,7 +175,7 @@ void barnes_hut(raft::handle_t const &handle, // If outboundAttractionDistribution active, compensate. if (outbound_attraction_distribution) { - int sum = thrust::reduce(rmm::exec_policy(stream), d_massl.begin(), d_massl.begin() + n); + int sum = thrust::reduce(rmm::exec_policy(stream_view), d_massl.begin(), d_massl.begin() + n); outbound_att_compensation = sum / (float)n; } @@ -195,70 +198,71 @@ void barnes_hut(raft::handle_t const &handle, for (int iter = 0; iter < max_iter; ++iter) { // Reset force values - thrust::fill(rmm::exec_policy(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), d_attract.begin(), d_attract.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), d_traction.begin(), d_traction.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), d_rep_forces.begin(), d_rep_forces.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), d_attract.begin(), d_attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), d_swinging.begin(), d_swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), d_traction.begin(), d_traction.end(), 0.f); - ResetKernel<<<1, 1, 0, stream.value()>>>(radiusd_squared, bottomd, NNODES, radiusd); - CHECK_CUDA(stream.value()); + ResetKernel<<<1, 1, 0, stream_view.value()>>>(radiusd_squared, bottomd, NNODES, radiusd); + CHECK_CUDA(stream_view.value()); // Compute bounding box arround all bodies - BoundingBoxKernel<<>>(startl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - maxxl, - maxyl, - minxl, - minyl, - FOUR_NNODES, - NNODES, - n, - limiter, - radiusd); - CHECK_CUDA(stream.value()); - - ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); - CHECK_CUDA(stream.value()); + BoundingBoxKernel<<>>( + startl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + maxxl, + maxyl, + minxl, + minyl, + FOUR_NNODES, + NNODES, + n, + limiter, + radiusd); + CHECK_CUDA(stream_view.value()); + + ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); + CHECK_CUDA(stream_view.value()); // Build quadtree - TreeBuildingKernel<<>>( + TreeBuildingKernel<<>>( childl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, maxdepthd, bottomd, radiusd); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); - ClearKernel2<<>>(startl, massl, NNODES, bottomd); - CHECK_CUDA(stream.value()); + ClearKernel2<<>>(startl, massl, NNODES, bottomd); + CHECK_CUDA(stream_view.value()); // Summarizes mass and position for each cell, bottom up approach - SummarizationKernel<<>>( + SummarizationKernel<<>>( countl, childl, massl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, bottomd); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); // Group closed bodies together, used to speed up Repulsion kernel - SortKernel<<>>( + SortKernel<<>>( sortl, countl, startl, childl, NNODES, n, bottomd); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); // Force computation O(n . log(n)) - RepulsionKernel<<>>(scaling_ratio, - theta, - epssq, - sortl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - rep_forces, - rep_forces + nnodes + 1, - theta_squared, - NNODES, - FOUR_NNODES, - n, - radiusd_squared, - maxdepthd); - CHECK_CUDA(stream.value()); + RepulsionKernel<<>>(scaling_ratio, + theta, + epssq, + sortl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + rep_forces, + rep_forces + nnodes + 1, + theta_squared, + NNODES, + FOUR_NNODES, + n, + radiusd_squared, + maxdepthd); + CHECK_CUDA(stream_view.value()); apply_gravity(nodes_pos, nodes_pos + nnodes + 1, @@ -269,7 +273,7 @@ void barnes_hut(raft::handle_t const &handle, strong_gravity_mode, scaling_ratio, n, - stream.value()); + stream_view.value()); apply_attraction(row, col, @@ -284,7 +288,7 @@ void barnes_hut(raft::handle_t const &handle, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream.value()); + stream_view.value()); compute_local_speed(rep_forces, rep_forces + nnodes + 1, @@ -296,28 +300,30 @@ void barnes_hut(raft::handle_t const &handle, swinging, traction, n, - stream.value()); + stream_view.value()); // Compute global swinging and traction values - const float s = thrust::reduce(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end()); + const float s = + thrust::reduce(rmm::exec_policy(stream_view), d_swinging.begin(), d_swinging.end()); - const float t = thrust::reduce(rmm::exec_policy(stream), d_traction.begin(), d_traction.end()); + const float t = + thrust::reduce(rmm::exec_policy(stream_view), d_traction.begin(), d_traction.end()); // Compute global speed based on gloab and local swinging and traction. adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); // Update positions - apply_forces_bh<<>>(nodes_pos, - nodes_pos + nnodes + 1, - attract, - attract + n, - rep_forces, - rep_forces + nnodes + 1, - old_forces, - old_forces + n, - swinging, - speed, - n); + apply_forces_bh<<>>(nodes_pos, + nodes_pos + nnodes + 1, + attract, + attract + n, + rep_forces, + rep_forces + nnodes + 1, + old_forces, + old_forces + n, + swinging, + speed, + n); if (callback) callback->on_epoch_end(nodes_pos); @@ -329,8 +335,8 @@ void barnes_hut(raft::handle_t const &handle, } // Copy nodes positions into final output pos - raft::copy(pos, nodes_pos, n, stream.value()); - raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream.value()); + raft::copy(pos, nodes_pos, n, stream_view.value()); + raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream_view.value()); if (callback) callback->on_train_end(nodes_pos); } diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index a82b7a5faff..567aa8c90c6 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "exact_repulsion.hpp" #include "fa2_kernels.hpp" @@ -50,7 +51,7 @@ void exact_fa2(raft::handle_t const &handle, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - rmm::cuda_stream_view stream(handle.get_stream()); + auto stream_view = handle.get_stream_view(); const edge_t e = graph.number_of_edges; const vertex_t n = graph.number_of_vertices; @@ -61,15 +62,15 @@ void exact_fa2(raft::handle_t const &handle, float *d_swinging{nullptr}; float *d_traction{nullptr}; - rmm::device_uvector repel(n * 2, stream); - rmm::device_uvector attract(n * 2, stream); - rmm::device_uvector old_forces(n * 2, stream); - thrust::fill(rmm::exec_policy(stream), old_forces.begin(), old_forces.end(), 0.f); + rmm::device_uvector repel(n * 2, stream_view); + rmm::device_uvector attract(n * 2, stream_view); + rmm::device_uvector old_forces(n * 2, stream_view); + thrust::fill(rmm::exec_policy(stream_view), old_forces.begin(), old_forces.end(), 0.f); // FA2 requires degree + 1. - rmm::device_uvector mass(n, stream); - thrust::fill(rmm::exec_policy(stream), mass.begin(), mass.end(), 1); - rmm::device_uvector swinging(n, stream); - rmm::device_uvector traction(n, stream); + rmm::device_uvector mass(n, stream_view); + thrust::fill(rmm::exec_policy(stream_view), mass.begin(), mass.end(), 1); + rmm::device_uvector swinging(n, stream_view); + rmm::device_uvector traction(n, stream_view); d_repel = repel.data(); d_attract = attract.data(); @@ -78,20 +79,21 @@ void exact_fa2(raft::handle_t const &handle, d_swinging = swinging.data(); d_traction = traction.data(); - int random_state = 0; - random_vector(pos, n * 2, random_state, stream.value()); + int seed{0}; + raft::random::Rng rng(seed); + rng.uniform(pos, n * 2, -100.0f, 100.0f, handle.get_stream()); if (x_start && y_start) { - raft::copy(pos, x_start, n, stream.value()); - raft::copy(pos + n, y_start, n, stream.value()); + raft::copy(pos, x_start, n, stream_view.value()); + raft::copy(pos + n, y_start, n, stream_view.value()); } // Sort COO for coalesced memory access. - sort(graph, stream.value()); - CHECK_CUDA(stream.value()); + sort(graph, stream_view.value()); + CHECK_CUDA(stream_view.value()); graph.degree(d_mass, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -103,7 +105,7 @@ void exact_fa2(raft::handle_t const &handle, float jt = 0.f; if (outbound_attraction_distribution) { - int sum = thrust::reduce(rmm::exec_policy(stream), mass.begin(), mass.end()); + int sum = thrust::reduce(rmm::exec_policy(stream_view), mass.begin(), mass.end()); outbound_att_compensation = sum / (float)n; } @@ -114,14 +116,14 @@ void exact_fa2(raft::handle_t const &handle, for (int iter = 0; iter < max_iter; ++iter) { // Reset force arrays - thrust::fill(rmm::exec_policy(stream), repel.begin(), repel.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), attract.begin(), attract.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), swinging.begin(), swinging.end(), 0.f); - thrust::fill(rmm::exec_policy(stream), traction.begin(), traction.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), repel.begin(), repel.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), attract.begin(), attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), swinging.begin(), swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream_view), traction.begin(), traction.end(), 0.f); // Exact repulsion apply_repulsion( - pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream.value()); + pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream_view.value()); apply_gravity(pos, pos + n, @@ -132,7 +134,7 @@ void exact_fa2(raft::handle_t const &handle, strong_gravity_mode, scaling_ratio, n, - stream.value()); + stream_view.value()); apply_attraction(row, col, @@ -147,7 +149,7 @@ void exact_fa2(raft::handle_t const &handle, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream.value()); + stream_view.value()); compute_local_speed(d_repel, d_repel + n, @@ -159,11 +161,11 @@ void exact_fa2(raft::handle_t const &handle, d_swinging, d_traction, n, - stream.value()); + stream_view.value()); // Compute global swinging and traction values. - const float s = thrust::reduce(rmm::exec_policy(stream), swinging.begin(), swinging.end()); - const float t = thrust::reduce(rmm::exec_policy(stream), traction.begin(), traction.end()); + const float s = thrust::reduce(rmm::exec_policy(stream_view), swinging.begin(), swinging.end()); + const float t = thrust::reduce(rmm::exec_policy(stream_view), traction.begin(), traction.end()); adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); @@ -178,7 +180,7 @@ void exact_fa2(raft::handle_t const &handle, d_swinging, speed, n, - stream.value()); + stream_view.value()); if (callback) callback->on_epoch_end(pos); diff --git a/cpp/src/layout/utils.hpp b/cpp/src/layout/utils.hpp index 335b8ea986c..822459c7751 100644 --- a/cpp/src/layout/utils.hpp +++ b/cpp/src/layout/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,22 +23,6 @@ namespace cugraph { namespace detail { -struct prg { - __host__ __device__ float operator()(int n) - { - thrust::default_random_engine rng; - thrust::uniform_real_distribution dist(-100.f, 100.f); - rng.discard(n); - return dist(rng); - } -}; - -void random_vector(float *vec, int n, int seed, cudaStream_t stream) -{ - thrust::counting_iterator index(seed); - thrust::transform(rmm::exec_policy(stream)->on(stream), index, index + n, vec, prg()); -} - /** helper method to get multi-processor count parameter */ inline int getMultiProcessorCount() { diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index b93ad0bd0b3..071302aed9a 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -19,11 +19,13 @@ * @file jaccard.cu * ---------------------------------------------------------------------------**/ -#include #include #include #include +#include +#include + namespace cugraph { namespace detail { @@ -208,6 +210,7 @@ int jaccard(vertex_t n, weight_t *weight_s, weight_t *weight_j) { + rmm::cuda_stream_view stream_view; dim3 nthreads, nblocks; int y = 4; @@ -221,9 +224,9 @@ int jaccard(vertex_t n, // launch kernel jaccard_row_sum - <<>>(n, csrPtr, csrInd, weight_in, work); - cudaDeviceSynchronize(); - fill(e, weight_i, weight_t{0.0}); + <<>>(n, csrPtr, csrInd, weight_in, work); + + thrust::fill(rmm::exec_policy(stream_view), weight_i, weight_i + e, weight_t{0.0}); // setup launch configuration nthreads.x = 32 / y; @@ -234,8 +237,8 @@ int jaccard(vertex_t n, nblocks.z = min((n + nthreads.z - 1) / nthreads.z, vertex_t{CUDA_MAX_BLOCKS}); // 1; // launch kernel - jaccard_is - <<>>(n, csrPtr, csrInd, weight_in, work, weight_i, weight_s); + jaccard_is<<>>( + n, csrPtr, csrInd, weight_in, work, weight_i, weight_s); // setup launch configuration nthreads.x = min(e, edge_t{CUDA_MAX_KERNEL_THREADS}); @@ -247,7 +250,7 @@ int jaccard(vertex_t n, // launch kernel jaccard_jw - <<>>(e, weight_i, weight_s, weight_j); + <<>>(e, weight_i, weight_s, weight_j); return 0; } diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 44a6e9e83aa..81cad454a17 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -26,8 +26,8 @@ #include #include -#include #include +#include #include #include @@ -160,7 +160,7 @@ struct rrandom_gen_t { void generate_col_indices(device_vec_t& d_col_indx) const { thrust::transform_if( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), d_ptr_random_, d_ptr_random_ + num_paths_, // input1 d_ptr_out_degs_, // input2 @@ -264,7 +264,7 @@ struct col_indx_extract_ton(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_paths_), // input1 d_v_col_indx.begin(), // input2 @@ -376,7 +376,7 @@ struct random_walker_t { // intialize path sizes to 1, as they contain at least one vertex each: // the initial set: d_src_init_v; // - thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), thrust::make_constant_iterator(1), num_paths_, d_sizes.begin()); @@ -390,7 +390,7 @@ struct random_walker_t { auto map_it_begin = thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); - thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::scatter(rmm::exec_policy(handle_.get_stream_view()), d_src_init_v.begin(), d_src_init_v.end(), map_it_begin, @@ -485,7 +485,7 @@ struct random_walker_t { bool all_paths_stopped(device_vec_t const& d_crt_out_degs) const { auto how_many_stopped = - thrust::count_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::count_if(rmm::exec_policy(handle_.get_stream_view()), d_crt_out_degs.begin(), d_crt_out_degs.end(), [] __device__(auto crt_out_deg) { return crt_out_deg == 0; }); @@ -517,19 +517,17 @@ struct random_walker_t { return (col_indx >= ptr_d_sizes[row_indx] - 1); }; - auto new_end_v = - thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - d_coalesced_v.begin(), - d_coalesced_v.end(), - thrust::make_counting_iterator(0), - predicate_v); + auto new_end_v = thrust::remove_if(rmm::exec_policy(handle_.get_stream_view()), + d_coalesced_v.begin(), + d_coalesced_v.end(), + thrust::make_counting_iterator(0), + predicate_v); - auto new_end_w = - thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - d_coalesced_w.begin(), - d_coalesced_w.end(), - thrust::make_counting_iterator(0), - predicate_w); + auto new_end_w = thrust::remove_if(rmm::exec_policy(handle_.get_stream_view()), + d_coalesced_w.begin(), + d_coalesced_w.end(), + thrust::make_counting_iterator(0), + predicate_w); handle_.get_stream_view().synchronize(); @@ -565,7 +563,7 @@ struct random_walker_t { auto map_it_begin = thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); - thrust::gather(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::gather(rmm::exec_policy(handle_.get_stream_view()), map_it_begin, map_it_begin + nelems, d_src.begin(), @@ -612,7 +610,7 @@ struct random_walker_t { auto map_it_begin = thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); - thrust::scatter_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::scatter_if(rmm::exec_policy(handle_.get_stream_view()), d_src.begin(), d_src.end(), map_it_begin, @@ -651,7 +649,7 @@ struct random_walker_t { device_vec_t& d_sizes) const { thrust::transform_if( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), d_sizes.begin(), d_sizes.end(), // input d_crt_out_degs.begin(), // stencil @@ -669,12 +667,12 @@ struct random_walker_t { void init_padding(device_vec_t& d_coalesced_v, device_vec_t& d_coalesced_w) const { - thrust::fill(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), d_coalesced_v.begin(), d_coalesced_v.end(), vertex_padding_value_); - thrust::fill(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::fill(rmm::exec_policy(handle_.get_stream_view()), d_coalesced_w.begin(), d_coalesced_w.end(), weight_padding_value_); @@ -742,13 +740,12 @@ random_walks_impl(raft::handle_t const& handle, vertex_t num_vertices = graph.get_number_of_vertices(); - auto how_many_valid = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_v_start.begin(), - d_v_start.end(), - [num_vertices] __device__(auto crt_vertex) { - return (crt_vertex >= 0) && (crt_vertex < num_vertices); - }); + auto how_many_valid = thrust::count_if(rmm::exec_policy(handle.get_stream_view()), + d_v_start.begin(), + d_v_start.end(), + [num_vertices] __device__(auto crt_vertex) { + return (crt_vertex >= 0) && (crt_vertex < num_vertices); + }); CUGRAPH_EXPECTS(static_cast(how_many_valid) == d_v_start.size(), "Invalid set of starting vertices."); @@ -912,12 +909,11 @@ struct coo_convertor_t { // and edge_paths_sz == 0 don't contribute // anything): // - auto new_end_it = - thrust::copy_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - d_sizes.begin(), - d_sizes.end(), - d_sz_w_scan.begin(), - [] __device__(auto sz_value) { return sz_value > 1; }); + auto new_end_it = thrust::copy_if(rmm::exec_policy(handle_.get_stream_view()), + d_sizes.begin(), + d_sizes.end(), + d_sz_w_scan.begin(), + [] __device__(auto sz_value) { return sz_value > 1; }); // resize to new_end: // @@ -929,7 +925,7 @@ struct coo_convertor_t { // edge_path_sz = (vertex_path_sz-1): // thrust::transform_exclusive_scan( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), d_sz_w_scan.begin(), d_sz_w_scan.end(), d_sz_w_scan.begin(), @@ -944,10 +940,8 @@ struct coo_convertor_t { device_const_vector_view& d_sizes) const { device_vec_t d_scan(num_paths_, handle_.get_stream()); - thrust::inclusive_scan(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), - d_sizes.begin(), - d_sizes.end(), - d_scan.begin()); + thrust::inclusive_scan( + rmm::exec_policy(handle_.get_stream_view()), d_sizes.begin(), d_sizes.end(), d_scan.begin()); index_t total_sz{0}; CUDA_TRY(cudaMemcpy( @@ -957,7 +951,7 @@ struct coo_convertor_t { // initialize stencil to all 1's: // - thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), thrust::make_constant_iterator(1), d_stencil.size(), d_stencil.begin()); @@ -967,7 +961,7 @@ struct coo_convertor_t { // and the next one starts, hence there cannot be an edge // between a path ending vertex and next path starting vertex; // - thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::scatter(rmm::exec_policy(handle_.get_stream_view()), thrust::make_constant_iterator(0), thrust::make_constant_iterator(0) + num_paths_, d_scan.begin(), @@ -990,7 +984,7 @@ struct coo_convertor_t { // in stencil is not 0; (if it is, there's no "next" // or dst index, because the path has ended); // - thrust::copy_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::copy_if(rmm::exec_policy(handle_.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(total_sz_v - 1), valid_src_indx.begin(), @@ -1009,7 +1003,7 @@ struct coo_convertor_t { // generated at the previous step; // thrust::transform( - rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + rmm::exec_policy(handle_.get_stream_view()), valid_src_indx.begin(), valid_src_indx.end(), thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())), // start_zip @@ -1134,12 +1128,12 @@ query_rw_sizes_offsets(raft::handle_t const& handle, index_t num_paths, index_t rmm::device_uvector d_weight_sizes(num_paths, handle.get_stream()); rmm::device_uvector d_weight_offsets(num_paths, handle.get_stream()); - thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream_view()), ptr_d_sizes, ptr_d_sizes + num_paths, d_vertex_offsets.begin()); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::transform(rmm::exec_policy(handle.get_stream_view()), ptr_d_sizes, ptr_d_sizes + num_paths, d_weight_sizes.begin(), @@ -1147,7 +1141,7 @@ query_rw_sizes_offsets(raft::handle_t const& handle, index_t num_paths, index_t handle.get_stream_view().synchronize(); - thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream_view()), d_weight_sizes.begin(), d_weight_sizes.end(), d_weight_offsets.begin()); diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 9f683af8209..93bb0a69d23 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -19,6 +19,7 @@ #include #include +#include namespace { @@ -26,11 +27,11 @@ template void degree_from_offsets(vertex_t number_of_vertices, edge_t const *offsets, edge_t *degree, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { // Computes out-degree for x = 0 and x = 2 thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(stream_view), thrust::make_counting_iterator(0), thrust::make_counting_iterator(number_of_vertices), [offsets, degree] __device__(vertex_t v) { degree[v] = offsets[v + 1] - offsets[v]; }); @@ -42,15 +43,15 @@ void degree_from_vertex_ids(const raft::handle_t *handle, edge_t number_of_edges, vertex_t const *indices, edge_t *degree, - cudaStream_t stream) + rmm::cuda_stream_view stream_view) { - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(stream_view), thrust::make_counting_iterator(0), thrust::make_counting_iterator(number_of_edges), [indices, degree] __device__(edge_t e) { atomicAdd(degree + indices[e], 1); }); if ((handle != nullptr) && (handle->comms_initialized())) { auto &comm = handle->get_comms(); - comm.allreduce(degree, degree, number_of_vertices, raft::comms::op_t::SUM, stream); + comm.allreduce(degree, degree, number_of_vertices, raft::comms::op_t::SUM, stream_view.value()); } } @@ -118,7 +119,7 @@ void GraphCompressedSparseBaseView::degree(ET *degree, DegreeDirecti // (e.g. if you have a CSC and you want in-degree (x=1) then pass // the offsets/indices and request an out-degree (x=2)) // - cudaStream_t stream{nullptr}; + rmm::cuda_stream_view stream_view; if (direction != DegreeDirection::IN) { if ((GraphViewBase::handle != nullptr) && @@ -127,7 +128,8 @@ void GraphCompressedSparseBaseView::degree(ET *degree, DegreeDirecti // source indexing for // the allreduce to work } - degree_from_offsets(GraphViewBase::number_of_vertices, offsets, degree, stream); + degree_from_offsets( + GraphViewBase::number_of_vertices, offsets, degree, stream_view); } if (direction != DegreeDirection::OUT) { @@ -136,7 +138,7 @@ void GraphCompressedSparseBaseView::degree(ET *degree, DegreeDirecti GraphViewBase::number_of_edges, indices, degree, - stream); + stream_view); } } diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index 4eeab9376fa..76e8dc32611 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -16,8 +16,9 @@ #include #include -#include #include +#include +#include #include #include @@ -80,20 +81,6 @@ __inline__ __device__ value_t parallel_prefix_sum(count_t n, index_t const *ind, return last; } -// dot -template -T dot(size_t n, T *x, T *y) -{ - cudaStream_t stream{nullptr}; - T result = thrust::inner_product(rmm::exec_policy(stream)->on(stream), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - thrust::device_pointer_cast(y), - 0.0f); - CHECK_CUDA(stream); - return result; -} - // axpy template struct axpy_functor : public thrust::binary_function { @@ -105,8 +92,8 @@ struct axpy_functor : public thrust::binary_function { template void axpy(size_t n, T a, T *x, T *y) { - cudaStream_t stream{nullptr}; - thrust::transform(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::transform(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), thrust::device_pointer_cast(y), @@ -124,9 +111,9 @@ struct square { template T nrm2(size_t n, T *x) { - cudaStream_t stream{nullptr}; + rmm::cuda_stream_view stream_view; T init = 0; - T result = std::sqrt(thrust::transform_reduce(rmm::exec_policy(stream)->on(stream), + T result = std::sqrt(thrust::transform_reduce(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), square(), @@ -139,8 +126,8 @@ T nrm2(size_t n, T *x) template T nrm1(size_t n, T *x) { - cudaStream_t stream{nullptr}; - T result = thrust::reduce(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + T result = thrust::reduce(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n)); CHECK_CUDA(stream); @@ -150,8 +137,8 @@ T nrm1(size_t n, T *x) template void scal(size_t n, T val, T *x) { - cudaStream_t stream{nullptr}; - thrust::transform(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::transform(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), thrust::make_constant_iterator(val), @@ -163,8 +150,8 @@ void scal(size_t n, T val, T *x) template void addv(size_t n, T val, T *x) { - cudaStream_t stream{nullptr}; - thrust::transform(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::transform(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), thrust::make_constant_iterator(val), @@ -176,19 +163,19 @@ void addv(size_t n, T val, T *x) template void fill(size_t n, T *x, T value) { - cudaStream_t stream{nullptr}; - thrust::fill(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::fill(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n), value); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } template void scatter(size_t n, T *src, T *dst, M *map) { - cudaStream_t stream{nullptr}; - thrust::scatter(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::scatter(rmm::exec_policy(stream_view), thrust::device_pointer_cast(src), thrust::device_pointer_cast(src + n), thrust::device_pointer_cast(map), @@ -216,8 +203,8 @@ void copy(size_t n, T *x, T *res) { thrust::device_ptr dev_ptr(x); thrust::device_ptr res_ptr(res); - cudaStream_t stream{nullptr}; - thrust::copy_n(rmm::exec_policy(stream)->on(stream), dev_ptr, n, res_ptr); + rmm::cuda_stream_view stream_view; + thrust::copy_n(rmm::exec_policy(stream_view), dev_ptr, n, res_ptr); CHECK_CUDA(stream); } @@ -236,8 +223,8 @@ struct dangling_functor : public thrust::unary_function { template void update_dangling_nodes(size_t n, T *dangling_nodes, T damping_factor) { - cudaStream_t stream{nullptr}; - thrust::transform_if(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + thrust::transform_if(rmm::exec_policy(stream_view), thrust::device_pointer_cast(dangling_nodes), thrust::device_pointer_cast(dangling_nodes + n), thrust::device_pointer_cast(dangling_nodes), @@ -332,8 +319,8 @@ void HT_matrix_csc_coo(const IndexType n, ValueType *val, ValueType *bookmark) { - cudaStream_t stream{nullptr}; - rmm::device_vector degree(n, 0); + rmm::cuda_stream_view stream_view; + rmm::device_uvector degree(n, stream_view); dim3 nthreads, nblocks; nthreads.x = min(e, CUDA_MAX_KERNEL_THREADS); @@ -343,8 +330,8 @@ void HT_matrix_csc_coo(const IndexType n, nblocks.y = 1; nblocks.z = 1; degree_coo - <<>>(n, e, csrInd, degree.data().get()); - CHECK_CUDA(stream); + <<>>(n, e, csrInd, degree.data()); + CHECK_CUDA(stream_view.value()); int y = 4; nthreads.x = 32 / y; @@ -354,8 +341,8 @@ void HT_matrix_csc_coo(const IndexType n, nblocks.y = 1; nblocks.z = min((n + nthreads.z - 1) / nthreads.z, CUDA_MAX_BLOCKS); // 1; equi_prob3 - <<>>(n, e, csrPtr, csrInd, val, degree.data().get()); - CHECK_CUDA(stream); + <<>>(n, e, csrPtr, csrInd, val, degree.data()); + CHECK_CUDA(stream.value()); ValueType a = 0.0; fill(n, bookmark, a); @@ -368,96 +355,8 @@ void HT_matrix_csc_coo(const IndexType n, nblocks.y = 1; nblocks.z = 1; flag_leafs_kernel - <<>>(n, degree.data().get(), bookmark); - CHECK_CUDA(stream); -} - -template -__global__ void permute_vals_kernel(const IndexType e, - IndexType *perm, - ValueType *in, - ValueType *out) -{ - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) - out[i] = in[perm[i]]; -} - -template -void permute_vals( - const IndexType e, IndexType *perm, ValueType *in, ValueType *out, cudaStream_t stream = nullptr) -{ - int nthreads = min(e, CUDA_MAX_KERNEL_THREADS); - int nblocks = min((e + nthreads - 1) / nthreads, CUDA_MAX_BLOCKS); - permute_vals_kernel<<>>(e, perm, in, out); -} - -// This will remove duplicate along with sorting -// This will sort the COO Matrix, row will be sorted and each column of same row will be sorted. -template -void remove_duplicate( - IndexType *src, IndexType *dest, ValueType *val, SizeT &nnz, cudaStream_t stream = nullptr) -{ - if (val != NULL) { - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), - thrust::raw_pointer_cast(val), - thrust::raw_pointer_cast(val) + nnz, - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(src), thrust::raw_pointer_cast(dest)))); - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), - thrust::raw_pointer_cast(dest), - thrust::raw_pointer_cast(dest + nnz), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(src), thrust::raw_pointer_cast(val)))); - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), - thrust::raw_pointer_cast(src), - thrust::raw_pointer_cast(src + nnz), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(dest), thrust::raw_pointer_cast(val)))); - - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - typedef thrust::tuple ZipIteratorTuple; - typedef thrust::zip_iterator ZipZipIterator; - - ZipZipIterator newEnd = - thrust::unique(rmm::exec_policy(stream)->on(stream), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(src), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(dest), thrust::raw_pointer_cast(val))))), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(src + nnz), - thrust::make_zip_iterator(thrust::make_tuple(dest + nnz, val + nnz))))); - - ZipIteratorTuple endTuple = newEnd.get_iterator_tuple(); - IndexType *row_end = thrust::get<0>(endTuple); - - nnz = ((size_t)row_end - (size_t)src) / sizeof(IndexType); - } else { - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), - thrust::raw_pointer_cast(dest), - thrust::raw_pointer_cast(dest + nnz), - thrust::raw_pointer_cast(src)); - thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream), - thrust::raw_pointer_cast(src), - thrust::raw_pointer_cast(src + nnz), - thrust::raw_pointer_cast(dest)); - - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator newEnd = - thrust::unique(rmm::exec_policy(stream)->on(stream), - thrust::make_zip_iterator(thrust::make_tuple(thrust::raw_pointer_cast(src), - thrust::raw_pointer_cast(dest))), - thrust::make_zip_iterator(thrust::make_tuple( - thrust::raw_pointer_cast(src + nnz), thrust::raw_pointer_cast(dest + nnz)))); - - IteratorTuple endTuple = newEnd.get_iterator_tuple(); - IndexType *row_end = thrust::get<0>(endTuple); - - nnz = ((size_t)row_end - (size_t)src) / sizeof(IndexType); - } + <<>>(n, degree.data(), bookmark); + CHECK_CUDA(stream_view.value()); } template @@ -500,12 +399,12 @@ bool has_negative_val(DistType *arr, size_t n) { // custom kernel with boolean bitwise reduce may be // faster. - cudaStream_t stream{nullptr}; - DistType result = *thrust::min_element(rmm::exec_policy(stream)->on(stream), + rmm::cuda_stream_view stream_view; + DistType result = *thrust::min_element(rmm::exec_policy(stream_view), thrust::device_pointer_cast(arr), thrust::device_pointer_cast(arr + n)); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); return (result < 0); } From 1030a49b2796fd5939ef6ba151d2d5546db3eed9 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 16 Jun 2021 12:36:33 -0400 Subject: [PATCH 302/343] Fea hungarian expose precision (#1673) Closes #1645 Closes #1646 Expose the precision parameter (epsilon in the Date/Nagi implementation) of the Hungarian algorithm to be controllable by the user. Add support for rectangular matrices. Will be enabled for CI after https://github.com/rapidsai/raft/pull/275 is merged. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Brad Rees (https://github.com/BradReesWork) - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/rapidsai/cugraph/pull/1673 --- cpp/include/cugraph/algorithms.hpp | 68 +++++- cpp/src/linear_assignment/hungarian.cu | 207 ++++++++++++---- cpp/tests/linear_assignment/hungarian_test.cu | 227 +++++++++++++----- 3 files changed, 387 insertions(+), 115 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 0b0dd88ce29..64f02d60fb4 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -606,6 +606,40 @@ weight_t hungarian(raft::handle_t const &handle, vertex_t const *workers, vertex_t *assignment); +/** + * @brief Compute Hungarian algorithm on a weighted bipartite graph + * + * The Hungarian algorithm computes an assigment of "jobs" to "workers". This function accepts + * a weighted graph and a vertex list identifying the "workers". The weights in the weighted + * graph identify the cost of assigning a particular job to a worker. The algorithm computes + * a minimum cost assignment and returns the cost as well as a vector identifying the assignment. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Supported value : int (signed, + * 32-bit) + * @tparam edge_t Type of edge identifiers. Supported value : int (signed, + * 32-bit) + * @tparam weight_t Type of edge weights. Supported values : float or double. + * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param[in] graph cuGRAPH COO graph + * @param[in] num_workers number of vertices in the worker set + * @param[in] workers device pointer to an array of worker vertex ids + * @param[out] assignment device pointer to an array to which the assignment will be + * written. The array should be num_workers long, and will identify which vertex id (job) is + * assigned to that worker + * @param[in] precision parameter to define precision of comparisons + * in reducing weights to zero. + */ +template +weight_t hungarian(raft::handle_t const &handle, + GraphCOOView const &graph, + vertex_t num_workers, + vertex_t const *workers, + vertex_t *assignment, + weight_t precision); + /** * @brief Louvain implementation * @@ -1052,6 +1086,38 @@ weight_t hungarian(raft::handle_t const &handle, vertex_t num_columns, vertex_t *assignment); +/** + * @brief Compute Hungarian algorithm on a weighted bipartite graph + * + * The Hungarian algorithm computes an assigment of "jobs" to "workers". This function accepts + * a weighted graph and a vertex list identifying the "workers". The weights in the weighted + * graph identify the cost of assigning a particular job to a worker. The algorithm computes + * a minimum cost assignment and returns the cost as well as a vector identifying the assignment. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Supported value : int (signed, + * 32-bit) + * @tparam weight_t Type of edge weights. Supported values : float or double. + * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param[in] costs pointer to array of costs, stored in row major order + * @param[in] num_rows number of rows in dense matrix + * @param[in] num_cols number of cols in dense matrix + * @param[out] assignment device pointer to an array to which the assignment will be + * written. The array should be num_cols long, and will identify + * which vertex id (job) is assigned to that worker + * @param[in] precision parameter to define precision of comparisons + * in reducing weights to zero. + */ +template +weight_t hungarian(raft::handle_t const &handle, + weight_t const *costs, + vertex_t num_rows, + vertex_t num_columns, + vertex_t *assignment, + weight_t precision); + } // namespace dense namespace experimental { @@ -1325,4 +1391,4 @@ void weakly_connected_components( bool do_expensive_check = false); } // namespace experimental -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index dfa1e43edad..77709d1e936 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -13,18 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include - -#include #include +#include + +#include + +#include +#include +#include #include #include -#include - -#include +#include +#include //#define TIMING @@ -35,28 +37,82 @@ namespace cugraph { namespace detail { +template +weight_t default_precision() +{ + return 0; +} + +template <> +float default_precision() +{ + return float{1e-6}; +} + +template <> +double default_precision() +{ + return double{1e-6}; +} + template weight_t hungarian(raft::handle_t const &handle, index_t num_rows, index_t num_cols, weight_t const *d_original_cost, index_t *d_assignment, - cudaStream_t stream) + weight_t precision) { - // - // TODO: Can Date/Nagi implementation in raft handle rectangular matrices? - // - CUGRAPH_EXPECTS(num_rows == num_cols, "Current implementation only supports square matrices"); - - rmm::device_vector col_assignments_v(num_rows); - - // Create an instance of LinearAssignmentProblem using problem size, number of subproblems - raft::lap::LinearAssignmentProblem lpx(handle, num_rows, 1); - - // Solve LAP(s) for given cost matrix - lpx.solve(d_original_cost, d_assignment, col_assignments_v.data().get()); - - return lpx.getPrimalObjectiveValue(0); + if (num_rows == num_cols) { + rmm::device_uvector col_assignments_v(num_rows, handle.get_stream_view()); + + // Create an instance of LinearAssignmentProblem using problem size, number of subproblems + raft::lap::LinearAssignmentProblem lpx(handle, num_rows, 1, precision); + + // Solve LAP(s) for given cost matrix + lpx.solve(d_original_cost, d_assignment, col_assignments_v.data()); + + return lpx.getPrimalObjectiveValue(0); + } else { + // + // Create a square matrix, copy d_original_cost into it. + // Fill the extra rows/columns with max(d_original_cost) + // + index_t n = std::max(num_rows, num_cols); + weight_t max_cost = thrust::reduce(rmm::exec_policy(handle.get_stream_view()), + d_original_cost, + d_original_cost + (num_rows * num_cols), + weight_t{0}, + thrust::maximum()); + + rmm::device_uvector tmp_cost(n * n, handle.get_stream_view()); + rmm::device_uvector tmp_row_assignment_v(n, handle.get_stream_view()); + rmm::device_uvector tmp_col_assignment_v(n, handle.get_stream_view()); + + thrust::transform(rmm::exec_policy(handle.get_stream_view()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n * n), + tmp_cost.begin(), + [max_cost, d_original_cost, n, num_rows, num_cols] __device__(index_t i) { + index_t row = i / n; + index_t col = i % n; + + return ((row < num_rows) && (col < num_cols)) + ? d_original_cost[row * num_cols + col] + : max_cost; + }); + + raft::lap::LinearAssignmentProblem lpx(handle, n, 1, precision); + + // Solve LAP(s) for given cost matrix + lpx.solve(tmp_cost.begin(), tmp_row_assignment_v.begin(), tmp_col_assignment_v.begin()); + + weight_t tmp_objective_value = lpx.getPrimalObjectiveValue(0); + + raft::copy(d_assignment, tmp_row_assignment_v.begin(), num_rows, handle.get_stream()); + + return tmp_objective_value - max_cost * std::abs(num_rows - num_cols); + } } template @@ -65,7 +121,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment, - cudaStream_t stream) + weight_t precision) { CUGRAPH_EXPECTS(assignment != nullptr, "Invalid input argument: assignment pointer is NULL"); CUGRAPH_EXPECTS(graph.edge_data != nullptr, @@ -86,15 +142,16 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t matrix_dimension = std::max(num_rows, num_cols); - rmm::device_vector cost_v(matrix_dimension * matrix_dimension); - rmm::device_vector tasks_v(num_cols); - rmm::device_vector temp_tasks_v(graph.number_of_vertices); - rmm::device_vector temp_workers_v(graph.number_of_vertices); + rmm::device_uvector cost_v(matrix_dimension * matrix_dimension, + handle.get_stream_view()); + rmm::device_uvector tasks_v(num_cols, handle.get_stream_view()); + rmm::device_uvector temp_tasks_v(graph.number_of_vertices, handle.get_stream_view()); + rmm::device_uvector temp_workers_v(graph.number_of_vertices, handle.get_stream_view()); - weight_t *d_cost = cost_v.data().get(); - vertex_t *d_tasks = tasks_v.data().get(); - vertex_t *d_temp_tasks = temp_tasks_v.data().get(); - vertex_t *d_temp_workers = temp_workers_v.data().get(); + weight_t *d_cost = cost_v.data(); + vertex_t *d_tasks = tasks_v.data(); + vertex_t *d_temp_tasks = temp_tasks_v.data(); + vertex_t *d_temp_workers = temp_workers_v.data(); vertex_t *d_src_indices = graph.src_indices; vertex_t *d_dst_indices = graph.dst_indices; weight_t *d_edge_data = graph.edge_data; @@ -103,46 +160,50 @@ weight_t hungarian_sparse(raft::handle_t const &handle, // Renumber vertices internally. Workers will become // rows, tasks will become columns // - thrust::sequence(rmm::exec_policy(stream)->on(stream), temp_tasks_v.begin(), temp_tasks_v.end()); + thrust::sequence( + rmm::exec_policy(handle.get_stream_view()), temp_tasks_v.begin(), temp_tasks_v.end()); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(handle.get_stream_view()), workers, workers + num_workers, [d_temp_tasks] __device__(vertex_t v) { d_temp_tasks[v] = -1; }); - auto temp_end = thrust::copy_if(rmm::exec_policy(stream)->on(stream), + auto temp_end = thrust::copy_if(rmm::exec_policy(handle.get_stream_view()), temp_tasks_v.begin(), temp_tasks_v.end(), d_tasks, [] __device__(vertex_t v) { return v >= 0; }); vertex_t size = thrust::distance(d_tasks, temp_end); - tasks_v.resize(size); + tasks_v.resize(size, handle.get_stream_view()); // // Now we'll assign costs into the dense array // - thrust::fill(rmm::exec_policy(stream)->on(stream), + thrust::fill(rmm::exec_policy(handle.get_stream_view()), temp_workers_v.begin(), temp_workers_v.end(), vertex_t{-1}); + thrust::fill(rmm::exec_policy(handle.get_stream_view()), + temp_tasks_v.begin(), + temp_tasks_v.end(), + vertex_t{-1}); thrust::fill( - rmm::exec_policy(stream)->on(stream), temp_tasks_v.begin(), temp_tasks_v.end(), vertex_t{-1}); - thrust::fill(rmm::exec_policy(stream)->on(stream), cost_v.begin(), cost_v.end(), weight_t{0}); + rmm::exec_policy(handle.get_stream_view()), cost_v.begin(), cost_v.end(), weight_t{0}); thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), [d_temp_workers, workers] __device__(vertex_t v) { d_temp_workers[workers[v]] = v; }); thrust::for_each( - rmm::exec_policy(stream)->on(stream), + rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [d_temp_tasks, d_tasks] __device__(vertex_t v) { d_temp_tasks[d_tasks[v]] = v; }); - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), [d_temp_workers, @@ -170,11 +231,11 @@ weight_t hungarian_sparse(raft::handle_t const &handle, // temp_assignment_v will hold the assignment in the dense // bipartite matrix numbering // - rmm::device_vector temp_assignment_v(matrix_dimension); - vertex_t *d_temp_assignment = temp_assignment_v.data().get(); + rmm::device_uvector temp_assignment_v(matrix_dimension, handle.get_stream_view()); + vertex_t *d_temp_assignment = temp_assignment_v.data(); weight_t min_cost = detail::hungarian( - handle, matrix_dimension, matrix_dimension, d_cost, d_temp_assignment, stream); + handle, matrix_dimension, matrix_dimension, d_cost, d_temp_assignment, precision); #ifdef TIMING hr_timer.stop(); @@ -185,7 +246,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, // // Translate the assignment back to the original vertex ids // - thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::for_each(rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), [d_tasks, d_temp_assignment, assignment] __device__(vertex_t id) { @@ -210,17 +271,49 @@ weight_t hungarian(raft::handle_t const &handle, vertex_t const *workers, vertex_t *assignment) { - cudaStream_t stream{0}; + return detail::hungarian_sparse( + handle, graph, num_workers, workers, assignment, detail::default_precision()); +} - return detail::hungarian_sparse(handle, graph, num_workers, workers, assignment, stream); +template +weight_t hungarian(raft::handle_t const &handle, + GraphCOOView const &graph, + vertex_t num_workers, + vertex_t const *workers, + vertex_t *assignment, + weight_t precision) +{ + return detail::hungarian_sparse(handle, graph, num_workers, workers, assignment, precision); } +template int32_t hungarian( + raft::handle_t const &, + GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *, + int32_t); + +template float hungarian(raft::handle_t const &, + GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *, + float); +template double hungarian(raft::handle_t const &, + GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *, + double); + template int32_t hungarian( raft::handle_t const &, GraphCOOView const &, int32_t, int32_t const *, int32_t *); + template float hungarian(raft::handle_t const &, GraphCOOView const &, int32_t, @@ -241,9 +334,19 @@ weight_t hungarian(raft::handle_t const &handle, index_t num_cols, index_t *assignment) { - cudaStream_t stream{0}; + return detail::hungarian( + handle, num_rows, num_cols, costs, assignment, detail::default_precision()); +} - return detail::hungarian(handle, num_rows, num_cols, costs, assignment, stream); +template +weight_t hungarian(raft::handle_t const &handle, + weight_t const *costs, + index_t num_rows, + index_t num_cols, + index_t *assignment, + weight_t precision) +{ + return detail::hungarian(handle, num_rows, num_cols, costs, assignment, precision); } template int32_t hungarian( @@ -252,6 +355,12 @@ template float hungarian( raft::handle_t const &, float const *, int32_t, int32_t, int32_t *); template double hungarian( raft::handle_t const &, double const *, int32_t, int32_t, int32_t *); +template int32_t hungarian( + raft::handle_t const &, int32_t const *, int32_t, int32_t, int32_t *, int32_t); +template float hungarian( + raft::handle_t const &, float const *, int32_t, int32_t, int32_t *, float); +template double hungarian( + raft::handle_t const &, double const *, int32_t, int32_t, int32_t *, double); } // namespace dense diff --git a/cpp/tests/linear_assignment/hungarian_test.cu b/cpp/tests/linear_assignment/hungarian_test.cu index 282524ffe0d..9698b5c3335 100644 --- a/cpp/tests/linear_assignment/hungarian_test.cu +++ b/cpp/tests/linear_assignment/hungarian_test.cu @@ -9,21 +9,23 @@ * */ -#include "cuda_profiler_api.h" -#include "gtest/gtest.h" - -#include -#include +#include #include #include #include -#include - #include +#include + +#include "cuda_profiler_api.h" +#include "gtest/gtest.h" + +#include +#include + __global__ void setup_generator(curandState *state) { int id = threadIdx.x + blockIdx.x * blockDim.x; @@ -64,29 +66,35 @@ TEST_F(HungarianTest, Bipartite4x4) int32_t workers[] = {0, 1, 2, 3}; - float min_cost = 18.0; - int32_t expected[] = {6, 7, 5, 4}; + float min_cost = 18.0; + std::vector expected({6, 7, 5, 4}); + std::vector assignment({0, 0, 0, 0}); int32_t length = sizeof(src_data) / sizeof(src_data[0]); int32_t length_workers = sizeof(workers) / sizeof(workers[0]); int32_t num_vertices = 1 + std::max(*std::max_element(src_data, src_data + length), *std::max_element(dst_data, dst_data + length)); - rmm::device_vector src_v(src_data, src_data + length); - rmm::device_vector dst_v(dst_data, dst_data + length); - rmm::device_vector cost_v(cost, cost + length); - rmm::device_vector workers_v(workers, workers + length_workers); - rmm::device_vector expected_v(expected, expected + length_workers); - rmm::device_vector assignment_v(length_workers); + rmm::device_uvector src_v(length, handle.get_stream_view()); + rmm::device_uvector dst_v(length, handle.get_stream_view()); + rmm::device_uvector cost_v(length, handle.get_stream_view()); + rmm::device_uvector workers_v(length_workers, handle.get_stream_view()); + rmm::device_uvector assignment_v(length_workers, handle.get_stream_view()); + + raft::update_device(src_v.begin(), src_data, length, handle.get_stream()); + raft::update_device(dst_v.begin(), dst_data, length, handle.get_stream()); + raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); + raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); cugraph::GraphCOOView g( - src_v.data().get(), dst_v.data().get(), cost_v.data().get(), num_vertices, length); + src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); - float r = cugraph::hungarian( - handle, g, length_workers, workers_v.data().get(), assignment_v.data().get()); + float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); + + raft::update_host(assignment.data(), assignment_v.begin(), length_workers, handle.get_stream()); EXPECT_EQ(min_cost, r); - EXPECT_EQ(expected_v, assignment_v); + EXPECT_EQ(assignment, expected); } TEST_F(HungarianTest, Bipartite5x5) @@ -100,29 +108,36 @@ TEST_F(HungarianTest, Bipartite5x5) int32_t workers[] = {0, 1, 2, 3, 4}; - float min_cost = 51.0; - int32_t expected[] = {5, 7, 8, 6, 9}; + float min_cost = 51.0; + std::vector expected({5, 7, 8, 6, 9}); + std::vector assignment({0, 0, 0, 0, 0}); int32_t length = sizeof(src_data) / sizeof(src_data[0]); int32_t length_workers = sizeof(workers) / sizeof(workers[0]); int32_t num_vertices = 1 + std::max(*std::max_element(src_data, src_data + length), *std::max_element(dst_data, dst_data + length)); - rmm::device_vector src_v(src_data, src_data + length); - rmm::device_vector dst_v(dst_data, dst_data + length); - rmm::device_vector cost_v(cost, cost + length); - rmm::device_vector workers_v(workers, workers + length_workers); - rmm::device_vector expected_v(expected, expected + length_workers); - rmm::device_vector assignment_v(length_workers); + rmm::device_uvector src_v(length, handle.get_stream_view()); + rmm::device_uvector dst_v(length, handle.get_stream_view()); + rmm::device_uvector cost_v(length, handle.get_stream_view()); + rmm::device_uvector workers_v(length_workers, handle.get_stream_view()); + rmm::device_uvector assignment_v(length_workers, handle.get_stream_view()); + + raft::update_device(src_v.begin(), src_data, length, handle.get_stream()); + raft::update_device(dst_v.begin(), dst_data, length, handle.get_stream()); + raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); + raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); cugraph::GraphCOOView g( - src_v.data().get(), dst_v.data().get(), cost_v.data().get(), num_vertices, length); + src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); - float r = cugraph::hungarian( - handle, g, length_workers, workers_v.data().get(), assignment_v.data().get()); + float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); + + raft::update_host( + assignment.data(), assignment_v.begin(), assignment_v.size(), handle.get_stream()); EXPECT_EQ(min_cost, r); - EXPECT_EQ(expected_v, assignment_v); + EXPECT_EQ(assignment, expected); } TEST_F(HungarianTest, Bipartite4x4_multiple_answers) @@ -135,40 +150,44 @@ TEST_F(HungarianTest, Bipartite4x4_multiple_answers) int32_t workers[] = {0, 1, 2, 3}; - float min_cost = 13.0; - int32_t expected1[] = {7, 6, 5, 4}; - int32_t expected2[] = {6, 7, 5, 4}; - int32_t expected3[] = {7, 6, 4, 5}; - int32_t expected4[] = {6, 7, 4, 5}; + float min_cost = 13.0; + + std::vector expected1({7, 6, 5, 4}); + std::vector expected2({6, 7, 5, 4}); + std::vector expected3({7, 6, 4, 5}); + std::vector expected4({6, 7, 4, 5}); + std::vector assignment({0, 0, 0, 0}); int32_t length = sizeof(src_data) / sizeof(src_data[0]); int32_t length_workers = sizeof(workers) / sizeof(workers[0]); int32_t num_vertices = 1 + std::max(*std::max_element(src_data, src_data + length), *std::max_element(dst_data, dst_data + length)); - rmm::device_vector src_v(src_data, src_data + length); - rmm::device_vector dst_v(dst_data, dst_data + length); - rmm::device_vector cost_v(cost, cost + length); - rmm::device_vector workers_v(workers, workers + length_workers); - rmm::device_vector assignment_v(length_workers); + rmm::device_uvector src_v(length, handle.get_stream_view()); + rmm::device_uvector dst_v(length, handle.get_stream_view()); + rmm::device_uvector cost_v(length, handle.get_stream_view()); + rmm::device_uvector workers_v(length_workers, handle.get_stream_view()); + rmm::device_uvector assignment_v(length_workers, handle.get_stream_view()); - rmm::device_vector expected1_v(expected1, expected1 + length_workers); - rmm::device_vector expected2_v(expected2, expected2 + length_workers); - rmm::device_vector expected3_v(expected3, expected3 + length_workers); - rmm::device_vector expected4_v(expected4, expected4 + length_workers); + raft::update_device(src_v.begin(), src_data, length, handle.get_stream()); + raft::update_device(dst_v.begin(), dst_data, length, handle.get_stream()); + raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); + raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); cugraph::GraphCOOView g( - src_v.data().get(), dst_v.data().get(), cost_v.data().get(), num_vertices, length); + src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); - float r = cugraph::hungarian( - handle, g, length_workers, workers_v.data().get(), assignment_v.data().get()); + float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); EXPECT_EQ(min_cost, r); - EXPECT_TRUE(thrust::equal(assignment_v.begin(), assignment_v.end(), expected1_v.begin()) || - thrust::equal(assignment_v.begin(), assignment_v.end(), expected2_v.begin()) || - thrust::equal(assignment_v.begin(), assignment_v.end(), expected3_v.begin()) || - thrust::equal(assignment_v.begin(), assignment_v.end(), expected4_v.begin())); + raft::update_host( + assignment.data(), assignment_v.data(), assignment_v.size(), handle.get_stream()); + + EXPECT_TRUE(std::equal(assignment.begin(), assignment.end(), expected1.begin()) || + std::equal(assignment.begin(), assignment.end(), expected2.begin()) || + std::equal(assignment.begin(), assignment.end(), expected3.begin()) || + std::equal(assignment.begin(), assignment.end(), expected4.begin())); } TEST_F(HungarianTest, May29InfLoop) @@ -181,13 +200,82 @@ TEST_F(HungarianTest, May29InfLoop) float min_cost = 2; - rmm::device_vector cost_v(cost, cost + num_rows * num_cols); - rmm::device_vector assignment_v(num_rows); + std::vector expected({3, 2, 1, 0}); + std::vector assignment({0, 0, 0, 0}); + + rmm::device_uvector cost_v(num_rows * num_cols, handle.get_stream_view()); + rmm::device_uvector assignment_v(num_rows, handle.get_stream_view()); + + raft::update_device(cost_v.begin(), cost, num_rows * num_cols, handle.get_stream()); - float r = cugraph::dense::hungarian( - handle, cost_v.data().get(), num_rows, num_cols, assignment_v.data().get()); + float r = + cugraph::dense::hungarian(handle, cost_v.data(), num_rows, num_cols, assignment_v.data()); + + raft::update_host( + assignment.data(), assignment_v.data(), assignment_v.size(), handle.get_stream()); EXPECT_EQ(min_cost, r); + EXPECT_EQ(assignment, expected); +} + +TEST_F(HungarianTest, Dense4x6) +{ + raft::handle_t handle{}; + + int32_t num_rows = 4; + int32_t num_cols = 6; + float cost[] = {0, 16, 1, 0, 90, 100, 33, 45, 0, 4, 90, 100, + 22, 0, 1000, 2000, 90, 100, 2, 0, 3000, 4000, 90, 100}; + + float min_cost = 2; + + std::vector expected({3, 2, 1, 0}); + std::vector assignment({0, 0, 0, 0}); + + rmm::device_uvector cost_v(num_rows * num_cols, handle.get_stream_view()); + rmm::device_uvector assignment_v(num_rows, handle.get_stream_view()); + + raft::update_device(cost_v.begin(), cost, num_rows * num_cols, handle.get_stream()); + + float r = + cugraph::dense::hungarian(handle, cost_v.data(), num_rows, num_cols, assignment_v.data()); + + raft::update_host( + assignment.data(), assignment_v.data(), assignment_v.size(), handle.get_stream()); + + EXPECT_EQ(min_cost, r); + EXPECT_EQ(assignment, expected); +} + +TEST_F(HungarianTest, Dense6x4) +{ + raft::handle_t handle{}; + + int32_t num_rows = 6; + int32_t num_cols = 4; + float cost[] = {0, 16, 1, 0, 33, 45, 0, 4, 90, 100, 110, 120, + 22, 0, 1000, 2000, 90, 100, 110, 120, 2, 0, 3000, 4000}; + + float min_cost = 2; + + std::vector expected1({3, 2, 4, 1, 5, 0}); + std::vector expected2({3, 2, 5, 1, 4, 0}); + std::vector assignment({0, 0, 0, 0, 0, 0}); + + rmm::device_uvector cost_v(num_rows * num_cols, handle.get_stream_view()); + rmm::device_uvector assignment_v(num_rows, handle.get_stream_view()); + + raft::update_device(cost_v.begin(), cost, num_rows * num_cols, handle.get_stream()); + + float r = + cugraph::dense::hungarian(handle, cost_v.data(), num_rows, num_cols, assignment_v.data()); + + raft::update_host( + assignment.data(), assignment_v.data(), assignment_v.size(), handle.get_stream()); + + EXPECT_EQ(min_cost, r); + EXPECT_TRUE(std::equal(assignment.begin(), assignment.end(), expected1.begin()) || + std::equal(assignment.begin(), assignment.end(), expected2.begin())); } TEST_F(HungarianTest, PythonTestFailure) @@ -229,13 +317,22 @@ TEST_F(HungarianTest, PythonTestFailure) float min_cost = 16; - rmm::device_vector cost_v(cost, cost + num_rows * num_cols); - rmm::device_vector assignment_v(num_rows); + std::vector expected({0, 2, 1, 4, 3}); + std::vector assignment({0, 0, 0, 0, 0}); + + rmm::device_uvector cost_v(num_rows * num_cols, handle.get_stream_view()); + rmm::device_uvector assignment_v(num_rows, handle.get_stream_view()); + + raft::update_device(cost_v.begin(), cost, num_rows * num_cols, handle.get_stream()); + + float r = + cugraph::dense::hungarian(handle, cost_v.data(), num_rows, num_cols, assignment_v.data()); - float r = cugraph::dense::hungarian( - handle, cost_v.data().get(), num_rows, num_cols, assignment_v.data().get()); + raft::update_host( + assignment.data(), assignment_v.data(), assignment_v.size(), handle.get_stream()); EXPECT_EQ(min_cost, r); + EXPECT_EQ(assignment, expected); } // FIXME: Need to have tests with nxm (e.g. 4x5 and 5x4) to test those conditions @@ -249,16 +346,16 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re HighResTimer hr_timer; - rmm::device_vector data_v(num_rows * num_cols); - rmm::device_vector state_vals_v(num_threads); - rmm::device_vector assignment_v(num_rows); + rmm::device_uvector data_v(num_rows * num_cols, handle.get_stream_view()); + rmm::device_uvector state_vals_v(num_threads, handle.get_stream_view()); + rmm::device_uvector assignment_v(num_rows, handle.get_stream_view()); std::vector validate(num_cols); hr_timer.start("initialization"); cudaStream_t stream{0}; - int32_t *d_data = data_v.data().get(); + int32_t *d_data = data_v.data(); //int64_t seed{85}; int64_t seed{time(nullptr)}; @@ -280,7 +377,7 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re for (int i = 0 ; i < repetitions ; ++i) { hr_timer.start("hungarian"); - r = cugraph::hungarian_dense(cost_v.data().get(), num_rows, num_cols, assignment_v.data().get()); + r = cugraph::hungarian_dense(cost_v.data(), num_rows, num_cols, assignment_v.data()); hr_timer.stop(); } From 7119b2fc3ff25182fdfae222a43ed8b7c26ae2fe Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 18 Jun 2021 10:57:19 -0400 Subject: [PATCH 303/343] Expose epsilon parameter (precision) through python layer (#1674) Completes the work of exposing the precision parameter to the caller in python as requested by #1645 Fixes a few PR comments from PR #1673 that were deferred as that PR required merging to fix the build. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1674 --- cpp/include/cugraph/algorithms.hpp | 24 ++++++------- cpp/src/linear_assignment/hungarian.cu | 34 +++++++++---------- python/cugraph/linear_assignment/lap.pxd | 19 +++++++++-- python/cugraph/linear_assignment/lap.py | 18 +++++++--- .../cugraph/linear_assignment/lap_wrapper.pyx | 23 +++++++++---- 5 files changed, 76 insertions(+), 42 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 64f02d60fb4..36b4b15e13d 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -595,7 +595,7 @@ void bfs(raft::handle_t const &handle, * @param[in] graph cuGRAPH COO graph * @param[in] num_workers number of vertices in the worker set * @param[in] workers device pointer to an array of worker vertex ids - * @param[out] assignment device pointer to an array to which the assignment will be + * @param[out] assignments device pointer to an array to which the assignment will be * written. The array should be num_workers long, and will identify which vertex id (job) is * assigned to that worker */ @@ -604,7 +604,7 @@ weight_t hungarian(raft::handle_t const &handle, GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, - vertex_t *assignment); + vertex_t *assignments); /** * @brief Compute Hungarian algorithm on a weighted bipartite graph @@ -626,10 +626,10 @@ weight_t hungarian(raft::handle_t const &handle, * @param[in] graph cuGRAPH COO graph * @param[in] num_workers number of vertices in the worker set * @param[in] workers device pointer to an array of worker vertex ids - * @param[out] assignment device pointer to an array to which the assignment will be + * @param[out] assignments device pointer to an array to which the assignment will be * written. The array should be num_workers long, and will identify which vertex id (job) is * assigned to that worker - * @param[in] precision parameter to define precision of comparisons + * @param[in] epsilon parameter to define precision of comparisons * in reducing weights to zero. */ template @@ -637,8 +637,8 @@ weight_t hungarian(raft::handle_t const &handle, GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, - vertex_t *assignment, - weight_t precision); + vertex_t *assignments, + weight_t epsilon); /** * @brief Louvain implementation @@ -1075,7 +1075,7 @@ namespace dense { * @param[in] costs pointer to array of costs, stored in row major order * @param[in] num_rows number of rows in dense matrix * @param[in] num_cols number of cols in dense matrix - * @param[out] assignment device pointer to an array to which the assignment will be + * @param[out] assignments device pointer to an array to which the assignment will be * written. The array should be num_cols long, and will identify * which vertex id (job) is assigned to that worker */ @@ -1084,7 +1084,7 @@ weight_t hungarian(raft::handle_t const &handle, weight_t const *costs, vertex_t num_rows, vertex_t num_columns, - vertex_t *assignment); + vertex_t *assignments); /** * @brief Compute Hungarian algorithm on a weighted bipartite graph @@ -1104,10 +1104,10 @@ weight_t hungarian(raft::handle_t const &handle, * @param[in] costs pointer to array of costs, stored in row major order * @param[in] num_rows number of rows in dense matrix * @param[in] num_cols number of cols in dense matrix - * @param[out] assignment device pointer to an array to which the assignment will be + * @param[out] assignments device pointer to an array to which the assignment will be * written. The array should be num_cols long, and will identify * which vertex id (job) is assigned to that worker - * @param[in] precision parameter to define precision of comparisons + * @param[in] epsilon parameter to define precision of comparisons * in reducing weights to zero. */ template @@ -1115,8 +1115,8 @@ weight_t hungarian(raft::handle_t const &handle, weight_t const *costs, vertex_t num_rows, vertex_t num_columns, - vertex_t *assignment, - weight_t precision); + vertex_t *assignments, + weight_t epsilon); } // namespace dense diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index 77709d1e936..b7c7ee84b7f 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -38,19 +38,19 @@ namespace cugraph { namespace detail { template -weight_t default_precision() +weight_t default_epsilon() { return 0; } template <> -float default_precision() +float default_epsilon() { return float{1e-6}; } template <> -double default_precision() +double default_epsilon() { return double{1e-6}; } @@ -61,13 +61,13 @@ weight_t hungarian(raft::handle_t const &handle, index_t num_cols, weight_t const *d_original_cost, index_t *d_assignment, - weight_t precision) + weight_t epsilon) { if (num_rows == num_cols) { rmm::device_uvector col_assignments_v(num_rows, handle.get_stream_view()); // Create an instance of LinearAssignmentProblem using problem size, number of subproblems - raft::lap::LinearAssignmentProblem lpx(handle, num_rows, 1, precision); + raft::lap::LinearAssignmentProblem lpx(handle, num_rows, 1, epsilon); // Solve LAP(s) for given cost matrix lpx.solve(d_original_cost, d_assignment, col_assignments_v.data()); @@ -85,14 +85,14 @@ weight_t hungarian(raft::handle_t const &handle, weight_t{0}, thrust::maximum()); - rmm::device_uvector tmp_cost(n * n, handle.get_stream_view()); + rmm::device_uvector tmp_cost_v(n * n, handle.get_stream_view()); rmm::device_uvector tmp_row_assignment_v(n, handle.get_stream_view()); rmm::device_uvector tmp_col_assignment_v(n, handle.get_stream_view()); thrust::transform(rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(n * n), - tmp_cost.begin(), + tmp_cost_v.begin(), [max_cost, d_original_cost, n, num_rows, num_cols] __device__(index_t i) { index_t row = i / n; index_t col = i % n; @@ -102,10 +102,10 @@ weight_t hungarian(raft::handle_t const &handle, : max_cost; }); - raft::lap::LinearAssignmentProblem lpx(handle, n, 1, precision); + raft::lap::LinearAssignmentProblem lpx(handle, n, 1, epsilon); // Solve LAP(s) for given cost matrix - lpx.solve(tmp_cost.begin(), tmp_row_assignment_v.begin(), tmp_col_assignment_v.begin()); + lpx.solve(tmp_cost_v.begin(), tmp_row_assignment_v.begin(), tmp_col_assignment_v.begin()); weight_t tmp_objective_value = lpx.getPrimalObjectiveValue(0); @@ -121,7 +121,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment, - weight_t precision) + weight_t epsilon) { CUGRAPH_EXPECTS(assignment != nullptr, "Invalid input argument: assignment pointer is NULL"); CUGRAPH_EXPECTS(graph.edge_data != nullptr, @@ -235,7 +235,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t *d_temp_assignment = temp_assignment_v.data(); weight_t min_cost = detail::hungarian( - handle, matrix_dimension, matrix_dimension, d_cost, d_temp_assignment, precision); + handle, matrix_dimension, matrix_dimension, d_cost, d_temp_assignment, epsilon); #ifdef TIMING hr_timer.stop(); @@ -272,7 +272,7 @@ weight_t hungarian(raft::handle_t const &handle, vertex_t *assignment) { return detail::hungarian_sparse( - handle, graph, num_workers, workers, assignment, detail::default_precision()); + handle, graph, num_workers, workers, assignment, detail::default_epsilon()); } template @@ -281,9 +281,9 @@ weight_t hungarian(raft::handle_t const &handle, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment, - weight_t precision) + weight_t epsilon) { - return detail::hungarian_sparse(handle, graph, num_workers, workers, assignment, precision); + return detail::hungarian_sparse(handle, graph, num_workers, workers, assignment, epsilon); } template int32_t hungarian( @@ -335,7 +335,7 @@ weight_t hungarian(raft::handle_t const &handle, index_t *assignment) { return detail::hungarian( - handle, num_rows, num_cols, costs, assignment, detail::default_precision()); + handle, num_rows, num_cols, costs, assignment, detail::default_epsilon()); } template @@ -344,9 +344,9 @@ weight_t hungarian(raft::handle_t const &handle, index_t num_rows, index_t num_cols, index_t *assignment, - weight_t precision) + weight_t epsilon) { - return detail::hungarian(handle, num_rows, num_cols, costs, assignment, precision); + return detail::hungarian(handle, num_rows, num_cols, costs, assignment, epsilon); } template int32_t hungarian( diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index 84f5050744d..9f65e215891 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -25,8 +25,15 @@ cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": const GraphCOOView[vertex_t,edge_t,weight_t] &graph, vertex_t num_workers, const vertex_t *workers, - vertex_t *assignment) except + + vertex_t *assignments, + weight_t epsilon) except + + cdef weight_t hungarian[vertex_t,edge_t,weight_t]( + const handle_t &handle, + const GraphCOOView[vertex_t,edge_t,weight_t] &graph, + vertex_t num_workers, + const vertex_t *workers, + vertex_t *assignments) except + cdef extern from "cugraph/algorithms.hpp": @@ -35,4 +42,12 @@ cdef extern from "cugraph/algorithms.hpp": const weight_t *costs, vertex_t num_rows, vertex_t num_columns, - vertex_t *assignment) except + + vertex_t *assignments, + weight_t epsilon) except + + + cdef weight_t dense_hungarian "cugraph::dense::hungarian" [vertex_t,weight_t]( + const handle_t &handle, + const weight_t *costs, + vertex_t num_rows, + vertex_t num_columns, + vertex_t *assignments) except + diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index d6f02efe77e..ed40e96fb47 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -15,7 +15,7 @@ from cugraph.linear_assignment import lap_wrapper -def hungarian(G, workers): +def hungarian(G, workers, epsilon=None): """ Execute the Hungarian algorithm against a symmetric, weighted, bipartite graph. @@ -46,6 +46,11 @@ def hungarian(G, workers): cudf.DataFrame. All vertices in G that are not in the workers set are implicitly assigned to the jobs set. + epsilon : float or double (matching weight type in graph) + Used for determining when value is close enough to zero to consider 0. + Defaults (if not specified) to 1e-6 in the C++ code. Unused for + integer weight types. + Returns ------- cost : matches costs.dtype @@ -77,7 +82,7 @@ def hungarian(G, workers): else: local_workers = workers - cost, df = lap_wrapper.sparse_hungarian(G, local_workers) + cost, df = lap_wrapper.sparse_hungarian(G, local_workers, epsilon) if G.renumbered: df = G.unrenumber(df, 'vertex') @@ -85,7 +90,7 @@ def hungarian(G, workers): return cost, df -def dense_hungarian(costs, num_rows, num_columns): +def dense_hungarian(costs, num_rows, num_columns, epsilon=None): """ Execute the Hungarian algorithm against a dense bipartite graph representation. @@ -107,7 +112,10 @@ def dense_hungarian(costs, num_rows, num_columns): Number of rows in the matrix num_columns : int Number of columns in the matrix - + epsilon : float or double (matching weight type in graph) + Used for determining when value is close enough to zero to consider 0. + Defaults (if not specified) to 1e-6 in the C++ code. Unused for + integer weight types. Returns ------- @@ -121,4 +129,4 @@ def dense_hungarian(costs, num_rows, num_columns): """ - return lap_wrapper.dense_hungarian(costs, num_rows, num_columns) + return lap_wrapper.dense_hungarian(costs, num_rows, num_columns, epsilon) diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index 7cd2124b8d9..c173f45fa3f 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -25,7 +25,7 @@ import cudf import numpy as np -def sparse_hungarian(input_graph, workers): +def sparse_hungarian(input_graph, workers, epsilon): """ Call the hungarian algorithm """ @@ -62,6 +62,9 @@ def sparse_hungarian(input_graph, workers): df['vertex'] = workers df['assignment'] = cudf.Series(np.zeros(len(workers), dtype=np.int32)) + if epsilon == None: + epsilon = 1e-6 + cdef uintptr_t c_src = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = weights.__cuda_array_interface__['data'][0] @@ -69,6 +72,8 @@ def sparse_hungarian(input_graph, workers): cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_assignment = df['assignment'].__cuda_array_interface__['data'][0]; + cdef float c_epsilon_float = epsilon + cdef double c_epsilon_double = epsilon cdef GraphCOOView[int,int,float] g_float cdef GraphCOOView[int,int,double] g_double @@ -76,16 +81,16 @@ def sparse_hungarian(input_graph, workers): if weights.dtype == np.float32: g_float = GraphCOOView[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges) - cost = c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment, c_epsilon_float) else: g_double = GraphCOOView[int,int,double](c_src, c_dst, c_weights, num_verts, num_edges) - cost = c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment, c_epsilon_double) return cost, df -def dense_hungarian(costs, num_rows, num_columns): +def dense_hungarian(costs, num_rows, num_columns, epsilon): """ Call the dense hungarian algorithm """ @@ -98,13 +103,19 @@ def dense_hungarian(costs, num_rows, num_columns): assignment = cudf.Series(np.zeros(num_rows, dtype=np.int32)) + if epsilon == None: + epsilon = 1e-6 + cdef uintptr_t c_costs = costs.__cuda_array_interface__['data'][0] cdef uintptr_t c_assignment = assignment.__cuda_array_interface__['data'][0] - + cdef float c_epsilon_float = epsilon + cdef double c_epsilon_double = epsilon if costs.dtype == np.float32: - cost = c_dense_hungarian[int,float](handle_[0], c_costs, num_rows, num_columns, c_assignment) + cost = c_dense_hungarian[int,float](handle_[0], c_costs, num_rows, num_columns, c_assignment, c_epsilon_float) elif costs.dtype == np.float64: + cost = c_dense_hungarian[int,double](handle_[0], c_costs, num_rows, num_columns, c_assignment, c_epsilon_double) + elif costs.dtype == np.int32: cost = c_dense_hungarian[int,double](handle_[0], c_costs, num_rows, num_columns, c_assignment) else: raise("unsported type: ", costs.dtype) From d01da40f0a436289bdb922f5804a7e1e7d6ed3dd Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Mon, 21 Jun 2021 14:53:56 -0400 Subject: [PATCH 304/343] Migrate old graph to legacy directory/namespace (#1675) Code cleanup. We want to migrate the new graph object into the cugraph namespace. Moving the legacy graph objects into a legacy namespace to clear the way. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Kumar Aatish (https://github.com/kaatish) - Andrei Schaffer (https://github.com/aschaffer) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1675 --- cpp/include/cugraph/algorithms.hpp | 70 ++++++------ cpp/include/cugraph/functions.hpp | 6 +- .../cugraph/{ => legacy}/eidecl_graph.hpp | 2 + .../cugraph/{ => legacy}/eidir_graph.hpp | 2 + cpp/include/cugraph/{ => legacy}/graph.hpp | 3 + cpp/include/cugraph/utilities/cython.hpp | 14 +-- cpp/src/centrality/betweenness_centrality.cu | 33 +++--- cpp/src/centrality/betweenness_centrality.cuh | 10 +- cpp/src/centrality/katz_centrality.cu | 6 +- cpp/src/community/ecg.cu | 28 ++--- cpp/src/community/egonet.cu | 2 +- .../community/extract_subgraph_by_vertex.cu | 30 +++-- cpp/src/community/ktruss.cu | 30 ++--- cpp/src/community/leiden.cu | 13 ++- cpp/src/community/leiden.cuh | 10 +- cpp/src/community/louvain.cu | 17 +-- cpp/src/community/louvain.cuh | 12 +- cpp/src/community/spectral_clustering.cu | 83 ++++++-------- cpp/src/community/triangles_counting.cu | 6 +- cpp/src/components/connectivity.cu | 10 +- cpp/src/converters/COOtoCSR.cu | 50 +++++---- cpp/src/converters/COOtoCSR.cuh | 68 ++++++----- cpp/src/converters/permute_graph.cuh | 8 +- cpp/src/cores/core_number.cu | 53 ++++----- cpp/src/layout/barnes_hut.hpp | 6 +- cpp/src/layout/exact_fa2.hpp | 6 +- cpp/src/layout/force_atlas2.cu | 6 +- cpp/src/linear_assignment/hungarian.cu | 60 +++++----- cpp/src/link_analysis/gunrock_hits.cpp | 6 +- cpp/src/link_prediction/jaccard.cu | 78 ++++++------- cpp/src/link_prediction/overlap.cu | 78 ++++++------- cpp/src/structure/graph.cu | 4 +- cpp/src/traversal/bfs.cu | 106 +++++++++--------- cpp/src/traversal/bfs_kernels.cuh | 8 +- cpp/src/traversal/mg/bfs.cuh | 8 +- cpp/src/traversal/mg/common_utils.cuh | 44 ++++---- cpp/src/traversal/mg/frontier_expand.cuh | 6 +- .../traversal/mg/frontier_expand_kernels.cuh | 8 +- cpp/src/traversal/sssp.cu | 10 +- cpp/src/traversal/two_hop_neighbors.cu | 16 +-- cpp/src/tree/mst.cu | 26 ++--- cpp/src/utilities/cython.cu | 65 ++++++----- .../centrality/betweenness_centrality_test.cu | 12 +- .../edge_betweenness_centrality_test.cu | 8 +- cpp/tests/centrality/katz_centrality_test.cu | 13 ++- cpp/tests/community/balanced_edge_test.cpp | 2 +- cpp/tests/community/ecg_test.cpp | 6 +- cpp/tests/community/leiden_test.cpp | 4 +- cpp/tests/community/louvain_test.cpp | 4 +- cpp/tests/community/triangle_test.cu | 6 +- cpp/tests/components/con_comp_test.cu | 9 +- cpp/tests/components/scc_test.cu | 39 ++++--- cpp/tests/experimental/bfs_test.cpp | 6 +- cpp/tests/experimental/ms_bfs_test.cpp | 1 - cpp/tests/experimental/sssp_test.cpp | 6 +- cpp/tests/layout/force_atlas2_test.cu | 4 +- cpp/tests/linear_assignment/hungarian_test.cu | 8 +- cpp/tests/sampling/random_walks_profiling.cu | 1 - cpp/tests/sampling/random_walks_test.cu | 1 - cpp/tests/sampling/random_walks_utils.cuh | 1 - cpp/tests/sampling/rw_low_level_test.cu | 1 - cpp/tests/traversal/bfs_test.cu | 6 +- cpp/tests/traversal/sssp_test.cu | 8 +- cpp/tests/traversal/tsp_test.cu | 2 +- cpp/tests/tree/mst_test.cu | 15 +-- .../utilities/matrix_market_file_utilities.cu | 20 ++-- cpp/tests/utilities/test_utilities.hpp | 4 +- python/cugraph/structure/graph_primtypes.pxd | 14 +-- 68 files changed, 678 insertions(+), 630 deletions(-) rename cpp/include/cugraph/{ => legacy}/eidecl_graph.hpp (99%) rename cpp/include/cugraph/{ => legacy}/eidir_graph.hpp (98%) rename cpp/include/cugraph/{ => legacy}/graph.hpp (99%) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 36b4b15e13d..1d9d964fc1c 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -19,8 +19,8 @@ #include #include -#include #include +#include #include @@ -45,7 +45,7 @@ namespace cugraph { * caller */ template -void jaccard(GraphCSRView const &graph, WT const *weights, WT *result); +void jaccard(legacy::GraphCSRView const &graph, WT const *weights, WT *result); /** * @brief Compute jaccard similarity coefficient for selected vertex pairs @@ -69,7 +69,7 @@ void jaccard(GraphCSRView const &graph, WT const *weights, WT *resul * caller */ template -void jaccard_list(GraphCSRView const &graph, +void jaccard_list(legacy::GraphCSRView const &graph, WT const *weights, ET num_pairs, VT const *first, @@ -95,7 +95,7 @@ void jaccard_list(GraphCSRView const &graph, * caller */ template -void overlap(GraphCSRView const &graph, WT const *weights, WT *result); +void overlap(legacy::GraphCSRView const &graph, WT const *weights, WT *result); /** * @brief Compute overlap coefficient for select pairs of vertices @@ -119,7 +119,7 @@ void overlap(GraphCSRView const &graph, WT const *weights, WT *resul * caller */ template -void overlap_list(GraphCSRView const &graph, +void overlap_list(legacy::GraphCSRView const &graph, WT const *weights, ET num_pairs, VT const *first, @@ -181,7 +181,7 @@ void overlap_list(GraphCSRView const &graph, */ template void force_atlas2(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -276,7 +276,7 @@ float traveling_salesperson(raft::handle_t const &handle, */ template void betweenness_centrality(const raft::handle_t &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalized = true, bool endpoints = false, @@ -320,7 +320,7 @@ void betweenness_centrality(const raft::handle_t &handle, */ template void edge_betweenness_centrality(const raft::handle_t &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalized = true, weight_t const *weight = nullptr, @@ -363,7 +363,7 @@ enum class cugraph_cc_t { * associated with vertex id i. */ template -void connected_components(GraphCSRView const &graph, +void connected_components(legacy::GraphCSRView const &graph, cugraph_cc_t connectivity_type, VT *labels); @@ -392,8 +392,8 @@ void connected_components(GraphCSRView const &graph, * */ template -std::unique_ptr> k_truss_subgraph( - GraphCOOView const &graph, +std::unique_ptr> k_truss_subgraph( + legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); @@ -430,7 +430,7 @@ std::unique_ptr> k_truss_subgraph( * @param[in] normalized If True normalize the resulting katz centrality values */ template -void katz_centrality(GraphCSRView const &graph, +void katz_centrality(legacy::GraphCSRView const &graph, result_t *result, double alpha, int max_iter, @@ -448,7 +448,7 @@ void katz_centrality(GraphCSRView const &graph, */ /* ----------------------------------------------------------------------------*/ template -void core_number(GraphCSRView const &graph, VT *core_number); +void core_number(legacy::GraphCSRView const &graph, VT *core_number); /** * @brief Compute K Core of the graph G @@ -472,8 +472,8 @@ void core_number(GraphCSRView const &graph, VT *core_number); * @param[out] out_graph Unique pointer to K Core subgraph in COO format */ template -std::unique_ptr> k_core( - GraphCOOView const &graph, +std::unique_ptr> k_core( + legacy::GraphCOOView const &graph, int k, VT const *vertex_id, VT const *core_number, @@ -498,7 +498,8 @@ std::unique_ptr> k_core( * @return Graph in COO format */ template -std::unique_ptr> get_two_hop_neighbors(GraphCSRView const &graph); +std::unique_ptr> get_two_hop_neighbors( + legacy::GraphCSRView const &graph); /** * @Synopsis Performs a single source shortest path traversal of a graph starting from a vertex. @@ -525,13 +526,13 @@ std::unique_ptr> get_two_hop_neighbors(GraphCSRView -void sssp(GraphCSRView const &graph, +void sssp(legacy::GraphCSRView const &graph, WT *distances, VT *predecessors, const VT source_vertex); // FIXME: Internally distances is of int (signed 32-bit) data type, but current -// template uses data from VT, ET, WT from he GraphCSR View even if weights +// template uses data from VT, ET, WT from the legacy::GraphCSR View even if weights // are not considered /** * @Synopsis Performs a breadth first search traversal of a graph starting from a vertex. @@ -567,7 +568,7 @@ void sssp(GraphCSRView const &graph, */ template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, VT *distances, VT *predecessors, double *sp_counters, @@ -601,7 +602,7 @@ void bfs(raft::handle_t const &handle, */ template weight_t hungarian(raft::handle_t const &handle, - GraphCOOView const &graph, + legacy::GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, vertex_t *assignments); @@ -634,7 +635,7 @@ weight_t hungarian(raft::handle_t const &handle, */ template weight_t hungarian(raft::handle_t const &handle, - GraphCOOView const &graph, + legacy::GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, vertex_t *assignments, @@ -775,7 +776,7 @@ void flatten_dendrogram(raft::handle_t const &handle, */ template std::pair leiden(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, vertex_t *clustering, size_t max_iter = 100, weight_t resolution = weight_t{1}); @@ -807,7 +808,7 @@ std::pair leiden(raft::handle_t const &handle, */ template void ecg(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, weight_t min_weight, vertex_t ensemble_size, vertex_t *clustering); @@ -833,9 +834,9 @@ void ecg(raft::handle_t const &handle, * @return out_graph Unique pointer to MSF subgraph in COO format */ template -std::unique_ptr> minimum_spanning_tree( +std::unique_ptr> minimum_spanning_tree( raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); namespace triangle { @@ -855,7 +856,7 @@ namespace triangle { * @return The number of triangles */ template -uint64_t triangle_count(GraphCSRView const &graph); +uint64_t triangle_count(legacy::GraphCSRView const &graph); } // namespace triangle namespace subgraph { @@ -880,9 +881,8 @@ namespace subgraph { * @param[out] result a graph in COO format containing the edges in the subgraph */ template -std::unique_ptr> extract_subgraph_vertex(GraphCOOView const &graph, - VT const *vertices, - VT num_vertices); +std::unique_ptr> extract_subgraph_vertex( + legacy::GraphCOOView const &graph, VT const *vertices, VT num_vertices); } // namespace subgraph /** @@ -909,7 +909,7 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView -void balancedCutClustering(GraphCSRView const &graph, +void balancedCutClustering(legacy::GraphCSRView const &graph, VT num_clusters, VT num_eigen_vects, WT evs_tolerance, @@ -940,7 +940,7 @@ void balancedCutClustering(GraphCSRView const &graph, * be stored */ template -void spectralModularityMaximization(GraphCSRView const &graph, +void spectralModularityMaximization(legacy::GraphCSRView const &graph, VT n_clusters, VT n_eig_vects, WT evs_tolerance, @@ -966,7 +966,7 @@ void spectralModularityMaximization(GraphCSRView const &graph, * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_modularity(GraphCSRView const &graph, +void analyzeClustering_modularity(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score); @@ -988,7 +988,7 @@ void analyzeClustering_modularity(GraphCSRView const &graph, * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_edge_cut(GraphCSRView const &graph, +void analyzeClustering_edge_cut(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score); @@ -1010,7 +1010,7 @@ void analyzeClustering_edge_cut(GraphCSRView const &graph, * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_ratio_cut(GraphCSRView const &graph, +void analyzeClustering_ratio_cut(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score); @@ -1046,7 +1046,7 @@ namespace gunrock { * */ template -void hits(GraphCSRView const &graph, +void hits(legacy::GraphCSRView const &graph, int max_iter, WT tolerance, WT const *starting_value, diff --git a/cpp/include/cugraph/functions.hpp b/cpp/include/cugraph/functions.hpp index 00e8648b156..a88d6cd88c7 100644 --- a/cpp/include/cugraph/functions.hpp +++ b/cpp/include/cugraph/functions.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include namespace cugraph { @@ -40,8 +40,8 @@ namespace cugraph { * */ template -std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, +std::unique_ptr> coo_to_csr( + legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cugraph/eidecl_graph.hpp b/cpp/include/cugraph/legacy/eidecl_graph.hpp similarity index 99% rename from cpp/include/cugraph/eidecl_graph.hpp rename to cpp/include/cugraph/legacy/eidecl_graph.hpp index 3e3d9ac5b31..d636b7fba5b 100644 --- a/cpp/include/cugraph/eidecl_graph.hpp +++ b/cpp/include/cugraph/legacy/eidecl_graph.hpp @@ -16,6 +16,7 @@ #pragma once namespace cugraph { +namespace legacy { extern template class GraphViewBase; extern template class GraphViewBase; extern template class GraphViewBase; @@ -88,4 +89,5 @@ extern template class GraphCSC; extern template class GraphCSC; extern template class GraphCSC; extern template class GraphCSC; +} // namespace legacy } // namespace cugraph diff --git a/cpp/include/cugraph/eidir_graph.hpp b/cpp/include/cugraph/legacy/eidir_graph.hpp similarity index 98% rename from cpp/include/cugraph/eidir_graph.hpp rename to cpp/include/cugraph/legacy/eidir_graph.hpp index 5bd6c233641..df9f6eb8f71 100644 --- a/cpp/include/cugraph/eidir_graph.hpp +++ b/cpp/include/cugraph/legacy/eidir_graph.hpp @@ -16,6 +16,7 @@ #pragma once namespace cugraph { +namespace legacy { template class GraphViewBase; template class GraphViewBase; template class GraphViewBase; @@ -70,4 +71,5 @@ template class GraphCSC; template class GraphCSC; template class GraphCSC; template class GraphCSC; +} // namespace legacy } // namespace cugraph diff --git a/cpp/include/cugraph/graph.hpp b/cpp/include/cugraph/legacy/graph.hpp similarity index 99% rename from cpp/include/cugraph/graph.hpp rename to cpp/include/cugraph/legacy/graph.hpp index 8ea58546ce1..4b009bc7a25 100644 --- a/cpp/include/cugraph/graph.hpp +++ b/cpp/include/cugraph/legacy/graph.hpp @@ -23,6 +23,7 @@ #include namespace cugraph { +namespace legacy { enum class PropType { PROP_UNDEF, PROP_FALSE, PROP_TRUE }; @@ -672,6 +673,8 @@ struct invalid_vertex_id : invalid_idx { template struct invalid_edge_id : invalid_idx { }; + +} // namespace legacy } // namespace cugraph #include "eidecl_graph.hpp" diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index 273e55bae25..7435daffb3e 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include @@ -67,12 +67,12 @@ struct graph_container_t { ~graphPtrUnion() {} void* null; - std::unique_ptr> GraphCSRViewFloatPtr; - std::unique_ptr> GraphCSRViewDoublePtr; - std::unique_ptr> GraphCSCViewFloatPtr; - std::unique_ptr> GraphCSCViewDoublePtr; - std::unique_ptr> GraphCOOViewFloatPtr; - std::unique_ptr> GraphCOOViewDoublePtr; + std::unique_ptr> GraphCSRViewFloatPtr; + std::unique_ptr> GraphCSRViewDoublePtr; + std::unique_ptr> GraphCSCViewFloatPtr; + std::unique_ptr> GraphCSCViewDoublePtr; + std::unique_ptr> GraphCOOViewFloatPtr; + std::unique_ptr> GraphCOOViewDoublePtr; }; graph_container_t() : graph_ptr_union{nullptr}, graph_type{graphTypeEnum::null} {} diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index 32dddd203db..2af0710d1ec 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -36,7 +36,7 @@ namespace detail { namespace { template void betweenness_centrality_impl(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalize, bool endpoints, @@ -60,7 +60,7 @@ void betweenness_centrality_impl(raft::handle_t const &handle, template void edge_betweenness_centrality_impl(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalize, weight_t const *weight, @@ -449,7 +449,7 @@ void BC::rescale_by_total_sources_used( template void betweenness_centrality(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalize, bool endpoints, @@ -488,17 +488,18 @@ void betweenness_centrality(raft::handle_t const &handle, } } -template void betweenness_centrality(const raft::handle_t &, - GraphCSRView const &, - float *, - bool, - bool, - float const *, - int, - int const *); +template void betweenness_centrality( + const raft::handle_t &, + legacy::GraphCSRView const &, + float *, + bool, + bool, + float const *, + int, + int const *); template void betweenness_centrality( const raft::handle_t &, - GraphCSRView const &, + legacy::GraphCSRView const &, double *, bool, bool, @@ -508,7 +509,7 @@ template void betweenness_centrality( template void edge_betweenness_centrality(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalize, weight_t const *weight, @@ -540,7 +541,7 @@ void edge_betweenness_centrality(raft::handle_t const &handle, template void edge_betweenness_centrality( const raft::handle_t &, - GraphCSRView const &, + legacy::GraphCSRView const &, float *, bool, float const *, @@ -549,7 +550,7 @@ template void edge_betweenness_centrality( template void edge_betweenness_centrality( raft::handle_t const &handle, - GraphCSRView const &, + legacy::GraphCSRView const &, double *, bool, double const *, diff --git a/cpp/src/centrality/betweenness_centrality.cuh b/cpp/src/centrality/betweenness_centrality.cuh index 418ac06faa4..9e3abca3e78 100644 --- a/cpp/src/centrality/betweenness_centrality.cuh +++ b/cpp/src/centrality/betweenness_centrality.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace cugraph { namespace detail { template void betweenness_centrality(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, result_t *result, bool normalize, bool endpoints, @@ -32,7 +32,7 @@ void betweenness_centrality(raft::handle_t const &handle, vertex_t const *sources); template -void edge_betweenness_centrality(GraphCSRView const &graph, +void edge_betweenness_centrality(legacy::GraphCSRView const &graph, result_t *result, bool normalize, weight_t const *weight, @@ -53,7 +53,7 @@ class BC { public: virtual ~BC(void) {} BC(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, cudaStream_t stream = 0) : handle_(handle), graph_(graph) { @@ -79,7 +79,7 @@ class BC { // --- RAFT handle --- raft::handle_t const &handle_; // --- Information concerning the graph --- - const GraphCSRView &graph_; + const legacy::GraphCSRView &graph_; // --- These information are extracted on setup --- vertex_t number_of_vertices_; // Number of vertices in the graph vertex_t number_of_edges_; // Number of edges in the graph diff --git a/cpp/src/centrality/katz_centrality.cu b/cpp/src/centrality/katz_centrality.cu index 0622193670e..608e617e3af 100644 --- a/cpp/src/centrality/katz_centrality.cu +++ b/cpp/src/centrality/katz_centrality.cu @@ -23,13 +23,13 @@ #include #include -#include +#include #include namespace cugraph { template -void katz_centrality(GraphCSRView const &graph, +void katz_centrality(legacy::GraphCSRView const &graph, result_t *result, double alpha, int max_iter, @@ -52,6 +52,6 @@ void katz_centrality(GraphCSRView const &graph, } template void katz_centrality( - GraphCSRView const &, double *, double, int, double, bool, bool); + legacy::GraphCSRView const &, double *, double, int, double, bool, bool); } // namespace cugraph diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index b990055c16c..196998b38c1 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -134,12 +134,12 @@ namespace cugraph { template void ecg(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, weight_t min_weight, vertex_t ensemble_size, vertex_t *clustering) { - using graph_type = GraphCSRView; + using graph_type = legacy::GraphCSRView; CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: ecg expects a weighted graph"); @@ -190,7 +190,7 @@ void ecg(raft::handle_t const &handle, // Run Louvain on the original graph using the computed weights // (pass max_level = 100 for a "full run") - GraphCSRView louvain_graph; + legacy::GraphCSRView louvain_graph; louvain_graph.indices = graph.indices; louvain_graph.offsets = graph.offsets; louvain_graph.edge_data = ecg_weights_v.data(); @@ -201,14 +201,16 @@ void ecg(raft::handle_t const &handle, } // Explicit template instantiations. -template void ecg(raft::handle_t const &, - GraphCSRView const &graph, - float min_weight, - int32_t ensemble_size, - int32_t *clustering); -template void ecg(raft::handle_t const &, - GraphCSRView const &graph, - double min_weight, - int32_t ensemble_size, - int32_t *clustering); +template void ecg( + raft::handle_t const &, + legacy::GraphCSRView const &graph, + float min_weight, + int32_t ensemble_size, + int32_t *clustering); +template void ecg( + raft::handle_t const &, + legacy::GraphCSRView const &graph, + double min_weight, + int32_t ensemble_size, + int32_t *clustering); } // namespace cugraph diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 6b93f561a45..d4f10f991b7 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index 4bfe57c2c50..305bb566af6 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include @@ -24,8 +24,8 @@ namespace { template -std::unique_ptr> extract_subgraph_by_vertices( - cugraph::GraphCOOView const &graph, +std::unique_ptr> extract_subgraph_by_vertices( + cugraph::legacy::GraphCOOView const &graph, vertex_t const *vertices, vertex_t num_vertices, cudaStream_t stream) @@ -70,7 +70,7 @@ std::unique_ptr> extract_subgraph_ }); if (count > 0) { - auto result = std::make_unique>( + auto result = std::make_unique>( num_vertices, count, has_weight); vertex_t *d_new_src = result->src_indices(); @@ -106,7 +106,8 @@ std::unique_ptr> extract_subgraph_ return result; } else { - return std::make_unique>(0, 0, has_weight); + return std::make_unique>( + 0, 0, has_weight); } } } // namespace @@ -115,9 +116,8 @@ namespace cugraph { namespace subgraph { template -std::unique_ptr> extract_subgraph_vertex(GraphCOOView const &graph, - VT const *vertices, - VT num_vertices) +std::unique_ptr> extract_subgraph_vertex( + legacy::GraphCOOView const &graph, VT const *vertices, VT num_vertices) { CUGRAPH_EXPECTS(vertices != nullptr, "Invalid input argument: vertices must be non null"); @@ -130,14 +130,12 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView> -extract_subgraph_vertex(GraphCOOView const &, - int32_t const *, - int32_t); -template std::unique_ptr> -extract_subgraph_vertex(GraphCOOView const &, - int32_t const *, - int32_t); +template std::unique_ptr> +extract_subgraph_vertex( + legacy::GraphCOOView const &, int32_t const *, int32_t); +template std::unique_ptr> +extract_subgraph_vertex( + legacy::GraphCOOView const &, int32_t const *, int32_t); } // namespace subgraph } // namespace cugraph diff --git a/cpp/src/community/ktruss.cu b/cpp/src/community/ktruss.cu index 224f84f6718..9297e416287 100644 --- a/cpp/src/community/ktruss.cu +++ b/cpp/src/community/ktruss.cu @@ -35,9 +35,8 @@ namespace cugraph { namespace detail { template -std::unique_ptr> ktruss_subgraph_impl(GraphCOOView const &graph, - int k, - rmm::mr::device_memory_resource *mr) +std::unique_ptr> ktruss_subgraph_impl( + legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) { using HornetGraph = hornet::gpu::Hornet; using UpdatePtr = hornet::BatchUpdatePtr; @@ -68,7 +67,7 @@ std::unique_ptr> ktruss_subgraph_impl(GraphCOOView>( + auto out_graph = std::make_unique>( graph.number_of_vertices, kt.getGraphEdgeCount(), graph.has_data(), stream, mr); kt.copyGraph(out_graph->src_indices(), out_graph->dst_indices()); @@ -79,8 +78,8 @@ std::unique_ptr> ktruss_subgraph_impl(GraphCOOView -std::unique_ptr> weighted_ktruss_subgraph_impl( - GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) +std::unique_ptr> weighted_ktruss_subgraph_impl( + legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) { using HornetGraph = hornet::gpu::Hornet>; using UpdatePtr = hornet::BatchUpdatePtr, hornet::DeviceType::DEVICE>; @@ -111,7 +110,7 @@ std::unique_ptr> weighted_ktruss_subgraph_impl( kt.runForK(k); CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to run"); - auto out_graph = std::make_unique>( + auto out_graph = std::make_unique>( graph.number_of_vertices, kt.getGraphEdgeCount(), graph.has_data(), stream, mr); kt.copyGraph(out_graph->src_indices(), out_graph->dst_indices(), out_graph->edge_data()); @@ -125,9 +124,8 @@ std::unique_ptr> weighted_ktruss_subgraph_impl( } // namespace detail template -std::unique_ptr> k_truss_subgraph(GraphCOOView const &graph, - int k, - rmm::mr::device_memory_resource *mr) +std::unique_ptr> k_truss_subgraph( + legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) { CUGRAPH_EXPECTS(graph.src_indices != nullptr, "Graph source indices cannot be a nullptr"); CUGRAPH_EXPECTS(graph.dst_indices != nullptr, "Graph destination indices cannot be a nullptr"); @@ -139,10 +137,14 @@ std::unique_ptr> k_truss_subgraph(GraphCOOView } } -template std::unique_ptr> k_truss_subgraph( - GraphCOOView const &, int, rmm::mr::device_memory_resource *); +template std::unique_ptr> +k_truss_subgraph(legacy::GraphCOOView const &, + int, + rmm::mr::device_memory_resource *); -template std::unique_ptr> k_truss_subgraph( - GraphCOOView const &, int, rmm::mr::device_memory_resource *); +template std::unique_ptr> +k_truss_subgraph(legacy::GraphCOOView const &, + int, + rmm::mr::device_memory_resource *); } // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 427e62d3286..703738fc190 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -23,7 +23,7 @@ namespace cugraph { template std::pair leiden(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, vertex_t *clustering, size_t max_level, weight_t resolution) @@ -34,7 +34,7 @@ std::pair leiden(raft::handle_t const &handle, "Invalid input argument: clustering is null, should be a device pointer to " "memory for storing the result"); - Leiden> runner(handle, graph); + Leiden> runner(handle, graph); weight_t wt = runner(max_level, resolution); rmm::device_uvector vertex_ids_v(graph.number_of_vertices, handle.get_stream()); @@ -56,11 +56,14 @@ std::pair leiden(raft::handle_t const &handle, } // Explicit template instantations -template std::pair leiden( - raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); +template std::pair leiden(raft::handle_t const &, + legacy::GraphCSRView const &, + int32_t *, + size_t, + float); template std::pair leiden(raft::handle_t const &, - GraphCSRView const &, + legacy::GraphCSRView const &, int32_t *, size_t, double); diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index e7e358777a4..6f0a0b6e8b0 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -122,11 +122,11 @@ class Leiden : public Louvain { // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(this->offsets_v_.data(), - this->indices_v_.data(), - this->weights_v_.data(), - this->number_of_vertices_, - this->number_of_edges_); + legacy::GraphCSRView current_graph(this->offsets_v_.data(), + this->indices_v_.data(), + this->weights_v_.data(), + this->number_of_vertices_, + this->number_of_edges_); current_graph.get_source_indices(this->src_indices_v_.data()); diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 842a7f39750..3fee1f58577 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -31,14 +31,14 @@ namespace detail { template std::pair>, weight_t> louvain( raft::handle_t const &handle, - GraphCSRView const &graph_view, + legacy::GraphCSRView const &graph_view, size_t max_level, weight_t resolution) { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - Louvain> runner(handle, graph_view); + Louvain> runner(handle, graph_view); weight_t wt = runner(max_level, resolution); return std::make_pair(runner.move_dendrogram(), wt); @@ -61,7 +61,7 @@ std::pair>, weight_t> louvain( template void flatten_dendrogram(raft::handle_t const &handle, - GraphCSRView const &graph_view, + legacy::GraphCSRView const &graph_view, Dendrogram const &dendrogram, vertex_t *clustering) { @@ -202,10 +202,13 @@ template std::pair>, double> louvain( size_t, double); -template std::pair louvain( - raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); +template std::pair louvain(raft::handle_t const &, + legacy::GraphCSRView const &, + int32_t *, + size_t, + float); template std::pair louvain(raft::handle_t const &, - GraphCSRView const &, + legacy::GraphCSRView const &, int32_t *, size_t, double); @@ -287,4 +290,4 @@ template std::pair louvain( } // namespace cugraph -#include +#include diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 2c6cf31a61e..a09f648b37a 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -157,11 +157,11 @@ class Louvain { // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(offsets_v_.data(), - indices_v_.data(), - weights_v_.data(), - number_of_vertices_, - number_of_edges_); + legacy::GraphCSRView current_graph(offsets_v_.data(), + indices_v_.data(), + weights_v_.data(), + number_of_vertices_, + number_of_edges_); current_graph.get_source_indices(src_indices_v_.data()); diff --git a/cpp/src/community/spectral_clustering.cu b/cpp/src/community/spectral_clustering.cu index 06b62c5019d..7fc52d6ed5b 100644 --- a/cpp/src/community/spectral_clustering.cu +++ b/cpp/src/community/spectral_clustering.cu @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include @@ -39,7 +39,7 @@ namespace ext_raft { namespace detail { template -void balancedCutClustering_impl(GraphCSRView const &graph, +void balancedCutClustering_impl(legacy::GraphCSRView const &graph, vertex_t n_clusters, vertex_t n_eig_vects, weight_t evs_tolerance, @@ -109,16 +109,17 @@ void balancedCutClustering_impl(GraphCSRView const & } template -void spectralModularityMaximization_impl(GraphCSRView const &graph, - vertex_t n_clusters, - vertex_t n_eig_vects, - weight_t evs_tolerance, - int evs_max_iter, - weight_t kmean_tolerance, - int kmean_max_iter, - vertex_t *clustering, - weight_t *eig_vals, - weight_t *eig_vects) +void spectralModularityMaximization_impl( + legacy::GraphCSRView const &graph, + vertex_t n_clusters, + vertex_t n_eig_vects, + weight_t evs_tolerance, + int evs_max_iter, + weight_t kmean_tolerance, + int kmean_max_iter, + vertex_t *clustering, + weight_t *eig_vals, + weight_t *eig_vects) { RAFT_EXPECTS(graph.edge_data != nullptr, "API error, graph must have weights"); RAFT_EXPECTS(evs_tolerance >= weight_t{0.0}, @@ -186,7 +187,7 @@ void spectralModularityMaximization_impl(GraphCSRView -void analyzeModularityClustering_impl(GraphCSRView const &graph, +void analyzeModularityClustering_impl(legacy::GraphCSRView const &graph, int n_clusters, vertex_t const *clustering, weight_t *modularity) @@ -207,7 +208,7 @@ void analyzeModularityClustering_impl(GraphCSRView c } template -void analyzeBalancedCut_impl(GraphCSRView const &graph, +void analyzeBalancedCut_impl(legacy::GraphCSRView const &graph, vertex_t n_clusters, vertex_t const *clustering, weight_t *edgeCut, @@ -240,7 +241,7 @@ void analyzeBalancedCut_impl(GraphCSRView const &gra } // namespace detail template -void balancedCutClustering(GraphCSRView const &graph, +void balancedCutClustering(legacy::GraphCSRView const &graph, VT num_clusters, VT num_eigen_vects, WT evs_tolerance, @@ -265,7 +266,7 @@ void balancedCutClustering(GraphCSRView const &graph, } template -void spectralModularityMaximization(GraphCSRView const &graph, +void spectralModularityMaximization(legacy::GraphCSRView const &graph, VT n_clusters, VT n_eigen_vects, WT evs_tolerance, @@ -290,7 +291,7 @@ void spectralModularityMaximization(GraphCSRView const &graph, } template -void analyzeClustering_modularity(GraphCSRView const &graph, +void analyzeClustering_modularity(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score) @@ -299,7 +300,7 @@ void analyzeClustering_modularity(GraphCSRView const &graph, } template -void analyzeClustering_edge_cut(GraphCSRView const &graph, +void analyzeClustering_edge_cut(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score) @@ -309,7 +310,7 @@ void analyzeClustering_edge_cut(GraphCSRView const &graph, } template -void analyzeClustering_ratio_cut(GraphCSRView const &graph, +void analyzeClustering_ratio_cut(legacy::GraphCSRView const &graph, int n_clusters, VT const *clustering, WT *score) @@ -319,37 +320,25 @@ void analyzeClustering_ratio_cut(GraphCSRView const &graph, } template void balancedCutClustering( - GraphCSRView const &, int, int, float, int, float, int, int *); + legacy::GraphCSRView const &, int, int, float, int, float, int, int *); template void balancedCutClustering( - GraphCSRView const &, int, int, double, int, double, int, int *); + legacy::GraphCSRView const &, int, int, double, int, double, int, int *); template void spectralModularityMaximization( - GraphCSRView const &, int, int, float, int, float, int, int *); + legacy::GraphCSRView const &, int, int, float, int, float, int, int *); template void spectralModularityMaximization( - GraphCSRView const &, int, int, double, int, double, int, int *); -template void analyzeClustering_modularity(GraphCSRView const &, - int, - int const *, - float *); -template void analyzeClustering_modularity(GraphCSRView const &, - int, - int const *, - double *); -template void analyzeClustering_edge_cut(GraphCSRView const &, - int, - int const *, - float *); -template void analyzeClustering_edge_cut(GraphCSRView const &, - int, - int const *, - double *); -template void analyzeClustering_ratio_cut(GraphCSRView const &, - int, - int const *, - float *); -template void analyzeClustering_ratio_cut(GraphCSRView const &, - int, - int const *, - double *); + legacy::GraphCSRView const &, int, int, double, int, double, int, int *); +template void analyzeClustering_modularity( + legacy::GraphCSRView const &, int, int const *, float *); +template void analyzeClustering_modularity( + legacy::GraphCSRView const &, int, int const *, double *); +template void analyzeClustering_edge_cut( + legacy::GraphCSRView const &, int, int const *, float *); +template void analyzeClustering_edge_cut( + legacy::GraphCSRView const &, int, int const *, double *); +template void analyzeClustering_ratio_cut( + legacy::GraphCSRView const &, int, int const *, float *); +template void analyzeClustering_ratio_cut( + legacy::GraphCSRView const &, int, int const *, double *); } // namespace ext_raft } // namespace cugraph diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index cd5b8bc6614..9aaf79490cb 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -18,7 +18,7 @@ #include #include -#include +#include #include @@ -841,7 +841,7 @@ void TrianglesCount::count() } // namespace template -uint64_t triangle_count(GraphCSRView const &graph) +uint64_t triangle_count(legacy::GraphCSRView const &graph) { TrianglesCount counter( graph.number_of_vertices, graph.number_of_edges, graph.offsets, graph.indices); @@ -851,7 +851,7 @@ uint64_t triangle_count(GraphCSRView const &graph) } template uint64_t triangle_count( - GraphCSRView const &); + legacy::GraphCSRView const &); } // namespace triangle } // namespace cugraph diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index d5768c7f09f..0c0021d9e43 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include @@ -57,7 +57,7 @@ namespace detail { */ template std::enable_if_t::value> connected_components_impl( - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, cugraph_cc_t connectivity_type, VT *labels, cudaStream_t stream) @@ -84,7 +84,7 @@ std::enable_if_t::value> connected_components_impl( } // namespace detail template -void connected_components(GraphCSRView const &graph, +void connected_components(legacy::GraphCSRView const &graph, cugraph_cc_t connectivity_type, VT *labels) { @@ -96,8 +96,8 @@ void connected_components(GraphCSRView const &graph, } template void connected_components( - GraphCSRView const &, cugraph_cc_t, int32_t *); + legacy::GraphCSRView const &, cugraph_cc_t, int32_t *); template void connected_components( - GraphCSRView const &, cugraph_cc_t, int64_t *); + legacy::GraphCSRView const &, cugraph_cc_t, int64_t *); } // namespace cugraph diff --git a/cpp/src/converters/COOtoCSR.cu b/cpp/src/converters/COOtoCSR.cu index 9164d7b9562..2f6eac8ce8f 100644 --- a/cpp/src/converters/COOtoCSR.cu +++ b/cpp/src/converters/COOtoCSR.cu @@ -20,55 +20,65 @@ namespace cugraph { // Explicit instantiation for uint32_t + float -template std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, + rmm::mr::device_memory_resource *); // Explicit instantiation for uint32_t + double -template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr( + legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // Explicit instantiation for int + float -template std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, + rmm::mr::device_memory_resource *); // Explicit instantiation for int + double -template std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, + rmm::mr::device_memory_resource *); // Explicit instantiation for int64_t + float -template std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, + rmm::mr::device_memory_resource *); // Explicit instantiation for int64_t + double -template std::unique_ptr> coo_to_csr( - GraphCOOView const &graph, rmm::mr::device_memory_resource *); +template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, + rmm::mr::device_memory_resource *); // in-place versions: // // Explicit instantiation for uint32_t + float template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // Explicit instantiation for uint32_t + double template void coo_to_csr_inplace( - GraphCOOView &graph, - GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // Explicit instantiation for int + float template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // Explicit instantiation for int + double template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // Explicit instantiation for int64_t + float template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // Explicit instantiation for int64_t + double template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); } // namespace cugraph diff --git a/cpp/src/converters/COOtoCSR.cuh b/cpp/src/converters/COOtoCSR.cuh index 7dcf28cbb0f..10899230b2a 100644 --- a/cpp/src/converters/COOtoCSR.cuh +++ b/cpp/src/converters/COOtoCSR.cuh @@ -38,7 +38,7 @@ #include -#include +#include namespace cugraph { namespace detail { @@ -60,7 +60,7 @@ namespace detail { * @param[out] result Total number of vertices */ template -VT sort(GraphCOOView &graph, rmm::cuda_stream_view stream_view) +VT sort(legacy::GraphCOOView &graph, rmm::cuda_stream_view stream_view) { VT max_src_id; VT max_dst_id; @@ -144,29 +144,30 @@ rmm::device_buffer create_offset(VT *source, } // namespace detail template -std::unique_ptr> coo_to_csr(GraphCOOView const &graph, - rmm::mr::device_memory_resource *mr) +std::unique_ptr> coo_to_csr( + legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *mr) { rmm::cuda_stream_view stream_view; - GraphCOO temp_graph(graph, stream_view.value(), mr); - GraphCOOView temp_graph_view = temp_graph.view(); - VT total_vertex_count = detail::sort(temp_graph_view, stream_view); - rmm::device_buffer offsets = detail::create_offset( + legacy::GraphCOO temp_graph(graph, stream_view.value(), mr); + legacy::GraphCOOView temp_graph_view = temp_graph.view(); + VT total_vertex_count = detail::sort(temp_graph_view, stream_view); + rmm::device_buffer offsets = detail::create_offset( temp_graph.src_indices(), total_vertex_count, temp_graph.number_of_edges(), stream_view, mr); auto coo_contents = temp_graph.release(); - GraphSparseContents csr_contents{ + legacy::GraphSparseContents csr_contents{ total_vertex_count, coo_contents.number_of_edges, std::make_unique(std::move(offsets)), std::move(coo_contents.dst_indices), std::move(coo_contents.edge_data)}; - return std::make_unique>(std::move(csr_contents)); + return std::make_unique>(std::move(csr_contents)); } template -void coo_to_csr_inplace(GraphCOOView &graph, GraphCSRView &result) +void coo_to_csr_inplace(legacy::GraphCOOView &graph, + legacy::GraphCSRView &result) { rmm::cuda_stream_view stream_view; @@ -188,60 +189,65 @@ void coo_to_csr_inplace(GraphCOOView &graph, GraphCSRView> -coo_to_csr(GraphCOOView const &graph, +extern template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // EIDecl for uint32_t + double -extern template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +extern template std::unique_ptr> +coo_to_csr( + legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // EIDecl for int + float -extern template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, +extern template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // EIDecl for int + double -extern template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, +extern template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // EIDecl for int64_t + float -extern template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, +extern template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // EIDecl for int64_t + double -extern template std::unique_ptr> -coo_to_csr(GraphCOOView const &graph, +extern template std::unique_ptr> +coo_to_csr(legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); // in-place versions: // // EIDecl for uint32_t + float extern template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // EIDecl for uint32_t + double extern template void coo_to_csr_inplace( - GraphCOOView &graph, - GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // EIDecl for int + float extern template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // EIDecl for int + double extern template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // EIDecl for int64_t + float extern template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); // EIDecl for int64_t + double extern template void coo_to_csr_inplace( - GraphCOOView &graph, GraphCSRView &result); + legacy::GraphCOOView &graph, + legacy::GraphCSRView &result); } // namespace cugraph diff --git a/cpp/src/converters/permute_graph.cuh b/cpp/src/converters/permute_graph.cuh index aa64cf5ae11..5f9cd8d7d7f 100644 --- a/cpp/src/converters/permute_graph.cuh +++ b/cpp/src/converters/permute_graph.cuh @@ -14,7 +14,7 @@ * limitations under the License. */ #include -#include +#include #include #include #include "converters/COOtoCSR.cuh" @@ -42,9 +42,9 @@ struct permutation_functor { * @return The permuted graph. */ template -void permute_graph(GraphCSRView const &graph, +void permute_graph(legacy::GraphCSRView const &graph, vertex_t const *permutation, - GraphCSRView result, + legacy::GraphCSRView result, cudaStream_t stream = 0) { // Create a COO out of the CSR @@ -76,7 +76,7 @@ void permute_graph(GraphCSRView const &graph, d_dst, pf); - GraphCOOView graph_coo; + legacy::GraphCOOView graph_coo; graph_coo.number_of_vertices = graph.number_of_vertices; graph_coo.number_of_edges = graph.number_of_edges; diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index 419232e8deb..74b3070ca8e 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include //#include @@ -25,7 +25,7 @@ namespace cugraph { namespace detail { template -void core_number(GraphCSRView const &graph, int *core_number) +void core_number(legacy::GraphCSRView const &graph, int *core_number) { using HornetGraph = hornet::gpu::HornetStatic; using HornetInit = hornet::HornetInit; @@ -52,8 +52,8 @@ struct FilterEdges { }; template -void extract_edges(GraphCOOView const &i_graph, - GraphCOOView &o_graph, +void extract_edges(legacy::GraphCOOView const &i_graph, + legacy::GraphCOOView &o_graph, VT *d_core, int k) { @@ -96,8 +96,8 @@ void extract_edges(GraphCOOView const &i_graph, // i.e. All edges (s,d,w) in in_graph are copied over to out_graph // if core_num[s] and core_num[d] are greater than or equal to k. template -std::unique_ptr> extract_subgraph( - GraphCOOView const &in_graph, +std::unique_ptr> extract_subgraph( + legacy::GraphCOOView const &in_graph, int const *vid, int const *core_num, int k, @@ -119,7 +119,7 @@ std::unique_ptr> extract_subgraph( auto edge = thrust::make_zip_iterator(thrust::make_tuple(in_graph.src_indices, in_graph.dst_indices)); - auto out_graph = std::make_unique>( + auto out_graph = std::make_unique>( in_graph.number_of_vertices, thrust::count_if(rmm::exec_policy(stream)->on(stream), edge, @@ -129,7 +129,7 @@ std::unique_ptr> extract_subgraph( stream, mr); - GraphCOOView out_graph_view = out_graph->view(); + legacy::GraphCOOView out_graph_view = out_graph->view(); extract_edges(in_graph, out_graph_view, d_sorted_core_num, k); return out_graph; @@ -138,18 +138,19 @@ std::unique_ptr> extract_subgraph( } // namespace detail template -void core_number(GraphCSRView const &graph, VT *core_number) +void core_number(legacy::GraphCSRView const &graph, VT *core_number) { return detail::core_number(graph, core_number); } template -std::unique_ptr> k_core(GraphCOOView const &in_graph, - int k, - VT const *vertex_id, - VT const *core_number, - VT num_vertex_ids, - rmm::mr::device_memory_resource *mr) +std::unique_ptr> k_core( + legacy::GraphCOOView const &in_graph, + int k, + VT const *vertex_id, + VT const *core_number, + VT num_vertex_ids, + rmm::mr::device_memory_resource *mr) { CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid input argument: vertex_id is NULL"); CUGRAPH_EXPECTS(core_number != nullptr, "Invalid input argument: core_number is NULL"); @@ -158,21 +159,21 @@ std::unique_ptr> k_core(GraphCOOView const &in_ return detail::extract_subgraph(in_graph, vertex_id, core_number, k, num_vertex_ids, mr); } -template void core_number(GraphCSRView const &, - int32_t *core_number); -template std::unique_ptr> k_core( - GraphCOOView const &, - int, - int32_t const *, - int32_t const *, - int32_t, - rmm::mr::device_memory_resource *); -template std::unique_ptr> k_core( - GraphCOOView const &, +template void core_number( + legacy::GraphCSRView const &, int32_t *core_number); +template std::unique_ptr> k_core( + legacy::GraphCOOView const &, int, int32_t const *, int32_t const *, int32_t, rmm::mr::device_memory_resource *); +template std::unique_ptr> +k_core(legacy::GraphCOOView const &, + int, + int32_t const *, + int32_t const *, + int32_t, + rmm::mr::device_memory_resource *); } // namespace cugraph diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index 4cbd8fbd668..2d9265be2d3 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -23,8 +23,8 @@ #include #include -#include #include +#include #include #include @@ -37,7 +37,7 @@ namespace detail { template void barnes_hut(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -160,7 +160,7 @@ void barnes_hut(raft::handle_t const &handle, sort(graph, stream_view.value()); CHECK_CUDA(stream_view.value()); - graph.degree(massl, cugraph::DegreeDirection::OUT); + graph.degree(massl, cugraph::legacy::DegreeDirection::OUT); CHECK_CUDA(stream_view.value()); const vertex_t *row = graph.src_indices; diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index 567aa8c90c6..172ac5b8000 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -21,8 +21,8 @@ #include -#include #include +#include #include #include @@ -35,7 +35,7 @@ namespace detail { template void exact_fa2(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -92,7 +92,7 @@ void exact_fa2(raft::handle_t const &handle, sort(graph, stream_view.value()); CHECK_CUDA(stream_view.value()); - graph.degree(d_mass, cugraph::DegreeDirection::OUT); + graph.degree(d_mass, cugraph::legacy::DegreeDirection::OUT); CHECK_CUDA(stream_view.value()); const vertex_t *row = graph.src_indices; diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index 86c95cc883e..50f01408775 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -21,7 +21,7 @@ namespace cugraph { template void force_atlas2(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -81,7 +81,7 @@ void force_atlas2(raft::handle_t const &handle, } template void force_atlas2(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -100,7 +100,7 @@ template void force_atlas2(raft::handle_t const &handle, internals::GraphBasedDimRedCallback *callback); template void force_atlas2(raft::handle_t const &handle, - GraphCOOView &graph, + legacy::GraphCOOView &graph, float *pos, const int max_iter, float *x_start, diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index b7c7ee84b7f..e7db5082ca1 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include #include #include @@ -117,7 +117,7 @@ weight_t hungarian(raft::handle_t const &handle, template weight_t hungarian_sparse(raft::handle_t const &handle, - GraphCOOView const &graph, + legacy::GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment, @@ -266,7 +266,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, template weight_t hungarian(raft::handle_t const &handle, - GraphCOOView const &graph, + legacy::GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment) @@ -277,7 +277,7 @@ weight_t hungarian(raft::handle_t const &handle, template weight_t hungarian(raft::handle_t const &handle, - GraphCOOView const &graph, + legacy::GraphCOOView const &graph, vertex_t num_workers, vertex_t const *workers, vertex_t *assignment, @@ -288,42 +288,46 @@ weight_t hungarian(raft::handle_t const &handle, template int32_t hungarian( raft::handle_t const &, - GraphCOOView const &, + legacy::GraphCOOView const &, int32_t, int32_t const *, int32_t *, int32_t); -template float hungarian(raft::handle_t const &, - GraphCOOView const &, - int32_t, - int32_t const *, - int32_t *, - float); -template double hungarian(raft::handle_t const &, - GraphCOOView const &, - int32_t, - int32_t const *, - int32_t *, - double); +template float hungarian( + raft::handle_t const &, + legacy::GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *, + float); +template double hungarian( + raft::handle_t const &, + legacy::GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *, + double); template int32_t hungarian( raft::handle_t const &, - GraphCOOView const &, + legacy::GraphCOOView const &, int32_t, int32_t const *, int32_t *); -template float hungarian(raft::handle_t const &, - GraphCOOView const &, - int32_t, - int32_t const *, - int32_t *); -template double hungarian(raft::handle_t const &, - GraphCOOView const &, - int32_t, - int32_t const *, - int32_t *); +template float hungarian( + raft::handle_t const &, + legacy::GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *); +template double hungarian( + raft::handle_t const &, + legacy::GraphCOOView const &, + int32_t, + int32_t const *, + int32_t *); namespace dense { diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index ffaec16c6a8..6b95418e6b6 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -20,7 +20,7 @@ * --------------------------------------------------------------------------*/ #include -#include +#include #include @@ -34,7 +34,7 @@ const int HOST{1}; // gunrock should expose the device constant at the API le const int DEVICE{2}; // gunrock should expose the device constant at the API level. template -void hits(cugraph::GraphCSRView const &graph, +void hits(cugraph::legacy::GraphCSRView const &graph, int max_iter, weight_t tolerance, weight_t const *starting_value, @@ -61,7 +61,7 @@ void hits(cugraph::GraphCSRView const &graph, DEVICE); } -template void hits(cugraph::GraphCSRView const &, +template void hits(cugraph::legacy::GraphCSRView const &, int, float, float const *, diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 071302aed9a..2e10cd7e8c3 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -19,7 +19,7 @@ * @file jaccard.cu * ---------------------------------------------------------------------------**/ -#include +#include #include #include @@ -316,7 +316,7 @@ int jaccard_pairs(vertex_t n, } // namespace detail template -void jaccard(GraphCSRView const &graph, WT const *weights, WT *result) +void jaccard(legacy::GraphCSRView const &graph, WT const *weights, WT *result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); @@ -348,7 +348,7 @@ void jaccard(GraphCSRView const &graph, WT const *weights, WT *resul } template -void jaccard_list(GraphCSRView const &graph, +void jaccard_list(legacy::GraphCSRView const &graph, WT const *weights, ET num_pairs, VT const *first, @@ -390,41 +390,41 @@ void jaccard_list(GraphCSRView const &graph, } } -template void jaccard(GraphCSRView const &, - float const *, - float *); -template void jaccard(GraphCSRView const &, - double const *, - double *); -template void jaccard(GraphCSRView const &, - float const *, - float *); -template void jaccard(GraphCSRView const &, - double const *, - double *); -template void jaccard_list(GraphCSRView const &, - float const *, - int32_t, - int32_t const *, - int32_t const *, - float *); -template void jaccard_list(GraphCSRView const &, - double const *, - int32_t, - int32_t const *, - int32_t const *, - double *); -template void jaccard_list(GraphCSRView const &, - float const *, - int64_t, - int64_t const *, - int64_t const *, - float *); -template void jaccard_list(GraphCSRView const &, - double const *, - int64_t, - int64_t const *, - int64_t const *, - double *); +template void jaccard( + legacy::GraphCSRView const &, float const *, float *); +template void jaccard( + legacy::GraphCSRView const &, double const *, double *); +template void jaccard( + legacy::GraphCSRView const &, float const *, float *); +template void jaccard( + legacy::GraphCSRView const &, double const *, double *); +template void jaccard_list( + legacy::GraphCSRView const &, + float const *, + int32_t, + int32_t const *, + int32_t const *, + float *); +template void jaccard_list( + legacy::GraphCSRView const &, + double const *, + int32_t, + int32_t const *, + int32_t const *, + double *); +template void jaccard_list( + legacy::GraphCSRView const &, + float const *, + int64_t, + int64_t const *, + int64_t const *, + float *); +template void jaccard_list( + legacy::GraphCSRView const &, + double const *, + int64_t, + int64_t const *, + int64_t const *, + double *); } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index 915b2c8bd52..f38c8326ca2 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -20,7 +20,7 @@ * ---------------------------------------------------------------------------**/ #include -#include +#include #include #include @@ -314,7 +314,7 @@ int overlap_pairs(vertex_t n, } // namespace detail template -void overlap(GraphCSRView const &graph, WT const *weights, WT *result) +void overlap(legacy::GraphCSRView const &graph, WT const *weights, WT *result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); @@ -346,7 +346,7 @@ void overlap(GraphCSRView const &graph, WT const *weights, WT *resul } template -void overlap_list(GraphCSRView const &graph, +void overlap_list(legacy::GraphCSRView const &graph, WT const *weights, ET num_pairs, VT const *first, @@ -388,41 +388,41 @@ void overlap_list(GraphCSRView const &graph, } } -template void overlap(GraphCSRView const &, - float const *, - float *); -template void overlap(GraphCSRView const &, - double const *, - double *); -template void overlap(GraphCSRView const &, - float const *, - float *); -template void overlap(GraphCSRView const &, - double const *, - double *); -template void overlap_list(GraphCSRView const &, - float const *, - int32_t, - int32_t const *, - int32_t const *, - float *); -template void overlap_list(GraphCSRView const &, - double const *, - int32_t, - int32_t const *, - int32_t const *, - double *); -template void overlap_list(GraphCSRView const &, - float const *, - int64_t, - int64_t const *, - int64_t const *, - float *); -template void overlap_list(GraphCSRView const &, - double const *, - int64_t, - int64_t const *, - int64_t const *, - double *); +template void overlap( + legacy::GraphCSRView const &, float const *, float *); +template void overlap( + legacy::GraphCSRView const &, double const *, double *); +template void overlap( + legacy::GraphCSRView const &, float const *, float *); +template void overlap( + legacy::GraphCSRView const &, double const *, double *); +template void overlap_list( + legacy::GraphCSRView const &, + float const *, + int32_t, + int32_t const *, + int32_t const *, + float *); +template void overlap_list( + legacy::GraphCSRView const &, + double const *, + int32_t, + int32_t const *, + int32_t const *, + double *); +template void overlap_list( + legacy::GraphCSRView const &, + float const *, + int64_t, + int64_t const *, + int64_t const *, + float *); +template void overlap_list( + legacy::GraphCSRView const &, + double const *, + int64_t, + int64_t const *, + int64_t const *, + double *); } // namespace cugraph diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 93bb0a69d23..192234380d9 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include @@ -58,6 +58,7 @@ void degree_from_vertex_ids(const raft::handle_t *handle, } // namespace namespace cugraph { +namespace legacy { template void GraphViewBase::get_vertex_identifiers(VT *identifiers) const @@ -149,6 +150,7 @@ template class GraphCOOView; template class GraphCOOView; template class GraphCompressedSparseBaseView; template class GraphCompressedSparseBaseView; +} // namespace legacy } // namespace cugraph #include diff --git a/cpp/src/traversal/bfs.cu b/cpp/src/traversal/bfs.cu index 8b62fbfecee..b4416b907e5 100644 --- a/cpp/src/traversal/bfs.cu +++ b/cpp/src/traversal/bfs.cu @@ -14,7 +14,7 @@ #include #include "bfs.cuh" -#include +#include #include #include @@ -474,7 +474,7 @@ template class BFS; // It can easily reach 1e40~1e70 on GAP-road.mtx template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, VT *distances, VT *predecessors, double *sp_counters, @@ -511,63 +511,69 @@ void bfs(raft::handle_t const &handle, } // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - uint32_t *distances, - uint32_t *predecessors, - double *sp_counters, - const uint32_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + uint32_t *distances, + uint32_t *predecessors, + double *sp_counters, + const uint32_t source_vertex, + bool directed, + bool mg_batch); // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - uint32_t *distances, - uint32_t *predecessors, - double *sp_counters, - const uint32_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + uint32_t *distances, + uint32_t *predecessors, + double *sp_counters, + const uint32_t source_vertex, + bool directed, + bool mg_batch); // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - int32_t *distances, - int32_t *predecessors, - double *sp_counters, - const int32_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + int32_t *distances, + int32_t *predecessors, + double *sp_counters, + const int32_t source_vertex, + bool directed, + bool mg_batch); // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - int32_t *distances, - int32_t *predecessors, - double *sp_counters, - const int32_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + int32_t *distances, + int32_t *predecessors, + double *sp_counters, + const int32_t source_vertex, + bool directed, + bool mg_batch); // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - int64_t *distances, - int64_t *predecessors, - double *sp_counters, - const int64_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + int64_t *distances, + int64_t *predecessors, + double *sp_counters, + const int64_t source_vertex, + bool directed, + bool mg_batch); // Explicit Instantiation -template void bfs(raft::handle_t const &handle, - GraphCSRView const &graph, - int64_t *distances, - int64_t *predecessors, - double *sp_counters, - const int64_t source_vertex, - bool directed, - bool mg_batch); +template void bfs( + raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + int64_t *distances, + int64_t *predecessors, + double *sp_counters, + const int64_t source_vertex, + bool directed, + bool mg_batch); } // namespace cugraph diff --git a/cpp/src/traversal/bfs_kernels.cuh b/cpp/src/traversal/bfs_kernels.cuh index 78ce646d3c6..e55abaa8c1d 100644 --- a/cpp/src/traversal/bfs_kernels.cuh +++ b/cpp/src/traversal/bfs_kernels.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include "traversal_common.cuh" namespace cugraph { @@ -292,7 +292,7 @@ __global__ void main_bottomup_kernel(const IndexType *unvisited, // When this kernel is converted to support different VT and ET, this // will likely split into invalid_vid and invalid_eid // This is equivalent to ~IndexType(0) (i.e., all bits set to 1) - constexpr IndexType invalid_idx = cugraph::invalid_idx::value; + constexpr IndexType invalid_idx = cugraph::legacy::invalid_idx::value; // we will call __syncthreads inside the loop // we need to keep complete block active @@ -550,7 +550,7 @@ __global__ void bottom_up_large_degree_kernel(IndexType *left_unvisited, // When this kernel is converted to support different VT and ET, this // will likely split into invalid_vid and invalid_eid // This is equivalent to ~IndexType(0) (i.e., all bits set to 1) - constexpr IndexType invalid_idx = cugraph::invalid_idx::value; + constexpr IndexType invalid_idx = cugraph::legacy::invalid_idx::value; // Inactive threads are not a pb for __ballot (known behaviour) for (IndexType idx = logical_warps_per_block * blockIdx.x + logical_warp_id; @@ -728,7 +728,7 @@ __global__ void topdown_expand_kernel( // When this kernel is converted to support different VT and ET, this // will likely split into invalid_vid and invalid_eid // This is equivalent to ~IndexType(0) (i.e., all bits set to 1) - constexpr IndexType invalid_idx = cugraph::invalid_idx::value; + constexpr IndexType invalid_idx = cugraph::legacy::invalid_idx::value; IndexType n_items_per_thread_left = (totaldegree > block_offset) diff --git a/cpp/src/traversal/mg/bfs.cuh b/cpp/src/traversal/mg/bfs.cuh index b053a6ff75a..129c1a554e4 100644 --- a/cpp/src/traversal/mg/bfs.cuh +++ b/cpp/src/traversal/mg/bfs.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace detail { template void bfs_traverse(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, const vertex_t start_vertex, rmm::device_vector &visited_bmap, rmm::device_vector &output_frontier_bmap, @@ -111,7 +111,7 @@ void bfs_traverse(raft::handle_t const &handle, template void bfs(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, vertex_t *distances, vertex_t *predecessors, const vertex_t start_vertex) @@ -132,7 +132,7 @@ void bfs(raft::handle_t const &handle, thrust::fill(rmm::exec_policy(stream)->on(stream), predecessors, predecessors + global_number_of_vertices, - cugraph::invalid_idx::value); + cugraph::legacy::invalid_idx::value); if (distances == nullptr) { detail::BFSStepNoDist bfs_op( diff --git a/cpp/src/traversal/mg/common_utils.cuh b/cpp/src/traversal/mg/common_utils.cuh index d922636e740..7c36bc2c139 100644 --- a/cpp/src/traversal/mg/common_utils.cuh +++ b/cpp/src/traversal/mg/common_utils.cuh @@ -137,9 +137,10 @@ struct BFSStep { }; template -vertex_t populate_isolated_vertices(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, - rmm::device_vector &isolated_vertex_ids) +vertex_t populate_isolated_vertices( + raft::handle_t const &handle, + cugraph::legacy::GraphCSRView const &graph, + rmm::device_vector &isolated_vertex_ids) { bool is_mg = (handle.comms_initialized() && (graph.local_vertices != nullptr) && (graph.local_offsets != nullptr)); @@ -218,7 +219,7 @@ void add_to_bitmap(raft::handle_t const &handle, // ith bit of isolated_bmap to 1 template void create_isolated_bitmap(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, rmm::device_vector &local_isolated_ids, rmm::device_vector &global_isolated_ids, rmm::device_vector &temp_buffer_len, @@ -383,13 +384,14 @@ return_t remove_duplicates(raft::handle_t const &handle, } template -vertex_t preprocess_input_frontier(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, - rmm::device_vector &bmap, - rmm::device_vector &isolated_bmap, - rmm::device_vector &input_frontier, - vertex_t input_frontier_len, - rmm::device_vector &output_frontier) +vertex_t preprocess_input_frontier( + raft::handle_t const &handle, + cugraph::legacy::GraphCSRView const &graph, + rmm::device_vector &bmap, + rmm::device_vector &isolated_bmap, + rmm::device_vector &input_frontier, + vertex_t input_frontier_len, + rmm::device_vector &output_frontier) { cudaStream_t stream = handle.get_stream(); @@ -415,12 +417,13 @@ vertex_t preprocess_input_frontier(raft::handle_t const &handle, } template -vertex_t preprocess_input_frontier(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, - rmm::device_vector &bmap, - rmm::device_vector &input_frontier, - vertex_t input_frontier_len, - rmm::device_vector &output_frontier) +vertex_t preprocess_input_frontier( + raft::handle_t const &handle, + cugraph::legacy::GraphCSRView const &graph, + rmm::device_vector &bmap, + rmm::device_vector &input_frontier, + vertex_t input_frontier_len, + rmm::device_vector &output_frontier) { cudaStream_t stream = handle.get_stream(); @@ -458,7 +461,7 @@ __global__ void fill_kernel(vertex_t *distances, vertex_t count, vertex_t start_ template void fill_max_dist(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, vertex_t start_vertex, vertex_t global_number_of_vertices, vertex_t *distances) @@ -471,8 +474,9 @@ void fill_max_dist(raft::handle_t const &handle, } template -vertex_t get_global_vertex_count(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph) +vertex_t get_global_vertex_count( + raft::handle_t const &handle, + cugraph::legacy::GraphCSRView const &graph) { rmm::device_vector id(1); id[0] = *thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/src/traversal/mg/frontier_expand.cuh b/cpp/src/traversal/mg/frontier_expand.cuh index 5436c060e18..4c4d794f067 100644 --- a/cpp/src/traversal/mg/frontier_expand.cuh +++ b/cpp/src/traversal/mg/frontier_expand.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "frontier_expand_kernels.cuh" #include "vertex_binning.cuh" @@ -29,7 +29,7 @@ namespace detail { template class FrontierExpand { raft::handle_t const &handle_; - cugraph::GraphCSRView const &graph_; + cugraph::legacy::GraphCSRView const &graph_; VertexBinner dist_; rmm::device_vector reorganized_vertices_; edge_t vertex_begin_; @@ -38,7 +38,7 @@ class FrontierExpand { public: FrontierExpand(raft::handle_t const &handle, - cugraph::GraphCSRView const &graph) + cugraph::legacy::GraphCSRView const &graph) : handle_(handle), graph_(graph) { bool is_mg = (handle.comms_initialized() && (graph.local_vertices != nullptr) && diff --git a/cpp/src/traversal/mg/frontier_expand_kernels.cuh b/cpp/src/traversal/mg/frontier_expand_kernels.cuh index 00884e01755..28ba6b19dbc 100644 --- a/cpp/src/traversal/mg/frontier_expand_kernels.cuh +++ b/cpp/src/traversal/mg/frontier_expand_kernels.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "vertex_binning.cuh" namespace cugraph { @@ -171,7 +171,7 @@ __global__ void kernel_per_vertex(edge_t const *offsets, } template -void large_vertex_lb(cugraph::GraphCSRView const &graph, +void large_vertex_lb(cugraph::legacy::GraphCSRView const &graph, DegreeBucket &bucket, operator_t op, vertex_t vertex_begin, @@ -196,7 +196,7 @@ void large_vertex_lb(cugraph::GraphCSRView const &gr } template -void medium_vertex_lb(cugraph::GraphCSRView const &graph, +void medium_vertex_lb(cugraph::legacy::GraphCSRView const &graph, DegreeBucket &bucket, operator_t op, vertex_t vertex_begin, @@ -223,7 +223,7 @@ void medium_vertex_lb(cugraph::GraphCSRView const &g } template -void small_vertex_lb(cugraph::GraphCSRView const &graph, +void small_vertex_lb(cugraph::legacy::GraphCSRView const &graph, DegreeBucket &bucket, operator_t op, vertex_t vertex_begin, diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index 8dcaffd953a..ac2ab5b2063 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include "sssp.cuh" #include "sssp_kernels.cuh" @@ -242,7 +242,7 @@ void SSSP::clean() * @file sssp.cu * --------------------------------------------------------------------------*/ template -void sssp(GraphCSRView const &graph, +void sssp(legacy::GraphCSRView const &graph, WT *distances, VT *predecessors, const VT source_vertex) @@ -281,7 +281,7 @@ void sssp(GraphCSRView const &graph, } else { // SSSP is not defined for graphs with negative weight cycles // Warn user about any negative edges - if (graph.prop.has_negative_edges == PropType::PROP_TRUE) + if (graph.prop.has_negative_edges == legacy::PropType::PROP_TRUE) std::cerr << "WARN: The graph has negative weight edges. SSSP will not " "converge if the graph has negative weight cycles\n"; edge_weights_ptr = graph.edge_data; @@ -293,11 +293,11 @@ void sssp(GraphCSRView const &graph, } // explicit instantiation -template void sssp(GraphCSRView const &graph, +template void sssp(legacy::GraphCSRView const &graph, float *distances, int *predecessors, const int source_vertex); -template void sssp(GraphCSRView const &graph, +template void sssp(legacy::GraphCSRView const &graph, double *distances, int *predecessors, const int source_vertex); diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index 770e618637b..eff91f03127 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include "two_hop_neighbors.cuh" @@ -32,7 +32,8 @@ namespace cugraph { template -std::unique_ptr> get_two_hop_neighbors(GraphCSRView const &graph) +std::unique_ptr> get_two_hop_neighbors( + legacy::GraphCSRView const &graph) { cudaStream_t stream{nullptr}; @@ -108,7 +109,8 @@ std::unique_ptr> get_two_hop_neighbors(GraphCSRView>(graph.number_of_vertices, outputSize, false); + auto result = + std::make_unique>(graph.number_of_vertices, outputSize, false); cudaMemcpy(result->src_indices(), d_first_pair, sizeof(VT) * outputSize, cudaMemcpyDefault); cudaMemcpy(result->dst_indices(), d_second_pair, sizeof(VT) * outputSize, cudaMemcpyDefault); @@ -116,10 +118,10 @@ std::unique_ptr> get_two_hop_neighbors(GraphCSRView> get_two_hop_neighbors( - GraphCSRView const &); +template std::unique_ptr> get_two_hop_neighbors( + legacy::GraphCSRView const &); -template std::unique_ptr> get_two_hop_neighbors( - GraphCSRView const &); +template std::unique_ptr> get_two_hop_neighbors( + legacy::GraphCSRView const &); } // namespace cugraph diff --git a/cpp/src/tree/mst.cu b/cpp/src/tree/mst.cu index 54698b588a4..0fae4f2bef7 100644 --- a/cpp/src/tree/mst.cu +++ b/cpp/src/tree/mst.cu @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include @@ -38,9 +38,9 @@ namespace cugraph { namespace detail { template -std::unique_ptr> mst_impl( +std::unique_ptr> mst_impl( raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, rmm::mr::device_memory_resource *mr) { @@ -55,33 +55,33 @@ std::unique_ptr> mst_impl( colors.data(), stream); - GraphCOOContents coo_contents{ + legacy::GraphCOOContents coo_contents{ graph.number_of_vertices, mst_edges.n_edges, std::make_unique(mst_edges.src.release()), std::make_unique(mst_edges.dst.release()), std::make_unique(mst_edges.weights.release())}; - return std::make_unique>(std::move(coo_contents)); + return std::make_unique>(std::move(coo_contents)); } } // namespace detail template -std::unique_ptr> minimum_spanning_tree( +std::unique_ptr> minimum_spanning_tree( raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, rmm::mr::device_memory_resource *mr) { return detail::mst_impl(handle, graph, mr); } -template std::unique_ptr> minimum_spanning_tree( +template std::unique_ptr> minimum_spanning_tree( raft::handle_t const &handle, - GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr); -template std::unique_ptr> minimum_spanning_tree( - raft::handle_t const &handle, - GraphCSRView const &graph, + legacy::GraphCSRView const &graph, rmm::mr::device_memory_resource *mr); +template std::unique_ptr> +minimum_spanning_tree(raft::handle_t const &handle, + legacy::GraphCSRView const &graph, + rmm::mr::device_memory_resource *mr); } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index e9bf9ffe031..cf6bbf6322f 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -281,11 +281,11 @@ void populate_graph_container_legacy(graph_container_t& graph_container, switch (legacyType) { case graphTypeEnum::LegacyCSR: { graph_container.graph_ptr_union.GraphCSRViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCSRViewFloat; (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -294,11 +294,11 @@ void populate_graph_container_legacy(graph_container_t& graph_container, } break; case graphTypeEnum::LegacyCSC: { graph_container.graph_ptr_union.GraphCSCViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCSCViewFloat; (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -307,11 +307,11 @@ void populate_graph_container_legacy(graph_container_t& graph_container, } break; case graphTypeEnum::LegacyCOO: { graph_container.graph_ptr_union.GraphCOOViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCOOViewFloat; (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -325,11 +325,12 @@ void populate_graph_container_legacy(graph_container_t& graph_container, switch (legacyType) { case graphTypeEnum::LegacyCSR: { graph_container.graph_ptr_union.GraphCSRViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>( + reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCSRViewDouble; (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -338,11 +339,12 @@ void populate_graph_container_legacy(graph_container_t& graph_container, } break; case graphTypeEnum::LegacyCSC: { graph_container.graph_ptr_union.GraphCSCViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>( + reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCSCViewDouble; (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -351,11 +353,12 @@ void populate_graph_container_legacy(graph_container_t& graph_container, } break; case graphTypeEnum::LegacyCOO: { graph_container.graph_ptr_union.GraphCOOViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); + std::make_unique>( + reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); graph_container.graph_type = graphTypeEnum::GraphCOOViewDouble; (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index 4cafab68986..c3883d9694f 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include @@ -195,7 +195,7 @@ void reference_rescale(result_t *result, template void reference_betweenness_centrality( - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, result_t *result, bool normalize, bool endpoints, // This is not yet implemented @@ -228,14 +228,14 @@ void reference_betweenness_centrality( // Explicit instantiation /* FIXME!!! template void reference_betweenness_centrality( - cugraph::GraphCSRView const &, + cugraph::legacy::GraphCSRView const &, float *, bool, bool, const int, int const *); template void reference_betweenness_centrality( - cugraph::GraphCSRView const &, + cugraph::legacy::GraphCSRView const &, double *, bool, bool, @@ -308,8 +308,8 @@ class Tests_BC : public ::testing::TestWithParam { auto csr = cugraph::test::generate_graph_csr_from_mm( is_directed, configuration.file_path_); cudaDeviceSynchronize(); - cugraph::GraphCSRView G = csr->view(); - G.prop.directed = is_directed; + cugraph::legacy::GraphCSRView G = csr->view(); + G.prop.directed = is_directed; CUDA_TRY(cudaGetLastError()); std::vector result(G.number_of_vertices, 0); std::vector expected(G.number_of_vertices, 0); diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index e31af4dba77..5f832475b3b 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include @@ -155,7 +155,7 @@ void reference_rescale(result_t *result, template void reference_edge_betweenness_centrality( - cugraph::GraphCSRView const &graph, + cugraph::legacy::GraphCSRView const &graph, result_t *result, bool normalize, vertex_t const number_of_sources, @@ -244,8 +244,8 @@ class Tests_EdgeBC : public ::testing::TestWithParam { auto csr = cugraph::test::generate_graph_csr_from_mm( is_directed, configuration.file_path_); cudaDeviceSynchronize(); - cugraph::GraphCSRView G = csr->view(); - G.prop.directed = is_directed; + cugraph::legacy::GraphCSRView G = csr->view(); + G.prop.directed = is_directed; CUDA_TRY(cudaGetLastError()); std::vector result(G.number_of_edges, 0); std::vector expected(G.number_of_edges, 0); diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index 44e52a7626f..e4951dd9098 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -56,13 +56,13 @@ std::vector getTopKIds(double* p_katz, int count, int k = 10) } template -int getMaxDegree(cugraph::GraphCSRView const& g) +int getMaxDegree(cugraph::legacy::GraphCSRView const& g) { cudaStream_t stream{nullptr}; rmm::device_vector degree_vector(g.number_of_vertices); ET* p_degree = degree_vector.data().get(); - g.degree(p_degree, cugraph::DegreeDirection::OUT); + g.degree(p_degree, cugraph::legacy::DegreeDirection::OUT); ET max_out_degree = thrust::reduce(rmm::exec_policy(stream)->on(stream), p_degree, p_degree + g.number_of_vertices, @@ -137,9 +137,10 @@ class Tests_Katz : public ::testing::TestWithParam { << "\n"; ASSERT_EQ(fclose(fpin), 0); - cugraph::GraphCOOView cooview(&cooColInd[0], &cooRowInd[0], nullptr, m, nnz); - auto csr = cugraph::coo_to_csr(cooview); - cugraph::GraphCSRView G = csr->view(); + cugraph::legacy::GraphCOOView cooview( + &cooColInd[0], &cooRowInd[0], nullptr, m, nnz); + auto csr = cugraph::coo_to_csr(cooview); + cugraph::legacy::GraphCSRView G = csr->view(); rmm::device_vector katz_vector(m); double* d_katz = thrust::raw_pointer_cast(katz_vector.data()); diff --git a/cpp/tests/community/balanced_edge_test.cpp b/cpp/tests/community/balanced_edge_test.cpp index a4bd8de769f..d4c5edf3f35 100644 --- a/cpp/tests/community/balanced_edge_test.cpp +++ b/cpp/tests/community/balanced_edge_test.cpp @@ -48,7 +48,7 @@ TEST(balanced_edge, success) rmm::device_vector weights_v(w_h); rmm::device_vector result_v(cluster_id); - cugraph::GraphCSRView G( + cugraph::legacy::GraphCSRView G( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); int num_clusters{8}; diff --git a/cpp/tests/community/ecg_test.cpp b/cpp/tests/community/ecg_test.cpp index 0f0960b0abb..15c2fb118c9 100644 --- a/cpp/tests/community/ecg_test.cpp +++ b/cpp/tests/community/ecg_test.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include @@ -47,7 +47,7 @@ TEST(ecg, success) rmm::device_vector weights_v(w_h); rmm::device_vector result_v(cluster_id); - cugraph::GraphCSRView graph_csr( + cugraph::legacy::GraphCSRView graph_csr( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); raft::handle_t handle; @@ -118,7 +118,7 @@ TEST(ecg, dolphin) raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - cugraph::GraphCSRView graph_csr( + cugraph::legacy::GraphCSRView graph_csr( offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); // "FIXME": remove this check once we drop support for Pascal diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index a586810b6b6..13e139666f6 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include @@ -59,7 +59,7 @@ TEST(leiden_karate, success) raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - cugraph::GraphCSRView G( + cugraph::legacy::GraphCSRView G( offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 821e8651d70..bcf7dddc459 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -204,7 +204,7 @@ TEST(louvain_legacy, success) raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - cugraph::GraphCSRView G( + cugraph::legacy::GraphCSRView G( offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; @@ -275,7 +275,7 @@ TEST(louvain_legacy_renumbered, success) raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - cugraph::GraphCSRView G( + cugraph::legacy::GraphCSRView G( offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; diff --git a/cpp/tests/community/triangle_test.cu b/cpp/tests/community/triangle_test.cu index b40c4734a14..4c51e15b111 100644 --- a/cpp/tests/community/triangle_test.cu +++ b/cpp/tests/community/triangle_test.cu @@ -11,7 +11,7 @@ #include #include -#include +#include #include @@ -49,7 +49,7 @@ TEST(triangle, dolphin) rmm::device_vector indices_v(ind_h); rmm::device_vector weights_v(w_h); - cugraph::GraphCSRView graph_csr( + cugraph::legacy::GraphCSRView graph_csr( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); uint64_t count{0}; @@ -92,7 +92,7 @@ TEST(triangle, karate) rmm::device_vector indices_v(ind_h); rmm::device_vector weights_v(w_h); - cugraph::GraphCSRView graph_csr( + cugraph::legacy::GraphCSRView graph_csr( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); uint64_t count{0}; diff --git a/cpp/tests/components/con_comp_test.cu b/cpp/tests/components/con_comp_test.cu index e394d5fc97c..2db7235c299 100644 --- a/cpp/tests/components/con_comp_test.cu +++ b/cpp/tests/components/con_comp_test.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -113,9 +113,10 @@ struct Tests_Weakly_CC : ::testing::TestWithParam { << "\n"; ASSERT_EQ(fclose(fpin), 0); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, m, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], nullptr, m, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); rmm::device_vector d_labels(m); diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index 0d2e87c40a2..5448cf79cce 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -176,9 +176,10 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { << "\n"; ASSERT_EQ(fclose(fpin), 0); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); rmm::device_vector d_labels(nrows); @@ -246,9 +247,10 @@ TEST_F(SCCSmallTest, CustomGraphSimpleLoops) EXPECT_EQ(nnz, cooColInd.size()); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); rmm::device_vector d_labels(nrows); @@ -296,9 +298,10 @@ TEST_F(SCCSmallTest, /*DISABLED_*/ CustomGraphWithSelfLoops) EXPECT_EQ(nnz, cooColInd.size()); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); rmm::device_vector d_labels(nrows); @@ -341,9 +344,10 @@ TEST_F(SCCSmallTest, SmallGraphWithSelfLoops1) EXPECT_EQ(nnz, cooColInd.size()); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); rmm::device_vector d_labels(nrows); @@ -381,10 +385,9 @@ TEST_F(SCCSmallTest, SmallGraphWithIsolated) // Note: there seems to be a BUG in coo_to_csr() or view() // COO format doesn't account for isolated vertices; // - // cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, - // nnz); - // auto G_unique = cugraph::coo_to_csr(G_coo); - // cugraph::GraphCSRView G = G_unique->view(); + // cugraph::legacy::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, + // nrows, nnz); auto G_unique = cugraph::coo_to_csr(G_coo); + // cugraph::legacy::GraphCSRView G = G_unique->view(); // // // size_t num_vertices = G.number_of_vertices; @@ -401,7 +404,7 @@ TEST_F(SCCSmallTest, SmallGraphWithIsolated) thrust::device_vector d_ro(ro); thrust::device_vector d_ci(ci); - cugraph::GraphCSRView G{ + cugraph::legacy::GraphCSRView G{ d_ro.data().get(), d_ci.data().get(), nullptr, static_cast(nrows), static_cast(nnz)}; size_t num_vertices = G.number_of_vertices; diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 3fea9f371e0..4114b695e49 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -54,7 +54,9 @@ void bfs_reference(edge_t const* offsets, vertex_t depth{0}; std::fill(distances, distances + num_vertices, std::numeric_limits::max()); - std::fill(predecessors, predecessors + num_vertices, cugraph::invalid_vertex_id::value); + std::fill(predecessors, + predecessors + num_vertices, + cugraph::experimental::invalid_vertex_id::value); *(distances + source) = depth; std::vector cur_frontier_rows{source}; @@ -249,7 +251,7 @@ class Tests_BFS : public ::testing::TestWithParam::value) { + if (*it == cugraph::experimental::invalid_vertex_id::value) { ASSERT_TRUE(h_reference_predecessors[i] == *it) << "vertex reachability does not match with the reference."; } else { diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp index b8d1e43f81e..a6fb306f1d8 100644 --- a/cpp/tests/experimental/ms_bfs_test.cpp +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 9a50553a114..91f0edcbf47 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -58,7 +58,9 @@ void sssp_reference(edge_t const* offsets, using queue_item_t = std::tuple; std::fill(distances, distances + num_vertices, std::numeric_limits::max()); - std::fill(predecessors, predecessors + num_vertices, cugraph::invalid_vertex_id::value); + std::fill(predecessors, + predecessors + num_vertices, + cugraph::experimental::invalid_vertex_id::value); *(distances + source) = weight_t{0.0}; std::priority_queue, std::greater> queue{}; @@ -264,7 +266,7 @@ class Tests_SSSP : public ::testing::TestWithParam::value) { + if (*it == cugraph::experimental::invalid_vertex_id::value) { ASSERT_TRUE(h_reference_predecessors[i] == *it) << "vertex reachability do not match with the reference."; } else { diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index f2f5561a7d8..1a259418bc7 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -144,7 +144,7 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam CUDA_TRY(cudaMemcpy(srcs, &cooRowInd[0], sizeof(int) * nnz, cudaMemcpyDefault)); CUDA_TRY(cudaMemcpy(dests, &cooColInd[0], sizeof(int) * nnz, cudaMemcpyDefault)); CUDA_TRY(cudaMemcpy(weights, &cooVal[0], sizeof(T) * nnz, cudaMemcpyDefault)); - cugraph::GraphCOOView G(srcs, dests, weights, m, nnz); + cugraph::legacy::GraphCOOView G(srcs, dests, weights, m, nnz); const int max_iter = 500; float* x_start = nullptr; diff --git a/cpp/tests/linear_assignment/hungarian_test.cu b/cpp/tests/linear_assignment/hungarian_test.cu index 9698b5c3335..26496e95271 100644 --- a/cpp/tests/linear_assignment/hungarian_test.cu +++ b/cpp/tests/linear_assignment/hungarian_test.cu @@ -12,7 +12,7 @@ #include #include -#include +#include #include @@ -86,7 +86,7 @@ TEST_F(HungarianTest, Bipartite4x4) raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); - cugraph::GraphCOOView g( + cugraph::legacy::GraphCOOView g( src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); @@ -128,7 +128,7 @@ TEST_F(HungarianTest, Bipartite5x5) raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); - cugraph::GraphCOOView g( + cugraph::legacy::GraphCOOView g( src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); @@ -174,7 +174,7 @@ TEST_F(HungarianTest, Bipartite4x4_multiple_answers) raft::update_device(cost_v.begin(), cost, length, handle.get_stream()); raft::update_device(workers_v.begin(), workers, length_workers, handle.get_stream()); - cugraph::GraphCOOView g( + cugraph::legacy::GraphCOOView g( src_v.data(), dst_v.data(), cost_v.data(), num_vertices, length); float r = cugraph::hungarian(handle, g, length_workers, workers_v.data(), assignment_v.data()); diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 355d62e8141..46a3cf120be 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sampling/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu index 186c45109e9..983a0ce23d8 100644 --- a/cpp/tests/sampling/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sampling/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh index f2db29eb23b..5e7d95e7ceb 100644 --- a/cpp/tests/sampling/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index 77c5b18499a..2d95a3672e2 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/traversal/bfs_test.cu b/cpp/tests/traversal/bfs_test.cu index 8cbfe0081d6..03ca1ec9455 100644 --- a/cpp/tests/traversal/bfs_test.cu +++ b/cpp/tests/traversal/bfs_test.cu @@ -95,8 +95,8 @@ class Tests_BFS : public ::testing::TestWithParam { auto csr = cugraph::test::generate_graph_csr_from_mm(directed, configuration.file_path_); cudaDeviceSynchronize(); - cugraph::GraphCSRView G = csr->view(); - G.prop.directed = directed; + cugraph::legacy::GraphCSRView G = csr->view(); + G.prop.directed = directed; ASSERT_TRUE(configuration.source_ >= 0 && (VT)configuration.source_ < G.number_of_vertices) << "Starting sources should be >= 0 and" @@ -174,7 +174,7 @@ class Tests_BFS : public ::testing::TestWithParam { // that the predecessor obtained with the GPU implementation is one of the // predecessors obtained during the C++ BFS traversal VT pred = cugraph_pred[i]; // It could be equal to -1 if the node is never reached - constexpr VT invalid_vid = cugraph::invalid_vertex_id::value; + constexpr VT invalid_vid = cugraph::legacy::invalid_vertex_id::value; if (pred == invalid_vid) { EXPECT_TRUE(ref_bfs_pred[i].empty()) << "[MISMATCH][PREDECESSOR] vaid = " << i << " cugraph had not predecessor," diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index 1903f9ad302..e221e7e3445 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -15,7 +15,7 @@ #include #include -#include +#include #include @@ -255,14 +255,14 @@ class Tests_SSSP : public ::testing::TestWithParam { ASSERT_TRUE(0); } - cugraph::GraphCOOView G_coo( + cugraph::legacy::GraphCOOView G_coo( &cooRowInd[0], &cooColInd[0], (DoRandomWeights ? &cooVal[0] : nullptr), num_vertices, num_edges); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G = G_unique->view(); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::legacy::GraphCSRView G = G_unique->view(); cudaDeviceSynchronize(); std::vector dist_vec; diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 806d9dea51a..e00f2949af5 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -31,7 +31,7 @@ #include #include -#include +#include #include diff --git a/cpp/tests/tree/mst_test.cu b/cpp/tests/tree/mst_test.cu index aee88d981c1..ffbddd96eb0 100644 --- a/cpp/tests/tree/mst_test.cu +++ b/cpp/tests/tree/mst_test.cu @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include @@ -105,13 +105,14 @@ class Tests_Mst : public ::testing::TestWithParam { raft::handle_t handle; std::cout << std::endl; - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], &cooVal[0], m, nnz); + cugraph::legacy::GraphCOOView G_coo( + &cooRowInd[0], &cooColInd[0], &cooVal[0], m, nnz); auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSRView G(G_unique->view().offsets, - G_unique->view().indices, - G_unique->view().edge_data, - G_unique->view().number_of_vertices, - G_unique->view().number_of_edges); + cugraph::legacy::GraphCSRView G(G_unique->view().offsets, + G_unique->view().indices, + G_unique->view().edge_data, + G_unique->view().number_of_vertices, + G_unique->view().number_of_edges); cudaDeviceSynchronize(); diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index 0457cbcc918..347712d1ac3 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -219,7 +219,7 @@ int mm_to_coo(FILE* f, * @tparam */ template -std::unique_ptr> generate_graph_csr_from_mm( +std::unique_ptr> generate_graph_csr_from_mm( bool& directed, std::string mm_file) { vertex_t number_of_vertices; @@ -253,7 +253,7 @@ std::unique_ptr> generate_graph_cs "file read failure."); CUGRAPH_EXPECTS(fclose(fpin) == 0, "fclose failure."); - cugraph::GraphCOOView cooview( + cugraph::legacy::GraphCOOView cooview( &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); return cugraph::coo_to_csr(cooview); @@ -448,17 +448,17 @@ template int32_t mm_to_coo(FILE* f, float* cooRVal, float* cooIVal); -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); template std::tuple, rmm::device_uvector> diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 09da0556e44..4ad450dde0a 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include @@ -89,7 +89,7 @@ int mm_to_coo(FILE* f, * @tparam */ template -std::unique_ptr> generate_graph_csr_from_mm( +std::unique_ptr> generate_graph_csr_from_mm( bool& directed, std::string mm_file); // Define RAPIDS_DATASET_ROOT_DIR using a preprocessor variable to diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index e0db6c31fca..bb43cc8674b 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -23,17 +23,17 @@ from libcpp.vector cimport vector from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer -cdef extern from "cugraph/graph.hpp" namespace "cugraph": +cdef extern from "cugraph/legacy/graph.hpp" namespace "cugraph::legacy": ctypedef enum PropType: - PROP_UNDEF "cugraph::PROP_UNDEF" - PROP_FALSE "cugraph::PROP_FALSE" - PROP_TRUE "cugraph::PROP_TRUE" + PROP_UNDEF "cugraph::legacy::PROP_UNDEF" + PROP_FALSE "cugraph::legacy::PROP_FALSE" + PROP_TRUE "cugraph::legacy::PROP_TRUE" ctypedef enum DegreeDirection: - DIRECTION_IN_PLUS_OUT "cugraph::DegreeDirection::IN_PLUS_OUT" - DIRECTION_IN "cugraph::DegreeDirection::IN" - DIRECTION_OUT "cugraph::DegreeDirection::OUT" + DIRECTION_IN_PLUS_OUT "cugraph::legacy::DegreeDirection::IN_PLUS_OUT" + DIRECTION_IN "cugraph::legacy::DegreeDirection::IN" + DIRECTION_OUT "cugraph::legacy::DegreeDirection::OUT" struct GraphProperties: bool directed From 2988ed5b5bfb0b7aa9eac94a6651865d4fcc828c Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 22 Jun 2021 12:34:29 -0500 Subject: [PATCH 305/343] Fix vertex partition offsets (#1680) Authors: - https://github.com/Iroy30 - Mark Harris (https://github.com/harrism) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1680 --- python/cugraph/dask/common/input_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index 0248f429a09..f7f866e1211 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -191,9 +191,9 @@ def _workers_to_parts(futures): :param futures: list of (worker, part) tuples :return: """ - w_to_p_map = OrderedDict() + w_to_p_map = OrderedDict.fromkeys(Comms.get_workers()) for w, p in futures: - if w not in w_to_p_map: + if w_to_p_map[w] is None: w_to_p_map[w] = [] w_to_p_map[w].append(p) return w_to_p_map From 0cbbdd8625b9c0452c18f082777e97eea401bb8f Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 23 Jun 2021 08:30:52 -0400 Subject: [PATCH 306/343] Use the 21.08 branch of rapids-cmake as rmm requires it (#1683) Now that rmm uses rapids-cmake we need to update to the 21.08 branch to get the new `rapids_cmake_write_version_file` function Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1683 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ae0b524689f..5db9a939945 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -19,7 +19,7 @@ include(FetchContent) FetchContent_Declare( rapids-cmake GIT_REPOSITORY https://github.com/rapidsai/rapids-cmake.git - GIT_TAG origin/branch-21.06 + GIT_TAG origin/branch-21.08 ) FetchContent_MakeAvailable(rapids-cmake) From 4787a98f5b6e7f9b6ecad2459f256d15a255d2f2 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 24 Jun 2021 13:47:31 -0400 Subject: [PATCH 307/343] Delete legacy renumbering implementation (#1681) This code is no longer used. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - https://github.com/Iroy30 URL: https://github.com/rapidsai/cugraph/pull/1681 --- cpp/CMakeLists.txt | 1 - cpp/include/cugraph/functions.hpp | 34 - cpp/src/converters/renumber.cu | 68 -- cpp/src/converters/renumber.cuh | 321 ---------- cpp/src/sort/bitonic.cuh | 546 ----------------- cpp/src/utilities/heap.cuh | 222 ------- cpp/tests/CMakeLists.txt | 4 - cpp/tests/renumber/renumber_test.cu | 579 ------------------ python/cugraph/structure/graph_primtypes.pxd | 10 - .../structure/graph_primtypes_wrapper.pyx | 40 -- 10 files changed, 1825 deletions(-) delete mode 100644 cpp/src/converters/renumber.cu delete mode 100644 cpp/src/converters/renumber.cuh delete mode 100644 cpp/src/sort/bitonic.cuh delete mode 100644 cpp/src/utilities/heap.cuh delete mode 100644 cpp/tests/renumber/renumber_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5db9a939945..ea1be5d01b8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -169,7 +169,6 @@ add_library(cugraph SHARED src/link_prediction/jaccard.cu src/link_prediction/overlap.cu src/layout/force_atlas2.cu - src/converters/renumber.cu src/converters/COOtoCSR.cu src/community/spectral_clustering.cu src/community/louvain.cu diff --git a/cpp/include/cugraph/functions.hpp b/cpp/include/cugraph/functions.hpp index a88d6cd88c7..bb1adcf818b 100644 --- a/cpp/include/cugraph/functions.hpp +++ b/cpp/include/cugraph/functions.hpp @@ -44,40 +44,6 @@ std::unique_ptr> coo_to_csr( legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); -/** - * @brief Renumber source and destination indices - * - * Renumber source and destination indexes to be a dense numbering, - * using contiguous values between 0 and number of vertices minus 1. - * - * @throws cugraph::logic_error when an error occurs. - * - * @tparam VT_IN type of vertex index input - * @tparam VT_OUT type of vertex index output - * @tparam ET type of edge index - * - * @param[in] number_of_edges number of edges in the graph - * @param[in] src Pointer to device memory containing source vertex ids - * @param[in] dst Pointer to device memory containing destination vertex ids - * @param[out] src_renumbered Pointer to device memory containing the output source vertices. - * @param[out] dst_renumbered Pointer to device memory containing the output destination vertices. - * @param[out] map_size Pointer to local memory containing the number of elements in the - * renumbering map - * @param[in] mr Memory resource used to allocate the returned graph - * - * @return Unique pointer to renumbering map - * - */ -template -std::unique_ptr renumber_vertices( - ET number_of_edges, - VT_IN const *src, - VT_IN const *dst, - VT_OUT *src_renumbered, - VT_OUT *dst_renumbered, - ET *map_size, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - /** * @brief Broadcast using handle communicator * diff --git a/cpp/src/converters/renumber.cu b/cpp/src/converters/renumber.cu deleted file mode 100644 index 9aedbc70e8b..00000000000 --- a/cpp/src/converters/renumber.cu +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "renumber.cuh" - -namespace cugraph { - -template -std::unique_ptr renumber_vertices( - ET number_of_edges, - VT_IN const *src, - VT_IN const *dst, - VT_OUT *src_renumbered, - VT_OUT *dst_renumbered, - ET *map_size, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) - -{ - // - // For now, let's just specify a default value of the hash size. - // This should be configurable. - // - // FIXME: cudf has a hash table implementation (moving to cuCollections) - // that is dynamic. We should use it instead, it will be faster - // and dynamically adjust to data sizes. - // - int hash_size = 8191; - - return cugraph::detail::renumber_vertices(number_of_edges, - src, - dst, - src_renumbered, - dst_renumbered, - map_size, - cugraph::detail::HashFunctionObjectInt(hash_size), - thrust::less(), - mr); -} - -template std::unique_ptr renumber_vertices(int32_t, - int64_t const *, - int64_t const *, - int32_t *, - int32_t *, - int32_t *, - rmm::mr::device_memory_resource *); -template std::unique_ptr renumber_vertices(int32_t, - int32_t const *, - int32_t const *, - int32_t *, - int32_t *, - int32_t *, - rmm::mr::device_memory_resource *); - -} // namespace cugraph diff --git a/cpp/src/converters/renumber.cuh b/cpp/src/converters/renumber.cuh deleted file mode 100644 index ccf4e6f62c2..00000000000 --- a/cpp/src/converters/renumber.cuh +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#define CUB_STDERR - -#include - -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include "sort/bitonic.cuh" - -namespace cugraph { -namespace detail { - -namespace renumber { -typedef uint32_t hash_type; -typedef uint32_t index_type; -} // namespace renumber - -class HashFunctionObjectInt { - public: - HashFunctionObjectInt(renumber::hash_type hash_size) : hash_size_(hash_size) {} - - template - __device__ __inline__ renumber::hash_type operator()(const VertexIdType &vertex_id) const - { - return ((vertex_id % hash_size_) + hash_size_) % hash_size_; - } - - renumber::hash_type getHashSize() const { return hash_size_; } - - private: - renumber::hash_type hash_size_; -}; - -/** - * @brief Renumber vertices to a dense numbering (0..vertex_size-1) - * - * This is a templated function so it can take 32 or 64 bit integers. The - * intention is to take source and destination vertex ids that might be - * sparsely scattered across the range and push things down to a dense - * numbering. - * - * Arrays src, dst, src_renumbered, dst_renumbered and numbering_map are - * assumed to be pre-allocated. numbering_map is best safely allocated - * to store 2 * size vertices. - * - * @param[in] size Number of edges - * @param[in] src List of source vertices - * @param[in] dst List of dest vertices - * @param[out] src_renumbered List of source vertices, renumbered - * @param[out] dst_renumbered List of dest vertices, renumbered - * @param[out] vertex_size Number of unique vertices - * @param[out] numbering_map Map of new vertex id to original vertex id. numbering_map[newId] - * = oldId - * - */ -template -std::unique_ptr renumber_vertices(T_size size, - const T_in *src, - const T_in *dst, - T_out *src_renumbered, - T_out *dst_renumbered, - T_size *map_size, - Hash_t hash, - Compare_t compare, - rmm::mr::device_memory_resource *mr) -{ - // - // This function will allocate numbering_map to be the exact size needed - // (user doesn't know a priori how many unique vertices there are. - // - // Here's the idea: Create a hash table. Since we're dealing with integers, - // we can take the integer modulo some prime p to create hash buckets. Then - // we dedupe the hash buckets to create a deduped set of entries. This hash - // table can then be used to renumber everything. - // - // We need 2 arrays for hash indexes, and one array for data - // - cudaStream_t stream = nullptr; - - renumber::hash_type hash_size = hash.getHashSize(); - - rmm::device_vector hash_data_v(2 * size); - rmm::device_vector hash_bins_start_v(1 + hash_size, - renumber::index_type{0}); - rmm::device_vector hash_bins_end_v(1 + hash_size); - - T_in *hash_data = hash_data_v.data().get(); - renumber::index_type *hash_bins_start = hash_bins_start_v.data().get(); - renumber::index_type *hash_bins_end = hash_bins_end_v.data().get(); - - // - // Pass 1: count how many vertex ids end up in each hash bin - // - thrust::for_each(rmm::exec_policy(stream)->on(stream), - src, - src + size, - [hash_bins_start, hash] __device__(T_in vid) { - atomicAdd(hash_bins_start + hash(vid), renumber::index_type{1}); - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - dst, - dst + size, - [hash_bins_start, hash] __device__(T_in vid) { - atomicAdd(hash_bins_start + hash(vid), renumber::index_type{1}); - }); - - // - // Compute exclusive sum and copy it into both hash_bins_start and - // hash_bins_end. hash_bins_end will be used to populate the - // hash_data array and at the end will identify the end of - // each range. - // - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), - hash_bins_start, - hash_bins_start + hash_size + 1, - hash_bins_end); - - CUDA_TRY(cudaMemcpy(hash_bins_start, - hash_bins_end, - (hash_size + 1) * sizeof(renumber::hash_type), - cudaMemcpyDeviceToDevice)); - - // - // Pass 2: Populate hash_data with data from the hash bins. - // - thrust::for_each(rmm::exec_policy(stream)->on(stream), - src, - src + size, - [hash_bins_end, hash_data, hash] __device__(T_in vid) { - uint32_t hash_index = hash(vid); - renumber::index_type hash_offset = atomicAdd(&hash_bins_end[hash_index], 1); - hash_data[hash_offset] = vid; - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - dst, - dst + size, - [hash_bins_end, hash_data, hash] __device__(T_in vid) { - uint32_t hash_index = hash(vid); - renumber::index_type hash_offset = atomicAdd(&hash_bins_end[hash_index], 1); - hash_data[hash_offset] = vid; - }); - - // - // Now that we have data in hash bins, we'll do a segmented sort of the has bins - // to sort each bin. This will allow us to identify duplicates (all duplicates - // are in the same hash bin so they will end up sorted consecutively). - // - renumber::index_type size_as_int = size; - cugraph::sort::bitonic::segmented_sort( - hash_size, size_as_int, hash_bins_start, hash_bins_end, hash_data, compare, stream); - - // - // Now we rinse and repeat. hash_data contains the data organized into sorted - // hash bins. This allows us to identify duplicates. We'll start over but - // we'll skip the duplicates when we repopulate the hash table. - // - - // - // Pass 3: count how many vertex ids end up in each hash bin after deduping - // - CUDA_TRY(cudaMemset(hash_bins_start, 0, (1 + hash_size) * sizeof(renumber::index_type))); - - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(2 * size), - [hash_data, hash_bins_start, hash, compare, size] __device__(renumber::index_type idx) { - // - // Two items (a and b) are equal if - // compare(a,b) is false and compare(b,a) - // is also false. If either is true then - // a and b are not equal. - // - // Note that if there are k duplicate - // instances of an entry, only the LAST - // entry will be counted - // - bool unique = ((idx + 1) == (2 * size)) || compare(hash_data[idx], hash_data[idx + 1]) || - compare(hash_data[idx + 1], hash_data[idx]); - - if (unique) atomicAdd(hash_bins_start + hash(hash_data[idx]), renumber::index_type{1}); - }); - - // - // Compute exclusive sum and copy it into both hash_bins_start and - // hash bins end. - // - thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream), - hash_bins_start, - hash_bins_start + hash_size + 1, - hash_bins_end); - - CUDA_TRY(cudaMemcpy(hash_bins_start, - hash_bins_end, - (hash_size + 1) * sizeof(renumber::hash_type), - cudaMemcpyDeviceToDevice)); - - // - // The last entry in the array (hash_bins_end[hash_size]) is the - // total number of unique vertices - // - renumber::index_type temp = 0; - CUDA_TRY(cudaMemcpy( - &temp, hash_bins_end + hash_size, sizeof(renumber::index_type), cudaMemcpyDeviceToHost)); - *map_size = temp; - - rmm::device_buffer numbering_map(temp * sizeof(T_in), stream, mr); - T_in *local_numbering_map = static_cast(numbering_map.data()); - - // - // Pass 4: Populate hash_data with data from the hash bins after deduping - // - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(2 * size), - [hash_bins_end, hash_data, local_numbering_map, hash, compare, size] __device__( - renumber::index_type idx) { - bool unique = ((idx + 1) == (2 * size)) || - compare(hash_data[idx], hash_data[idx + 1]) || - compare(hash_data[idx + 1], hash_data[idx]); - - if (unique) { - uint32_t hash_index = hash(hash_data[idx]); - renumber::index_type hash_offset = atomicAdd(&hash_bins_end[hash_index], 1); - local_numbering_map[hash_offset] = hash_data[idx]; - } - }); - - // - // At this point, hash_bins_start and numbering_map partition the - // unique data into a hash table. - // - - // - // If we do a segmented sort now, we can do the final lookups. - // - size_as_int = size; - cugraph::sort::bitonic::segmented_sort( - hash_size, size_as_int, hash_bins_start, hash_bins_end, local_numbering_map, compare, stream); - - // - // Renumber the input. For each vertex, identify the - // hash bin, and then search the hash bin for the - // record that matches, the relative offset between that - // element and the beginning of the array is the vertex - // id in the renumbered map. - // - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size), - [local_numbering_map, - hash_bins_start, - hash_bins_end, - hash, - src, - src_renumbered, - compare] __device__(renumber::index_type idx) { - renumber::hash_type tmp = hash(src[idx]); - const T_in *id = - thrust::lower_bound(thrust::seq, - local_numbering_map + hash_bins_start[tmp], - local_numbering_map + hash_bins_end[tmp], - src[idx], - compare); - src_renumbered[idx] = id - local_numbering_map; - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size), - [local_numbering_map, - hash_bins_start, - hash_bins_end, - hash, - dst, - dst_renumbered, - compare] __device__(renumber::index_type idx) { - renumber::hash_type tmp = hash(dst[idx]); - const T_in *id = - thrust::lower_bound(thrust::seq, - local_numbering_map + hash_bins_start[tmp], - local_numbering_map + hash_bins_end[tmp], - dst[idx], - compare); - dst_renumbered[idx] = id - local_numbering_map; - }); - - return std::make_unique(std::move(numbering_map)); -} - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/sort/bitonic.cuh b/cpp/src/sort/bitonic.cuh deleted file mode 100644 index b1b19bafdf0..00000000000 --- a/cpp/src/sort/bitonic.cuh +++ /dev/null @@ -1,546 +0,0 @@ -// -*-c++-*- - -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Bitonic sort implementation -// Author: Chuck Hastings charlesh@nvidia.com - -// TODO: Read a paper (Hagen Peters 2011) that suggests some -// ways to optimize this. Need to shift into a kernel -// and then organize to support multiple passes in -// a single kernel call. This should reduce kernel -// launch overhead and the number of memory references, -// which should drive down the overall time. -// - -#ifndef BITONIC_SORT_H -#define BITONIC_SORT_H - -#include -#include - -#include -#include - -namespace cugraph { -namespace sort { - -namespace bitonic { -/* - * This implementation is based upon the bitonic sort technique. - * This should be pretty efficient in a SIMT environment. - */ -namespace detail { -/** - * @brief Compare two items, if the compare functor returns true - * then swap them. - * - * @param a - reference to the first item - * @param b - reference to the second item - * @param compare - reference to a comparison functor - */ -template -inline void __device__ compareAndSwap(ValueT &a, ValueT &b, CompareT &compare) -{ - if (!compare(a, b)) { thrust::swap(a, b); } -} - -/* - * @brief perform repartitioning of two sorted partitions. This - * is analagous to the bitonic merge step. But it only - * performs the compare and swap portion of the bitonic - * merge. The subsequent sorts are handled externally. - * - * The repartition assumes that the data is segregated - * into partitions of binSize. So if there are 8 elements - * and a bin size of 2 then the array will be partitioned - * into 4 bins of size 2. Each bin is assumed to be - * sorted. The repartition takes consecutive bins and - * repartitions them so that the first bin contains the - * low elements and the second bin contains the high elements. - * - * @param array - the array containing the data we need to repartition - * @param count - the number of elements in the array - * @param binSize - the size of the bin - * @param compare - comparison functor - */ -template -void repartition(ValueT *array, int count, int binSize, CompareT &compare) -{ - thrust::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(count / 2), - - [array, count, binSize, compare] __device__(int idx) { - // - // Identify which elements in which partition - // we are responsible for comparing and swapping - // - // We're running count/2 iterations. Each iteration - // needs to operate on a pair of elements. Consider - // the pairs of partitions, this will let us determine - // which elements we compare. - // - int bi_partition = idx / binSize; - - // - // bi_partition identifies which pair of partitions - // we're operating on. Out of each bin we're only - // going to do binSize comparisons, so the first - // element in the comparison will be based on - // idx % binSize. - // - int offset = idx % binSize; - - // - // First element is easy. - // Second element is "easy" but we'll fix - // special cases below. - // - int i = bi_partition * (binSize * 2) + offset; - int j = (bi_partition + 1) * (binSize * 2) - 1 - offset; - - // - // The last partition pair is the problem. - // There are several cases: - // 1) Both partitions are full. This - // is the easy case, we can just - // compare and swap elements - // 2) First partition is full, the second - // partition is not full (possibly - // empty). In this case, we only - // compare some of the elements. - // 3) First partition is not full, there - // is no second partition. In this - // case we actually don't have any - // work to do. - // - // This should be a simple check. If the - // second element is beyond the end of - // the array then there is nothing to compare - // and swap. Note that if the first - // element is beyond the end of the array - // there is also nothing to compare and swap, - // but if the first element is beyond the - // end of the array then the second element - // will also be beyond the end of the array. - // - if (j < count) compareAndSwap(array[i], array[j], compare); - }); -} - -/* - * @brief perform shuffles. After the repartition we need - * to perform shuffles of the halves to get things in - * order. - * - * @param array - the array containing the data we need to repartition - * @param count - the number of elements in the array - * @param binSize - the size of the bin - * @param compare - comparison functor - */ -template -void shuffles(ValueT *array, int count, int binSize, CompareT &compare) -{ - thrust::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator((count + 1) / 2), - [array, count, binSize, compare] __device__(int idx) { - // - // Identify which elements in which partition - // we are responsible for comparing and swapping - // - // We're running count/2 iterations. Each iteration - // needs to operate on a pair of elements. Consider - // the pairs of partitions, this will let us determine - // which elements we compare. - // - int bi_partition = idx / binSize; - - // - // bi_partition identifies which pair of partitions - // we're operating on. Out of each bin we're only - // going to do binSize comparisons, so the first - // element in the comparison will be based on - // idx % binSize. - // - int offset = idx % binSize; - - // - // First element is easy. - // Second element is "easy" i + binSize. - // - int i = bi_partition * (binSize * 2) + offset; - int j = i + binSize; - - // - // If the second element is beyond the end of - // the array then there is nothing to compare - // and swap. - // - if (j < count) compareAndSwap(array[i], array[j], compare); - }); -} - -/* - * @brief perform repartitioning of two sorted partitions in the - * segmented sort case. - * - * The repartition assumes that the data is segregated - * into partitions of binSize. So if there are 8 elements - * and a bin size of 2 then the array will be partitioned - * into 4 bins of size 2. Each bin is assumed to be - * sorted. The repartition takes consecutive bins and - * repartitions them so that the first bin contains the - * low elements and the second bin contains the high elements. - * - * @param array - the array containing the data we need to repartition - * @param count - the number of elements in the array - * @param binSize - the size of the bin - * @param compare - comparison functor - */ -template -void repartition_segmented(const IndexT *d_begin_offsets, - const IndexT *d_end_offsets, - ValueT *d_items, - IndexT start, - IndexT stop, - IndexT *d_grouped_bins, - int binSize, - int max_count, - int bin_pairs, - CompareT &compare) -{ - thrust::for_each(thrust::device, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(max_count / 2), - [d_begin_offsets, - d_end_offsets, - d_items, - start, - stop, - d_grouped_bins, - bin_pairs, - binSize, - compare] __device__(int idx) { - // - // idx needs to be mapped into the correct place - // - int entry = idx / bin_pairs; - int entry_idx = idx % bin_pairs; - int base = d_begin_offsets[d_grouped_bins[start + entry]]; - int count = d_end_offsets[d_grouped_bins[start + entry]] - base; - - // - // Identify which elements in which partition - // we are responsible for comparing and swapping - // - // We're running count/2 iterations. Each iteration - // needs to operate on a pair of elements. Consider - // the pairs of partitions, this will let us determine - // which elements we compare. - // - int bi_partition = entry_idx / binSize; - - // - // bi_partition identifies which pair of partitions - // we're operating on. Out of each bin we're only - // going to do binSize comparisons, so the first - // element in the comparison will be based on - // idx % binSize. - // - int offset = entry_idx % binSize; - - // - // First element is easy. - // Second element is "easy" but we'll fix - // special cases below. - // - int i = bi_partition * (binSize * 2) + offset; - int j = (bi_partition + 1) * (binSize * 2) - 1 - offset; - - // - // The last partition pair is the problem. - // There are several cases: - // 1) Both partitions are full. This - // is the easy case, we can just - // compare and swap elements - // 2) First partition is full, the second - // partition is not full (possibly - // empty). In this case, we only - // compare some of the elements. - // 3) First partition is not full, there - // is no second partition. In this - // case we actually don't have any - // work to do. - // - // This should be a simple check. If the - // second element is beyond the end of - // the array then there is nothing to compare - // and swap. Note that if the first - // element is beyond the end of the array - // there is also nothing to compare and swap, - // but if the first element is beyond the - // end of the array then the second element - // will also be beyond the end of the array. - // - if (j < count) { - compareAndSwap(d_items[base + i], d_items[base + j], compare); - } - }); -} - -/* - * @brief perform shuffles. After the repartition we need - * to perform shuffles of the halves to get things in - * order. - * - * @param rowOffsets - the row offsets identifying the segments - * @param colIndices - the values to sort within the segments - * @param start - position within the grouped bins where we - * start this pass - * @param stop - position within the grouped bins where we stop - * this pass - * @param d_grouped_bins - lrb grouped bins. All bins between - * start and stop are in the same lrb bin - * @param binSize - the bitonic bin size for this pass of the shuffles - * @param max_count - maximum number of elements possible for - * this call - * @param bin_pairs - the number of bin pairs - * @param compare - the comparison functor - */ -template -void shuffles_segmented(const IndexT *d_begin_offsets, - const IndexT *d_end_offsets, - ValueT *d_items, - IndexT start, - IndexT stop, - IndexT *d_grouped_bins, - int binSize, - long max_count, - int bin_pairs, - CompareT &compare) -{ - thrust::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(max_count / 2), - [d_begin_offsets, - d_end_offsets, - d_items, - start, - stop, - d_grouped_bins, - compare, - max_count, - bin_pairs, - binSize] __device__(int idx) { - // - // idx needs to be mapped into the correct place - // - int entry = idx / bin_pairs; - int entry_idx = idx % bin_pairs; - int base = d_begin_offsets[d_grouped_bins[start + entry]]; - int count = d_end_offsets[d_grouped_bins[start + entry]] - base; - - // - // Identify which elements in which partition - // we are responsible for comparing and swapping - // - // We're running count/2 iterations. Each iteration - // needs to operate on a pair of elements. Consider - // the pairs of partitions, this will let us determine - // which elements we compare. - // - int bi_partition = entry_idx / binSize; - - // - // bi_partition identifies which pair of partitions - // we're operating on. Out of each bin we're only - // going to do binSize comparisons, so the first - // element in the comparison will be based on - // idx % binSize. - // - int offset = entry_idx % binSize; - - // - // First element is easy. - // Second element is "easy" i + binSize. - // - int i = bi_partition * (binSize * 2) + offset; - int j = i + binSize; - - // - // If the second element is beyond the end of - // the array then there is nothing to compare - // and swap. - // - if (j < count) compareAndSwap(d_items[base + i], d_items[base + j], compare); - }); -} -} // namespace detail - -template -void sort(ValueT *array, int count, CompareT &compare) -{ - for (int i = 1; i < count; i *= 2) { - detail::repartition(array, count, i, compare); - - for (int j = i / 2; j > 0; j /= 2) { detail::shuffles(array, count, j, compare); } - } -} - -/** - * @brief Perform a segmented sort. This function performs a sort - * on each segment of the specified input. This sort is done - * in place, so the d_items array is modified during this call. - * Sort is done according to the (optionally) specified - * comparison function. - * - * Note that this function uses O(num_segments) temporary - * memory during execution. - * - * @param [in] num_segments - the number of segments that the items array is divided into - * @param [in] num_items - the number of items in the array - * @param [in] d_begin_offsets - device array containing the offset denoting the start - * of each segment - * @param [in] d_end_offsets - device array containing the offset denoting the end - * of each segment. - * @param [in/out] d_items - device array containing the items to sort - * @param [in] compare - [optional] comparison function. Default is thrust::less. - * @param [in] stream - [optional] CUDA stream to launch kernels with. Default is stream 0. - * - * @return error code - */ -template -void segmented_sort(IndexT num_segments, - IndexT num_items, - const IndexT *d_begin_offsets, - const IndexT *d_end_offsets, - ValueT *d_items, - CompareT compare = thrust::less(), - cudaStream_t stream = nullptr) -{ - // - // NOTE: This should probably be computed somehow. At the moment - // we are limited to 32 bits because of memory sizes. - // - int lrb_size = 32; - IndexT lrb[lrb_size + 1]; - - rmm::device_vector lrb_v(lrb_size + 1); - rmm::device_vector grouped_bins_v(num_segments + 1); - - IndexT *d_lrb = lrb_v.data().get(); - IndexT *d_grouped_bins = grouped_bins_v.data().get(); - - CUDA_TRY(cudaMemset(d_lrb, 0, (lrb_size + 1) * sizeof(IndexT))); - - // - // First we'll count how many entries go in each bin - // - thrust::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_segments), - [d_begin_offsets, d_end_offsets, d_lrb] __device__(int idx) { - int size = d_end_offsets[idx] - d_begin_offsets[idx]; - // - // NOTE: If size is 0 or 1 then no - // sorting is required, so we'll - // eliminate those bins here - // - if (size > 1) atomicAdd(d_lrb + __clz(size), 1); - }); - - // - // Exclusive sum will identify where each bin begins - // - thrust::exclusive_scan( - rmm::exec_policy(stream)->on(stream), d_lrb, d_lrb + (lrb_size + 1), d_lrb); - - // - // Copy the start of each bin to local memory - // - CUDA_TRY(cudaMemcpy(lrb, d_lrb, (lrb_size + 1) * sizeof(IndexT), cudaMemcpyDeviceToHost)); - - // - // Now we'll populate grouped_bins. This will corrupt - // d_lrb, but we've already copied it locally. - // - thrust::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_segments), - [d_begin_offsets, d_end_offsets, d_lrb, d_grouped_bins] __device__(int idx) { - int size = d_end_offsets[idx] - d_begin_offsets[idx]; - if (size > 1) { - int pos = atomicAdd(d_lrb + __clz(size), 1); - d_grouped_bins[pos] = idx; - } - }); - - // - // At this point, d_grouped_bins contains the index of the - // different segments, ordered into log2 bins. - // - - // - // Now we're ready to go. - // - // For simplicity (at least for now), let's just - // iterate over each lrb bin. Note that the larger - // the index i, the smaller the size of each bin... but - // there will likely be many more inhabitants of that bin. - // - for (int i = 0; i < lrb_size; ++i) { - int size = lrb[i + 1] - lrb[i]; - if (size > 0) { - // - // There are inhabitants of this lrb range - // - // max_count will be used to drive the bitonic - // passes (1, 2, 4, 8, ... up to max_count) - // - int max_count = 1 << (lrb_size - i); - - for (int j = 1; j < max_count; j *= 2) { - detail::repartition_segmented(d_begin_offsets, - d_end_offsets, - d_items, - lrb[i], - lrb[i + 1], - d_grouped_bins, - j, - size * max_count, - max_count / 2, - compare); - - for (int k = j / 2; k > 0; k /= 2) { - detail::shuffles_segmented(d_begin_offsets, - d_end_offsets, - d_items, - lrb[i], - lrb[i + 1], - d_grouped_bins, - k, - size * max_count, - max_count / 2, - compare); - } - } - } - } -} - -} // namespace bitonic -} // namespace sort -} // namespace cugraph - -#endif diff --git a/cpp/src/utilities/heap.cuh b/cpp/src/utilities/heap.cuh deleted file mode 100644 index 0747a658324..00000000000 --- a/cpp/src/utilities/heap.cuh +++ /dev/null @@ -1,222 +0,0 @@ -// -*-c++-*- - -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Utilities to treat array as a heap -// Author: Chuck Hastings charlesh@nvidia.com - -#ifndef HEAP_H -#define HEAP_H - -namespace cugraph { -namespace detail { - -namespace heap { -/* - * Our goal here is to treat a C-style array indexed - * from 0 to n-1 as a heap. The heap is a binary tress - * structure where the root of each tree is the smallest - * (or largest) value in that subtree. - * - * This is a completely serial implementation. The intention - * from a parallelism perspective would be to use this on - * a block of data assigned to a particular GPU (or CPU) thread. - * - * These functions will allow you to use an existing - * c-style array (host or device side) and manipulate - * it as a heap. - * - * Note, the heap will be represented like this - the - * shape indicates the binary tree structure, the element - * indicates the index of the array that is associated - * with the element. This diagram will help understand - * the parent/child calculations defined below. - * - * 0 - * 1 2 - * 3 4 5 6 - * 7 8 9 10 11 12 13 14 - * - * So element 0 is the root of the tree, element 1 is the - * left child of 0, element 2 is the right child of 0, etc. - */ - -namespace detail { -/** - * @brief Identify the parent index of the specified index. - * NOTE: This function does no bounds checking, so - * the parent of 0 is 0. - * - * See the above documentation for a picture to describe - * the tree. - * - * IndexT is a templated integer type of the index - * - * @param[in] index - the current array index - * @return the index of the parent of the current index - */ -template -inline IndexT __host__ __device__ parent(IndexT index) -{ - static_assert(std::is_integral::value, "Index must be of an integral type"); - - return ((index + 1) / 2) - 1; -} - -/** - * @brief Identify the left child index of the specified index. - * NOTE: This function does no bounds checking, so - * the left child computed might be out of bounds. - * - * See the above documentation for a picture to describe - * the tree. - * - * IndexT is a templated integer type of the index - * - * @param[in] index - the current array index - * @return the index of the left child of the current index - */ -template -inline IndexT __host__ __device__ left_child(IndexT index) -{ - static_assert(std::is_integral::value, "Index must be of an integral type"); - - return ((index + 1) * 2 - 1); -} - -/** - * @brief Identify the right child index of the specified index. - * NOTE: This function does no bounds checking, so - * the right child computed might be out of bounds. - * - * See the above documentation for a picture to describe - * the tree. - * - * IndexT is a templated integer type of the index - * - * @param[in] index - the current array index - * @return the index of the right child of the current index - */ -template -inline IndexT __host__ __device__ right_child(IndexT index) -{ - static_assert(std::is_integral::value, "Index must be of an integral type"); - - return (index + 1) * 2; -} -} // namespace detail - -/** - * @brief Reorder an existing array of elements into a heap - * - * ArrayT is a templated type of the array elements - * IndexT is a templated integer type of the index - * CompareT is a templated compare function - * - * @param[in, out] array - the existing array - * @param[in] size - the number of elements in the existing array - * @param[in] compare - the comparison function to use - * - */ -template -inline void __host__ __device__ heapify(ArrayT *array, IndexT size, CompareT compare) -{ - static_assert(std::is_integral::value, "Index must be of an integral type"); - - // - // We want to order ourselves as a heap. This is accomplished by starting - // at the end and for each element, compare with its parent and - // swap if necessary. We repeat this until there are no more swaps - // (should take no more than log2(size) iterations). - // - IndexT count_swaps = 1; - while (count_swaps > 0) { - count_swaps = 0; - for (IndexT i = size - 1; i > 0; --i) { - IndexT p = detail::parent(i); - - if (compare(array[i], array[p])) { - thrust::swap(array[i], array[p]); - ++count_swaps; - } - } - } -} - -/** - * @brief Pop the top element off of the heap. Note that the caller - * should decrement the size - the last element in the - * array is no longer used. - * - * ArrayT is a templated type of the array elements - * IndexT is a templated integer type of the index - * CompareT is a templated compare function - * - * @return - the top of the heap. - */ -template -inline ArrayT __host__ __device__ heap_pop(ArrayT *array, IndexT size, CompareT compare) -{ - static_assert(std::is_integral::value, "Index must be of an integral type"); - - // - // Swap the top of the array with the last element - // - --size; - thrust::swap(array[0], array[size]); - - // - // Now top element is no longer the smallest (largest), so we need - // to sift it down to the proper location. - // - for (IndexT i = 0; i < size;) { - IndexT lc = detail::left_child(i); - IndexT rc = detail::right_child(i); - IndexT smaller = i; - - // - // We can go out of bounds, let's check the simple cases - // - if (rc < size) { - // - // Both children exist in tree, pick the smaller (lerger) - // one. - // - smaller = (compare(array[lc], array[rc])) ? lc : rc; - } else if (lc < size) { - smaller = lc; - } - - if ((smaller != i) && (compare(array[smaller], array[i]))) { - thrust::swap(array[i], array[smaller]); - i = smaller; - } else { - // - // If we don't swap then we can stop checking, break out of the loop - // - i = size; - } - } - - return array[size]; -} -} // namespace heap - -} // namespace detail -} // namespace cugraph - -#endif diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ec18640bc11..524b681601f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -297,10 +297,6 @@ ConfigureTest(TRIANGLE_TEST community/triangle_test.cu) # - EGO tests -------------------------------------------------------------------------------- ConfigureTest(EGO_TEST community/egonet_test.cu) -################################################################################################### -# - RENUMBERING tests ----------------------------------------------------------------------------- -ConfigureTest(RENUMBERING_TEST renumber/renumber_test.cu) - ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- ConfigureTest(FA2_TEST layout/force_atlas2_test.cu) diff --git a/cpp/tests/renumber/renumber_test.cu b/cpp/tests/renumber/renumber_test.cu deleted file mode 100644 index a7102402acf..00000000000 --- a/cpp/tests/renumber/renumber_test.cu +++ /dev/null @@ -1,579 +0,0 @@ -// -*-c++-*- - -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//#include "gmock/gmock.h" - -#include - -#include - -#include - -#include -#include - -#include - -struct RenumberingTest : public ::testing::Test { -}; - -__global__ void display_list(const char *label, uint32_t *verts, size_t length) -{ - printf("%s\n", label); - - for (size_t i = 0; i < length; ++i) { printf(" %u\n", verts[i]); } -} - -__global__ void setup_generator(curandState *state) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - curand_init(43, id, 0, &state[id]); -} - -__global__ void generate_sources(curandState *state, int n, uint32_t *verts) -{ - int first = threadIdx.x + blockIdx.x * blockDim.x; - int stride = blockDim.x * gridDim.x; - - curandState local_state = state[first]; - for (int id = first; id < n; id += stride) { verts[id] = curand(&local_state); } - - state[first] = local_state; -} - -__global__ void generate_destinations(curandState *state, - int n, - const uint32_t *sources, - uint32_t *destinations) -{ - int first = threadIdx.x + blockIdx.x * blockDim.x; - int stride = blockDim.x * gridDim.x; - - curandState local_state = state[first]; - for (int id = first; id < n; id += stride) { - destinations[id] = sources[curand(&local_state) % n]; - } - - state[first] = local_state; -} - -TEST_F(RenumberingTest, SmallFixedVertexList) -{ - uint32_t src_data[] = {4U, 6U, 8U, 20U, 1U}; - uint32_t dst_data[] = {1U, 29U, 35U, 0U, 77U}; - - uint32_t src_expected[] = {2U, 3U, 4U, 5U, 1U}; - uint32_t dst_expected[] = {1U, 6U, 7U, 0U, 8U}; - - size_t length = sizeof(src_data) / sizeof(src_data[0]); - - uint32_t *src_d; - uint32_t *dst_d; - - uint32_t tmp_results[length]; - uint32_t tmp_map[2 * length]; - - rmm::device_vector src(length); - rmm::device_vector dst(length); - src_d = src.data().get(); - dst_d = dst.data().get(); - - EXPECT_EQ(cudaMemcpy(src_d, src_data, sizeof(uint32_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(dst_d, dst_data, sizeof(uint32_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - - size_t unique_verts = 0; - - auto number_map = cugraph::detail::renumber_vertices(length, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(511), - thrust::less(), - rmm::mr::get_current_device_resource()); - - EXPECT_EQ(cudaMemcpy( - tmp_map, number_map->data(), sizeof(uint32_t) * unique_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(tmp_results, src_d, sizeof(uint32_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], src_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], src_data[i]); - } - - EXPECT_EQ(cudaMemcpy(tmp_results, dst_d, sizeof(uint32_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], dst_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], dst_data[i]); - } -} - -TEST_F(RenumberingTest, SmallFixedVertexListNegative) -{ - int64_t src_data[] = {4, 6, 8, -20, 1}; - int64_t dst_data[] = {1, 29, 35, 0, 77}; - - int64_t src_expected[] = {2, 3, 4, 8, 1}; - int64_t dst_expected[] = {1, 5, 6, 0, 7}; - - size_t length = sizeof(src_data) / sizeof(src_data[0]); - - int64_t *src_d; - int64_t *dst_d; - - int64_t tmp_results[length]; - int64_t tmp_map[2 * length]; - - rmm::device_vector src(length); - rmm::device_vector dst(length); - src_d = src.data().get(); - dst_d = dst.data().get(); - - EXPECT_EQ(cudaMemcpy(src_d, src_data, sizeof(int64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(dst_d, dst_data, sizeof(int64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - - size_t unique_verts = 0; - - auto number_map = cugraph::detail::renumber_vertices(length, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(511), - thrust::less(), - rmm::mr::get_current_device_resource()); - - EXPECT_EQ( - cudaMemcpy(tmp_map, number_map->data(), sizeof(int64_t) * unique_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(tmp_results, src_d, sizeof(int64_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], src_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], src_data[i]); - } - - EXPECT_EQ(cudaMemcpy(tmp_results, dst_d, sizeof(int64_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], dst_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], dst_data[i]); - } -} - -TEST_F(RenumberingTest, SmallFixedVertexList64Bit) -{ - uint64_t src_data[] = {4U, 6U, 8U, 20U, 1U}; - uint64_t dst_data[] = {1U, 29U, 35U, 0U, 77U}; - - uint64_t src_expected[] = {2U, 3U, 4U, 5U, 1U}; - uint64_t dst_expected[] = {1U, 6U, 7U, 0U, 8U}; - - size_t length = sizeof(src_data) / sizeof(src_data[0]); - - uint64_t *src_d; - uint64_t *dst_d; - - uint64_t tmp_results[length]; - uint64_t tmp_map[2 * length]; - - rmm::device_vector src(length); - rmm::device_vector dst(length); - src_d = src.data().get(); - dst_d = dst.data().get(); - - EXPECT_EQ(cudaMemcpy(src_d, src_data, sizeof(uint64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(dst_d, dst_data, sizeof(uint64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - - size_t unique_verts = 0; - - auto number_map = cugraph::detail::renumber_vertices(length, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(511), - thrust::less(), - rmm::mr::get_current_device_resource()); - - EXPECT_EQ(cudaMemcpy( - tmp_map, number_map->data(), sizeof(uint64_t) * unique_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(tmp_results, src_d, sizeof(uint64_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], src_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], src_data[i]); - } - - EXPECT_EQ(cudaMemcpy(tmp_results, dst_d, sizeof(uint64_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], dst_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], dst_data[i]); - } -} - -TEST_F(RenumberingTest, SmallFixedVertexList64BitTo32Bit) -{ - uint64_t src_data[] = {4U, 6U, 8U, 20U, 1U}; - uint64_t dst_data[] = {1U, 29U, 35U, 0U, 77U}; - - uint32_t src_expected[] = {2U, 3U, 4U, 5U, 1U}; - uint32_t dst_expected[] = {1U, 6U, 7U, 0U, 8U}; - - size_t length = sizeof(src_data) / sizeof(src_data[0]); - - uint64_t *src_d; - uint64_t *dst_d; - uint32_t *src_renumbered_d; - uint32_t *dst_renumbered_d; - - uint32_t tmp_results[length]; - uint64_t tmp_map[2 * length]; - - rmm::device_vector src(length); - rmm::device_vector dst(length); - src_d = src.data().get(); - dst_d = dst.data().get(); - rmm::device_vector src_renumbered(length); - rmm::device_vector dst_renumbered(length); - src_renumbered_d = src_renumbered.data().get(); - dst_renumbered_d = dst_renumbered.data().get(); - - EXPECT_EQ(cudaMemcpy(src_d, src_data, sizeof(uint64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(dst_d, dst_data, sizeof(uint64_t) * length, cudaMemcpyHostToDevice), - cudaSuccess); - - size_t unique_verts = 0; - - auto number_map = cugraph::detail::renumber_vertices(length, - src_d, - dst_d, - src_renumbered_d, - dst_renumbered_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(511), - thrust::less(), - rmm::mr::get_current_device_resource()); - - EXPECT_EQ(cudaMemcpy( - tmp_map, number_map->data(), sizeof(uint64_t) * unique_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - EXPECT_EQ( - cudaMemcpy(tmp_results, src_renumbered_d, sizeof(uint32_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], src_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], src_data[i]); - } - - EXPECT_EQ( - cudaMemcpy(tmp_results, dst_renumbered_d, sizeof(uint32_t) * length, cudaMemcpyDeviceToHost), - cudaSuccess); - for (size_t i = 0; i < length; ++i) { - EXPECT_EQ(tmp_results[i], dst_expected[i]); - EXPECT_EQ(tmp_map[tmp_results[i]], dst_data[i]); - } -} - -TEST_F(RenumberingTest, Random100KVertexSet) -{ - const int num_verts = 100000; - - uint64_t *src_d; - uint64_t *dst_d; - - std::vector src_data_vec(num_verts); - std::vector dst_data_vec(num_verts); - std::vector tmp_results_vec(num_verts); - std::vector tmp_map_vec(2 * num_verts); - - uint64_t *src_data = src_data_vec.data(); - uint64_t *dst_data = dst_data_vec.data(); - uint64_t *tmp_results = tmp_results_vec.data(); - uint64_t *tmp_map = tmp_map_vec.data(); - rmm::device_vector src(num_verts); - rmm::device_vector dst(num_verts); - src_d = src.data().get(); - dst_d = dst.data().get(); - - // - // Generate random source and vertex values - // - srand(43); - - for (int i = 0; i < num_verts; ++i) { src_data[i] = (uint64_t)rand(); } - - for (int i = 0; i < num_verts; ++i) { dst_data[i] = (uint64_t)rand(); } - - EXPECT_EQ(cudaMemcpy(src_d, src_data, sizeof(uint64_t) * num_verts, cudaMemcpyHostToDevice), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(dst_d, dst_data, sizeof(uint64_t) * num_verts, cudaMemcpyHostToDevice), - cudaSuccess); - - // - // Renumber everything - // - size_t unique_verts = 0; - size_t n_verts{num_verts}; - - auto start = std::chrono::system_clock::now(); - - auto number_map = cugraph::detail::renumber_vertices(n_verts, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(511), - thrust::less(), - rmm::mr::get_current_device_resource()); - - auto end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - - std::cout << "Renumber kernel elapsed time (ms): " << elapsed_seconds.count() * 1000 << std::endl; - - EXPECT_EQ(cudaMemcpy( - tmp_map, number_map->data(), sizeof(uint64_t) * unique_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - EXPECT_EQ(cudaMemcpy(tmp_results, src_d, sizeof(uint64_t) * num_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - - size_t min_id = unique_verts; - size_t max_id = 0; - - size_t cnt = 0; - for (size_t i = 0; i < num_verts; ++i) { - min_id = min(min_id, tmp_results[i]); - max_id = max(max_id, tmp_results[i]); - if (tmp_map[tmp_results[i]] != src_data[i]) ++cnt; - - if (cnt < 20) EXPECT_EQ(tmp_map[tmp_results[i]], src_data[i]); - } - - if (cnt > 0) printf(" src error count = %ld out of %d\n", cnt, num_verts); - - EXPECT_EQ(cudaMemcpy(tmp_results, dst_d, sizeof(uint64_t) * num_verts, cudaMemcpyDeviceToHost), - cudaSuccess); - for (size_t i = 0; i < num_verts; ++i) { - min_id = min(min_id, tmp_results[i]); - max_id = max(max_id, tmp_results[i]); - if (tmp_map[tmp_results[i]] != dst_data[i]) ++cnt; - - if (cnt < 20) EXPECT_EQ(tmp_map[tmp_results[i]], dst_data[i]); - } - - if (cnt > 0) printf(" src error count = %ld out of %d\n", cnt, num_verts); - - EXPECT_EQ(min_id, 0); - EXPECT_EQ(max_id, (unique_verts - 1)); -} - -TEST_F(RenumberingTest, Random10MVertexSet) -{ - const int num_verts = 10000000; - - // A sampling of performance on single Quadro GV100 - // const int hash_size = 32767; // 238 ms - // const int hash_size = 8191; // 224 ms - const int hash_size = 511; // 224 ms - - uint32_t *src_d; - uint32_t *dst_d; - - rmm::device_vector src(num_verts); - rmm::device_vector dst(num_verts); - src_d = src.data().get(); - dst_d = dst.data().get(); - - // - // Init the random number generate - // - const int num_threads{64}; - curandState *state; - - rmm::device_vector state_vals(num_threads); - state = state_vals.data().get(); - setup_generator<<>>(state); - generate_sources<<>>(state, num_verts, src_d); - generate_destinations<<>>(state, num_verts, src_d, dst_d); - - std::cout << "done with initialization" << std::endl; - - // - // Renumber everything - // - size_t unique_verts = 0; - size_t n_verts{num_verts}; - - auto start = std::chrono::system_clock::now(); - auto number_map = - cugraph::detail::renumber_vertices(n_verts, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(hash_size), - thrust::less(), - rmm::mr::get_current_device_resource()); - auto end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - - std::cout << "Renumber kernel elapsed time (ms): " << elapsed_seconds.count() * 1000 << std::endl; - std::cout << " unique verts = " << unique_verts << std::endl; - std::cout << " hash size = " << hash_size << std::endl; -} - -TEST_F(RenumberingTest, Random100MVertexSet) -{ - const int num_verts = 100000000; - - // A sampling of performance on single Quadro GV100 - // const int hash_size = 8192; // 1811 ms - // const int hash_size = 16384; // 1746 ms - // const int hash_size = 32768; // 1662 ms - // const int hash_size = 65536; // 1569 ms - // const int hash_size = 16777216; // 1328 ms - const int hash_size = 511; - - uint32_t *src_d; - uint32_t *dst_d; - - rmm::device_vector src(num_verts); - rmm::device_vector dst(num_verts); - src_d = src.data().get(); - dst_d = dst.data().get(); - - // - // Init the random number generate - // - const int num_threads{64}; - curandState *state; - - rmm::device_vector state_vals(num_threads); - state = state_vals.data().get(); - setup_generator<<>>(state); - generate_sources<<>>(state, num_verts, src_d); - generate_destinations<<>>(state, num_verts, src_d, dst_d); - - std::cout << "done with initialization" << std::endl; - - // - // Renumber everything - // - size_t unique_verts = 0; - size_t n_verts{num_verts}; - - auto start = std::chrono::system_clock::now(); - auto number_map = - cugraph::detail::renumber_vertices(n_verts, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(hash_size), - thrust::less(), - rmm::mr::get_current_device_resource()); - auto end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - - std::cout << "Renumber kernel elapsed time (ms): " << elapsed_seconds.count() * 1000 << std::endl; - std::cout << " unique verts = " << unique_verts << std::endl; - std::cout << " hash size = " << hash_size << std::endl; -} - -TEST_F(RenumberingTest, Random500MVertexSet) -{ - const int num_verts = 500000000; - - // A sampling of performance on single Quadro GV100 - // const int hash_size = 8192; // 9918 ms - // const int hash_size = 16384; // 9550 ms - // const int hash_size = 32768; // 9146 ms - // const int hash_size = 131072; // 8537 ms - const int hash_size = 1048576; // 7335 ms - // const int hash_size = 511; // 7335 ms - - uint32_t *src_d; - uint32_t *dst_d; - - rmm::device_vector src(num_verts); - rmm::device_vector dst(num_verts); - src_d = src.data().get(); - dst_d = dst.data().get(); - - // - // Init the random number generate - // - const int num_threads{64}; - curandState *state; - - rmm::device_vector state_vals(num_threads); - state = state_vals.data().get(); - setup_generator<<>>(state); - generate_sources<<>>(state, num_verts, src_d); - generate_destinations<<>>(state, num_verts, src_d, dst_d); - - std::cout << "done with initialization" << std::endl; - - // - // Renumber everything - // - size_t unique_verts = 0; - size_t n_verts{num_verts}; - - auto start = std::chrono::system_clock::now(); - auto number_map = - cugraph::detail::renumber_vertices(n_verts, - src_d, - dst_d, - src_d, - dst_d, - &unique_verts, - cugraph::detail::HashFunctionObjectInt(hash_size), - thrust::less(), - rmm::mr::get_current_device_resource()); - auto end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - - std::cout << "Renumber kernel elapsed time (ms): " << elapsed_seconds.count() * 1000 << std::endl; - std::cout << " unique verts = " << unique_verts << std::endl; - std::cout << " hash size = " << hash_size << std::endl; -} - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index bb43cc8674b..20581a8ecc0 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -128,16 +128,6 @@ cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( const GraphCSRView[VT, ET, WT] &graph) except + -cdef extern from "cugraph/functions.hpp" namespace "cugraph": - - cdef unique_ptr[device_buffer] renumber_vertices[VT_IN,VT_OUT,ET]( - ET number_of_edges, - const VT_IN *src, - const VT_IN *dst, - VT_OUT *src_renumbered, - VT_OUT *dst_renumbered, - ET *map_size) except + - cdef extern from "" namespace "std" nogil: cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) diff --git a/python/cugraph/structure/graph_primtypes_wrapper.pyx b/python/cugraph/structure/graph_primtypes_wrapper.pyx index 91af28380c3..95de1d70732 100644 --- a/python/cugraph/structure/graph_primtypes_wrapper.pyx +++ b/python/cugraph/structure/graph_primtypes_wrapper.pyx @@ -18,7 +18,6 @@ from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_primtypes cimport get_two_hop_neighbors as c_get_two_hop_neighbors -from cugraph.structure.graph_primtypes cimport renumber_vertices as c_renumber_vertices from cugraph.structure.utils_wrapper import * from libcpp cimport bool import enum @@ -52,45 +51,6 @@ class Direction(enum.Enum): OUT = 2 -def renumber(source_col, dest_col): - num_edges = len(source_col) - - src_renumbered = cudf.Series(np.zeros(num_edges), dtype=np.int32) - dst_renumbered = cudf.Series(np.zeros(num_edges), dtype=np.int32) - - cdef uintptr_t c_src = source_col.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst = dest_col.__cuda_array_interface__['data'][0] - cdef uintptr_t c_src_renumbered = src_renumbered.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_renumbered = dst_renumbered.__cuda_array_interface__['data'][0] - cdef int map_size = 0 - cdef int n_edges = num_edges - - cdef unique_ptr[device_buffer] numbering_map - - if (source_col.dtype == np.int32): - numbering_map = move(c_renumber_vertices[int,int,int](n_edges, - c_src, - c_dst, - c_src_renumbered, - c_dst_renumbered, - &map_size)) - else: - numbering_map = move(c_renumber_vertices[long,int,int](n_edges, - c_src, - c_dst, - c_src_renumbered, - c_dst_renumbered, - &map_size)) - - - map = DeviceBuffer.c_from_unique_ptr(move(numbering_map)) - map = Buffer(map) - - output_map = cudf.Series(data=map, dtype=source_col.dtype) - - return src_renumbered, dst_renumbered, output_map - - def view_adj_list(input_graph): if input_graph.adjlist is None: From a13ed2fc2704805537990843fa5f2a25a512c9bc Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Mon, 28 Jun 2021 17:27:43 -0500 Subject: [PATCH 308/343] Improve Random Walks performance (#1685) This tracks work on adding additional Random Walks traversal strategy: horizontal. This strategy is more efficient, but requires more GPU memory. The previous strategy (vertical) is only used when not enough GPU memory is available to support the horizontal one. The decision on which to use is based on GPU memory availability: if enough memory is available then the more performant (horizontal) strategy is used. Authors: - Andrei Schaffer (https://github.com/aschaffer) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/rapidsai/cugraph/pull/1685 --- cpp/src/sampling/random_walks.cuh | 287 ++++++++++++------ cpp/src/sampling/rw_traversals.hpp | 297 +++++++++++++++++++ cpp/tests/sampling/random_walks_profiling.cu | 54 +++- cpp/tests/sampling/random_walks_test.cu | 66 +++-- 4 files changed, 573 insertions(+), 131 deletions(-) create mode 100644 cpp/src/sampling/rw_traversals.hpp diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 81cad454a17..570a7ec971f 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -46,72 +46,18 @@ #include #include +#include // FIXME: requirement for temporary std::getenv() #include #include #include +#include "rw_traversals.hpp" + namespace cugraph { namespace experimental { namespace detail { -template -using device_vec_t = rmm::device_uvector; - -template -using device_v_it = typename device_vec_t::iterator; - -template -value_t* raw_ptr(device_vec_t& dv) -{ - return dv.data(); -} - -template -value_t const* raw_const_ptr(device_vec_t const& dv) -{ - return dv.data(); -} - -template -struct device_const_vector_view { - device_const_vector_view(value_t const* d_buffer, index_t size) : d_buffer_(d_buffer), size_(size) - { - } - - device_const_vector_view(device_const_vector_view const& other) = delete; - device_const_vector_view& operator=(device_const_vector_view const& other) = delete; - - device_const_vector_view(device_const_vector_view&& other) - { - d_buffer_ = other.d_buffer_; - size_ = other.size_; - } - device_const_vector_view& operator=(device_const_vector_view&& other) - { - d_buffer_ = other.d_buffer_; - size_ = other.size_; - - return *this; - } - - value_t const* begin(void) const { return d_buffer_; } - - value_t const* end() const { return d_buffer_ + size_; } - - index_t size(void) const { return size_; } - - private: - value_t const* d_buffer_{nullptr}; - index_t size_; -}; - -template -value_t const* raw_const_ptr(device_const_vector_view& dv) -{ - return dv.begin(); -} - // raft random generator: // (using upper-bound cached "map" // giving out_deg(v) for each v in [0, |V|); @@ -127,6 +73,9 @@ struct rrandom_gen_t { using seed_type = seed_t; using real_type = real_t; + // cnstr. version that provides step-wise in-place + // rnd generation: + // rrandom_gen_t(raft::handle_t const& handle, index_t num_paths, device_vec_t& d_random, // scratch-pad, non-coalesced @@ -147,9 +96,23 @@ struct rrandom_gen_t { // this must be done at each step, // but this object is constructed at each step; // - raft::random::Rng rng(seed_); - rng.uniform( - d_ptr_random_, num_paths, real_t{0.0}, real_t{1.0}, handle.get_stream()); + generate_random(handle, d_ptr_random_, num_paths, seed_); + } + + // cnstr. version for the case when the + // random vector is provided by the caller: + // + rrandom_gen_t(raft::handle_t const& handle, + index_t num_paths, + real_t* ptr_d_rnd, // supplied + device_vec_t const& d_crt_out_deg, // non-coalesced + seed_t seed = seed_t{}) + : handle_(handle), + seed_(seed), + num_paths_(num_paths), + d_ptr_out_degs_(raw_const_ptr(d_crt_out_deg)), + d_ptr_random_(ptr_d_rnd) + { } // in place: @@ -175,6 +138,14 @@ struct rrandom_gen_t { [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); } + // abstracts away the random values generation: + // + static void generate_random(raft::handle_t const& handle, real_t* p_d_rnd, size_t sz, seed_t seed) + { + raft::random::Rng rng(seed); + rng.uniform(p_d_rnd, sz, real_t{0.0}, real_t{1.0}, handle.get_stream()); + } + private: raft::handle_t const& handle_; index_t num_paths_; @@ -345,11 +316,12 @@ template , typename index_t = typename graph_t::edge_type> struct random_walker_t { - using vertex_t = typename graph_t::vertex_type; - using edge_t = typename graph_t::edge_type; - using weight_t = typename graph_t::weight_type; - using seed_t = typename random_engine_t::seed_type; - using real_t = typename random_engine_t::real_type; + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + using rnd_engine_t = random_engine_t; random_walker_t(raft::handle_t const& handle, graph_t const& graph, @@ -480,6 +452,71 @@ struct random_walker_t { scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_paths_sz); } + // step() version that doesn't update the random vector: + // (the caller supplies it) + // + void step_only( + graph_t const& graph, + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& d_crt_out_degs, // crt out-degs for current set of vertices + real_t* ptr_d_random, // crt set of random real values (supplied) + device_vec_t& d_col_indx, // crt col col indices to be used for retrieving next step + device_vec_t& d_next_v, // crt set of destination vertices, for next step + device_vec_t& d_next_w) + const // set of weights between src and destination vertices, for next step + { + // update crt snapshot of out-degs, + // from cached out degs, using + // latest vertex in each path as source: + // + gather_from_coalesced( + d_coalesced_v, d_cached_out_degs_, d_paths_sz, d_crt_out_degs, max_depth_, num_paths_); + + // generate random destination indices: + // + random_engine_t rgen(handle_, num_paths_, ptr_d_random, d_crt_out_degs); + + rgen.generate_col_indices(d_col_indx); + + // dst extraction from dst indices: + // + col_indx_extract_t col_extractor(handle_, + graph, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_paths_sz), + num_paths_, + max_depth_); + + // The following steps update the next entry in each path, + // except the paths that reached sinks; + // + // for each indx in [0..num_paths) { + // v_indx = d_v_rnd_n_indx[indx]; + // + // -- get the `v_indx`-th out-vertex of d_v_paths_v_set[indx] vertex: + // -- also, note the size deltas increased by 1 in dst (d_sizes[]): + // + // d_coalesced_v[indx*num_paths + d_sizes[indx]] = + // get_out_vertex(graph, d_coalesced_v[indx*num_paths + d_sizes[indx] -1)], v_indx); + // d_coalesced_w[indx*(num_paths-1) + d_sizes[indx] - 1] = + // get_out_edge_weight(graph, d_coalesced_v[indx*num_paths + d_sizes[indx]-2], v_indx); + // + // (1) generate actual vertex destinations: + // + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + // (2) update path sizes: + // + update_path_sizes(d_crt_out_degs, d_paths_sz); + + // (3) actual coalesced updates: + // + scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_paths_sz); + scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_paths_sz); + } + // returns true if all paths reached sinks: // bool all_paths_stopped(device_vec_t const& d_crt_out_degs) const @@ -678,6 +715,8 @@ struct random_walker_t { weight_padding_value_); } + decltype(auto) get_handle(void) const { return handle_; } + private: raft::handle_t const& handle_; index_t num_paths_; @@ -692,7 +731,12 @@ struct random_walker_t { * length. Single-GPU specialization. * * @tparam graph_t Type of graph (view). + * @tparam traversal_t Traversal policy. Either horizontal (faster but requires more memory) or + * vertical. Defaults to horizontal. * @tparam random_engine_t Type of random engine used to generate RW. + * @tparam seeding_policy_t Random engine seeding policy: variable or fixed (for reproducibility). + * Defaults to variable, clock dependent. + * @tparam index_t Indexing type. Defaults to edge_type. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph Graph object to generate RW on. @@ -716,6 +760,7 @@ struct random_walker_t { * entries; */ template , typename seeding_policy_t = clock_seeding_t, @@ -762,12 +807,22 @@ random_walks_impl(raft::handle_t const& handle, device_vec_t d_coalesced_v(coalesced_sz, stream); // coalesced vertex set device_vec_t d_coalesced_w(coalesced_sz, stream); // coalesced weight set device_vec_t d_paths_sz(num_paths, stream); // paths sizes - device_vec_t d_crt_out_degs(num_paths, stream); // out-degs for current set of vertices - device_vec_t d_random(num_paths, stream); - device_vec_t d_col_indx(num_paths, stream); - device_vec_t d_next_v(num_paths, stream); - device_vec_t d_next_w(num_paths, stream); + // traversal policy: + // + traversal_t traversor(num_paths, max_depth); + + auto tmp_buff_sz = traversor.get_tmp_buff_sz(); + + device_vec_t d_crt_out_degs(tmp_buff_sz, stream); // crt vertex set out-degs + device_vec_t d_col_indx(tmp_buff_sz, stream); // \in {0,..,out-deg(v)} + device_vec_t d_next_v(tmp_buff_sz, stream); // crt set of next vertices + device_vec_t d_next_w(tmp_buff_sz, stream); // crt set of next weights + + // random data handling: + // + auto rnd_data_sz = traversor.get_random_buff_sz(); + device_vec_t d_random(rnd_data_sz, stream); // abstracted out seed initialization: // seed_t seed0 = static_cast(seeder()); @@ -780,26 +835,19 @@ random_walks_impl(raft::handle_t const& handle, // rand_walker.start(d_v_start, d_coalesced_v, d_paths_sz); - // start from 1, as 0-th was initialized above: + // traverse paths: // - for (decltype(max_depth) step_indx = 1; step_indx < max_depth; ++step_indx) { - // take one-step in-sync for each path in parallel: - // - rand_walker.step(graph, - seed0 + static_cast(step_indx), - d_coalesced_v, - d_coalesced_w, - d_paths_sz, - d_crt_out_degs, - d_random, - d_col_indx, - d_next_v, - d_next_w); - - // early exit: all paths have reached sinks: - // - if (rand_walker.all_paths_stopped(d_crt_out_degs)) break; - } + traversor(graph, + rand_walker, + seed0, + d_coalesced_v, + d_coalesced_w, + d_paths_sz, + d_crt_out_degs, + d_random, + d_col_indx, + d_next_v, + d_next_w); // wrap-up, post-process: // truncate v_set, w_set to actual space used @@ -829,7 +877,12 @@ random_walks_impl(raft::handle_t const& handle, * length. Multi-GPU specialization. * * @tparam graph_t Type of graph (view). + * @tparam traversal_t Traversal policy. Either horizontal (faster but requires more memory) or + * vertical. Defaults to horizontal. * @tparam random_engine_t Type of random engine used to generate RW. + * @tparam seeding_policy_t Random engine seeding policy: variable or fixed (for reproducibility). + * Defaults to variable, clock dependent. + * @tparam index_t Indexing type. Defaults to edge_type. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph Graph object to generate RW on. @@ -853,6 +906,7 @@ random_walks_impl(raft::handle_t const& handle, * entries; */ template , typename seeding_policy_t = clock_seeding_t, @@ -1058,19 +1112,60 @@ random_walks(raft::handle_t const& handle, index_t max_depth, bool use_padding) { - using vertex_t = typename graph_t::vertex_type; + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using rnd_engine_t = float; // 0-copy const device view: // detail::device_const_vector_view d_v_start{ptr_d_start, num_paths}; - auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth, use_padding); - // ignore last element of the quad, seed, - // since it's meant for testing / debugging, only: + // GPU memory availability: // - return std::make_tuple(std::move(std::get<0>(quad_tuple)), - std::move(std::get<1>(quad_tuple)), - std::move(std::get<2>(quad_tuple))); + size_t free_mem_sp_bytes{0}; + size_t total_mem_sp_bytes{0}; + cudaMemGetInfo(&free_mem_sp_bytes, &total_mem_sp_bytes); + + // GPU memory requirements: + // + size_t coalesced_v_count = num_paths * max_depth; + auto coalesced_e_count = coalesced_v_count - num_paths; + size_t req_mem_common = sizeof(vertex_t) * coalesced_v_count + + sizeof(weight_t) * coalesced_e_count + // coalesced_v + coalesced_w + (sizeof(vertex_t) + sizeof(index_t)) * num_paths; // start_v + sizes + + size_t req_mem_horizontal = + req_mem_common + sizeof(rnd_engine_t) * coalesced_e_count; // + rnd_buff + size_t req_mem_vertical = req_mem_common + (sizeof(edge_t) + 2 * sizeof(vertex_t) + + sizeof(weight_t) + sizeof(rnd_engine_t)) * + num_paths; // + smaller_rnd_buff + tmp_buffs + + bool use_vertical_strategy{false}; + if (req_mem_horizontal > req_mem_vertical && req_mem_horizontal > free_mem_sp_bytes) { + use_vertical_strategy = true; + std::cerr + << "WARNING: Due to GPU memory availability, slower vertical traversal will be used.\n"; + } + + if (use_vertical_strategy) { + auto quad_tuple = detail::random_walks_impl( + handle, graph, d_v_start, max_depth, use_padding); + // ignore last element of the quad, seed, + // since it's meant for testing / debugging, only: + // + return std::make_tuple(std::move(std::get<0>(quad_tuple)), + std::move(std::get<1>(quad_tuple)), + std::move(std::get<2>(quad_tuple))); + } else { + auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth, use_padding); + // ignore last element of the quad, seed, + // since it's meant for testing / debugging, only: + // + return std::make_tuple(std::move(std::get<0>(quad_tuple)), + std::move(std::get<1>(quad_tuple)), + std::move(std::get<2>(quad_tuple))); + } } /** diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp new file mode 100644 index 00000000000..f7aeb0b6dc1 --- /dev/null +++ b/cpp/src/sampling/rw_traversals.hpp @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +using device_vec_t = rmm::device_uvector; + +template +using device_v_it = typename device_vec_t::iterator; + +template +value_t* raw_ptr(device_vec_t& dv) +{ + return dv.data(); +} + +template +value_t const* raw_const_ptr(device_vec_t const& dv) +{ + return dv.data(); +} + +template +struct device_const_vector_view { + device_const_vector_view(value_t const* d_buffer, index_t size) : d_buffer_(d_buffer), size_(size) + { + } + + device_const_vector_view(device_const_vector_view const& other) = delete; + device_const_vector_view& operator=(device_const_vector_view const& other) = delete; + + device_const_vector_view(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + } + device_const_vector_view& operator=(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + + return *this; + } + + value_t const* begin(void) const { return d_buffer_; } + + value_t const* end() const { return d_buffer_ + size_; } + + index_t size(void) const { return size_; } + + private: + value_t const* d_buffer_{nullptr}; + index_t size_; +}; + +template +value_t const* raw_const_ptr(device_const_vector_view& dv) +{ + return dv.begin(); +} + +// classes abstracting the way the random walks path are generated: +// + +// vertical traversal proxy: +// a device vector of next vertices is generated for each path; +// when a vertex is a sink the corresponding path doesn't advance anymore; +// +// smaller memory footprint; +// +struct vertical_traversal_t { + vertical_traversal_t(size_t num_paths, size_t max_depth) + : num_paths_(num_paths), max_depth_(max_depth) + { + } + + template + void operator()( + graph_t const& graph, // graph being traversed + random_walker_t const& rand_walker, // random walker object for which traversal is driven + seed_t seed0, // initial seed value + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& + d_crt_out_degs, // crt out-degs for current set of vertices + device_vec_t& d_random, // crt set of random real values + device_vec_t& + d_col_indx, // crt col col indices to be used for retrieving next step + device_vec_t& + d_next_v, // crt set of destination vertices, for next step + device_vec_t& + d_next_w) // set of weights between src and destination vertices, for next step + const + { + // start from 1, as 0-th was initialized above: + // + for (decltype(max_depth_) step_indx = 1; step_indx < max_depth_; ++step_indx) { + // take one-step in-sync for each path in parallel: + // + rand_walker.step(graph, + seed0 + static_cast(step_indx), + d_coalesced_v, + d_coalesced_w, + d_paths_sz, + d_crt_out_degs, + d_random, + d_col_indx, + d_next_v, + d_next_w); + + // early exit: all paths have reached sinks: + // + if (rand_walker.all_paths_stopped(d_crt_out_degs)) break; + } + } + + size_t get_random_buff_sz(void) const { return num_paths_; } + size_t get_tmp_buff_sz(void) const { return num_paths_; } + + private: + size_t num_paths_; + size_t max_depth_; +}; + +// horizontal traversal proxy: +// each path is generated independently from start to finish; +// when a vertex is a sink the corresponding path doesn't advance anymore; +// requires (num_paths x max_depth) precomputed real random values in [0,1]; +// +// larger memory footprint, but potentially more efficient; +// +struct horizontal_traversal_t { + horizontal_traversal_t(size_t num_paths, size_t max_depth) + : num_paths_(num_paths), max_depth_(max_depth) + { + } + + template + void operator()( + graph_t const& graph, // graph being traversed + random_walker_t const& rand_walker, // random walker object for which traversal is driven + seed_t seed0, // initial seed value + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& + d_crt_out_degs, // ignored: out-degs for the current set of vertices + device_vec_t& d_random, // _entire_ set of random real values + device_vec_t& + d_col_indx, // ignored: crt col indices to be used for retrieving next step + device_vec_t& + d_next_v, // ignored: crt set of destination vertices, for next step (coalesced set + // updated directly, instead) + device_vec_t& + d_next_w) // ignored: set of weights between src and destination vertices, for next step + // (coalesced set updated directly, instead) + const + { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using random_engine_t = typename random_walker_t::rnd_engine_t; + + auto const& handle = rand_walker.get_handle(); + auto* ptr_d_random = raw_ptr(d_random); + + random_engine_t::generate_random(handle, ptr_d_random, d_random.size(), seed0); + + auto const* col_indices = graph.indices(); + auto const* row_offsets = graph.offsets(); + auto const* values = graph.weights(); + auto* ptr_d_sizes = raw_ptr(d_paths_sz); + auto const& d_cached_out_degs = rand_walker.get_out_degs(); + + auto rnd_to_indx_convertor = [] __device__(real_t rnd_vindx, edge_t crt_out_deg) { + real_t max_ub = static_cast(crt_out_deg - 1); + auto interp_vindx = rnd_vindx * max_ub + real_t{.5}; + vertex_t v_indx = static_cast(interp_vindx); + return (v_indx >= crt_out_deg ? crt_out_deg - 1 : v_indx); + }; + + auto next_vw = + [row_offsets, + col_indices, + values] __device__(auto v_indx, // src vertex to find dst from + auto col_indx) { // column index, in {0,...,out_deg(v_indx)-1}, + // extracted from random value in [0..1] + auto start_row = row_offsets[v_indx]; + + auto weight_value = + (values == nullptr ? weight_t{1} + : values[start_row + col_indx]); // account for un-weighted graphs + return thrust::make_tuple(col_indices[start_row + col_indx], weight_value); + }; + + // start from 1, as 0-th was initialized above: + // + thrust::for_each(rmm::exec_policy(handle.get_stream_view()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths_), + [max_depth = max_depth_, + ptr_d_cache_out_degs = raw_const_ptr(d_cached_out_degs), + ptr_coalesced_v = raw_ptr(d_coalesced_v), + ptr_coalesced_w = raw_ptr(d_coalesced_w), + ptr_d_random, + ptr_d_sizes, + rnd_to_indx_convertor, + next_vw] __device__(auto path_index) { + auto chunk_offset = path_index * max_depth; + vertex_t src_vertex = ptr_coalesced_v[chunk_offset]; + + for (index_t step_indx = 1; step_indx < max_depth; ++step_indx) { + auto crt_out_deg = ptr_d_cache_out_degs[src_vertex]; + if (crt_out_deg == 0) break; + + // indexing into coalesced arrays of size num_paths x (max_depth -1): + // (d_random, d_coalesced_w) + // + auto stepping_index = chunk_offset - path_index + step_indx - 1; + + auto real_rnd_indx = ptr_d_random[stepping_index]; + + auto col_indx = rnd_to_indx_convertor(real_rnd_indx, crt_out_deg); + auto pair_vw = next_vw(src_vertex, col_indx); + + src_vertex = thrust::get<0>(pair_vw); + auto crt_weight = thrust::get<1>(pair_vw); + + ptr_coalesced_v[chunk_offset + step_indx] = src_vertex; + ptr_coalesced_w[stepping_index] = crt_weight; + ptr_d_sizes[path_index]++; + } + }); + } + + size_t get_random_buff_sz(void) const { return num_paths_ * (max_depth_ - 1); } + size_t get_tmp_buff_sz(void) const + { + return 0; + } // no need for tmp buffers + //(see "ignored" above) + + private: + size_t num_paths_; + size_t max_depth_; +}; // namespace detail + +} // namespace detail +} // namespace experimental +} // namespace cugraph diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 46a3cf120be..6049230e21a 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -55,13 +55,19 @@ void fill_start(raft::handle_t const& handle, [num_vertices] __device__(auto indx) { return indx % num_vertices; }); } +namespace impl_details = cugraph::experimental::detail; + +enum class traversal_id_t : int { HORIZONTAL = 0, VERTICAL }; + /** * @internal - * @brief Calls the random_walks algorithm and displays the time metrics (total - * time for all requested paths, average time for each path). + * @brief Calls the random_walks algorithm with specified traversal strategy and displays the time + * metrics (total time for all requested paths, average time for each path). */ template -void output_random_walks_time(graph_vt const& graph_view, typename graph_vt::edge_type num_paths) +void output_random_walks_time(graph_vt const& graph_view, + typename graph_vt::edge_type num_paths, + traversal_id_t trv_id) { using vertex_t = typename graph_vt::vertex_type; using edge_t = typename graph_vt::edge_type; @@ -75,19 +81,31 @@ void output_random_walks_time(graph_vt const& graph_view, typename graph_vt::edg // 0-copy const device view: // - cugraph::experimental::detail::device_const_vector_view d_start_view{ - d_start.data(), num_paths}; + impl_details::device_const_vector_view d_start_view{d_start.data(), num_paths}; edge_t max_depth{10}; HighResTimer hr_timer; - std::string label("RandomWalks"); - hr_timer.start(label); - cudaProfilerStart(); - auto ret_tuple = - cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); - cudaProfilerStop(); - hr_timer.stop(); + std::string label{}; + + if (trv_id == traversal_id_t::HORIZONTAL) { + label = std::string("RandomWalks; Horizontal traversal"); + hr_timer.start(label); + cudaProfilerStart(); + auto ret_tuple = + impl_details::random_walks_impl( + handle, graph_view, d_start_view, max_depth); + cudaProfilerStop(); + hr_timer.stop(); + } else { + label = std::string("RandomWalks; Vertical traversal"); + hr_timer.start(label); + cudaProfilerStart(); + auto ret_tuple = impl_details::random_walks_impl( + handle, graph_view, d_start_view, max_depth); + cudaProfilerStop(); + hr_timer.stop(); + } try { auto runtime = hr_timer.get_average_runtime(label); @@ -146,9 +164,10 @@ struct RandomWalks_Usecase { * * @param[in] configuration RandomWalks_Usecase instance containing the input * file to read for constructing the graph_t. + * @param[in] trv_id traversal strategy. */ template -void run(RandomWalks_Usecase const& configuration) +void run(RandomWalks_Usecase const& configuration, traversal_id_t trv_id) { raft::handle_t handle{}; @@ -162,7 +181,7 @@ void run(RandomWalks_Usecase const& configuration) // FIXME: the num_paths vector might be better specified via the // configuration input instead of hardcoding here. std::vector v_np{1, 10, 100}; - for (auto&& num_paths : v_np) { output_random_walks_time(graph_view, num_paths); } + for (auto&& num_paths : v_np) { output_random_walks_time(graph_view, num_paths, trv_id); } } /** @@ -206,7 +225,12 @@ int main(int argc, char** argv) // Run benchmarks std::cout << "Using dataset: " << dataset << std::endl; - run(RandomWalks_Usecase(dataset, true)); + + std::cout << "Horizontal traversal strategy:\n"; + run(RandomWalks_Usecase(dataset, true), traversal_id_t::HORIZONTAL); + + std::cout << "Vertical traversal strategy:\n"; + run(RandomWalks_Usecase(dataset, true), traversal_id_t::VERTICAL); // FIXME: consider returning non-zero for situations that warrant it (eg. if // the algo ran but the results are invalid, if a benchmark threshold is diff --git a/cpp/tests/sampling/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu index 983a0ce23d8..dc73c474356 100644 --- a/cpp/tests/sampling/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,10 @@ void fill_start(raft::handle_t const& handle, } } // namespace +namespace impl_details = cugraph::experimental::detail; + +enum class traversal_id_t : int { HORIZONTAL = 0, VERTICAL }; + struct RandomWalks_Usecase { std::string graph_file_full_path{}; bool test_weighted{false}; @@ -70,7 +75,8 @@ struct RandomWalks_Usecase { }; }; -class Tests_RandomWalks : public ::testing::TestWithParam { +class Tests_RandomWalks + : public ::testing::TestWithParam> { public: Tests_RandomWalks() {} static void SetupTestCase() {} @@ -80,7 +86,7 @@ class Tests_RandomWalks : public ::testing::TestWithParam { virtual void TearDown() {} template - void run_current_test(RandomWalks_Usecase const& configuration) + void run_current_test(std::tuple const& configuration) { raft::handle_t handle{}; @@ -88,19 +94,21 @@ class Tests_RandomWalks : public ::testing::TestWithParam { // // std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; + traversal_id_t trv_id = std::get<0>(configuration); + auto const& target = std::get<1>(configuration); cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); + handle, target.graph_file_full_path, target.test_weighted, false); auto graph_view = graph.view(); // call random_walks: - start_random_walks(graph_view); + start_random_walks(graph_view, trv_id); } template - void start_random_walks(graph_vt const& graph_view) + void start_random_walks(graph_vt const& graph_view, traversal_id_t trv_id) { using vertex_t = typename graph_vt::vertex_type; using edge_t = typename graph_vt::edge_type; @@ -115,23 +123,40 @@ class Tests_RandomWalks : public ::testing::TestWithParam { // 0-copy const device view: // - cugraph::experimental::detail::device_const_vector_view d_start_view{ - d_start.data(), num_paths}; + impl_details::device_const_vector_view d_start_view{d_start.data(), + num_paths}; edge_t max_depth{10}; - auto ret_tuple = - cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + if (trv_id == traversal_id_t::HORIZONTAL) { + auto ret_tuple = + impl_details::random_walks_impl( + handle, graph_view, d_start_view, max_depth); - // check results: - // - bool test_all_paths = cugraph::test::host_check_rw_paths( - handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + // check results: + // + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + + if (!test_all_paths) + std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; - if (!test_all_paths) - std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; + ASSERT_TRUE(test_all_paths); + } else { // VERTICAL + auto ret_tuple = + impl_details::random_walks_impl( + handle, graph_view, d_start_view, max_depth); - ASSERT_TRUE(test_all_paths); + // check results: + // + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + + if (!test_all_paths) + std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; + + ASSERT_TRUE(test_all_paths); + } } }; @@ -143,9 +168,10 @@ TEST_P(Tests_RandomWalks, Initialize_i32_i32_f) INSTANTIATE_TEST_SUITE_P( simple_test, Tests_RandomWalks, - ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), - RandomWalks_Usecase("test/datasets/web-Google.mtx", true), - RandomWalks_Usecase("test/datasets/ljournal-2008.mtx", true), - RandomWalks_Usecase("test/datasets/webbase-1M.mtx", true))); + ::testing::Combine(::testing::Values(traversal_id_t::HORIZONTAL, traversal_id_t::VERTICAL), + ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), + RandomWalks_Usecase("test/datasets/web-Google.mtx", true), + RandomWalks_Usecase("test/datasets/ljournal-2008.mtx", true), + RandomWalks_Usecase("test/datasets/webbase-1M.mtx", true)))); CUGRAPH_TEST_PROGRAM_MAIN() From f714e3eac187865b568a3d8e4a3fa5e8b6eb65da Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 28 Jun 2021 17:29:10 -0500 Subject: [PATCH 309/343] `CHECK_CUDA` macros in debug builds (#1687) Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Kumar Aatish (https://github.com/kaatish) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1687 --- cpp/src/traversal/mg/common_utils.cuh | 8 ++++++-- cpp/src/utilities/graph_utils.cuh | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cpp/src/traversal/mg/common_utils.cuh b/cpp/src/traversal/mg/common_utils.cuh index 7c36bc2c139..8ceaf43c827 100644 --- a/cpp/src/traversal/mg/common_utils.cuh +++ b/cpp/src/traversal/mg/common_utils.cuh @@ -16,11 +16,15 @@ #pragma once -#include +#include "../traversal_common.cuh" + #include #include #include -#include "../traversal_common.cuh" + +#include +#include +#include namespace cugraph { diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index 76e8dc32611..14275f52b9b 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -99,7 +99,7 @@ void axpy(size_t n, T a, T *x, T *y) thrust::device_pointer_cast(y), thrust::device_pointer_cast(y), axpy_functor(a)); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } // norm @@ -119,7 +119,7 @@ T nrm2(size_t n, T *x) square(), init, thrust::plus())); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); return result; } @@ -130,7 +130,7 @@ T nrm1(size_t n, T *x) T result = thrust::reduce(rmm::exec_policy(stream_view), thrust::device_pointer_cast(x), thrust::device_pointer_cast(x + n)); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); return result; } @@ -144,7 +144,7 @@ void scal(size_t n, T val, T *x) thrust::make_constant_iterator(val), thrust::device_pointer_cast(x), thrust::multiplies()); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } template @@ -157,7 +157,7 @@ void addv(size_t n, T val, T *x) thrust::make_constant_iterator(val), thrust::device_pointer_cast(x), thrust::plus()); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } template @@ -180,7 +180,7 @@ void scatter(size_t n, T *src, T *dst, M *map) thrust::device_pointer_cast(src + n), thrust::device_pointer_cast(map), thrust::device_pointer_cast(dst)); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } template @@ -205,7 +205,7 @@ void copy(size_t n, T *x, T *res) thrust::device_ptr res_ptr(res); rmm::cuda_stream_view stream_view; thrust::copy_n(rmm::exec_policy(stream_view), dev_ptr, n, res_ptr); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } template @@ -230,7 +230,7 @@ void update_dangling_nodes(size_t n, T *dangling_nodes, T damping_factor) thrust::device_pointer_cast(dangling_nodes), dangling_functor(1.0 - damping_factor), is_zero()); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); } // google matrix kernels @@ -342,11 +342,11 @@ void HT_matrix_csc_coo(const IndexType n, nblocks.z = min((n + nthreads.z - 1) / nthreads.z, CUDA_MAX_BLOCKS); // 1; equi_prob3 <<>>(n, e, csrPtr, csrInd, val, degree.data()); - CHECK_CUDA(stream.value()); + CHECK_CUDA(stream_view.value()); ValueType a = 0.0; fill(n, bookmark, a); - CHECK_CUDA(stream); + CHECK_CUDA(stream_view.value()); nthreads.x = min(n, CUDA_MAX_KERNEL_THREADS); nthreads.y = 1; From dfeb73332c031cd8a0e67428c0866a06d5c4fe20 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 29 Jun 2021 17:20:14 -0500 Subject: [PATCH 310/343] remove hardcoded dtype (#1689) this PRs removes legacy hardcoded int32 types Authors: - https://github.com/Iroy30 Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1689 --- python/cugraph/dask/traversal/bfs.py | 4 ++-- python/cugraph/dask/traversal/sssp.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 03b9844bf6c..11283a1cb78 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -110,8 +110,8 @@ def bfs(graph, compute() start = start.iloc[0] else: - start = graph.lookup_internal_vertex_id(cudf.Series([start], - dtype='int32')).compute() + start = graph.lookup_internal_vertex_id(cudf.Series([start]) + ).compute() start = start.iloc[0] result = [client.submit( diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index 32e7401023a..f6c259caf77 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -98,8 +98,8 @@ def sssp(graph, data = get_distributed_data(ddf) if graph.renumbered: - source = graph.lookup_internal_vertex_id(cudf.Series([source], - dtype='int32')).compute() + source = graph.lookup_internal_vertex_id(cudf.Series([source]) + ).compute() source = source.iloc[0] result = [client.submit( From e2388f7582cfb4336feaca7b3d048853c59e2e52 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 30 Jun 2021 06:50:41 -0500 Subject: [PATCH 311/343] Fix int64 vertex_t (#1691) Authors: - https://github.com/Iroy30 Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1691 --- .../centrality/mg_katz_centrality_wrapper.pyx | 21 +++++--- .../dask/community/louvain_wrapper.pyx | 2 +- .../components/mg_connectivity_wrapper.pyx | 2 +- .../link_analysis/mg_pagerank_wrapper.pyx | 22 ++++++--- .../dask/traversal/mg_sssp_wrapper.pyx | 49 ++++++++++++------- 5 files changed, 62 insertions(+), 34 deletions(-) diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index c072d5ec143..acad3a29440 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -48,7 +48,7 @@ def mg_katz_centrality(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -105,11 +105,18 @@ def mg_katz_centrality(input_df, cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_katz_centralities = df['katz_centrality'].__cuda_array_interface__['data'][0] - if (df['katz_centrality'].dtype == np.float32): - c_katz_centrality.call_katz_centrality[int, float](handle_[0], graph_container, c_identifier, c_katz_centralities, - alpha, beta, tol, max_iter, 0, normalize) + if vertex_t == np.int32: + if (df['katz_centrality'].dtype == np.float32): + c_katz_centrality.call_katz_centrality[int, float](handle_[0], graph_container, c_identifier, c_katz_centralities, + alpha, beta, tol, max_iter, 0, normalize) + else: + c_katz_centrality.call_katz_centrality[int, double](handle_[0], graph_container, c_identifier, c_katz_centralities, + alpha, beta, tol, max_iter, 0, normalize) else: - c_katz_centrality.call_katz_centrality[int, double](handle_[0], graph_container, c_identifier, c_katz_centralities, - alpha, beta, tol, max_iter, 0, normalize) - + if (df['katz_centrality'].dtype == np.float32): + c_katz_centrality.call_katz_centrality[long, float](handle_[0], graph_container, c_identifier, c_katz_centralities, + alpha, beta, tol, max_iter, 0, normalize) + else: + c_katz_centrality.call_katz_centrality[long, double](handle_[0], graph_container, c_identifier, c_katz_centralities, + alpha, beta, tol, max_iter, 0, normalize) return df diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 4585270c879..57e643b6f1d 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -67,7 +67,7 @@ def louvain(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t weight_t = weights.dtype # COO diff --git a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx index 156d29a9794..32b499cd44a 100644 --- a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx +++ b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx @@ -40,7 +40,7 @@ def mg_wcc(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t weights = None weight_t = np.dtype("float32") diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 6b8e18c119c..8af7ccf4266 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -47,7 +47,7 @@ def mg_pagerank(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -112,11 +112,19 @@ def mg_pagerank(input_df, c_pers_vtx = personalization['vertex'].__cuda_array_interface__['data'][0] c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] - if (df['pagerank'].dtype == np.float32): - c_pagerank.call_pagerank[int, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) + if vertex_t == np.int32: + if (df['pagerank'].dtype == np.float32): + c_pagerank.call_pagerank[int, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + alpha, tol, max_iter, 0) + else: + c_pagerank.call_pagerank[int, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + alpha, tol, max_iter, 0) else: - c_pagerank.call_pagerank[int, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - + if (df['pagerank'].dtype == np.float32): + c_pagerank.call_pagerank[long, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + alpha, tol, max_iter, 0) + else: + c_pagerank.call_pagerank[long, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + alpha, tol, max_iter, 0) + return df diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index c11ec967e05..26928331273 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -42,7 +42,7 @@ def mg_sssp(input_df, if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -99,21 +99,34 @@ def mg_sssp(input_df, cdef uintptr_t c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] # MG BFS path assumes directed is true - if weight_t == np.float32: - c_sssp.call_sssp[int, float](handle_[0], - graph_container, - NULL, - c_distance_ptr, - c_predecessor_ptr, - start) - elif weight_t == np.float64: - c_sssp.call_sssp[int, double](handle_[0], - graph_container, - NULL, - c_distance_ptr, - c_predecessor_ptr, - start) - else: # This case should not happen - raise NotImplementedError - + if vertex_t == np.int32: + if weight_t == np.float32: + c_sssp.call_sssp[int, float](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) + elif weight_t == np.float64: + c_sssp.call_sssp[int, double](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) + else: + if weight_t == np.float32: + c_sssp.call_sssp[long, float](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) + elif weight_t == np.float64: + c_sssp.call_sssp[long, double](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) return df From 187f2bc5c5e63ac1cb5e7bec857e7fc02a9cced2 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 30 Jun 2021 08:09:37 -0500 Subject: [PATCH 312/343] fixing symmetrize_ddf (#1686) fix the function symmetrizing the dask dataframe Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1686 --- python/cugraph/structure/symmetrize.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index 442701f6508..13116eabb07 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -14,6 +14,7 @@ from cugraph.structure import graph_classes as csg import cudf import dask_cudf +from cugraph.comms import comms as Comms def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True): @@ -136,13 +137,13 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): else: ddf2 = df[[dst_name, src_name]] ddf2.columns = [src_name, dst_name] - + worker_list = Comms.get_workers() + num_workers = len(worker_list) ddf = df.append(ddf2).reset_index(drop=True) - result = ( - ddf.groupby(by=[src_name, dst_name], as_index=False) - .min() - .reset_index() - ) + result = ddf.shuffle(on=[ + src_name, dst_name], ignore_index=True, npartitions=num_workers) + result = result.map_partitions(lambda x: x.groupby( + by=[src_name, dst_name], as_index=False).min().reset_index(drop=True)) return result From 1778d43a2ad64c437547ed83fc651abe4a0b2df1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 1 Jul 2021 21:32:32 -0400 Subject: [PATCH 313/343] Ues std::optional (or thrust::optional) for optional parameters & first part of DCSR (DCSC) implementation. (#1676) - Update graph data structure to optionally use DCSR (DCSC) for hypersparse segments - Use std::optional (or thrust::optional) for optional parameters Touches many files but a big chunk of the updates are related to std::optional support. Currently, DCSR (DCSC) is disabled (and primitives are not updated to support the format). Splitting DCSR (DCSC) support in two PRs to avoid the (already) big PR getting even bigger. Authors: - Seunghwa Kang (https://github.com/seunghwak) - Mark Harris (https://github.com/harrism) - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Andrei Schaffer (https://github.com/aschaffer) - Chuck Hastings (https://github.com/ChuckHastings) - Kumar Aatish (https://github.com/kaatish) - Brad Rees (https://github.com/BradReesWork) - https://github.com/Iroy30 URL: https://github.com/rapidsai/cugraph/pull/1676 --- cpp/include/cugraph/algorithms.hpp | 26 +- cpp/include/cugraph/experimental/graph.hpp | 70 ++-- .../cugraph/experimental/graph_functions.hpp | 44 +- .../cugraph/experimental/graph_view.hpp | 186 ++++----- .../cugraph/matrix_partition_device.cuh | 262 ------------ .../cugraph/matrix_partition_device_view.cuh | 234 +++++++++++ cpp/include/cugraph/matrix_partition_view.hpp | 136 +++++++ .../prims/copy_to_adj_matrix_row_col.cuh | 24 +- .../copy_v_transform_reduce_in_out_nbr.cuh | 95 +++-- ...ransform_reduce_key_aggregated_out_nbr.cuh | 19 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 17 +- .../cugraph/prims/transform_reduce_e.cuh | 130 +++--- ...transform_reduce_v_with_adj_matrix_row.cuh | 8 +- .../update_frontier_v_push_if_out_nbr.cuh | 253 ++++++------ .../cugraph/serialization/serializer.hpp | 14 +- cpp/include/cugraph/utilities/cython.hpp | 24 +- .../cugraph/vertex_partition_device.cuh | 112 ----- .../cugraph/vertex_partition_device_view.cuh | 111 +++++ cpp/include/cugraph/vertex_partition_view.hpp | 81 ++++ cpp/src/community/egonet.cu | 19 +- .../components/weakly_connected_components.cu | 48 ++- cpp/src/experimental/bfs.cu | 5 +- cpp/src/experimental/coarsen_graph.cu | 270 ++++++------- cpp/src/experimental/graph.cu | 381 ++++++++---------- cpp/src/experimental/graph_view.cu | 163 +++++--- cpp/src/experimental/induced_subgraph.cu | 57 +-- cpp/src/experimental/pagerank.cu | 199 ++++----- cpp/src/experimental/renumber_edgelist.cu | 188 ++++++--- cpp/src/experimental/sssp.cu | 5 +- cpp/src/sampling/random_walks.cuh | 18 +- cpp/src/sampling/rw_traversals.hpp | 8 +- cpp/src/serialization/serializer.cu | 60 ++- .../structure/create_graph_from_edgelist.cu | 159 ++++---- cpp/src/utilities/cython.cu | 204 ++++++---- cpp/tests/bcast/mg_graph_bcast.cpp | 10 +- cpp/tests/community/egonet_test.cu | 35 +- cpp/tests/community/mg_louvain_helper.cu | 114 +++--- cpp/tests/community/mg_louvain_test.cpp | 24 +- .../mg_weakly_connected_components_test.cpp | 6 +- cpp/tests/components/wcc_graphs.cu | 11 +- cpp/tests/components/wcc_graphs.hpp | 2 +- .../weakly_connected_components_test.cpp | 15 +- cpp/tests/experimental/bfs_test.cpp | 28 +- cpp/tests/experimental/coarsen_graph_test.cpp | 12 +- cpp/tests/experimental/degree_test.cpp | 4 +- cpp/tests/experimental/graph_test.cpp | 71 ++-- .../experimental/induced_subgraph_test.cpp | 80 ++-- .../experimental/katz_centrality_test.cpp | 36 +- cpp/tests/experimental/mg_bfs_test.cpp | 14 +- .../experimental/mg_katz_centrality_test.cpp | 11 +- cpp/tests/experimental/mg_sssp_test.cpp | 16 +- cpp/tests/experimental/pagerank_test.cpp | 288 +++++++------ cpp/tests/experimental/sssp_test.cpp | 31 +- cpp/tests/experimental/weight_sum_test.cpp | 6 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 161 +++++--- cpp/tests/sampling/random_walks_utils.cuh | 10 +- cpp/tests/sampling/rw_low_level_test.cu | 112 ++--- cpp/tests/serialization/un_serialize_test.cpp | 15 +- .../utilities/matrix_market_file_utilities.cu | 80 ++-- cpp/tests/utilities/rmat_utilities.cu | 98 ++--- cpp/tests/utilities/test_graphs.hpp | 78 ++-- cpp/tests/utilities/test_utilities.hpp | 111 +++-- python/cugraph/community/egonet_wrapper.pyx | 3 +- .../components/connectivity_wrapper.pyx | 3 +- .../dask/centrality/katz_centrality.py | 6 + .../centrality/mg_katz_centrality_wrapper.pyx | 15 +- python/cugraph/dask/community/louvain.py | 12 +- .../dask/community/louvain_wrapper.pyx | 16 +- .../cugraph/dask/components/connectivity.py | 10 +- .../components/mg_connectivity_wrapper.pyx | 16 +- .../link_analysis/mg_pagerank_wrapper.pyx | 14 +- python/cugraph/dask/link_analysis/pagerank.py | 7 + python/cugraph/dask/traversal/bfs.py | 6 + .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 14 +- .../dask/traversal/mg_sssp_wrapper.pyx | 14 +- python/cugraph/dask/traversal/sssp.py | 6 + .../link_analysis/pagerank_wrapper.pyx | 3 +- .../cugraph/sampling/random_walks_wrapper.pyx | 3 +- .../simpleDistributedGraph.py | 13 +- python/cugraph/structure/graph_utilities.pxd | 13 +- python/cugraph/structure/number_map.py | 54 ++- python/cugraph/structure/renumber_wrapper.pyx | 162 +++++--- python/cugraph/tests/test_graph.py | 2 +- python/cugraph/tests/test_hypergraph.py | 2 +- python/cugraph/traversal/bfs_wrapper.pyx | 3 +- 85 files changed, 3060 insertions(+), 2446 deletions(-) delete mode 100644 cpp/include/cugraph/matrix_partition_device.cuh create mode 100644 cpp/include/cugraph/matrix_partition_device_view.cuh create mode 100644 cpp/include/cugraph/matrix_partition_view.hpp delete mode 100644 cpp/include/cugraph/vertex_partition_device.cuh create mode 100644 cpp/include/cugraph/vertex_partition_device_view.cuh create mode 100644 cpp/include/cugraph/vertex_partition_view.hpp diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 1d9d964fc1c..12776f1f8c7 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1214,17 +1214,17 @@ void sssp(raft::handle_t const &handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. - * @param adj_matrix_row_out_weight_sums Pointer to an array storing sums of out-going edge weights - * for the vertices in the rows of the graph adjacency matrix (for re-use) or `nullptr`. If - * `nullptr`, these values are freshly computed. Computing these values outsid this function reduces - * the number of memoray allocations/deallocations and computing if a user repeatedly computes - * PageRank scores using the same graph with different personalization vectors. + * @param precomputed_vertex_out_weight_sums Pointer to an array storing sums of out-going edge + * weights for the vertices (for re-use) or `std::nullopt`. If `std::nullopt`, these values are + * freshly computed. Computing these values outside this function reduces the number of memory + * allocations/deallocations and computing if a user repeatedly computes PageRank scores using the + * same graph with different personalization vectors. * @param personalization_vertices Pointer to an array storing personalization vertex identifiers - * (compute personalized PageRank) or `nullptr` (compute general PageRank). + * (compute personalized PageRank) or `std::nullopt` (compute general PageRank). * @param personalization_values Pointer to an array storing personalization values for the vertices - * in the personalization set. Relevant only if @p personalization_vertices is not `nullptr`. + * in the personalization set. Relevant only if @p personalization_vertices is not `std::nullopt`. * @param personalization_vector_size Size of the personalization set. If @personalization_vertices - * is not `nullptr`, the sizes of the arrays pointed by @p personalization_vertices and @p + * is not `std::nullopt`, the sizes of the arrays pointed by @p personalization_vertices and @p * personalization_values should be @p personalization_vector_size. * @param pageranks Pointer to the output PageRank score array. * @param alpha PageRank damping factor. @@ -1240,10 +1240,10 @@ void sssp(raft::handle_t const &handle, template void pagerank(raft::handle_t const &handle, graph_view_t const &graph_view, - weight_t const *adj_matrix_row_out_weight_sums, - vertex_t const *personalization_vertices, - result_t const *personalization_values, - vertex_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, result_t *pageranks, result_t alpha, result_t epsilon, @@ -1322,7 +1322,7 @@ void katz_centrality(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &handle, graph_view_t const &graph_view, diff --git a/cpp/include/cugraph/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp index 1c829016516..0f93abb1635 100644 --- a/cpp/include/cugraph/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,7 @@ template struct edgelist_t { vertex_t const *p_src_vertices{nullptr}; vertex_t const *p_dst_vertices{nullptr}; - weight_t const *p_edge_weights{nullptr}; + std::optional p_edge_weights{std::nullopt}; edge_t number_of_edges{0}; }; @@ -69,18 +70,36 @@ class graph_t> const &segment_offsets, bool do_expensive_check = false); + bool is_weighted() const { return adj_matrix_partition_weights_.has_value(); } + graph_view_t view() const { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); std::vector indices(adj_matrix_partition_indices_.size(), nullptr); - std::vector weights(adj_matrix_partition_weights_.size(), nullptr); + auto weights = adj_matrix_partition_weights_ + ? std::make_optional>( + (*adj_matrix_partition_weights_).size(), nullptr) + : std::nullopt; + auto dcs_nzd_vertices = adj_matrix_partition_dcs_nzd_vertices_ + ? std::make_optional>( + (*adj_matrix_partition_dcs_nzd_vertices_).size(), nullptr) + : std::nullopt; + auto dcs_nzd_vertex_counts = + adj_matrix_partition_dcs_nzd_vertex_counts_ + ? std::make_optional>( + (*adj_matrix_partition_dcs_nzd_vertex_counts_).size(), vertex_t{0}) + : std::nullopt; for (size_t i = 0; i < offsets.size(); ++i) { offsets[i] = adj_matrix_partition_offsets_[i].data(); indices[i] = adj_matrix_partition_indices_[i].data(); - if (weights.size() > 0) { weights[i] = adj_matrix_partition_weights_[i].data(); } + if (weights) { (*weights)[i] = (*adj_matrix_partition_weights_)[i].data(); } + if (dcs_nzd_vertices) { + (*dcs_nzd_vertices)[i] = (*adj_matrix_partition_dcs_nzd_vertices_)[i].data(); + (*dcs_nzd_vertex_counts)[i] = (*adj_matrix_partition_dcs_nzd_vertex_counts_)[i]; + } } return graph_view_t( @@ -88,27 +107,31 @@ class graph_tget_number_of_vertices(), this->get_number_of_edges(), this->get_graph_properties(), - adj_matrix_partition_segment_offsets_.size() > 0, + adj_matrix_partition_segment_offsets_, false); } private: std::vector> adj_matrix_partition_offsets_{}; std::vector> adj_matrix_partition_indices_{}; - std::vector> adj_matrix_partition_weights_{}; + std::optional>> adj_matrix_partition_weights_{ + std::nullopt}; + // nzd: nonzero (local) degree, relevant only if segment_offsets.size() > 0 + std::optional>> adj_matrix_partition_dcs_nzd_vertices_{ + std::nullopt}; + std::optional> adj_matrix_partition_dcs_nzd_vertex_counts_{std::nullopt}; partition_t partition_{}; - std::vector - adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based - // on vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is - // true + // segment offsets within the vertex partition based on vertex degree, relevant only if + // segment_offsets.size() > 0 + std::optional> adj_matrix_partition_segment_offsets_{std::nullopt}; }; // single-GPU version @@ -129,17 +152,16 @@ class graph_t(), offsets_(0, handle.get_stream()), - indices_(0, handle.get_stream()), - weights_(0, handle.get_stream()){}; + indices_(0, handle.get_stream()){}; graph_t(raft::handle_t const &handle, edgelist_t const &edgelist, vertex_t number_of_vertices, graph_properties_t properties, - bool sorted_by_degree, + std::optional> const &segment_offsets, bool do_expensive_check = false); - vertex_t get_number_of_local_vertices() const { return this->get_number_of_vertices(); } + bool is_weighted() const { return weights_.has_value(); } graph_view_t view() const { @@ -147,12 +169,11 @@ class graph_tget_handle_ptr()), offsets_.data(), indices_.data(), - weights_.data(), - segment_offsets_, + weights_ ? std::optional{(*weights_).data()} : std::nullopt, this->get_number_of_vertices(), this->get_number_of_edges(), this->get_graph_properties(), - segment_offsets_.size() > 0, + segment_offsets_, false); } @@ -167,8 +188,8 @@ class graph_t &&offsets, rmm::device_uvector &&indices, - rmm::device_uvector &&weights, - std::vector &&segment_offsets) + std::optional> &&weights, + std::optional> &&segment_offsets) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), offsets_(std::move(offsets)), @@ -180,9 +201,10 @@ class graph_t offsets_; rmm::device_uvector indices_; - rmm::device_uvector weights_; - std::vector segment_offsets_{}; // segment offsets based on vertex degree, relevant - // only if sorted_by_global_degree is true + std::optional> weights_{std::nullopt}; + + // segment offsets based on vertex degree, relevant only if sorted_by_global_degree is true + std::optional> segment_offsets_{}; }; template diff --git a/cpp/include/cugraph/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp index 8f5dbb1138b..10d3f6d2216 100644 --- a/cpp/include/cugraph/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -65,14 +65,19 @@ namespace experimental { * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition * assigned to this process). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return std::tuple, partition_t, vertex_t, edge_t> - * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to - * this process in multi-GPU), partition_t object storing graph partitioning information, total - * number of vertices, and total number of edges. + * @return std::tuple, partition_t, vertex_t, edge_t, + * std::vector> Tuple of labels (vertex IDs before renumbering) for the entire set of + * vertices (assigned to this process in multi-GPU), partition_t object storing graph partitioning + * information, total number of vertices, total number of edges, and vertex partition segment + * offsets (a vertex partition is partitioned to multiple segments based on vertex degrees). */ template std::enable_if_t, partition_t, vertex_t, edge_t>> + std::tuple, + partition_t, + vertex_t, + edge_t, + std::vector>> renumber_edgelist(raft::handle_t const& handle, std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, @@ -102,17 +107,18 @@ renumber_edgelist(raft::handle_t const& handle, * Vertex IDs are updated in-place ([INOUT] parameter). * @param num_edgelist_edges Number of edges in the edgelist. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set - * of vertices. + * @return std::tuple, std::vector> Tuple of abels (vertex + * IDs before renumbering) for the entire set of vertices and vertex partition segment offsets (a + * vertex partition is partitioned to multiple segments based on vertex degrees). */ template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_vertex_span, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check = false); +std::enable_if_t, std::vector>> +renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_vertex_span, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check = false); /** * @brief Renumber external vertices to internal vertices based on the provoided @p @@ -313,7 +319,7 @@ template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs( raft::handle_t const& handle, @@ -347,8 +353,8 @@ extract_induced_subgraphs( * and) edge list. * @param renumber Flag indicating whether to renumber vertices or not. * @return std::tuple, rmm::device_uvector> Pair of the generated graph and the renumber map. The - * szie of the renumber map is 0 if @p renumber is false. + * multi_gpu>, rmm::device_uvector> Pair of the generated graph and the renumber map (if + * @p renumber is true) or std::nullopt (if @p renumber is false). */ template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); diff --git a/cpp/include/cugraph/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp index 45e716c3647..e243bddca1c 100644 --- a/cpp/include/cugraph/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -15,7 +15,9 @@ */ #pragma once +#include #include +#include #include #include @@ -212,15 +214,17 @@ class partition_t { struct graph_properties_t { bool is_symmetric{false}; bool is_multigraph{false}; - bool is_weighted{false}; }; namespace detail { // FIXME: threshold values require tuning +// use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller +// than col_comm_size * hypersparse_threshold_ratio, should be less than 1.0 +double constexpr hypersparse_threshold_ratio = 0.0; size_t constexpr low_degree_threshold{raft::warp_size()}; size_t constexpr mid_degree_threshold{1024}; -size_t constexpr num_segments_per_vertex_partition{3}; +size_t constexpr num_sparse_segments_per_vertex_partition{3}; // Common for both graph_view_t & graph_t and both single-GPU & multi-GPU versions template @@ -254,7 +258,6 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } - bool is_weighted() const { return properties_.is_weighted; } protected: friend class cugraph::serializer::serializer_t; @@ -302,17 +305,21 @@ class graph_view_t const& adj_matrix_partition_offsets, - std::vector const& adj_matrix_partition_indices, - std::vector const& adj_matrix_partition_weights, - std::vector const& adj_matrix_partition_segment_offsets, - partition_t const& partition, - vertex_t number_of_vertices, - edge_t number_of_edges, - graph_properties_t properties, - bool sorted_by_global_degree_within_vertex_partition, - bool do_expensive_check = false); + graph_view_t( + raft::handle_t const& handle, + std::vector const& adj_matrix_partition_offsets, + std::vector const& adj_matrix_partition_indices, + std::optional> const& adj_matrix_partition_weights, + std::optional> const& adj_matrix_partition_dcs_nzd_vertices, + std::optional> const& adj_matrix_partition_dcs_nzd_vertex_counts, + partition_t const& partition, + vertex_t number_of_vertices, + edge_t number_of_edges, + graph_properties_t properties, + std::optional> const& adj_matrix_partition_segment_offsets, + bool do_expensive_check = false); + + bool is_weighted() const { return adj_matrix_partition_weights_.has_value(); } vertex_t get_number_of_local_vertices() const { @@ -472,62 +479,57 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets(size_t partition_idx) const - { - return adj_matrix_partition_segment_offsets_.size() > 0 - ? std::vector( - adj_matrix_partition_segment_offsets_.begin() + - partition_idx * (detail::num_segments_per_vertex_partition + 1), - adj_matrix_partition_segment_offsets_.begin() + - (partition_idx + 1) * (detail::num_segments_per_vertex_partition + 1)) - : std::vector{}; - } - - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - edge_t const* offsets() const { return offsets(0); } - - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - vertex_t const* indices() const { return indices(0); } - - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - weight_t const* weights() const { return weights(0); } - - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - edge_t const* offsets(size_t adj_matrix_partition_idx) const + std::optional> get_local_adj_matrix_partition_segment_offsets( + size_t partition_idx) const { - return adj_matrix_partition_offsets_[adj_matrix_partition_idx]; + if (adj_matrix_partition_segment_offsets_) { + auto size_per_partition = + (*adj_matrix_partition_segment_offsets_).size() / partition_.get_col_size(); + return std::vector( + (*adj_matrix_partition_segment_offsets_).begin() + partition_idx * size_per_partition, + (*adj_matrix_partition_segment_offsets_).begin() + + (partition_idx + 1) * size_per_partition); + } else { + return std::nullopt; + } } - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - vertex_t const* indices(size_t adj_matrix_partition_idx) const + vertex_partition_view_t get_vertex_partition_view() const { - return adj_matrix_partition_indices_[adj_matrix_partition_idx]; + return vertex_partition_view_t(this->get_number_of_vertices(), + this->get_local_vertex_first(), + this->get_local_vertex_last()); } - // FIXME: this function is not part of the public stable API. This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private or even disappear if we switch to CSR + DCSR (or CSC + DCSC). - weight_t const* weights(size_t adj_matrix_partition_idx) const + matrix_partition_view_t get_matrix_partition_view( + size_t adj_matrix_partition_idx) const { - return adj_matrix_partition_weights_.size() > 0 - ? adj_matrix_partition_weights_[adj_matrix_partition_idx] - : static_cast(nullptr); + return matrix_partition_view_t( + adj_matrix_partition_offsets_[adj_matrix_partition_idx], + adj_matrix_partition_indices_[adj_matrix_partition_idx], + adj_matrix_partition_weights_ + ? std::optional{(*adj_matrix_partition_weights_)[adj_matrix_partition_idx]} + : std::nullopt, + adj_matrix_partition_dcs_nzd_vertices_ + ? std::optional{( + *adj_matrix_partition_dcs_nzd_vertices_)[adj_matrix_partition_idx]} + : std::nullopt, + adj_matrix_partition_dcs_nzd_vertex_counts_ + ? std::optional{( + *adj_matrix_partition_dcs_nzd_vertex_counts_)[adj_matrix_partition_idx]} + : std::nullopt, + this->get_number_of_local_adj_matrix_partition_edges(adj_matrix_partition_idx), + store_transposed ? this->get_local_adj_matrix_partition_col_first(adj_matrix_partition_idx) + : this->get_local_adj_matrix_partition_row_first(adj_matrix_partition_idx), + store_transposed ? this->get_local_adj_matrix_partition_col_last(adj_matrix_partition_idx) + : this->get_local_adj_matrix_partition_row_last(adj_matrix_partition_idx), + store_transposed ? this->get_local_adj_matrix_partition_row_first(adj_matrix_partition_idx) + : this->get_local_adj_matrix_partition_col_first(adj_matrix_partition_idx), + store_transposed ? this->get_local_adj_matrix_partition_row_last(adj_matrix_partition_idx) + : this->get_local_adj_matrix_partition_col_last(adj_matrix_partition_idx), + store_transposed + ? this->get_local_adj_matrix_partition_col_value_start_offset(adj_matrix_partition_idx) + : this->get_local_adj_matrix_partition_row_value_start_offset(adj_matrix_partition_idx)); } rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; @@ -545,16 +547,18 @@ class graph_view_t adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; - std::vector adj_matrix_partition_weights_{}; + std::optional> adj_matrix_partition_weights_{}; + + // relevant only if we use the CSR + DCSR (or CSC + DCSC) hybrid format + std::optional> adj_matrix_partition_dcs_nzd_vertices_{}; + std::optional> adj_matrix_partition_dcs_nzd_vertex_counts_{}; + std::vector adj_matrix_partition_number_of_edges_{}; partition_t partition_{}; - std::vector - adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based - // on vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is - // true + // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered + std::optional> adj_matrix_partition_segment_offsets_{}; }; // single-GPU version @@ -580,14 +584,15 @@ class graph_view_t const& segment_offsets, + std::optional weights, vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties, - bool sorted_by_degree, + std::optional> const& segment_offsets, bool do_expensive_check = false); + bool is_weighted() const { return weights_.has_value(); } + vertex_t get_number_of_local_vertices() const { return this->get_number_of_vertices(); } constexpr vertex_t get_local_vertex_first() const { return vertex_t{0}; } @@ -701,30 +706,25 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets( + std::optional> get_local_adj_matrix_partition_segment_offsets( size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); - return segment_offsets_.size() > 0 ? segment_offsets_ : std::vector{}; + return segment_offsets_; } - // FIXME: this function is not part of the public stable API.This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private. - edge_t const* offsets() const { return offsets_; } - - // FIXME: this function is not part of the public stable API.This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private. - vertex_t const* indices() const { return indices_; } + vertex_partition_view_t get_vertex_partition_view() const + { + return vertex_partition_view_t(this->get_number_of_vertices()); + } - // FIXME: this function is not part of the public stable API.This function is mainly for pattern - // accelerator implementation. This function is currently public to support the legacy - // implementations directly accessing CSR/CSC data, but this function will eventually become - // private. - weight_t const* weights() const { return weights_; } + matrix_partition_view_t get_matrix_partition_view( + size_t adj_matrix_partition_idx = 0) const + { + assert(adj_matrix_partition_idx == 0); // there is only one matrix partition in single-GPU + return matrix_partition_view_t( + offsets_, indices_, weights_, this->get_number_of_vertices(), this->get_number_of_edges()); + } rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; @@ -741,10 +741,10 @@ class graph_view_t weights_{std::nullopt}; - std::vector segment_offsets_{}; // segment offsets based on vertex degree, relevant - // only if sorted_by_global_degree is true + // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered + std::optional> segment_offsets_{std::nullopt}; }; } // namespace experimental diff --git a/cpp/include/cugraph/matrix_partition_device.cuh b/cpp/include/cugraph/matrix_partition_device.cuh deleted file mode 100644 index 8951e4269bd..00000000000 --- a/cpp/include/cugraph/matrix_partition_device.cuh +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -#include - -namespace cugraph { -namespace experimental { - -template -class matrix_partition_device_base_t { - public: - matrix_partition_device_base_t(edge_t const* offsets, - vertex_t const* indices, - weight_t const* weights, - edge_t number_of_edges) - : offsets_(offsets), indices_(indices), weights_(weights), number_of_edges_(number_of_edges) - { - } - - __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } - - __host__ __device__ vertex_t const* get_indices() const { return indices_; } - __host__ __device__ weight_t const* get_weights() const { return weights_; } - - __device__ thrust::tuple get_local_edges( - vertex_t major_offset) const noexcept - { - auto edge_offset = *(offsets_ + major_offset); - auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset; - auto indices = indices_ + edge_offset; - auto weights = weights_ != nullptr ? weights_ + edge_offset : nullptr; - return thrust::make_tuple(indices, weights, local_degree); - } - - __device__ edge_t get_local_degree(vertex_t major_offset) const noexcept - { - return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); - } - - __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept - { - return *(offsets_ + major_offset); - } - - private: - // should be trivially copyable to device - edge_t const* offsets_{nullptr}; - vertex_t const* indices_{nullptr}; - weight_t const* weights_{nullptr}; - edge_t number_of_edges_{0}; -}; - -template -class matrix_partition_device_t; - -// multi-GPU version -template -class matrix_partition_device_t> - : public matrix_partition_device_base_t { - public: - matrix_partition_device_t(GraphViewType const& graph_view, size_t partition_idx) - : matrix_partition_device_base_t( - graph_view.offsets(partition_idx), - graph_view.indices(partition_idx), - graph_view.weights(partition_idx), - graph_view.get_number_of_local_adj_matrix_partition_edges(partition_idx)), - major_first_(GraphViewType::is_adj_matrix_transposed - ? graph_view.get_local_adj_matrix_partition_col_first(partition_idx) - : graph_view.get_local_adj_matrix_partition_row_first(partition_idx)), - major_last_(GraphViewType::is_adj_matrix_transposed - ? graph_view.get_local_adj_matrix_partition_col_last(partition_idx) - : graph_view.get_local_adj_matrix_partition_row_last(partition_idx)), - minor_first_(GraphViewType::is_adj_matrix_transposed - ? graph_view.get_local_adj_matrix_partition_row_first(partition_idx) - : graph_view.get_local_adj_matrix_partition_col_first(partition_idx)), - minor_last_(GraphViewType::is_adj_matrix_transposed - ? graph_view.get_local_adj_matrix_partition_row_last(partition_idx) - : graph_view.get_local_adj_matrix_partition_col_last(partition_idx)), - major_value_start_offset_( - GraphViewType::is_adj_matrix_transposed - ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(partition_idx) - : graph_view.get_local_adj_matrix_partition_row_value_start_offset(partition_idx)) - { - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_value_start_offset() const - { - return major_value_start_offset_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_first() const noexcept - { - return major_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_last() const noexcept - { - return major_last_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_size() const noexcept - { - return major_last_ - major_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_first() const noexcept - { - return minor_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_last() const noexcept - { - return minor_last_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_size() const noexcept - { - return minor_last_ - minor_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_offset_from_major_nocheck( - typename GraphViewType::vertex_type major) const noexcept - { - return major - major_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_offset_from_minor_nocheck( - typename GraphViewType::vertex_type minor) const noexcept - { - return minor - minor_first_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_from_major_offset_nocheck( - typename GraphViewType::vertex_type major_offset) const noexcept - { - return major_first_ + major_offset; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_from_minor_offset_nocheck( - typename GraphViewType::vertex_type minor_offset) const noexcept - { - return minor_first_ + minor_offset; - } - - private: - // should be trivially copyable to device - typename GraphViewType::vertex_type major_first_{0}; - typename GraphViewType::vertex_type major_last_{0}; - typename GraphViewType::vertex_type minor_first_{0}; - typename GraphViewType::vertex_type minor_last_{0}; - - typename GraphViewType::vertex_type major_value_start_offset_{0}; -}; - -// single-GPU version -template -class matrix_partition_device_t> - : public matrix_partition_device_base_t { - public: - matrix_partition_device_t(GraphViewType const& graph_view, size_t partition_idx) - : matrix_partition_device_base_t( - graph_view.offsets(), - graph_view.indices(), - graph_view.weights(), - graph_view.get_number_of_edges()), - number_of_vertices_(graph_view.get_number_of_vertices()) - { - assert(partition_idx == 0); - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_value_start_offset() const - { - return typename GraphViewType::vertex_type{0}; - } - - __host__ __device__ constexpr typename GraphViewType::vertex_type get_major_first() const noexcept - { - return typename GraphViewType::vertex_type{0}; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_last() const noexcept - { - return number_of_vertices_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_size() const noexcept - { - return number_of_vertices_; - } - - __host__ __device__ constexpr typename GraphViewType::vertex_type get_minor_first() const noexcept - { - return typename GraphViewType::vertex_type{0}; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_last() const noexcept - { - return number_of_vertices_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_size() const noexcept - { - return number_of_vertices_; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_offset_from_major_nocheck( - typename GraphViewType::vertex_type major) const noexcept - { - return major; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_offset_from_minor_nocheck( - typename GraphViewType::vertex_type minor) const noexcept - { - return minor; - } - - __host__ __device__ typename GraphViewType::vertex_type get_major_from_major_offset_nocheck( - typename GraphViewType::vertex_type major_offset) const noexcept - { - return major_offset; - } - - __host__ __device__ typename GraphViewType::vertex_type get_minor_from_minor_offset_nocheck( - typename GraphViewType::vertex_type minor_offset) const noexcept - { - return minor_offset; - } - - private: - typename GraphViewType::vertex_type number_of_vertices_; -}; - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/cugraph/matrix_partition_device_view.cuh b/cpp/include/cugraph/matrix_partition_device_view.cuh new file mode 100644 index 00000000000..aa7bb6f97ba --- /dev/null +++ b/cpp/include/cugraph/matrix_partition_device_view.cuh @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +class matrix_partition_device_view_base_t { + public: + matrix_partition_device_view_base_t(edge_t const* offsets, + vertex_t const* indices, + std::optional weights, + edge_t number_of_edges) + : offsets_(offsets), + indices_(indices), + weights_(weights ? thrust::optional(*weights) : thrust::nullopt), + number_of_edges_(number_of_edges) + { + } + + __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } + + __host__ __device__ edge_t const* get_offsets() const { return offsets_; } + __host__ __device__ vertex_t const* get_indices() const { return indices_; } + __host__ __device__ thrust::optional get_weights() const { return weights_; } + + __device__ thrust::tuple, edge_t> + get_local_edges(vertex_t major_offset) const noexcept + { + auto edge_offset = *(offsets_ + major_offset); + auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset; + auto indices = indices_ + edge_offset; + auto weights = + weights_ ? thrust::optional{*weights_ + edge_offset} : thrust::nullopt; + return thrust::make_tuple(indices, weights, local_degree); + } + + __device__ edge_t get_local_degree(vertex_t major_offset) const noexcept + { + return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); + } + + __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept + { + return *(offsets_ + major_offset); + } + + private: + // should be trivially copyable to device + edge_t const* offsets_{nullptr}; + vertex_t const* indices_{nullptr}; + thrust::optional weights_{thrust::nullopt}; + edge_t number_of_edges_{0}; +}; + +} // namespace detail + +template +class matrix_partition_device_view_t; + +// multi-GPU version +template +class matrix_partition_device_view_t> + : public detail::matrix_partition_device_view_base_t { + public: + matrix_partition_device_view_t( + matrix_partition_view_t view) + : detail::matrix_partition_device_view_base_t( + view.get_offsets(), view.get_indices(), view.get_weights(), view.get_number_of_edges()), + dcs_nzd_vertices_(view.get_dcs_nzd_vertices() + ? thrust::optional{*(view.get_dcs_nzd_vertices())} + : thrust::nullopt), + dcs_nzd_vertex_count_(view.get_dcs_nzd_vertex_count() + ? thrust::optional{*(view.get_dcs_nzd_vertex_count())} + : thrust::nullopt), + major_first_(view.get_major_first()), + major_last_(view.get_major_last()), + minor_first_(view.get_minor_first()), + minor_last_(view.get_minor_last()), + major_value_start_offset_(view.get_major_value_start_offset()) + { + } + + __host__ __device__ vertex_t get_major_first() const noexcept { return major_first_; } + + __host__ __device__ vertex_t get_major_last() const noexcept { return major_last_; } + + __host__ __device__ vertex_t get_major_size() const noexcept + { + return major_last_ - major_first_; + } + + __host__ __device__ vertex_t get_minor_first() const noexcept { return minor_first_; } + + __host__ __device__ vertex_t get_minor_last() const noexcept { return minor_last_; } + + __host__ __device__ vertex_t get_minor_size() const noexcept + { + return minor_last_ - minor_first_; + } + + __host__ __device__ vertex_t get_major_offset_from_major_nocheck(vertex_t major) const noexcept + { + return major - major_first_; + } + + __host__ __device__ vertex_t get_minor_offset_from_minor_nocheck(vertex_t minor) const noexcept + { + return minor - minor_first_; + } + + __host__ __device__ vertex_t get_major_from_major_offset_nocheck(vertex_t major_offset) const + noexcept + { + return major_first_ + major_offset; + } + + __host__ __device__ vertex_t get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const + noexcept + { + return minor_first_ + minor_offset; + } + + __host__ __device__ vertex_t get_major_value_start_offset() const + { + return major_value_start_offset_; + } + + private: + // should be trivially copyable to device + + thrust::optional dcs_nzd_vertices_{nullptr}; + thrust::optional dcs_nzd_vertex_count_{0}; + + vertex_t major_first_{0}; + vertex_t major_last_{0}; + vertex_t minor_first_{0}; + vertex_t minor_last_{0}; + + vertex_t major_value_start_offset_{0}; +}; + +// single-GPU version +template +class matrix_partition_device_view_t> + : public detail::matrix_partition_device_view_base_t { + public: + matrix_partition_device_view_t( + matrix_partition_view_t view) + : detail::matrix_partition_device_view_base_t( + view.get_offsets(), view.get_indices(), view.get_weights(), view.get_number_of_edges()), + number_of_vertices_(view.get_major_last()) + { + } + + __host__ __device__ vertex_t get_major_value_start_offset() const { return vertex_t{0}; } + + __host__ __device__ constexpr vertex_t get_major_first() const noexcept { return vertex_t{0}; } + + __host__ __device__ vertex_t get_major_last() const noexcept { return number_of_vertices_; } + + __host__ __device__ vertex_t get_major_size() const noexcept { return number_of_vertices_; } + + __host__ __device__ constexpr vertex_t get_minor_first() const noexcept { return vertex_t{0}; } + + __host__ __device__ vertex_t get_minor_last() const noexcept { return number_of_vertices_; } + + __host__ __device__ vertex_t get_minor_size() const noexcept { return number_of_vertices_; } + + __host__ __device__ vertex_t get_major_offset_from_major_nocheck(vertex_t major) const noexcept + { + return major; + } + + __host__ __device__ vertex_t get_minor_offset_from_minor_nocheck(vertex_t minor) const noexcept + { + return minor; + } + + __host__ __device__ vertex_t get_major_from_major_offset_nocheck(vertex_t major_offset) const + noexcept + { + return major_offset; + } + + __host__ __device__ vertex_t get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const + noexcept + { + return minor_offset; + } + + private: + vertex_t number_of_vertices_; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/matrix_partition_view.hpp b/cpp/include/cugraph/matrix_partition_view.hpp new file mode 100644 index 00000000000..85c366a5a74 --- /dev/null +++ b/cpp/include/cugraph/matrix_partition_view.hpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +class matrix_partition_view_base_t { + public: + matrix_partition_view_base_t(edge_t const* offsets, + vertex_t const* indices, + std::optional weights, + edge_t number_of_edges) + : offsets_(offsets), indices_(indices), weights_(weights), number_of_edges_(number_of_edges) + { + } + + edge_t get_number_of_edges() const { return number_of_edges_; } + + edge_t const* get_offsets() const { return offsets_; } + vertex_t const* get_indices() const { return indices_; } + std::optional get_weights() const { return weights_; } + + private: + edge_t const* offsets_{nullptr}; + vertex_t const* indices_{nullptr}; + std::optional weights_{std::nullopt}; + edge_t number_of_edges_{0}; +}; + +} // namespace detail + +template +class matrix_partition_view_t; + +// multi-GPU version +template +class matrix_partition_view_t> + : public detail::matrix_partition_view_base_t { + public: + matrix_partition_view_t(edge_t const* offsets, + vertex_t const* indices, + std::optional weights, + std::optional dcs_nzd_vertices, + std::optional dcs_nzd_vertex_count, + edge_t number_of_matrix_partition_edges, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + vertex_t major_value_start_offset) + : detail::matrix_partition_view_base_t( + offsets, indices, weights, number_of_matrix_partition_edges), + dcs_nzd_vertices_(dcs_nzd_vertices), + dcs_nzd_vertex_count_(dcs_nzd_vertex_count), + major_first_(major_first), + major_last_(major_last), + minor_first_(minor_first), + minor_last_(minor_first), + major_value_start_offset_(major_value_start_offset) + { + } + + std::optional get_dcs_nzd_vertices() const { return dcs_nzd_vertices_; } + std::optional get_dcs_nzd_vertex_count() const { return dcs_nzd_vertex_count_; } + + vertex_t get_major_first() const { return major_first_; } + vertex_t get_major_last() const { return major_last_; } + vertex_t get_minor_first() const { return minor_first_; } + vertex_t get_minor_last() const { return minor_last_; } + + vertex_t get_major_value_start_offset() const { return major_value_start_offset_; } + + private: + // relevant only if we use the CSR + DCSR (or CSC + DCSC) hybrid format + std::optional dcs_nzd_vertices_{}; + std::optional dcs_nzd_vertex_count_{}; + + vertex_t major_first_{0}; + vertex_t major_last_{0}; + vertex_t minor_first_{0}; + vertex_t minor_last_{0}; + + vertex_t major_value_start_offset_{0}; +}; + +// single-GPU version +template +class matrix_partition_view_t> + : public detail::matrix_partition_view_base_t { + public: + matrix_partition_view_t(edge_t const* offsets, + vertex_t const* indices, + std::optional weights, + vertex_t number_of_vertices, + edge_t number_of_edges) + : detail::matrix_partition_view_base_t( + offsets, indices, weights, number_of_edges), + number_of_vertices_(number_of_vertices) + { + } + + vertex_t get_major_first() const { return vertex_t{0}; } + vertex_t get_major_last() const { return number_of_vertices_; } + vertex_t get_minor_first() const { return vertex_t{0}; } + vertex_t get_minor_last() const { return number_of_vertices_; } + + private: + vertex_t number_of_vertices_{0}; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh b/cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh index e2ab135691e..1aedd952cf6 100644 --- a/cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include #include @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -118,6 +118,8 @@ void copy_to_matrix_major(raft::handle_t const& handle, MatrixMajorValueOutputIterator matrix_major_value_output_first) { using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); @@ -146,7 +148,9 @@ void copy_to_matrix_major(raft::handle_t const& handle, handle.get_stream()); for (int i = 0; i < col_comm_size; ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); @@ -157,7 +161,9 @@ void copy_to_matrix_major(raft::handle_t const& handle, typename std::iterator_traits::value_type>(rx_tmp_buffer); if (col_comm_rank == i) { - vertex_partition_device_t vertex_partition(graph_view); + auto vertex_partition = + vertex_partition_device_view_t( + graph_view.get_vertex_partition_view()); auto map_first = thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); @@ -304,6 +310,8 @@ void copy_to_matrix_minor(raft::handle_t const& handle, MatrixMinorValueOutputIterator matrix_minor_value_output_first) { using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); @@ -331,7 +339,9 @@ void copy_to_matrix_minor(raft::handle_t const& handle, static_cast(thrust::distance(vertex_first, vertex_last)), handle.get_stream()); - matrix_partition_device_t matrix_partition(graph_view, 0); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(size_t{0})); for (int i = 0; i < row_comm_size; ++i) { rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); @@ -342,7 +352,9 @@ void copy_to_matrix_minor(raft::handle_t const& handle, typename std::iterator_traits::value_type>(rx_tmp_buffer); if (row_comm_rank == i) { - vertex_partition_device_t vertex_partition(graph_view); + auto vertex_partition = + vertex_partition_device_view_t( + graph_view.get_vertex_partition_view()); auto map_first = thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh index 0bae6da71e6..9ef67563772 100644 --- a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include #include @@ -53,7 +53,10 @@ template __global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -74,7 +77,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); @@ -86,7 +89,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( indices, weights] __device__(auto i) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -145,7 +148,10 @@ template __global__ void for_all_major_for_all_nbr_mid_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -168,14 +174,14 @@ __global__ void for_all_major_for_all_nbr_mid_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = lane_id == 0 ? init : e_op_result_t{}; // relevent only if update_major == true for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -223,7 +229,10 @@ template __global__ void for_all_major_for_all_nbr_high_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -243,14 +252,14 @@ __global__ void for_all_major_for_all_nbr_high_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = threadIdx.x == 0 ? init : e_op_result_t{}; // relevent only if update_major == true for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -309,6 +318,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, { constexpr auto update_major = (in == GraphViewType::is_adj_matrix_transposed); using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); @@ -345,7 +356,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); auto major_tmp_buffer_size = GraphViewType::is_multi_gpu && update_major ? matrix_partition.get_major_size() : vertex_t{0}; @@ -371,34 +384,34 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, ? matrix_partition.get_major_value_start_offset() : vertex_t{0}; auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); - if (segment_offsets.size() > 0) { + if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more // segment for very high degree vertices and running segmented reduction - static_assert(detail::num_segments_per_vertex_partition == 3); - if (segment_offsets[1] > 0) { - raft::grid_1d_block_t update_grid(segment_offsets[1], + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + if ((*segment_offsets)[1] > 0) { + raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_high_degree + detail::for_all_major_for_all_nbr_high_degree <<>>( matrix_partition, matrix_partition.get_major_first(), - matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + (*segment_offsets)[1], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, update_major ? major_buffer_first : minor_buffer_first, e_op, major_init); } else { - detail::for_all_major_for_all_nbr_high_degree + detail::for_all_major_for_all_nbr_high_degree <<>>( matrix_partition, matrix_partition.get_major_first(), - matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + (*segment_offsets)[1], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, vertex_value_output_first, @@ -406,62 +419,62 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, major_init); } } - if (segment_offsets[2] - segment_offsets[1] > 0) { - raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], + if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { + raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_mid_degree + detail::for_all_major_for_all_nbr_mid_degree <<>>( matrix_partition, - matrix_partition.get_major_first() + segment_offsets[1], - matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first + segment_offsets[1] : minor_buffer_first, + update_major ? major_buffer_first + (*segment_offsets)[1] : minor_buffer_first, e_op, major_init); } else { - detail::for_all_major_for_all_nbr_mid_degree + detail::for_all_major_for_all_nbr_mid_degree <<>>( matrix_partition, - matrix_partition.get_major_first() + segment_offsets[1], - matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first + (update_major ? segment_offsets[1] : vertex_t{0}), + vertex_value_output_first + (update_major ? (*segment_offsets)[1] : vertex_t{0}), e_op, major_init); } } - if (segment_offsets[3] - segment_offsets[2] > 0) { - raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { + raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, - matrix_partition.get_major_first() + segment_offsets[2], - matrix_partition.get_major_last(), + matrix_partition.get_major_first() + (*segment_offsets)[2], + matrix_partition.get_major_first() + (*segment_offsets)[3], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first + segment_offsets[2] : minor_buffer_first, + update_major ? major_buffer_first + (*segment_offsets)[2] : minor_buffer_first, e_op, major_init); } else { - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, - matrix_partition.get_major_first() + segment_offsets[2], - matrix_partition.get_major_last(), + matrix_partition.get_major_first() + (*segment_offsets)[2], + matrix_partition.get_major_first() + (*segment_offsets)[3], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first + (update_major ? segment_offsets[2] : vertex_t{0}), + vertex_value_output_first + (update_major ? (*segment_offsets)[2] : vertex_t{0}), e_op, major_init); } @@ -474,7 +487,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, matrix_partition.get_major_first(), @@ -485,7 +498,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, e_op, major_init); } else { - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, matrix_partition.get_major_first(), diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index f9c6fed059b..4aa80956745 100644 --- a/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -16,16 +16,15 @@ #pragma once #include -#include #include -#include +#include #include #include #include #include #include #include -#include +#include #include #include @@ -243,7 +242,9 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector major_vertices(0, handle.get_stream()); auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); rmm::device_uvector tmp_major_vertices(matrix_partition.get_number_of_edges(), handle.get_stream()); @@ -264,8 +265,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_minor_keys.begin()); if (graph_view.is_weighted()) { thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - matrix_partition.get_weights(), - matrix_partition.get_weights() + matrix_partition.get_number_of_edges(), + *(matrix_partition.get_weights()), + *(matrix_partition.get_weights()) + matrix_partition.get_number_of_edges(), tmp_key_aggregated_edge_weights.begin()); } // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber @@ -526,8 +527,10 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( vertex_value_output_first, thrust::make_transform_iterator( unique_major_vertices.begin(), - [vertex_partition = vertex_partition_device_t(graph_view)] __device__( - auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })), + [vertex_partition = vertex_partition_device_view_t( + graph_view.get_vertex_partition_view())] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + })), thrust::equal_to{}, reduce_op); diff --git a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 6041c6da3e2..3753c4ad4bd 100644 --- a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -43,7 +43,10 @@ template __global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -64,7 +67,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); @@ -78,7 +81,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( indices, weights] __device__(auto i) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -183,7 +186,9 @@ transform_reduce_by_adj_matrix_row_col_key_e( rmm::device_uvector keys(0, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); int comm_root_rank = 0; if (GraphViewType::is_multi_gpu) { @@ -224,7 +229,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, graph_view.get_vertex_partition_first(comm_root_rank), diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh index f8252974f95..8eac2ac4f07 100644 --- a/cpp/include/cugraph/prims/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include #include @@ -44,7 +43,10 @@ template __global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -65,7 +67,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto sum = thrust::transform_reduce( @@ -80,7 +82,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( indices, weights] __device__(auto i) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -125,7 +127,10 @@ template __global__ void for_all_major_for_all_nbr_mid_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -148,12 +153,12 @@ __global__ void for_all_major_for_all_nbr_mid_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -195,7 +200,10 @@ template __global__ void for_all_major_for_all_nbr_high_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, typename GraphViewType::vertex_type major_first, typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -215,12 +223,12 @@ __global__ void for_all_major_for_all_nbr_high_degree( while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor @@ -303,6 +311,8 @@ T transform_reduce_e(raft::handle_t const& handle, static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -311,7 +321,9 @@ T transform_reduce_e(raft::handle_t const& handle, T{}); for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} @@ -320,61 +332,55 @@ T transform_reduce_e(raft::handle_t const& handle, ? matrix_partition.get_major_value_start_offset() : vertex_t{0}; auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); - if (segment_offsets.size() > 0) { + if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more // segment for very high degree vertices and running segmented reduction - static_assert(detail::num_segments_per_vertex_partition == 3); - if (segment_offsets[1] > 0) { - raft::grid_1d_block_t update_grid(segment_offsets[1], + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + if ((*segment_offsets)[1] > 0) { + raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_high_degree<<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_first() + segment_offsets[1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(result_buffer), - e_op); + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + (*segment_offsets)[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); } - if (segment_offsets[2] - segment_offsets[1] > 0) { - raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], + if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { + raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_mid_degree<<>>( - matrix_partition, - matrix_partition.get_major_first() + segment_offsets[1], - matrix_partition.get_major_first() + segment_offsets[2], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(result_buffer), - e_op); + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); } - if (segment_offsets[3] - segment_offsets[2] > 0) { - raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { + raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - matrix_partition.get_major_first() + segment_offsets[2], - matrix_partition.get_major_last(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(result_buffer), - e_op); + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[2], + matrix_partition.get_major_first() + (*segment_offsets)[3], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); } } else { if (matrix_partition.get_major_size() > 0) { @@ -382,17 +388,15 @@ T transform_reduce_e(raft::handle_t const& handle, detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_last(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(result_buffer), - e_op); + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); } } } diff --git a/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh index 59830222a9c..bfb6f296075 100644 --- a/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh @@ -70,6 +70,10 @@ T transform_reduce_v_with_adj_matrix_row( VertexOp v_op, T init) { + using vertex_t = GraphViewtype::vertex_type; + using edge_t = GraphViewtype::edge_type; + using weight_t = GraphViewtype::weight_type; + T ret{}; auto vertex_first = graph_view.get_local_vertex_first(); @@ -82,7 +86,9 @@ T transform_reduce_v_with_adj_matrix_row( auto range_last = std::min(vertex_last, row_last); if (range_last > range_first) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? 0 : matrix_partition.get_major_value_start_offset(); diff --git a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh index abb1a7e18cb..e148739dadc 100644 --- a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include #include @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include @@ -127,19 +127,20 @@ auto get_optional_payload_buffer_begin = [](auto& optional_payload_buffer) { // FIXME: a temporary workaround for cudaErrorInvalidDeviceFunction error when device lambda is used // in the else part in if constexpr else statement that involves device lambda -template + typename key_t, + bool multi_gpu> struct call_v_op_t { VertexValueInputIterator vertex_value_input_first{}; VertexValueOutputIterator vertex_value_output_first{}; VertexOp v_op{}; - vertex_partition_device_t vertex_partition{}; + vertex_partition_device_view_t vertex_partition{}; size_t invalid_bucket_idx; - template + template __device__ std::enable_if_t, uint8_t> operator()( key_t key) const { @@ -154,7 +155,7 @@ struct call_v_op_t { } } - template + template __device__ std::enable_if_t, uint8_t> operator()( key_t key) const { @@ -188,7 +189,10 @@ template __device__ void push_if_buffer_element( - matrix_partition_device_t& matrix_partition, + matrix_partition_device_view_t& matrix_partition, typename std::iterator_traits::value_type key, typename GraphViewType::vertex_type row_offset, typename GraphViewType::vertex_type col, @@ -244,7 +248,10 @@ template __global__ void for_all_frontier_row_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, KeyIterator key_first, KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -279,21 +286,21 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = 0; i < local_out_degree; ++i) { - push_if_buffer_element(matrix_partition, - key, - row_offset, - indices[i], - weights != nullptr ? weights[i] : weight_t{1.0}, - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - buffer_key_output_first, - buffer_payload_output_first, - buffer_idx_ptr, - e_op); + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x * blockDim.x; } @@ -307,7 +314,10 @@ template __global__ void for_all_frontier_row_for_all_nbr_mid_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, KeyIterator key_first, KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -344,21 +354,21 @@ __global__ void for_all_frontier_row_for_all_nbr_mid_degree( } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { - push_if_buffer_element(matrix_partition, - key, - row_offset, - indices[i], - weights != nullptr ? weights[i] : weight_t{1.0}, - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - buffer_key_output_first, - buffer_payload_output_first, - buffer_idx_ptr, - e_op); + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x * (blockDim.x / raft::warp_size()); @@ -373,7 +383,10 @@ template __global__ void for_all_frontier_row_for_all_nbr_high_degree( - matrix_partition_device_t matrix_partition, + matrix_partition_device_view_t matrix_partition, KeyIterator key_first, KeyIterator key_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, @@ -407,21 +420,21 @@ __global__ void for_all_frontier_row_for_all_nbr_high_degree( } auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; + thrust::optional weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { - push_if_buffer_element(matrix_partition, - key, - row_offset, - indices[i], - weights != nullptr ? weights[i] : weight_t{1.0}, - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - buffer_key_output_first, - buffer_payload_output_first, - buffer_idx_ptr, - e_op); + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); } idx += gridDim.x; @@ -517,6 +530,7 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; using key_t = typename VertexFrontierType::key_type; edge_t ret{0}; @@ -555,7 +569,9 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier( local_frontier_sizes = std::vector{static_cast(cur_frontier_bucket.size())}; } for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); if (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); @@ -755,7 +771,9 @@ void update_frontier_v_push_if_out_nbr( static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; } for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); + auto matrix_partition = + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)); auto matrix_partition_frontier_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); @@ -841,11 +859,11 @@ void update_frontier_v_push_if_out_nbr( ? vertex_t{0} : matrix_partition.get_major_value_start_offset(); auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); - if (segment_offsets.size() > 0) { - static_assert(detail::num_segments_per_vertex_partition == 3); - std::vector h_thresholds(detail::num_segments_per_vertex_partition - 1); - h_thresholds[0] = matrix_partition.get_major_first() + segment_offsets[1]; - h_thresholds[1] = matrix_partition.get_major_first() + segment_offsets[2]; + if (segment_offsets) { + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + std::vector h_thresholds(detail::num_sparse_segments_per_vertex_partition - 1); + h_thresholds[0] = matrix_partition.get_major_first() + (*segment_offsets)[1]; + h_thresholds[1] = matrix_partition.get_major_first() + (*segment_offsets)[2]; rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); @@ -868,19 +886,17 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_high_degree<<>>( - matrix_partition, - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - get_dataframe_buffer_begin(key_buffer), - detail::get_optional_payload_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); + detail::for_all_frontier_row_for_all_nbr_high_degree + <<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); } if (h_offsets[1] - h_offsets[0] > 0) { raft::grid_1d_warp_t update_grid( @@ -888,19 +904,17 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_mid_degree<<>>( - matrix_partition, - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - get_dataframe_buffer_begin(key_buffer), - detail::get_optional_payload_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); + detail::for_all_frontier_row_for_all_nbr_mid_degree + <<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); } if (matrix_partition_frontier_size - h_offsets[1] > 0) { raft::grid_1d_thread_t update_grid( @@ -908,19 +922,17 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], - get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - get_dataframe_buffer_begin(key_buffer), - detail::get_optional_payload_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); + detail::for_all_frontier_row_for_all_nbr_low_degree + <<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); } } else { if (matrix_partition_frontier_size > 0) { @@ -929,19 +941,17 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), - get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - get_dataframe_buffer_begin(key_buffer), - detail::get_optional_payload_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); + detail::for_all_frontier_row_for_all_nbr_low_degree + <<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); } } } @@ -1061,7 +1071,8 @@ void update_frontier_v_push_if_out_nbr( static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); rmm::device_uvector bucket_indices(num_buffer_elements, handle.get_stream()); - vertex_partition_device_t vertex_partition(graph_view); + auto vertex_partition = vertex_partition_device_view_t( + graph_view.get_vertex_partition_view()); if constexpr (!std::is_same_v) { auto key_payload_pair_first = thrust::make_zip_iterator( @@ -1099,19 +1110,21 @@ void update_frontier_v_push_if_out_nbr( resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); } else { - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_begin(key_buffer) + num_buffer_elements, - bucket_indices.begin(), - detail::call_v_op_t{vertex_value_input_first, - vertex_value_output_first, - v_op, - vertex_partition, - VertexFrontierType::kInvalidBucketIdx}); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(key_buffer) + num_buffer_elements, + bucket_indices.begin(), + detail::call_v_op_t{vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + VertexFrontierType::kInvalidBucketIdx}); } auto bucket_key_pair_first = thrust::make_zip_iterator( diff --git a/cpp/include/cugraph/serialization/serializer.hpp b/cpp/include/cugraph/serialization/serializer.hpp index 666ee81e98f..240df1d304a 100644 --- a/cpp/include/cugraph/serialization/serializer.hpp +++ b/cpp/include/cugraph/serialization/serializer.hpp @@ -76,6 +76,7 @@ class serializer_t { : num_vertices_(graph.get_number_of_vertices()), num_edges_(graph.get_number_of_edges()), properties_(graph.get_graph_properties()), + is_weighted_(graph.is_weighted()), segment_offsets_(graph.view().get_local_adj_matrix_partition_segment_offsets(0)) { } @@ -83,10 +84,12 @@ class serializer_t { graph_meta_t(size_t num_vertices, size_t num_edges, graph_properties_t const& properties, - std::vector const& segment_offsets) + bool is_weighted, + std::optional> const& segment_offsets) : num_vertices_(num_vertices), num_edges_(num_edges), properties_(properties), + is_weighted_(is_weighted), segment_offsets_(segment_offsets) { } @@ -94,11 +97,13 @@ class serializer_t { size_t num_vertices_; size_t num_edges_; graph_properties_t properties_{}; - std::vector segment_offsets_{}; + bool is_weighted_{}; + std::optional> segment_offsets_{}; size_t get_device_sz_bytes(void) const { - return 2 * sizeof(size_t) + segment_offsets_.size() * sizeof(vertex_t) + + return 2 * sizeof(size_t) + + (segment_offsets_ ? (*segment_offsets_).size() : size_t{0}) * sizeof(vertex_t) + 3 * sizeof(bool_ser_t); } }; @@ -151,8 +156,7 @@ class serializer_t { size_t num_vertices = graph_meta.num_vertices_; size_t num_edges = graph_meta.num_edges_; - size_t weight_storage_sz = - graph_meta.properties_.is_weighted ? num_edges * sizeof(weight_t) : 0; + size_t weight_storage_sz = graph_meta.is_weighted_ ? num_edges * sizeof(weight_t) : 0; size_t device_ser_sz = (num_vertices + 1) * sizeof(edge_t) + num_edges * sizeof(vertex_t) + weight_storage_sz; diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index 7435daffb3e..2bd4e86aa7e 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -94,7 +94,10 @@ struct graph_container_t { void* src_vertices; void* dst_vertices; void* weights; + bool is_weighted; void* vertex_partition_offsets; + void* segment_offsets; + size_t num_segments; size_t num_local_edges; size_t num_global_vertices; @@ -104,7 +107,6 @@ struct graph_container_t { numberTypeEnum weightType; bool transposed; bool is_multi_gpu; - bool sorted_by_degree; bool do_expensive_check; int row_comm_size; int col_comm_size; @@ -246,8 +248,8 @@ struct random_walk_coo_t { // (unrenumbering maps, etc.) // template -struct renum_quad_t { - explicit renum_quad_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {} +struct renum_tuple_t { + explicit renum_tuple_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {} rmm::device_uvector& get_dv(void) { return dv_; } @@ -261,6 +263,13 @@ struct renum_quad_t { vertex_t& get_num_vertices(void) { return nv_; } edge_t& get_num_edges(void) { return ne_; } + std::vector& get_segment_offsets(void) { return segment_offsets_; } + + std::unique_ptr> get_segment_offsets_wrap() + { // const + return std::make_unique>(segment_offsets_); + } + // `partition_t` pass-through getters // int get_part_row_size() const { return part_.get_row_size(); } @@ -271,7 +280,7 @@ struct renum_quad_t { // FIXME: part_.get_vertex_partition_offsets() returns a std::vector // - std::unique_ptr> get_partition_offsets(void) // const + std::unique_ptr> get_partition_offsets_wrap(void) // const { return std::make_unique>(part_.get_vertex_partition_offsets()); } @@ -354,7 +363,9 @@ struct renum_quad_t { cugraph::experimental::partition_t part_; vertex_t nv_{0}; edge_t ne_{0}; + std::vector segment_offsets_; }; + // FIXME: finish description for vertex_partition_offsets // // Factory function for populating an empty graph container with a new graph @@ -412,13 +423,14 @@ void populate_graph_container(graph_container_t& graph_container, void* dst_vertices, void* weights, void* vertex_partition_offsets, + void* segment_offsets, + size_t num_segments, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, - bool sorted_by_degree, bool is_weighted, bool is_symmetric, bool transposed, @@ -572,7 +584,7 @@ std::unique_ptr> call_shuffle( // Wrapper for calling renumber_edeglist() inplace: // template -std::unique_ptr> call_renumber( +std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, diff --git a/cpp/include/cugraph/vertex_partition_device.cuh b/cpp/include/cugraph/vertex_partition_device.cuh deleted file mode 100644 index b57efd115eb..00000000000 --- a/cpp/include/cugraph/vertex_partition_device.cuh +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -namespace cugraph { -namespace experimental { - -template -class vertex_partition_device_base_t { - public: - vertex_partition_device_base_t(vertex_t number_of_vertices) - : number_of_vertices_(number_of_vertices) - { - } - - template - __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( - vertex_type v) const noexcept - { - return ((v >= 0) && (v < number_of_vertices_)); - } - - template - __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( - vertex_type v) const noexcept - { - return (v < number_of_vertices_); - } - - private: - // should be trivially copyable to device - vertex_t number_of_vertices_{0}; -}; - -template -class vertex_partition_device_t; - -// multi-GPU version -template -class vertex_partition_device_t> - : public vertex_partition_device_base_t { - public: - vertex_partition_device_t(GraphViewType const& graph_view) - : vertex_partition_device_base_t( - graph_view.get_number_of_vertices()), - first_(graph_view.get_local_vertex_first()), - last_(graph_view.get_local_vertex_last()) - { - } - - __host__ __device__ bool is_local_vertex_nocheck(typename GraphViewType::vertex_type v) const - noexcept - { - return (v >= first_) && (v < last_); - } - - __host__ __device__ typename GraphViewType::vertex_type - get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept - { - return v - first_; - } - - private: - // should be trivially copyable to device - typename GraphViewType::vertex_type first_{0}; - typename GraphViewType::vertex_type last_{0}; -}; - -// single-GPU version -template -class vertex_partition_device_t> - : public vertex_partition_device_base_t { - public: - vertex_partition_device_t(GraphViewType const& graph_view) - : vertex_partition_device_base_t( - graph_view.get_number_of_vertices()) - { - } - - __host__ __device__ constexpr bool is_local_vertex_nocheck( - typename GraphViewType::vertex_type v) const noexcept - { - return true; - } - - __host__ __device__ constexpr typename GraphViewType::vertex_type - get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept - { - return v; - } -}; - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/cugraph/vertex_partition_device_view.cuh b/cpp/include/cugraph/vertex_partition_device_view.cuh new file mode 100644 index 00000000000..046c89e62a2 --- /dev/null +++ b/cpp/include/cugraph/vertex_partition_device_view.cuh @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +class vertex_partition_device_view_base_t { + public: + vertex_partition_device_view_base_t(vertex_t number_of_vertices) + : number_of_vertices_(number_of_vertices) + { + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return ((v >= 0) && (v < number_of_vertices_)); + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return (v < number_of_vertices_); + } + + private: + // should be trivially copyable to device + vertex_t number_of_vertices_{0}; +}; + +} // namespace detail + +template +class vertex_partition_device_view_t; + +// multi-GPU version +template +class vertex_partition_device_view_t> + : public detail::vertex_partition_device_view_base_t { + public: + vertex_partition_device_view_t(vertex_partition_view_t view) + : detail::vertex_partition_device_view_base_t(view.get_number_of_vertices()), + local_vertex_first_(view.get_local_vertex_first()), + local_vertex_last_(view.get_local_vertex_last()) + { + } + + __host__ __device__ bool is_local_vertex_nocheck(vertex_t v) const noexcept + { + return (v >= local_vertex_first_) && (v < local_vertex_last_); + } + + __host__ __device__ vertex_t get_local_vertex_offset_from_vertex_nocheck(vertex_t v) const + noexcept + { + return v - local_vertex_first_; + } + + private: + // should be trivially copyable to device + vertex_t local_vertex_first_{0}; + vertex_t local_vertex_last_{0}; +}; + +// single-GPU version +template +class vertex_partition_device_view_t> + : public detail::vertex_partition_device_view_base_t { + public: + vertex_partition_device_view_t(vertex_partition_view_t view) + : detail::vertex_partition_device_view_base_t(view.get_number_of_vertices()) + { + } + + __host__ __device__ constexpr bool is_local_vertex_nocheck(vertex_t v) const noexcept + { + return true; + } + + __host__ __device__ constexpr vertex_t get_local_vertex_offset_from_vertex_nocheck( + vertex_t v) const noexcept + { + return v; + } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/vertex_partition_view.hpp b/cpp/include/cugraph/vertex_partition_view.hpp new file mode 100644 index 00000000000..51badf162eb --- /dev/null +++ b/cpp/include/cugraph/vertex_partition_view.hpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +class vertex_partition_view_base_t { + public: + vertex_partition_view_base_t(vertex_t number_of_vertices) + : number_of_vertices_(number_of_vertices) + { + } + + vertex_t get_number_of_vertices() const { return number_of_vertices_; } + + private: + vertex_t number_of_vertices_{0}; +}; + +} // namespace detail + +template +class vertex_partition_view_t; + +// multi-GPU version +template +class vertex_partition_view_t> + : public detail::vertex_partition_view_base_t { + public: + vertex_partition_view_t(vertex_t number_of_vertices, + vertex_t local_vertex_first, + vertex_t local_vertex_last) + : detail::vertex_partition_view_base_t(number_of_vertices), + local_vertex_first_(local_vertex_first), + local_vertex_last_(local_vertex_last) + { + } + + vertex_t get_local_vertex_first() const { return local_vertex_first_; } + vertex_t get_local_vertex_last() const { return local_vertex_last_; } + + private: + vertex_t local_vertex_first_{0}; + vertex_t local_vertex_last_{0}; +}; + +// single-GPU version +template +class vertex_partition_view_t> + : public detail::vertex_partition_view_base_t { + public: + vertex_partition_view_t(vertex_t number_of_vertices) + : detail::vertex_partition_view_base_t(number_of_vertices) + { + } + + vertex_t get_local_vertex_first() const { return vertex_t{0}; } + vertex_t get_local_vertex_last() const { return this->get_number_of_vertices(); } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index d4f10f991b7..72dc0eebc42 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -57,7 +57,7 @@ cutoff and subgraph extraction template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract( raft::handle_t const &handle, @@ -176,13 +176,16 @@ extract( return cugraph::experimental::extract_induced_subgraphs( handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs); } + } // namespace + namespace cugraph { namespace experimental { + template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -211,7 +214,7 @@ extract_ego(raft::handle_t const &handle, // SG FP32 template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, @@ -220,7 +223,7 @@ extract_ego(raft::handle_t const &, int32_t); template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, @@ -229,7 +232,7 @@ extract_ego(raft::handle_t const &, int32_t); template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, @@ -240,7 +243,7 @@ extract_ego(raft::handle_t const &, // SG FP64 template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, @@ -249,7 +252,7 @@ extract_ego(raft::handle_t const &, int32_t); template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, @@ -258,7 +261,7 @@ extract_ego(raft::handle_t const &, int32_t); template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_ego(raft::handle_t const &, graph_view_t const &, diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index ff8fbb9d032..848a6d9e615 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -41,6 +40,7 @@ #include #include +#include #include #include #include @@ -60,7 +60,8 @@ std::tuple, typename GraphViewType::vertex_type, typename GraphViewType::edge_type> accumulate_new_roots(raft::handle_t const &handle, - vertex_partition_device_t vertex_partition, + vertex_partition_device_view_t vertex_partition, typename GraphViewType::vertex_type const *components, typename GraphViewType::edge_type const *degrees, typename GraphViewType::vertex_type const *candidate_first, @@ -170,7 +171,8 @@ template struct v_op_t { using vertex_type = typename GraphViewType::vertex_type; - vertex_partition_device_t vertex_partition{}; + vertex_partition_device_view_t + vertex_partition{}; vertex_type *level_components{}; decltype(thrust::make_zip_iterator(thrust::make_tuple( static_cast(nullptr), static_cast(nullptr)))) edge_buffer_first{}; @@ -267,7 +269,8 @@ void weakly_connected_components_impl(raft::handle_t const &handle, std::vector level_local_vertex_first_vectors{}; while (true) { auto level_graph_view = num_levels == 0 ? push_graph_view : level_graph.view(); - vertex_partition_device_t vertex_partition(level_graph_view); + auto vertex_partition = vertex_partition_device_view_t( + level_graph_view.get_vertex_partition_view()); level_component_vectors.push_back(rmm::device_uvector( num_levels == 0 ? vertex_t{0} : level_graph_view.get_number_of_local_vertices(), handle.get_stream_view())); @@ -471,15 +474,15 @@ void weakly_connected_components_impl(raft::handle_t const &handle, while (true) { if ((edge_count < degree_sum_threshold) && (next_candidate_offset < static_cast(new_root_candidates.size()))) { - auto [new_roots, num_scanned, degree_sum] = - accumulate_new_roots(handle, - vertex_partition, - level_components, - degrees.data(), - new_root_candidates.data() + next_candidate_offset, - new_root_candidates.data() + new_root_candidates.size(), - iter == 0 ? init_max_new_roots : max_new_roots, - degree_sum_threshold - edge_count); + auto [new_roots, num_scanned, degree_sum] = accumulate_new_roots( + handle, + vertex_partition, + level_components, + degrees.data(), + new_root_candidates.data() + next_candidate_offset, + new_root_candidates.data() + new_root_candidates.size(), + iter == 0 ? init_max_new_roots : max_new_roots, + degree_sum_threshold - edge_count); next_candidate_offset += num_scanned; edge_count += degree_sum; @@ -710,19 +713,20 @@ void weakly_connected_components_impl(raft::handle_t const &handle, handle.get_stream()); } - std::tie(level_graph, level_renumber_map) = + std::optional> tmp_renumber_map{std::nullopt}; + std::tie(level_graph, tmp_renumber_map) = create_graph_from_edgelist( - handle, - std::nullopt, - std::move(std::get<0>(edge_buffer)), - std::move(std::get<1>(edge_buffer)), - rmm::device_uvector(size_t{0}, handle.get_stream_view()), - graph_properties_t{true, false, false}, - true); + GraphViewType::is_multi_gpu>(handle, + std::nullopt, + std::move(std::get<0>(edge_buffer)), + std::move(std::get<1>(edge_buffer)), + std::nullopt, + graph_properties_t{true, false}, + true); + level_renumber_map = std::move(*tmp_renumber_map); } else { break; } diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index b75590f89b3..24c86fea79f 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -109,7 +109,8 @@ void bfs(raft::handle_t const &handle, if (direction_optimizing) { CUGRAPH_FAIL("unimplemented."); } else { - vertex_partition_device_t vertex_partition(push_graph_view); + auto vertex_partition = vertex_partition_device_view_t( + push_graph_view.get_vertex_partition_view()); update_frontier_v_push_if_out_nbr( handle, diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 834c41cbbf3..573e7818c82 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -43,47 +43,45 @@ namespace cugraph { namespace experimental { namespace detail { -template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t major_first, - vertex_t major_last, - bool is_weighted, - cudaStream_t stream) +template +std::tuple, + rmm::device_uvector, + std::optional>> +decompress_matrix_partition_to_edgelist( + matrix_partition_device_view_t const matrix_partition, + cudaStream_t stream) { - edge_t number_of_edges{0}; - raft::update_host( - &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); - CUDA_TRY(cudaStreamSynchronize(stream)); + auto number_of_edges = matrix_partition.get_number_of_edges(); rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights(is_weighted ? number_of_edges : 0, stream); + auto edgelist_weights = + matrix_partition.get_weights() + ? std::make_optional>(number_of_edges, stream) + : std::nullopt; + auto major_first = matrix_partition.get_major_first(); + auto major_last = matrix_partition.get_major_last(); // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA // warp per vertex, and low-degree vertices using one CUDA thread per block - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(major_first), - thrust::make_counting_iterator(major_last), - [compressed_sparse_offsets, - major_first, - p_majors = edgelist_major_vertices.begin()] __device__(auto v) { - auto first = compressed_sparse_offsets[v - major_first]; - auto last = compressed_sparse_offsets[v - major_first + 1]; - thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); - }); + thrust::for_each( + rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [matrix_partition, major_first, p_majors = edgelist_major_vertices.begin()] __device__(auto v) { + auto first = matrix_partition.get_local_offset(v - major_first); + auto last = first + matrix_partition.get_local_degree(v - major_first); + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_indices, - compressed_sparse_indices + number_of_edges, + matrix_partition.get_indices(), + matrix_partition.get_indices() + number_of_edges, edgelist_minor_vertices.begin()); - if (is_weighted) { + if (edgelist_weights) { thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_weights, - compressed_sparse_weights + number_of_edges, - edgelist_weights.data()); + *(matrix_partition.get_weights()), + *(matrix_partition.get_weights()) + number_of_edges, + (*edgelist_weights).data()); } return std::make_tuple(std::move(edgelist_major_vertices), @@ -94,19 +92,18 @@ std:: template edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOUT] */, vertex_t *edgelist_minor_vertices /* [INOUT] */, - weight_t *edgelist_weights /* [INOUT] */, + std::optional edgelist_weights /* [INOUT] */, edge_t number_of_edges, - bool is_weighted, cudaStream_t stream) { auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); - if (is_weighted) { + if (edgelist_weights) { thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges, - edgelist_weights); + *edgelist_weights); rmm::device_uvector tmp_edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), @@ -116,7 +113,7 @@ edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOU rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges, - edgelist_weights, + (*edgelist_weights), thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), tmp_edgelist_minor_vertices.begin())), tmp_edgelist_weights.begin()); @@ -131,7 +128,7 @@ edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOU edge_first, edge_first + ret, thrust::make_zip_iterator(thrust::make_tuple( - edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights))); + edgelist_major_vertices, edgelist_minor_vertices, *edgelist_weights))); return ret; } else { @@ -143,63 +140,49 @@ edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOU } } -template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( - edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t const *p_major_labels, - vertex_t const *p_minor_labels, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last, - bool is_weighted, - cudaStream_t stream) +template +std::tuple, + rmm::device_uvector, + std::optional>> +decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + matrix_partition_device_view_t const matrix_partition, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the // compressed sparse format to save memory - rmm::device_uvector edgelist_major_vertices(0, stream); - rmm::device_uvector edgelist_minor_vertices(0, stream); - rmm::device_uvector edgelist_weights(0, stream); - std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_edgelist(compressed_sparse_offsets, - compressed_sparse_indices, - compressed_sparse_weights, - major_first, - major_last, - is_weighted, - stream); + auto [edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights] = + decompress_matrix_partition_to_edgelist(matrix_partition, stream); auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - thrust::transform( - rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size(), - pair_first, - [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { - return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], - p_minor_labels[thrust::get<1>(val) - minor_first]); - }); - - auto number_of_edges = - groupby_e_and_coarsen_edgelist(edgelist_major_vertices.data(), - edgelist_minor_vertices.data(), - edgelist_weights.data(), - static_cast(edgelist_major_vertices.size()), - is_weighted, - stream); + thrust::transform(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, + p_minor_labels, + major_first = matrix_partition.get_major_first(), + minor_first = matrix_partition.get_minor_first()] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + auto number_of_edges = groupby_e_and_coarsen_edgelist( + edgelist_major_vertices.data(), + edgelist_minor_vertices.data(), + edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, + static_cast(edgelist_major_vertices.size()), + stream); edgelist_major_vertices.resize(number_of_edges, stream); edgelist_major_vertices.shrink_to_fit(stream); edgelist_minor_vertices.resize(number_of_edges, stream); edgelist_minor_vertices.shrink_to_fit(stream); - if (is_weighted) { - edgelist_weights.resize(number_of_edges, stream); - edgelist_weights.shrink_to_fit(stream); + if (edgelist_weights) { + (*edgelist_weights).resize(number_of_edges, stream); + (*edgelist_weights).shrink_to_fit(stream); } return std::make_tuple(std::move(edgelist_major_vertices), @@ -251,16 +234,19 @@ coarsen_graph( std::vector> coarsened_edgelist_major_vertices{}; std::vector> coarsened_edgelist_minor_vertices{}; - std::vector> coarsened_edgelist_weights{}; + auto coarsened_edgelist_weights = + graph_view.is_weighted() ? std::make_optional>>({}) + : std::nullopt; coarsened_edgelist_major_vertices.reserve(graph_view.get_number_of_local_adj_matrix_partitions()); coarsened_edgelist_minor_vertices.reserve(coarsened_edgelist_major_vertices.size()); - coarsened_edgelist_weights.reserve( - graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : size_t{0}); + if (coarsened_edgelist_weights) { + (*coarsened_edgelist_weights).reserve(coarsened_edgelist_major_vertices.size()); + } for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { coarsened_edgelist_major_vertices.emplace_back(0, handle.get_stream()); coarsened_edgelist_minor_vertices.emplace_back(0, handle.get_stream()); - if (graph_view.is_weighted()) { - coarsened_edgelist_weights.emplace_back(0, handle.get_stream()); + if (coarsened_edgelist_weights) { + (*coarsened_edgelist_weights).emplace_back(0, handle.get_stream()); } } // FIXME: we may compare performance/memory footprint with the hash_based approach especially when @@ -309,25 +295,12 @@ coarsen_graph( comm.barrier(); // currently, this is ncclAllReduce #endif - rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector edgelist_weights(0, handle.get_stream()); - std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( - graph_view.offsets(i), - graph_view.indices(i), - graph_view.weights(i), + auto [edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights] = + decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view(i)), major_labels.data(), adj_matrix_minor_labels.data(), - store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) - : graph_view.get_local_adj_matrix_partition_row_first(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) - : graph_view.get_local_adj_matrix_partition_row_last(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) - : graph_view.get_local_adj_matrix_partition_col_first(i), - store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) - : graph_view.get_local_adj_matrix_partition_col_last(i), - graph_view.is_weighted(), handle.get_stream()); // 1-2. globaly shuffle @@ -335,14 +308,16 @@ coarsen_graph( { rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - if (graph_view.is_weighted()) { + auto rx_edgelist_weights = + edgelist_weights ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; + if (edgelist_weights) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin(), - edgelist_weights.begin())); + (*edgelist_weights).begin())); std::forward_as_tuple( - std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, *rx_edgelist_weights), std::ignore) = groupby_gpuid_and_shuffle_values( handle.get_comms(), @@ -391,10 +366,10 @@ coarsen_graph( }; auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto counts = graph_view.is_weighted() + auto counts = edgelist_weights ? groupby_and_count(pair_first, pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin(), + (*edgelist_weights).begin(), local_partition_id_op, graph_view.get_number_of_local_adj_matrix_partitions(), handle.get_stream()) @@ -415,10 +390,10 @@ coarsen_graph( auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( edgelist_major_vertices.begin() + h_displacements[j], edgelist_minor_vertices.begin() + h_displacements[j], - graph_view.is_weighted() ? edgelist_weights.begin() + h_displacements[j] - : static_cast(nullptr), + edgelist_weights + ? std::optional{(*edgelist_weights).data() + h_displacements[j]} + : std::nullopt, h_counts[j], - graph_view.is_weighted(), handle.get_stream()); auto cur_size = coarsened_edgelist_major_vertices[j].size(); @@ -429,19 +404,19 @@ coarsen_graph( handle.get_stream()); coarsened_edgelist_minor_vertices[j].resize(coarsened_edgelist_major_vertices[j].size(), handle.get_stream()); - if (graph_view.is_weighted()) { - coarsened_edgelist_weights[j].resize(coarsened_edgelist_major_vertices[j].size(), - handle.get_stream()); + if (coarsened_edgelist_weights) { + (*coarsened_edgelist_weights)[j].resize(coarsened_edgelist_major_vertices[j].size(), + handle.get_stream()); auto src_edge_first = thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin(), - edgelist_weights.begin())) + + (*edgelist_weights).begin())) + h_displacements[j]; auto dst_edge_first = thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices[j].begin(), coarsened_edgelist_minor_vertices[j].begin(), - coarsened_edgelist_weights[j].begin())) + + (*coarsened_edgelist_weights)[j].begin())) + cur_size; thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_edge_first, @@ -467,18 +442,18 @@ coarsen_graph( auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( coarsened_edgelist_major_vertices[i].data(), coarsened_edgelist_minor_vertices[i].data(), - graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() - : static_cast(nullptr), + coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights)[i].data()} + : std::nullopt, static_cast(coarsened_edgelist_major_vertices[i].size()), - graph_view.is_weighted(), handle.get_stream()); coarsened_edgelist_major_vertices[i].resize(number_of_partition_edges, handle.get_stream()); coarsened_edgelist_major_vertices[i].shrink_to_fit(handle.get_stream()); coarsened_edgelist_minor_vertices[i].resize(number_of_partition_edges, handle.get_stream()); coarsened_edgelist_minor_vertices[i].shrink_to_fit(handle.get_stream()); - if (coarsened_edgelist_weights.size() > 0) { - coarsened_edgelist_weights[i].resize(number_of_partition_edges, handle.get_stream()); - coarsened_edgelist_weights[i].shrink_to_fit(handle.get_stream()); + if (coarsened_edgelist_weights) { + (*coarsened_edgelist_weights)[i].resize(number_of_partition_edges, handle.get_stream()); + (*coarsened_edgelist_weights)[i].shrink_to_fit(handle.get_stream()); } } @@ -531,6 +506,7 @@ coarsen_graph( col_comm_rank); vertex_t number_of_vertices{}; edge_t number_of_edges{}; + std::optional> segment_offsets{}; { std::vector major_ptrs(coarsened_edgelist_major_vertices.size()); std::vector minor_ptrs(major_ptrs.size()); @@ -540,7 +516,7 @@ coarsen_graph( minor_ptrs[i] = coarsened_edgelist_minor_vertices[i].data(); counts[i] = static_cast(coarsened_edgelist_major_vertices[i].size()); } - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges, segment_offsets) = renumber_edgelist( handle, std::optional>{ @@ -560,8 +536,10 @@ coarsen_graph( : coarsened_edgelist_major_vertices[i].data(); edgelists[i].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices[i].data() : coarsened_edgelist_minor_vertices[i].data(); - edgelists[i].p_edge_weights = graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() - : static_cast(nullptr); + edgelists[i].p_edge_weights = + coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights)[i].data()} + : std::nullopt, edgelists[i].number_of_edges = static_cast(coarsened_edgelist_major_vertices[i].size()); } @@ -572,8 +550,8 @@ coarsen_graph( partition, number_of_vertices, number_of_edges, - graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, - true), + graph_properties_t{graph_view.is_symmetric(), false}, + segment_offsets), std::move(renumber_map_labels)); } @@ -597,23 +575,14 @@ coarsen_graph( // currently, nothing to do } - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - std::tie(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights) = - compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( - graph_view.offsets(), - graph_view.indices(), - graph_view.weights(), + auto [coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights] = + decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + matrix_partition_device_view_t( + graph_view.get_matrix_partition_view()), labels, labels, - vertex_t{0}, - graph_view.get_number_of_vertices(), - vertex_t{0}, - graph_view.get_number_of_vertices(), - graph_view.is_weighted(), handle.get_stream()); rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), @@ -632,7 +601,7 @@ coarsen_graph( unique_labels.end())), handle.get_stream()); - auto renumber_map_labels = renumber_edgelist( + auto [renumber_map_labels, segment_offsets] = renumber_edgelist( handle, std::optional>{ std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, @@ -646,7 +615,10 @@ coarsen_graph( : coarsened_edgelist_major_vertices.data(); edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() : coarsened_edgelist_minor_vertices.data(); - edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.p_edge_weights = + coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights).data()} + : std::nullopt; edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); return std::make_tuple( @@ -654,8 +626,8 @@ coarsen_graph( handle, edgelist, static_cast(renumber_map_labels.size()), - graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, - true), + graph_properties_t{graph_view.is_symmetric(), false}, + segment_offsets), std::move(renumber_map_labels)); } diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 2a6a60e5280..8e51723fddf 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -59,38 +59,29 @@ struct out_of_range_t { } }; +// compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - edgelist_to_compressed_sparse(edgelist_t const &edgelist, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last, - bool is_weighted, - rmm::cuda_stream_view stream_view) +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>> +compress_edgelist(edgelist_t const &edgelist, + vertex_t major_first, + vertex_t major_hypersparse_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + rmm::cuda_stream_view stream_view) { rmm::device_uvector offsets((major_last - major_first) + 1, stream_view); rmm::device_uvector indices(edgelist.number_of_edges, stream_view); - rmm::device_uvector weights(is_weighted ? edgelist.number_of_edges : 0, stream_view); + auto weights = edgelist.p_edge_weights ? std::make_optional>( + edgelist.number_of_edges, stream_view) + : std::nullopt; thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0}); thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0}); - // FIXME: need to performance test this code with R-mat graphs having highly-skewed degree - // distribution. If there is a small number of vertices with very large degrees, atomicAdd can - // sequentialize execution. CUDA9+ & Kepler+ provide complier/architectural optimizations to - // mitigate this impact - // (https://developer.nvidia.com/blog/cuda-pro-tip-optimized-filtering-warp-aggregated-atomics/), - // and we need to check this thrust::for_each based approach delivers the expected performance. - - // FIXME: also need to verify this approach is at least not significantly slower than the sorting - // based approach (this approach does not use extra memory, so better stick to this approach - // unless performance is significantly worse). - auto p_offsets = offsets.data(); - auto p_indices = indices.data(); - auto p_weights = is_weighted ? weights.data() : static_cast(nullptr); - thrust::for_each(rmm::exec_policy(stream_view), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, store_transposed ? edgelist.p_dst_vertices + edgelist.number_of_edges @@ -98,13 +89,15 @@ std:: [p_offsets, major_first] __device__(auto v) { atomicAdd(p_offsets + (v - major_first), edge_t{1}); }); - thrust::exclusive_scan( rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin()); - if (is_weighted) { + auto p_indices = indices.data(); + if (edgelist.p_edge_weights) { + auto p_weights = (*weights).data(); + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); + edgelist.p_src_vertices, edgelist.p_dst_vertices, *(edgelist.p_edge_weights))); thrust::for_each(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist.number_of_edges, @@ -131,7 +124,7 @@ std:: thrust::for_each(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist.number_of_edges, - [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { + [p_offsets, p_indices, major_first] __device__(auto e) { auto s = thrust::get<0>(e); auto d = thrust::get<1>(e); auto major = store_transposed ? d : s; @@ -148,9 +141,51 @@ std:: }); } + auto dcs_nzd_vertices = (major_hypersparse_first < major_last) + ? std::make_optional>( + major_last - major_hypersparse_first, stream_view) + : std::nullopt; + if (dcs_nzd_vertices) { + auto constexpr invalid_vertex = invalid_vertex_id::value; + + thrust::transform( + rmm::exec_policy(stream_view), + thrust::make_counting_iterator(major_hypersparse_first), + thrust::make_counting_iterator(major_last), + (*dcs_nzd_vertices).begin(), + [major_first, offsets = offsets.data()] __device__(auto major) { + auto major_offset = major - major_first; + return offsets[major_offset + 1] - offsets[major_offset] > 0 ? major : invalid_vertex; + }); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + (*dcs_nzd_vertices).begin(), offsets.begin() + (major_hypersparse_first - major_first))); + (*dcs_nzd_vertices) + .resize(thrust::distance(pair_first, + thrust::remove_if(rmm::exec_policy(stream_view), + pair_first, + pair_first + (*dcs_nzd_vertices).size(), + [] __device__(auto pair) { + return thrust::get<0>(pair) == invalid_vertex; + })), + stream_view); + (*dcs_nzd_vertices).shrink_to_fit(stream_view); + if (static_cast((*dcs_nzd_vertices).size()) < major_last - major_hypersparse_first) { + thrust::copy( + rmm::exec_policy(stream_view), + offsets.begin() + (major_last - major_first), + offsets.end(), + offsets.begin() + (major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size()); + offsets.resize((major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size() + 1, + stream_view); + offsets.shrink_to_fit(stream_view); + } + } + // FIXME: need to add an option to sort neighbor lists - return std::make_tuple(std::move(offsets), std::move(indices), std::move(weights)); + return std::make_tuple( + std::move(offsets), std::move(indices), std::move(weights), std::move(dcs_nzd_vertices)); } } // namespace @@ -167,7 +202,7 @@ graph_t> const &segment_offsets, bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), @@ -187,38 +222,40 @@ graph_tget_handle_ptr()->get_stream_view(); - CUGRAPH_EXPECTS(edgelists.size() > 0, - "Invalid input argument: edgelists.size() should be non-zero."); + CUGRAPH_EXPECTS(edgelists.size() == static_cast(col_comm_size), + "Invalid input argument: errneous edgelists.size()."); + CUGRAPH_EXPECTS( + !segment_offsets.has_value() || + ((*segment_offsets).size() == (detail::num_sparse_segments_per_vertex_partition + 1)), + "Invalid input argument: segment_offsets.size() returns an invalid value."); + + auto is_weighted = edgelists[0].p_edge_weights.has_value(); + auto use_dcs = + segment_offsets + ? ((*segment_offsets).size() > (detail::num_sparse_segments_per_vertex_partition + 1)) + : false; CUGRAPH_EXPECTS( - std::any_of(edgelists.begin() + 1, + std::any_of(edgelists.begin(), edgelists.end(), - [is_weighted = properties.is_weighted](auto edgelist) { + [is_weighted](auto edgelist) { return ((edgelist.number_of_edges > 0) && (edgelist.p_src_vertices == nullptr)) || ((edgelist.number_of_edges > 0) && (edgelist.p_dst_vertices == nullptr)) || (is_weighted && (edgelist.number_of_edges > 0) && - (edgelist.p_edge_weights == nullptr)) || - (!is_weighted && (edgelist.p_edge_weights != nullptr)); + ((edgelist.p_edge_weights.has_value() == false) || + (*(edgelist.p_edge_weights) == nullptr))); }) == false, "Invalid input argument: edgelists[].p_src_vertices and edgelists[].p_dst_vertices should not " "be nullptr if edgelists[].number_of_edges > 0 and edgelists[].p_edge_weights should be " - "nullptr if unweighted or should not be nullptr if weighted and edgelists[].number_of_edges > " - "0."); + "neither std::nullopt nor nullptr if weighted and edgelists[].number_of_edges > 0."); - CUGRAPH_EXPECTS(edgelists.size() == static_cast(col_comm_size), - "Invalid input argument: errneous edgelists.size()."); - - // optional expensive checks (part 1/3) + // optional expensive checks (part 1/2) if (do_expensive_check) { edge_t number_of_local_edges_sum{}; for (size_t i = 0; i < edgelists.size(); ++i) { - vertex_t major_first{}; - vertex_t major_last{}; - vertex_t minor_first{}; - vertex_t minor_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(i); - std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); + auto [major_first, major_last] = partition.get_matrix_partition_major_range(i); + auto [minor_first, minor_last] = partition.get_matrix_partition_minor_range(); number_of_local_edges_sum += edgelists[i].number_of_edges; @@ -244,103 +281,77 @@ graph_t d_segment_offsets((*segment_offsets).size(), default_stream_view); + raft::update_device(d_segment_offsets.data(), + (*segment_offsets).data(), + (*segment_offsets).size(), + default_stream_view.value()); + rmm::device_uvector d_aggregate_segment_offsets( + col_comm_size * d_segment_offsets.size(), default_stream_view); + col_comm.allgather(d_segment_offsets.data(), + d_aggregate_segment_offsets.data(), + d_segment_offsets.size(), + default_stream_view.value()); + + adj_matrix_partition_segment_offsets_ = + std::vector(d_aggregate_segment_offsets.size(), vertex_t{0}); + raft::update_host((*adj_matrix_partition_segment_offsets_).data(), + d_aggregate_segment_offsets.data(), + d_aggregate_segment_offsets.size(), + default_stream_view.value()); + + default_stream_view + .synchronize(); // this is necessary as adj_matrix_partition_segment_offsets_ can be used + // right after return. + } + + // compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid adj_matrix_partition_offsets_.reserve(edgelists.size()); adj_matrix_partition_indices_.reserve(edgelists.size()); - adj_matrix_partition_weights_.reserve(properties.is_weighted ? edgelists.size() : 0); + if (is_weighted) { + adj_matrix_partition_weights_ = std::vector>{}; + (*adj_matrix_partition_weights_).reserve(edgelists.size()); + } + if (use_dcs) { + adj_matrix_partition_dcs_nzd_vertices_ = std::vector>{}; + adj_matrix_partition_dcs_nzd_vertex_counts_ = std::vector{}; + (*adj_matrix_partition_dcs_nzd_vertices_).reserve(edgelists.size()); + (*adj_matrix_partition_dcs_nzd_vertex_counts_).reserve(edgelists.size()); + } for (size_t i = 0; i < edgelists.size(); ++i) { - vertex_t major_first{}; - vertex_t major_last{}; - vertex_t minor_first{}; - vertex_t minor_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(i); - std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); - - rmm::device_uvector offsets(0, default_stream_view); - rmm::device_uvector indices(0, default_stream_view); - rmm::device_uvector weights(0, default_stream_view); - std::tie(offsets, indices, weights) = - edgelist_to_compressed_sparse(edgelists[i], - major_first, - major_last, - minor_first, - minor_last, - properties.is_weighted, - default_stream_view); + auto [major_first, major_last] = partition.get_matrix_partition_major_range(i); + auto [minor_first, minor_last] = partition.get_matrix_partition_minor_range(); + auto major_hypersparse_first = + use_dcs + ? major_first + + (*adj_matrix_partition_segment_offsets_) + [(*segment_offsets).size() * i + detail::num_sparse_segments_per_vertex_partition] + : major_last; + auto [offsets, indices, weights, dcs_nzd_vertices] = + compress_edgelist(edgelists[i], + major_first, + major_hypersparse_first, + major_last, + minor_first, + minor_last, + default_stream_view); + adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); - if (properties.is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } - } - - // update degree-based segment offsets (to be used for graph analytics kernel optimization) - - if (sorted_by_global_degree_within_vertex_partition) { - auto degrees = detail::compute_major_degrees( - *(this->get_handle_ptr()), adj_matrix_partition_offsets_, partition_); - - // optional expensive checks (part 2/3) - - if (do_expensive_check) { - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), - degrees.begin(), - degrees.end(), - thrust::greater{}), - "Invalid input argument: sorted_by_global_degree_within_vertex_partition is " - "set to true, but degrees are not non-ascending."); + if (is_weighted) { (*adj_matrix_partition_weights_).push_back(std::move(*weights)); } + if (use_dcs) { + auto dcs_nzd_vertex_count = static_cast((*dcs_nzd_vertices).size()); + (*adj_matrix_partition_dcs_nzd_vertices_).push_back(std::move(*dcs_nzd_vertices)); + (*adj_matrix_partition_dcs_nzd_vertex_counts_).push_back(dcs_nzd_vertex_count); } - - static_assert(detail::num_segments_per_vertex_partition == 3); - static_assert((detail::low_degree_threshold <= detail::mid_degree_threshold) && - (detail::mid_degree_threshold <= std::numeric_limits::max())); - rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, - default_stream_view); - std::vector h_thresholds = { - static_cast(detail::mid_degree_threshold * col_comm_size), - static_cast(detail::low_degree_threshold * col_comm_size)}; - raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream_view.value()); - - rmm::device_uvector segment_offsets(detail::num_segments_per_vertex_partition + 1, - default_stream_view); - - // temporaries are necessary because the &&-overload of device_uvector is deleted - // Note that we must sync `default_stream` before these temporaries go out of scope to - // avoid use after free. (The syncs are at the end of this function) - auto zero_vertex = vertex_t{0}; - auto vertex_count = static_cast(degrees.size()); - segment_offsets.set_element_async(0, zero_vertex, default_stream_view); - segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, vertex_count, default_stream_view); - - thrust::upper_bound(rmm::exec_policy(default_stream_view), - degrees.begin(), - degrees.end(), - d_thresholds.begin(), - d_thresholds.end(), - segment_offsets.begin() + 1, - thrust::greater{}); - - rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), - default_stream_view); - col_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream_view.value()); - - adj_matrix_partition_segment_offsets_.resize(aggregate_segment_offsets.size()); - raft::update_host(adj_matrix_partition_segment_offsets_.data(), - aggregate_segment_offsets.data(), - aggregate_segment_offsets.size(), - default_stream_view.value()); - - default_stream_view - .synchronize(); // this is necessary as degrees, d_thresholds, and segment_offsets will - // become out-of-scope once control flow exits this block and - // adj_matrix_partition_segment_offsets_ can be used right after return. } - // optional expensive checks (part 3/3) + // optional expensive checks (part 2/2) if (do_expensive_check) { // FIXME: check for symmetricity may better be implemetned with transpose(). @@ -361,27 +372,33 @@ graph_t const &edgelist, vertex_t number_of_vertices, graph_properties_t properties, - bool sorted_by_degree, + std::optional> const &segment_offsets, bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, edgelist.number_of_edges, properties), offsets_(rmm::device_uvector(0, handle.get_stream_view())), indices_(rmm::device_uvector(0, handle.get_stream_view())), - weights_(rmm::device_uvector(0, handle.get_stream_view())) + segment_offsets_(segment_offsets) { // cheap error checks auto default_stream_view = this->get_handle_ptr()->get_stream_view(); + auto is_weighted = edgelist.p_edge_weights.has_value(); + CUGRAPH_EXPECTS( ((edgelist.number_of_edges == 0) || (edgelist.p_src_vertices != nullptr)) && ((edgelist.number_of_edges == 0) || (edgelist.p_dst_vertices != nullptr)) && - ((properties.is_weighted && - ((edgelist.number_of_edges == 0) || (edgelist.p_edge_weights != nullptr))) || - (!properties.is_weighted && (edgelist.p_edge_weights == nullptr))), - "Invalid input argument: edgelist.p_src_vertices and edgelist.p_dst_vertices should " - "not be nullptr if edgelist.number_of_edges > 0 and edgelist.p_edge_weights should be nullptr " - "if unweighted or should not be nullptr if weighted and edgelist.number_of_edges > 0."); + (!is_weighted || (is_weighted && ((edgelist.number_of_edges == 0) || + (*(edgelist.p_edge_weights) != nullptr)))), + "Invalid input argument: edgelist.p_src_vertices and edgelist.p_dst_vertices should not be " + "nullptr if edgelist.number_of_edges > 0 and edgelist.p_edge_weights should be neither " + "std::nullopt nor nullptr if weighted and edgelist.number_of_edges > 0."); + + CUGRAPH_EXPECTS( + !segment_offsets.has_value() || + ((*segment_offsets).size() == (detail::num_sparse_segments_per_vertex_partition + 1)), + "Invalid input argument: segment_offsets.size() returns an invalid value."); // optional expensive checks (part 1/2) @@ -407,74 +424,14 @@ graph_t(edgelist, - vertex_t{0}, - this->get_number_of_vertices(), - vertex_t{0}, - this->get_number_of_vertices(), - properties.is_weighted, - default_stream_view); - - // update degree-based segment offsets (to be used for graph analytics kernel optimization) - - if (sorted_by_degree) { - auto degree_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(vertex_t{0}), - detail::degree_from_offsets_t{offsets_.data()}); - - // optional expensive checks (part 2/2) - - if (do_expensive_check) { - CUGRAPH_EXPECTS( - thrust::is_sorted(rmm::exec_policy(default_stream_view), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Invalid input argument: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); - } - - static_assert(detail::num_segments_per_vertex_partition == 3); - static_assert((detail::low_degree_threshold <= detail::mid_degree_threshold) && - (detail::mid_degree_threshold <= std::numeric_limits::max())); - rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, - default_stream_view); - std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), - static_cast(detail::low_degree_threshold)}; - raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream_view.value()); - - rmm::device_uvector segment_offsets(detail::num_segments_per_vertex_partition + 1, - default_stream_view); - - // temporaries are necessary because the &&-overload of device_uvector is deleted - // Note that we must sync `default_stream` before these temporaries go out of scope to - // avoid use after free. (The syncs are at the end of this function) - auto zero_vertex = vertex_t{0}; - auto vertex_count = static_cast(this->get_number_of_vertices()); - segment_offsets.set_element_async(0, zero_vertex, default_stream_view); - - segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, vertex_count, default_stream_view); - - thrust::upper_bound(rmm::exec_policy(default_stream_view), - degree_first, - degree_first + this->get_number_of_vertices(), - d_thresholds.begin(), - d_thresholds.end(), - segment_offsets.begin() + 1, - thrust::greater{}); - - segment_offsets_.resize(segment_offsets.size()); - raft::update_host(segment_offsets_.data(), - segment_offsets.data(), - segment_offsets.size(), - default_stream_view.value()); - - default_stream_view - .synchronize(); // this is necessary as segment_offsets_ can be used right after return. - } + std::tie(offsets_, indices_, weights_, std::ignore) = + compress_edgelist(edgelist, + vertex_t{0}, + this->get_number_of_vertices(), + this->get_number_of_vertices(), + vertex_t{0}, + this->get_number_of_vertices(), + default_stream_view); // optional expensive checks (part 3/3) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index d5b4308c80e..3fb5560ddb7 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -54,17 +54,23 @@ struct out_of_range_t { template std::vector update_adj_matrix_partition_edge_counts( std::vector const& adj_matrix_partition_offsets, + std::optional> const& adj_matrix_partition_dcs_nzd_vertex_counts, partition_t const& partition, + std::optional> const& adj_matrix_partition_segment_offsets, cudaStream_t stream) { std::vector adj_matrix_partition_edge_counts(partition.get_number_of_matrix_partitions(), 0); + auto use_dcs = adj_matrix_partition_dcs_nzd_vertex_counts.has_value(); for (size_t i = 0; i < adj_matrix_partition_offsets.size(); ++i) { - vertex_t major_first{}; - vertex_t major_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(i); + auto [major_first, major_last] = partition.get_matrix_partition_major_range(i); raft::update_host(&(adj_matrix_partition_edge_counts[i]), - adj_matrix_partition_offsets[i] + (major_last - major_first), + adj_matrix_partition_offsets[i] + + (use_dcs ? ((*adj_matrix_partition_segment_offsets) + [(detail::num_sparse_segments_per_vertex_partition + 2) * i + + detail::num_sparse_segments_per_vertex_partition] - + major_first + (*adj_matrix_partition_dcs_nzd_vertex_counts)[i]) + : (major_last - major_first)), 1, stream); } @@ -157,24 +163,32 @@ template graph_view_t>:: - graph_view_t(raft::handle_t const& handle, - std::vector const& adj_matrix_partition_offsets, - std::vector const& adj_matrix_partition_indices, - std::vector const& adj_matrix_partition_weights, - std::vector const& adj_matrix_partition_segment_offsets, - partition_t const& partition, - vertex_t number_of_vertices, - edge_t number_of_edges, - graph_properties_t properties, - bool sorted_by_global_degree_within_vertex_partition, - bool do_expensive_check) + graph_view_t( + raft::handle_t const& handle, + std::vector const& adj_matrix_partition_offsets, + std::vector const& adj_matrix_partition_indices, + std::optional> const& adj_matrix_partition_weights, + std::optional> const& adj_matrix_partition_dcs_nzd_vertices, + std::optional> const& adj_matrix_partition_dcs_nzd_vertex_counts, + partition_t const& partition, + vertex_t number_of_vertices, + edge_t number_of_edges, + graph_properties_t properties, + std::optional> const& adj_matrix_partition_segment_offsets, + bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), adj_matrix_partition_offsets_(adj_matrix_partition_offsets), adj_matrix_partition_indices_(adj_matrix_partition_indices), adj_matrix_partition_weights_(adj_matrix_partition_weights), - adj_matrix_partition_number_of_edges_(update_adj_matrix_partition_edge_counts( - adj_matrix_partition_offsets, partition, handle.get_stream())), + adj_matrix_partition_dcs_nzd_vertices_(adj_matrix_partition_dcs_nzd_vertices), + adj_matrix_partition_dcs_nzd_vertex_counts_(adj_matrix_partition_dcs_nzd_vertex_counts), + adj_matrix_partition_number_of_edges_( + update_adj_matrix_partition_edge_counts(adj_matrix_partition_offsets, + adj_matrix_partition_dcs_nzd_vertex_counts, + partition, + adj_matrix_partition_segment_offsets, + handle.get_stream())), partition_(partition), adj_matrix_partition_segment_offsets_(adj_matrix_partition_segment_offsets) { @@ -188,24 +202,36 @@ graph_view_tget_subcomm(cugraph::partition_2d::key_naming_t().col_name()) .get_size(); + auto is_weighted = adj_matrix_partition_weights.has_value(); + auto use_dcs = adj_matrix_partition_dcs_nzd_vertices.has_value(); + CUGRAPH_EXPECTS(adj_matrix_partition_offsets.size() == adj_matrix_partition_indices.size(), "Internal Error: adj_matrix_partition_offsets.size() and " "adj_matrix_partition_indices.size() should coincide."); - CUGRAPH_EXPECTS((adj_matrix_partition_weights.size() == adj_matrix_partition_offsets.size()) || - (adj_matrix_partition_weights.size() == 0), - "Internal Error: adj_matrix_partition_weights.size() should coincide with " - "adj_matrix_partition_offsets.size() (if weighted) or 0 (if unweighted)."); + CUGRAPH_EXPECTS( + !is_weighted || ((*adj_matrix_partition_weights).size() == adj_matrix_partition_offsets.size()), + "Internal Error: adj_matrix_partition_weights.size() should coincide with " + "adj_matrix_partition_offsets.size() (if weighted)."); + CUGRAPH_EXPECTS(adj_matrix_partition_dcs_nzd_vertex_counts.has_value() == use_dcs, + "adj_matrix_partition_dcs_nzd_vertices.has_value() and " + "adj_matrix_partition_dcs_nzd_vertex_counts.has_value() should coincide"); + CUGRAPH_EXPECTS(!use_dcs || ((*adj_matrix_partition_dcs_nzd_vertices).size() == + (*adj_matrix_partition_dcs_nzd_vertex_counts).size()), + "Internal Error: adj_matrix_partition_dcs_nzd_vertices.size() and " + "adj_matrix_partition_dcs_nzd_vertex_counts.size() should coincide (if used)."); + CUGRAPH_EXPECTS(!use_dcs || ((*adj_matrix_partition_dcs_nzd_vertices).size() == + adj_matrix_partition_offsets.size()), + "Internal Error: adj_matrix_partition_dcs_nzd_vertices.size() should coincide " + "with adj_matrix_partition_offsets.size() (if used)."); CUGRAPH_EXPECTS(adj_matrix_partition_offsets.size() == static_cast(col_comm_size), "Internal Error: erroneous adj_matrix_partition_offsets.size()."); - CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && - (adj_matrix_partition_segment_offsets.size() == - col_comm_size * (detail::num_segments_per_vertex_partition + 1))) || - (!sorted_by_global_degree_within_vertex_partition && - (adj_matrix_partition_segment_offsets.size() == 0)), - "Internal Error: adj_matrix_partition_segment_offsets.size() does not match " - "with sorted_by_global_degree_within_vertex_partition."); + CUGRAPH_EXPECTS( + !adj_matrix_partition_segment_offsets.has_value() || + ((*adj_matrix_partition_segment_offsets).size() == + col_comm_size * (detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 2 : 1))), + "Internal Error: invalid adj_matrix_partition_segment_offsets.size()."); // optional expensive checks @@ -221,20 +247,23 @@ graph_view_t>::graph_view_t(raft::handle_t const& handle, edge_t const* offsets, vertex_t const* indices, - weight_t const* weights, - std::vector const& - segment_offsets, + std::optional weights, vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties, - bool sorted_by_degree, + std::optional> const& + segment_offsets, bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), @@ -324,10 +353,10 @@ graph_view_t{0, this->get_number_of_vertices()}) == 0, "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); - if (sorted_by_degree) { + if (segment_offsets) { auto degree_first = thrust::make_transform_iterator(thrust::make_counting_iterator(vertex_t{0}), detail::degree_from_offsets_t{offsets}); - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Internal Error: sorted_by_degree is set to true, but degrees are not " - "in ascending order."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream_view), + degree_first, + degree_first + this->get_number_of_vertices(), + thrust::greater{}), + "Internal Error: segment_offsets are provided, but degrees are not in descending order."); - CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), + CUGRAPH_EXPECTS(std::is_sorted((*segment_offsets).begin(), (*segment_offsets).end()), "Internal Error: erroneous segment_offsets."); - CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid input argument segment_offsets."); - CUGRAPH_EXPECTS(segment_offsets.back() == this->get_number_of_vertices(), + CUGRAPH_EXPECTS((*segment_offsets)[0] == 0, "Invalid input argument segment_offsets."); + CUGRAPH_EXPECTS((*segment_offsets).back() == this->get_number_of_vertices(), "Invalid input argument: segment_offsets."); } diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index af96103c486..fe390690aa6 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -16,9 +16,9 @@ #include #include -#include +#include #include -#include +#include #include #include @@ -44,7 +44,7 @@ template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs( raft::handle_t const &handle, @@ -81,8 +81,8 @@ extract_induced_subgraphs( subgraph_offsets, subgraph_offsets + (num_subgraphs + 1)), "Invalid input argument: subgraph_offsets is not sorted."); - vertex_partition_device_t> - vertex_partition(graph_view); + auto vertex_partition = + vertex_partition_device_view_t(graph_view.get_vertex_partition_view()); CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream_view()), subgraph_vertices, subgraph_vertices + num_aggregate_subgraph_vertices, @@ -134,8 +134,8 @@ extract_induced_subgraphs( num_aggregate_subgraph_vertices + 1, handle.get_stream_view()); // for each element of subgraph_vertices - matrix_partition_device_t> - matrix_partition(graph_view, 0); + auto matrix_partition = matrix_partition_device_view_t( + graph_view.get_matrix_partition_view()); // count the numbers of the induced subgraph edges for each vertex in the aggregate subgraph // vertex list. thrust::transform( @@ -148,7 +148,7 @@ extract_induced_subgraphs( subgraph_offsets + 1, thrust::upper_bound(thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, i)); vertex_t const *indices{nullptr}; - weight_t const *weights{nullptr}; + thrust::optional weights{thrust::nullopt}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); @@ -181,8 +181,10 @@ extract_induced_subgraphs( rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream_view()); rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream_view()); - rmm::device_uvector edge_weights( - graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream_view()); + auto edge_weights = graph_view.is_weighted() + ? std::make_optional>( + num_aggregate_edges, handle.get_stream_view()) + : std::nullopt; // fill the edge list buffer (to be returned) for each vetex in the aggregate subgraph vertex // list (use the offsets computed in the Phase 1) @@ -197,27 +199,28 @@ extract_induced_subgraphs( subgraph_vertex_output_offsets = subgraph_vertex_output_offsets.data(), edge_majors = edge_majors.data(), edge_minors = edge_minors.data(), - edge_weights = edge_weights.data()] __device__(auto i) { + edge_weights = edge_weights ? thrust::optional{(*edge_weights).data()} + : thrust::nullopt] __device__(auto i) { auto subgraph_idx = thrust::distance( subgraph_offsets + 1, thrust::upper_bound( thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, size_t{i})); vertex_t const *indices{nullptr}; - weight_t const *weights{nullptr}; + thrust::optional weights{thrust::nullopt}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); - if (weights != nullptr) { + if (weights) { auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - thrust::make_constant_iterator(subgraph_vertices[i]), indices, weights)); + thrust::make_constant_iterator(subgraph_vertices[i]), indices, *weights)); // FIXME: this is inefficient for high local degree vertices thrust::copy_if( thrust::seq, triplet_first, triplet_first + local_degree, - thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors, edge_weights)) + + thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors, *edge_weights)) + subgraph_vertex_output_offsets[i], [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], vertex_last = @@ -264,7 +267,7 @@ extract_induced_subgraphs( template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -275,7 +278,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -286,7 +289,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -297,7 +300,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -308,7 +311,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -319,7 +322,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -330,7 +333,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -341,7 +344,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -352,7 +355,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -363,7 +366,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -374,7 +377,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, @@ -385,7 +388,7 @@ extract_induced_subgraphs(raft::handle_t const &handle, template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, rmm::device_uvector> extract_induced_subgraphs(raft::handle_t const &handle, graph_view_t const &graph_view, diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 66cc416f91b..64b80c14b2e 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -42,18 +41,19 @@ namespace detail { // FIXME: personalization_vector_size is confusing in OPG (local or aggregate?) template -void pagerank(raft::handle_t const& handle, - GraphViewType const& pull_graph_view, - typename GraphViewType::weight_type const* precomputed_vertex_out_weight_sums, - typename GraphViewType::vertex_type const* personalization_vertices, - result_t const* personalization_values, - typename GraphViewType::vertex_type personalization_vector_size, - result_t* pageranks, - result_t alpha, - result_t epsilon, - size_t max_iterations, - bool has_initial_guess, - bool do_expensive_check) +void pagerank( + raft::handle_t const& handle, + GraphViewType const& pull_graph_view, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, + result_t* pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check) { using vertex_t = typename GraphViewType::vertex_type; using weight_t = typename GraphViewType::weight_type; @@ -69,27 +69,28 @@ void pagerank(raft::handle_t const& handle, if (num_vertices == 0) { return; } auto aggregate_personalization_vector_size = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce(handle.get_comms(), personalization_vector_size, handle.get_stream()) - : personalization_vector_size; + personalization_vertices + ? GraphViewType::is_multi_gpu + ? host_scalar_allreduce( + handle.get_comms(), *personalization_vector_size, handle.get_stream()) + : *personalization_vector_size + : vertex_t{0}; // 1. check input arguments - CUGRAPH_EXPECTS( - ((personalization_vector_size > 0) && (personalization_vertices != nullptr) && - (personalization_values != nullptr)) || - ((personalization_vector_size == 0) && (personalization_vertices == nullptr) && - (personalization_values == nullptr)), - "Invalid input argument: if personalization_vector_size is non-zero, personalization verties " - "and personalization values should be provided. Otherwise, they should not be provided."); + CUGRAPH_EXPECTS((personalization_vertices.has_value() == false) || + (personalization_values.has_value() && personalization_vector_size.has_value()), + "Invalid input argument: if personalization_vertices.has_value() is true, " + "personalization_values.has_value() and personalization_vector_size.has_value() " + "should be true as well."); CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), "Invalid input argument: alpha should be in [0.0, 1.0]."); CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); if (do_expensive_check) { - if (precomputed_vertex_out_weight_sums != nullptr) { + if (precomputed_vertex_out_weight_sums) { auto num_negative_precomputed_vertex_out_weight_sums = count_if_v( - handle, pull_graph_view, precomputed_vertex_out_weight_sums, [] __device__(auto val) { + handle, pull_graph_view, *precomputed_vertex_out_weight_sums, [] __device__(auto val) { return val < result_t{0.0}; }); CUGRAPH_EXPECTS( @@ -118,12 +119,13 @@ void pagerank(raft::handle_t const& handle, } if (aggregate_personalization_vector_size > 0) { - vertex_partition_device_t vertex_partition(pull_graph_view); + auto vertex_partition = vertex_partition_device_view_t( + pull_graph_view.get_vertex_partition_view()); auto num_invalid_vertices = count_if_v(handle, pull_graph_view, - personalization_vertices, - personalization_vertices + personalization_vector_size, + *personalization_vertices, + *personalization_vertices + *personalization_vector_size, [vertex_partition] __device__(auto val) { return !(vertex_partition.is_valid_vertex(val) && vertex_partition.is_local_vertex_nocheck(val)); @@ -132,8 +134,8 @@ void pagerank(raft::handle_t const& handle, "Invalid input argument: peresonalization vertices have invalid vertex IDs."); auto num_negative_values = count_if_v(handle, pull_graph_view, - personalization_values, - personalization_values + personalization_vector_size, + *personalization_values, + *personalization_values + *personalization_vector_size, [] __device__(auto val) { return val < 0.0; }); CUGRAPH_EXPECTS(num_negative_values == 0, "Invalid input argument: peresonalization values should be non-negative."); @@ -142,20 +144,21 @@ void pagerank(raft::handle_t const& handle, // 2. compute the sums of the out-going edge weights (if not provided) - auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums == nullptr - ? pull_graph_view.compute_out_weight_sums(handle) - : rmm::device_uvector(0, handle.get_stream()); - auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr - ? precomputed_vertex_out_weight_sums - : tmp_vertex_out_weight_sums.data(); + auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums + ? std::nullopt + : std::optional>{ + pull_graph_view.compute_out_weight_sums(handle)}; + auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums + ? *precomputed_vertex_out_weight_sums + : (*tmp_vertex_out_weight_sums).data(); // 3. initialize pagerank values if (has_initial_guess) { auto sum = reduce_v(handle, pull_graph_view, pageranks, result_t{0.0}); - CUGRAPH_EXPECTS( - sum > 0.0, - "Invalid input argument: sum of the PageRank initial guess values should be positive."); + CUGRAPH_EXPECTS(sum > 0.0, + "Invalid input argument: sum of the PageRank initial " + "guess values should be positive."); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), pageranks, pageranks + pull_graph_view.get_number_of_local_vertices(), @@ -174,11 +177,12 @@ void pagerank(raft::handle_t const& handle, if (aggregate_personalization_vector_size > 0) { personalization_sum = reduce_v(handle, pull_graph_view, - personalization_values, - personalization_values + personalization_vector_size, + *personalization_values, + *personalization_values + *personalization_vector_size, result_t{0.0}); CUGRAPH_EXPECTS(personalization_sum > 0.0, - "Invalid input argument: sum of personalization valuese should be positive."); + "Invalid input argument: sum of personalization valuese " + "should be positive."); } // 5. pagerank iteration @@ -240,13 +244,14 @@ void pagerank(raft::handle_t const& handle, pageranks); if (aggregate_personalization_vector_size > 0) { - vertex_partition_device_t vertex_partition(pull_graph_view); + auto vertex_partition = vertex_partition_device_view_t( + pull_graph_view.get_vertex_partition_view()); auto val_first = thrust::make_zip_iterator( - thrust::make_tuple(personalization_vertices, personalization_values)); + thrust::make_tuple(*personalization_vertices, *personalization_values)); thrust::for_each( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, - val_first + personalization_vector_size, + val_first + *personalization_vector_size, [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__( auto val) { auto v = thrust::get<0>(val); @@ -279,10 +284,10 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - weight_t const* precomputed_vertex_out_weight_sums, - vertex_t const* personalization_vertices, - result_t const* personalization_values, - vertex_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, result_t* pageranks, result_t alpha, result_t epsilon, @@ -308,10 +313,10 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - float const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -321,10 +326,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - double const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, @@ -334,10 +339,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - float const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -347,10 +352,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - double const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, @@ -360,10 +365,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int64_t const* personalization_vertices, - float const* personalization_values, - int64_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -373,10 +378,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int64_t const* personalization_vertices, - double const* personalization_values, - int64_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, @@ -386,10 +391,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - float const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -399,10 +404,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - double const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, @@ -412,10 +417,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - float const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -425,10 +430,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int32_t const* personalization_vertices, - double const* personalization_values, - int32_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, @@ -438,10 +443,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float const* precomputed_vertex_out_weight_sums, - int64_t const* personalization_vertices, - float const* personalization_values, - int64_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, float* pageranks, float alpha, float epsilon, @@ -451,10 +456,10 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double const* precomputed_vertex_out_weight_sums, - int64_t const* personalization_vertices, - double const* personalization_values, - int64_t personalization_vector_size, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, double* pageranks, double alpha, double epsilon, diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index afd7bce772e..6dd48c326ec 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -47,7 +47,7 @@ namespace experimental { namespace detail { template -rmm::device_uvector compute_renumber_map( +std::tuple, std::vector> compute_renumber_map( raft::handle_t const& handle, std::optional> optional_vertex_span, std::vector const& edgelist_major_vertices, @@ -320,7 +320,61 @@ rmm::device_uvector compute_renumber_map( labels.begin(), thrust::greater()); - return labels; + // 7. compute segment_offsets + + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + static_assert((detail::low_degree_threshold <= detail::mid_degree_threshold) && + (detail::mid_degree_threshold <= std::numeric_limits::max())); + size_t mid_degree_threshold{detail::mid_degree_threshold}; + size_t low_degree_threshold{detail::low_degree_threshold}; + size_t hypersparse_degree_threshold{0}; + if (multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + mid_degree_threshold *= col_comm_size; + low_degree_threshold *= col_comm_size; + hypersparse_degree_threshold = + static_cast(col_comm_size * detail::hypersparse_threshold_ratio); + } + auto num_segments_per_vertex_partition = + detail::num_sparse_segments_per_vertex_partition + + (hypersparse_degree_threshold > 0 ? size_t{1} : size_t{0}); + rmm::device_uvector d_thresholds(num_segments_per_vertex_partition - 1, + handle.get_stream()); + auto h_thresholds = hypersparse_degree_threshold > 0 + ? std::vector{static_cast(mid_degree_threshold), + static_cast(low_degree_threshold), + static_cast(hypersparse_degree_threshold)} + : std::vector{static_cast(mid_degree_threshold), + static_cast(low_degree_threshold)}; + raft::update_device( + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); + + rmm::device_uvector d_segment_offsets(num_segments_per_vertex_partition + 1, + handle.get_stream()); + + auto zero_vertex = vertex_t{0}; + auto vertex_count = static_cast(counts.size()); + d_segment_offsets.set_element_async(0, zero_vertex, handle.get_stream()); + d_segment_offsets.set_element_async( + num_segments_per_vertex_partition, vertex_count, handle.get_stream()); + + thrust::upper_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.begin(), + counts.end(), + d_thresholds.begin(), + d_thresholds.end(), + d_segment_offsets.begin() + 1, + thrust::greater{}); + + std::vector h_segment_offsets(d_segment_offsets.size()); + raft::update_host(h_segment_offsets.data(), + d_segment_offsets.data(), + d_segment_offsets.size(), + handle.get_stream()); + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(labels), h_segment_offsets); } template @@ -538,7 +592,11 @@ void expensive_check_edgelist( template std::enable_if_t, partition_t, vertex_t, edge_t>> + std::tuple, + partition_t, + vertex_t, + edge_t, + std::vector>> renumber_edgelist(raft::handle_t const& handle, std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, @@ -573,7 +631,7 @@ renumber_edgelist(raft::handle_t const& handle, // 1. compute renumber map - auto renumber_map_labels = + auto [renumber_map_labels, segment_offsets] = detail::compute_renumber_map(handle, optional_local_vertex_span, edgelist_const_major_vertices, @@ -712,18 +770,21 @@ renumber_edgelist(raft::handle_t const& handle, comm.barrier(); // currently, this is ncclAllReduce #endif - return std::make_tuple( - std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); + return std::make_tuple(std::move(renumber_map_labels), + partition, + number_of_vertices, + number_of_edges, + segment_offsets); } template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_vertex_span, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check) +std::enable_if_t, std::vector>> +renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_vertex_span, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) { if (do_expensive_check) { expensive_check_edgelist( @@ -734,12 +795,13 @@ std::enable_if_t> renumber_edgelist( std::vector{num_edgelist_edges}); } - auto renumber_map_labels = detail::compute_renumber_map( - handle, - optional_vertex_span, - std::vector{edgelist_major_vertices}, - std::vector{edgelist_minor_vertices}, - std::vector{num_edgelist_edges}); + auto [renumber_map_labels, segment_offsets] = + detail::compute_renumber_map( + handle, + optional_vertex_span, + std::vector{edgelist_major_vertices}, + std::vector{edgelist_minor_vertices}, + std::vector{num_edgelist_edges}); double constexpr load_factor = 0.7; @@ -764,14 +826,18 @@ std::enable_if_t> renumber_edgelist( renumber_map.find( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); - return renumber_map_labels; + return std::make_tuple(std::move(renumber_map_labels), segment_offsets); } } // namespace detail template std::enable_if_t, partition_t, vertex_t, edge_t>> + std::tuple, + partition_t, + vertex_t, + edge_t, + std::vector>> renumber_edgelist(raft::handle_t const& handle, std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, @@ -788,13 +854,13 @@ renumber_edgelist(raft::handle_t const& handle, } template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_vertex_span, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check) +std::enable_if_t, std::vector>> +renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_vertex_span, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) { return detail::renumber_edgelist(handle, optional_vertex_span, @@ -809,16 +875,18 @@ std::enable_if_t> renumber_edgelist( // instantiations for // -template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_local_vertex_span, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( +template std:: + tuple, partition_t, int32_t, int32_t, std::vector> + renumber_edgelist( + raft::handle_t const& handle, + std::optional> optional_local_vertex_span, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); + +template std::tuple, std::vector> +renumber_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, @@ -828,16 +896,18 @@ template rmm::device_uvector renumber_edgelist // instantiations for // -template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_local_vertex_span, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( +template std:: + tuple, partition_t, int32_t, int64_t, std::vector> + renumber_edgelist( + raft::handle_t const& handle, + std::optional> optional_local_vertex_span, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); + +template std::tuple, std::vector> +renumber_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, @@ -847,16 +917,18 @@ template rmm::device_uvector renumber_edgelist // instantiations for // -template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - std::optional> optional_local_vertex_span, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( +template std:: + tuple, partition_t, int64_t, int64_t, std::vector> + renumber_edgelist( + raft::handle_t const& handle, + std::optional> optional_local_vertex_span, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); + +template std::tuple, std::vector> +renumber_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, int64_t* edgelist_major_vertices /* [INOUT] */, diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 06872ead17f..4c5dac6065f 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -168,7 +168,8 @@ void sssp(raft::handle_t const &handle, row_distances); } - vertex_partition_device_t vertex_partition(push_graph_view); + auto vertex_partition = vertex_partition_device_view_t( + push_graph_view.get_vertex_partition_view()); update_frontier_v_push_if_out_nbr( handle, diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 570a7ec971f..361726acb56 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include // FIXME: requirement for temporary std::getenv() #include +#include #include #include @@ -201,9 +203,9 @@ struct col_indx_extract_t{*values_} + : thrust::nullopt] __device__(auto indx, auto col_indx) { auto delta = ptr_d_sizes[indx] - 1; auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; auto start_row = row_offsets[v_indx]; - auto weight_value = - (values == nullptr ? weight_t{1} - : values[start_row + col_indx]); // account for un-weighted graphs + auto weight_value = (values ? (*values)[start_row + col_indx] + : weight_t{1}); // account for un-weighted graphs return thrust::make_tuple(col_indices[start_row + col_indx], weight_value); }, [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); @@ -264,7 +266,7 @@ struct col_indx_extract_t values_; edge_t const* out_degs_; index_t const* sizes_; diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index f7aeb0b6dc1..c86f76fcc72 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -213,9 +213,11 @@ struct horizontal_traversal_t { random_engine_t::generate_random(handle, ptr_d_random, d_random.size(), seed0); - auto const* col_indices = graph.indices(); - auto const* row_offsets = graph.offsets(); - auto const* values = graph.weights(); + auto const* col_indices = graph.get_matrix_partition_view().get_indices(); + auto const* row_offsets = graph.get_matrix_partition_view().get_offsets(); + auto const* values = graph.get_matrix_partition_view().get_weights() + ? *(graph.get_matrix_partition_view().get_weights()) + : static_cast(nullptr); auto* ptr_d_sizes = raw_ptr(d_paths_sz); auto const& d_cached_out_degs = rand_walker.get_out_degs(); diff --git a/cpp/src/serialization/serializer.cu b/cpp/src/serialization/serializer.cu index 1950ed780c5..28529c9f3ed 100644 --- a/cpp/src/serialization/serializer.cu +++ b/cpp/src/serialization/serializer.cu @@ -33,6 +33,7 @@ namespace cugraph { namespace serializer { + template void serializer_t::serialize(value_t val) { @@ -98,14 +99,15 @@ void serializer_t::serialize(serializer_t::graph_meta_t const& gmeta) serialize(gmeta.num_edges_); serialize(static_cast(gmeta.properties_.is_symmetric)); serialize(static_cast(gmeta.properties_.is_multigraph)); - serialize(static_cast(gmeta.properties_.is_weighted)); + serialize(static_cast(gmeta.is_weighted_)); - auto seg_off_sz_bytes = gmeta.segment_offsets_.size() * sizeof(vertex_t); + auto seg_off_sz_bytes = + (gmeta.segment_offsets_ ? (*(gmeta.segment_offsets_)).size() : size_t{0}) * sizeof(vertex_t); if (seg_off_sz_bytes > 0) { auto it_end = begin_ + seg_off_sz_bytes; raft::update_device(begin_, - reinterpret_cast(gmeta.segment_offsets_.data()), + reinterpret_cast((*(gmeta.segment_offsets_)).data()), seg_off_sz_bytes, handle_.get_stream()); @@ -141,15 +143,15 @@ serializer_t::graph_meta_t serializer_t::unserialize( bool_t is_weighted = unserialize(); graph_properties_t properties{static_cast(is_symmetric), - static_cast(is_multigraph), - static_cast(is_weighted)}; + static_cast(is_multigraph)}; - std::vector segment_offsets{}; + std::optional> segment_offsets{std::nullopt}; size_t seg_off_sz_bytes = graph_meta_sz_bytes - 2 * sizeof(size_t) - 3 * sizeof(bool_t); if (seg_off_sz_bytes > 0) { - raft::update_host(segment_offsets.data(), + segment_offsets = std::vector(seg_off_sz_bytes / sizeof(vertex_t), vertex_t{0}); + raft::update_host((*segment_offsets).data(), reinterpret_cast(cbegin_), seg_off_sz_bytes, handle_.get_stream()); @@ -157,7 +159,8 @@ serializer_t::graph_meta_t serializer_t::unserialize( cbegin_ += seg_off_sz_bytes; } - return graph_meta_t{num_vertices, num_edges, properties, segment_offsets}; + return graph_meta_t{ + num_vertices, num_edges, properties, static_cast(is_weighted), segment_offsets}; } else { CUGRAPH_FAIL("Unsupported graph type for unserialization."); @@ -182,9 +185,9 @@ void serializer_t::serialize(graph_t const& graph, serializer_t::graph_meta_t{graph}; - edge_t const* offsets = gview.offsets(); - vertex_t const* indices = gview.indices(); - weight_t const* weights = gview.weights(); + auto offsets = gview.get_matrix_partition_view().get_offsets(); + auto indices = gview.get_matrix_partition_view().get_indices(); + auto weights = gview.get_matrix_partition_view().get_weights(); // FIXME: remove when host_bcast() becomes available for vectors; // @@ -197,7 +200,7 @@ void serializer_t::serialize(graph_t const& graph, serializer_t::graph_meta_t(num_vertices + 1); auto d_indices = unserialize(num_edges); - if (g_props.is_weighted) { - auto d_weights = unserialize(num_edges); - - return graph_t(handle_, - num_vertices, - num_edges, - g_props, - std::move(d_offsets), - std::move(d_indices), - std::move(d_weights), - std::move(seg_offsets)); // RVO-ed - } else { - return graph_t(handle_, - num_vertices, - num_edges, - g_props, - std::move(d_offsets), - std::move(d_indices), - rmm::device_uvector(0, handle_.get_stream()), - std::move(seg_offsets)); // RVO-ed - } - + return graph_t( + handle_, + num_vertices, + num_edges, + g_props, + std::move(d_offsets), + std::move(d_indices), + is_weighted ? std::optional>{unserialize(num_edges)} + : std::nullopt, + std::move(seg_offsets)); // RVO-ed } else { CUGRAPH_FAIL("Unsupported graph type for unserialization."); diff --git a/cpp/src/structure/create_graph_from_edgelist.cu b/cpp/src/structure/create_graph_from_edgelist.cu index 27764ead0f0..a39ea3b87e4 100644 --- a/cpp/src/structure/create_graph_from_edgelist.cu +++ b/cpp/src/structure/create_graph_from_edgelist.cu @@ -39,13 +39,13 @@ std::enable_if_t< multi_gpu, std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector>> + std::optional>>> create_graph_from_edgelist_impl( raft::handle_t const& handle, std::optional> optional_local_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber) { @@ -70,10 +70,10 @@ create_graph_from_edgelist_impl( store_transposed ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - auto edge_counts = graph_properties.is_weighted + auto edge_counts = edgelist_weights ? cugraph::experimental::groupby_and_count(pair_first, pair_first + edgelist_rows.size(), - edgelist_weights.begin(), + (*edgelist_weights).begin(), local_partition_id_op, col_comm_size, handle.get_stream()) @@ -97,6 +97,7 @@ create_graph_from_edgelist_impl( cugraph::experimental::partition_t partition{}; vertex_t number_of_vertices{}; edge_t number_of_edges{}; + auto segment_offsets = std::make_optional>(0); { std::vector major_ptrs(h_edge_counts.size()); std::vector minor_ptrs(major_ptrs.size()); @@ -108,7 +109,8 @@ create_graph_from_edgelist_impl( (store_transposed ? edgelist_rows.begin() : edgelist_cols.begin()) + h_displacements[i]; counts[i] = static_cast(h_edge_counts[i]); } - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + std::tie( + renumber_map_labels, partition, number_of_vertices, number_of_edges, *segment_offsets) = cugraph::experimental::renumber_edgelist( handle, optional_local_vertex_span, major_ptrs, minor_ptrs, counts); } @@ -121,15 +123,22 @@ create_graph_from_edgelist_impl( edgelists[i] = cugraph::experimental::edgelist_t{ edgelist_rows.data() + h_displacements[i], edgelist_cols.data() + h_displacements[i], - graph_properties.is_weighted ? edgelist_weights.data() + h_displacements[i] - : static_cast(nullptr), + edgelist_weights + ? std::optional{(*edgelist_weights).data() + h_displacements[i]} + : std::nullopt, static_cast(h_edge_counts[i])}; } return std::make_tuple( cugraph::experimental::graph_t( - handle, edgelists, partition, number_of_vertices, number_of_edges, graph_properties, true), - std::move(renumber_map_labels)); + handle, + edgelists, + partition, + number_of_vertices, + number_of_edges, + graph_properties, + std::optional>{segment_offsets}), + std::optional>{std::move(renumber_map_labels)}); } template , - rmm::device_uvector>> + std::optional>>> create_graph_from_edgelist_impl( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber) { auto renumber_map_labels = - renumber ? cugraph::experimental::renumber_edgelist( - handle, - optional_vertex_span, - store_transposed ? edgelist_cols.data() : edgelist_rows.data(), - store_transposed ? edgelist_rows.data() : edgelist_cols.data(), - static_cast(edgelist_rows.size())) - : rmm::device_uvector(0, handle.get_stream()); + renumber ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; + std::optional> segment_offsets{std::nullopt}; + if (renumber) { + segment_offsets = std::vector{}; + std::tie(*renumber_map_labels, *segment_offsets) = + cugraph::experimental::renumber_edgelist( + handle, + optional_vertex_span, + store_transposed ? edgelist_cols.data() : edgelist_rows.data(), + store_transposed ? edgelist_rows.data() : edgelist_cols.data(), + static_cast(edgelist_rows.size())); + } + vertex_t num_vertices{}; if (renumber) { - num_vertices = static_cast(renumber_map_labels.size()); + num_vertices = static_cast((*renumber_map_labels).size()); } else { if (optional_vertex_span) { num_vertices = std::get<1>(*optional_vertex_span); @@ -186,11 +202,12 @@ create_graph_from_edgelist_impl( cugraph::experimental::edgelist_t{ edgelist_rows.data(), edgelist_cols.data(), - graph_properties.is_weighted ? edgelist_weights.data() : static_cast(nullptr), + edgelist_weights ? std::optional{(*edgelist_weights).data()} + : std::nullopt, static_cast(edgelist_rows.size())}, num_vertices, graph_properties, - renumber ? true : false), + std::optional>{segment_offsets}), std::move(renumber_map_labels)); } @@ -202,13 +219,13 @@ template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber) { @@ -225,266 +242,266 @@ create_graph_from_edgelist( // explicit instantiations template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> create_graph_from_edgelist( raft::handle_t const& handle, std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, + std::optional>&& edgelist_weights, graph_properties_t graph_properties, bool renumber); diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index cf6bbf6322f..08402b38c1e 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -146,9 +147,10 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai edgelists[i] = cugraph::experimental::edgelist_t{ reinterpret_cast(graph_container.src_vertices) + displacements[i], reinterpret_cast(graph_container.dst_vertices) + displacements[i], - graph_container.graph_props.is_weighted - ? reinterpret_cast(graph_container.weights) + displacements[i] - : static_cast(nullptr), + graph_container.is_weighted + ? std::optional( + {static_cast(graph_container.weights) + displacements[i]}) + : std::nullopt, edge_counts[i]}; } @@ -165,7 +167,12 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai static_cast(graph_container.num_global_vertices), static_cast(graph_container.num_global_edges), graph_container.graph_props, - true, + graph_container.segment_offsets != nullptr + ? std::make_optional>( + static_cast(graph_container.segment_offsets), + static_cast(graph_container.segment_offsets) + + graph_container.num_segments + 1) + : std::nullopt, graph_container.do_expensive_check); } @@ -181,14 +188,21 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai experimental::edgelist_t edgelist{ reinterpret_cast(graph_container.src_vertices), reinterpret_cast(graph_container.dst_vertices), - reinterpret_cast(graph_container.weights), + graph_container.is_weighted + ? std::optional{reinterpret_cast(graph_container.weights)} + : std::nullopt, static_cast(graph_container.num_local_edges)}; return std::make_unique>( handle, edgelist, static_cast(graph_container.num_global_vertices), graph_container.graph_props, - graph_container.sorted_by_degree, + graph_container.segment_offsets != nullptr + ? std::make_optional>( + static_cast(graph_container.segment_offsets), + static_cast(graph_container.segment_offsets) + + graph_container.num_segments + 1) + : std::nullopt, graph_container.do_expensive_check); } @@ -203,13 +217,14 @@ void populate_graph_container(graph_container_t& graph_container, void* dst_vertices, void* weights, void* vertex_partition_offsets, + void* segment_offsets, + size_t num_segments, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, - bool sorted_by_degree, bool is_weighted, bool is_symmetric, bool transposed, @@ -233,10 +248,13 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.col_comm_rank = col_comm_rank; } - graph_container.vertex_partition_offsets = vertex_partition_offsets; graph_container.src_vertices = src_vertices; graph_container.dst_vertices = dst_vertices; graph_container.weights = weights; + graph_container.is_weighted = is_weighted; + graph_container.vertex_partition_offsets = vertex_partition_offsets; + graph_container.segment_offsets = segment_offsets; + graph_container.num_segments = num_segments; graph_container.num_local_edges = num_local_edges; graph_container.num_global_vertices = num_global_vertices; graph_container.num_global_edges = num_global_edges; @@ -245,11 +263,10 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.weightType = weightType; graph_container.transposed = transposed; graph_container.is_multi_gpu = multi_gpu; - graph_container.sorted_by_degree = sorted_by_degree; graph_container.do_expensive_check = do_expensive_check; - experimental::graph_properties_t graph_props{ - .is_symmetric = is_symmetric, .is_multigraph = false, .is_weighted = is_weighted}; + experimental::graph_properties_t graph_props{.is_symmetric = is_symmetric, + .is_multigraph = false}; graph_container.graph_props = graph_props; graph_container.graph_type = graphTypeEnum::graph_t; @@ -566,68 +583,95 @@ void call_pagerank(raft::handle_t const& handle, bool has_guess) { if (graph_container.is_multi_gpu) { + auto& comm = handle.get_comms(); + auto aggregate_personalization_subset_size = cugraph::experimental::host_scalar_allreduce( + comm, personalization_subset_size, handle.get_stream()); + if (graph_container.edgeType == numberTypeEnum::int32Type) { auto graph = detail::create_graph(handle, graph_container); - cugraph::experimental::pagerank(handle, - graph->view(), - static_cast(nullptr), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - static_cast(personalization_subset_size), - reinterpret_cast(p_pagerank), - static_cast(alpha), - static_cast(tolerance), - max_iter, - has_guess, - true); + cugraph::experimental::pagerank( + handle, + graph->view(), + std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{reinterpret_cast(personalization_subset)} + : std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{personalization_values} + : std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{static_cast(personalization_subset_size)} + : std::nullopt, + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { auto graph = detail::create_graph(handle, graph_container); - cugraph::experimental::pagerank(handle, - graph->view(), - static_cast(nullptr), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - static_cast(personalization_subset_size), - reinterpret_cast(p_pagerank), - static_cast(alpha), - static_cast(tolerance), - max_iter, - has_guess, - true); + cugraph::experimental::pagerank( + handle, + graph->view(), + std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{personalization_subset} + : std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{personalization_values} + : std::nullopt, + aggregate_personalization_subset_size > 0 + ? std::optional{personalization_subset_size} + : std::nullopt, + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } } else { if (graph_container.edgeType == numberTypeEnum::int32Type) { auto graph = detail::create_graph(handle, graph_container); - cugraph::experimental::pagerank(handle, - graph->view(), - static_cast(nullptr), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - static_cast(personalization_subset_size), - reinterpret_cast(p_pagerank), - static_cast(alpha), - static_cast(tolerance), - max_iter, - has_guess, - true); + cugraph::experimental::pagerank( + handle, + graph->view(), + std::nullopt, + personalization_subset_size > 0 + ? std::optional{reinterpret_cast(personalization_subset)} + : std::nullopt, + personalization_subset_size > 0 ? std::optional{personalization_values} + : std::nullopt, + personalization_subset_size > 0 ? std::optional{personalization_subset_size} + : std::nullopt, + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { auto graph = detail::create_graph(handle, graph_container); - cugraph::experimental::pagerank(handle, - graph->view(), - static_cast(nullptr), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - static_cast(personalization_subset_size), - reinterpret_cast(p_pagerank), - static_cast(alpha), - static_cast(tolerance), - max_iter, - has_guess, - true); + cugraph::experimental::pagerank( + handle, + graph->view(), + std::nullopt, + personalization_subset_size > 0 ? std::optional{personalization_subset} + : std::nullopt, + personalization_subset_size > 0 ? std::optional{personalization_values} + : std::nullopt, + personalization_subset_size > 0 ? std::optional{personalization_subset_size} + : std::nullopt, + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } } } @@ -772,7 +816,9 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, static_cast(n_subgraphs), std::make_unique(std::get<0>(g).release()), std::make_unique(std::get<1>(g).release()), - std::make_unique(std::get<2>(g).release()), + std::make_unique( + std::get<2>(g) ? (*std::get<2>(g)).release() + : rmm::device_buffer(size_t{0}, handle.get_stream_view())), std::make_unique(std::get<3>(g).release())}; return std::make_unique(std::move(coo_contents)); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { @@ -789,7 +835,9 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, static_cast(n_subgraphs), std::make_unique(std::get<0>(g).release()), std::make_unique(std::get<1>(g).release()), - std::make_unique(std::get<2>(g).release()), + std::make_unique( + std::get<2>(g) ? (*std::get<2>(g)).release() + : rmm::device_buffer(size_t{0}, handle.get_stream_view())), std::make_unique(std::get<3>(g).release())}; return std::make_unique(std::move(coo_contents)); } else { @@ -1143,7 +1191,7 @@ std::unique_ptr> call_shuffle( // TODO: check if return type needs further handling... // template -std::unique_ptr> call_renumber( +std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, @@ -1153,8 +1201,8 @@ std::unique_ptr> call_renumber( { // caveat: return values have different types on the 2 branches below: // - std::unique_ptr> p_ret = - std::make_unique>(handle); + std::unique_ptr> p_ret = + std::make_unique>(handle); if (multi_gpu) { std::vector displacements(edge_counts.size(), edge_t{0}); @@ -1166,18 +1214,22 @@ std::unique_ptr> call_renumber( minor_ptrs[i] = shuffled_edgelist_minor_vertices + displacements[i]; } - std::tie( - p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = + std::tie(p_ret->get_dv(), + p_ret->get_partition(), + p_ret->get_num_vertices(), + p_ret->get_num_edges(), + p_ret->get_segment_offsets()) = cugraph::experimental::renumber_edgelist( handle, std::nullopt, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); } else { - p_ret->get_dv() = cugraph::experimental::renumber_edgelist( - handle, - std::nullopt, - shuffled_edgelist_major_vertices, - shuffled_edgelist_minor_vertices, - edge_counts[0], - do_expensive_check); + std::tie(p_ret->get_dv(), p_ret->get_segment_offsets()) = + cugraph::experimental::renumber_edgelist( + handle, + std::nullopt, + shuffled_edgelist_major_vertices, + shuffled_edgelist_minor_vertices, + edge_counts[0], + do_expensive_check); p_ret->get_partition() = cugraph::experimental::partition_t{}; // dummy @@ -1494,7 +1546,7 @@ template std::unique_ptr> call_s // TODO: add the remaining relevant EIDIr's: // -template std::unique_ptr> call_renumber( +template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, @@ -1502,7 +1554,7 @@ template std::unique_ptr> call_renumber( bool do_expensive_check, bool multi_gpu); -template std::unique_ptr> call_renumber( +template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, @@ -1510,7 +1562,7 @@ template std::unique_ptr> call_renumber( bool do_expensive_check, bool multi_gpu); -template std::unique_ptr> call_renumber( +template std::unique_ptr> call_renumber( raft::handle_t const& handle, int64_t* shuffled_edgelist_major_vertices /* [INOUT] */, int64_t* shuffled_edgelist_minor_vertices /* [INOUT] */, diff --git a/cpp/tests/bcast/mg_graph_bcast.cpp b/cpp/tests/bcast/mg_graph_bcast.cpp index dbb2460abf4..1a0d4c558c9 100644 --- a/cpp/tests/bcast/mg_graph_bcast.cpp +++ b/cpp/tests/bcast/mg_graph_bcast.cpp @@ -84,22 +84,14 @@ class GraphBcast_MG_Testfixture : public ::testing::TestWithParam d_renumber_map_labels(0, stream); - - std::tie(sg_graph, d_renumber_map_labels) = + auto [sg_graph, d_renumber_map_labels] = cugraph::test::read_graph_from_matrix_market_file( handle, param.graph_file_full_path, true, /*renumber=*/false); if (comm_rank == 0) { graph_broadcast(handle, &sg_graph); - ; } else { sg_graph_t* g_ignore{nullptr}; auto graph_copy = graph_broadcast(handle, g_ignore); diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index 6f1ca4eb374..29be3508de7 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -87,14 +87,10 @@ class Tests_InducedEgo : public ::testing::TestWithParam { configuration.ego_sources.size(), handle.get_stream()); - rmm::device_uvector d_ego_edgelist_src(0, handle.get_stream()); - rmm::device_uvector d_ego_edgelist_dst(0, handle.get_stream()); - rmm::device_uvector d_ego_edgelist_weights(0, handle.get_stream()); - rmm::device_uvector d_ego_edge_offsets(0, handle.get_stream()); HighResTimer hr_timer; hr_timer.start("egonet"); cudaProfilerStart(); - std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) = + auto [d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets] = cugraph::experimental::extract_ego(handle, graph_view, d_ego_sources.data(), @@ -121,7 +117,7 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE(d_ego_edge_offsets.size() == (configuration.ego_sources.size() + 1)); ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_dst.size()); if (configuration.test_weighted) - ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_weights.size()); + ASSERT_TRUE(d_ego_edgelist_src.size() == (*d_ego_edgelist_weights).size()); ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] == d_ego_edgelist_src.size()); for (size_t i = 0; i < configuration.ego_sources.size(); i++) @@ -133,33 +129,6 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE( cugraph::experimental::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); } - - /* - // For inspecting data - std::vector h_cugraph_ego_edgelist_weights(d_ego_edgelist_weights.size()); - if (configuration.test_weighted) { - raft::update_host(h_cugraph_ego_edgelist_weights.data(), - d_ego_edgelist_weights.data(), - d_ego_edgelist_weights.size(), - handle.get_stream()); - } - raft::print_host_vector("offsets", - &h_cugraph_ego_edge_offsets[0], - h_cugraph_ego_edge_offsets.size(), - std::cout); - raft::print_host_vector("src", - &h_cugraph_ego_edgelist_src[0], - h_cugraph_ego_edgelist_src.size(), - std::cout); - raft::print_host_vector("dst", - &h_cugraph_ego_edgelist_dst[0], - h_cugraph_ego_edgelist_dst.size(), - std::cout); - raft::print_host_vector("weights", - &h_cugraph_ego_edgelist_weights[0], - h_cugraph_ego_edgelist_weights.size(), - std::cout); - */ } }; diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index 935c36c9232..e5b99c5cbdd 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -60,14 +60,15 @@ void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, } template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t major_first, - vertex_t major_last, - cudaStream_t stream) +std::tuple, + rmm::device_uvector, + std::optional>> +compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + std::optional compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) { edge_t number_of_edges{0}; raft::update_host( @@ -75,8 +76,10 @@ std:: CUDA_TRY(cudaStreamSynchronize(stream)); rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights( - compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + auto edgelist_weights = + compressed_sparse_weights + ? std::make_optional>(number_of_edges, stream) + : std::nullopt; // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA @@ -95,11 +98,11 @@ std:: compressed_sparse_indices, compressed_sparse_indices + number_of_edges, edgelist_minor_vertices.begin()); - if (compressed_sparse_weights != nullptr) { + if (compressed_sparse_weights) { thrust::copy(rmm::exec_policy(stream)->on(stream), - compressed_sparse_weights, - compressed_sparse_weights + number_of_edges, - edgelist_weights.data()); + (*compressed_sparse_weights), + (*compressed_sparse_weights) + number_of_edges, + (*edgelist_weights).data()); } return std::make_tuple(std::move(edgelist_major_vertices), @@ -108,20 +111,21 @@ std:: } template -void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */, - cudaStream_t stream) +void sort_and_coarsen_edgelist( + rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + std::optional> &edgelist_weights /* [INOUT] */, + cudaStream_t stream) { auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); size_t number_of_edges{0}; - if (edgelist_weights.size() > 0) { + if (edgelist_weights) { thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), pair_first, pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin()); + (*edgelist_weights).begin()); rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), stream); @@ -132,7 +136,7 @@ void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_ver rmm::exec_policy(stream)->on(stream), pair_first, pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin(), + (*edgelist_weights).begin(), thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), tmp_edgelist_minor_vertices.begin())), tmp_edgelist_weights.begin()); @@ -140,7 +144,7 @@ void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_ver edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); - edgelist_weights = std::move(tmp_edgelist_weights); + (*edgelist_weights) = std::move(tmp_edgelist_weights); } else { thrust::sort(rmm::exec_policy(stream)->on(stream), pair_first, @@ -153,34 +157,34 @@ void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_ver edgelist_major_vertices.resize(number_of_edges, stream); edgelist_minor_vertices.resize(number_of_edges, stream); - edgelist_weights.resize(number_of_edges, stream); edgelist_major_vertices.shrink_to_fit(stream); edgelist_minor_vertices.shrink_to_fit(stream); - edgelist_weights.shrink_to_fit(stream); + if (edgelist_weights) { + (*edgelist_weights).resize(number_of_edges, stream); + (*edgelist_weights).shrink_to_fit(stream); + } } template -std:: - tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - weight_t const *compressed_sparse_weights, - vertex_t const *p_major_labels, - vertex_t const *p_minor_labels, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last, - cudaStream_t stream) +std::tuple, + rmm::device_uvector, + std::optional>> +compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + std::optional compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the // compressed sparse format to save memory - rmm::device_uvector edgelist_major_vertices(0, stream); - rmm::device_uvector edgelist_minor_vertices(0, stream); - rmm::device_uvector edgelist_weights(0, stream); - std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + auto [edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights] = compressed_sparse_to_edgelist(compressed_sparse_offsets, compressed_sparse_indices, compressed_sparse_weights, @@ -208,7 +212,8 @@ std:: std::move(edgelist_weights)); } -// single-GPU version +// FIXME: better add "bool renumber" (which must be false in MG) to the coarsen_grpah function +// instead of replicating the code here. single-GPU version template std::unique_ptr> coarsen_graph( @@ -217,16 +222,13 @@ coarsen_graph( &graph_view, vertex_t const *labels) { - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); - std::tie(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights) = + auto [coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights] = compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - graph_view.offsets(), - graph_view.indices(), - graph_view.weights(), + graph_view.get_matrix_partition_view().get_offsets(), + graph_view.get_matrix_partition_view().get_indices(), + graph_view.get_matrix_partition_view().get_weights(), labels, labels, vertex_t{0}, @@ -240,7 +242,10 @@ coarsen_graph( : coarsened_edgelist_major_vertices.data(); edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() : coarsened_edgelist_minor_vertices.data(); - edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.p_edge_weights = + coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights).data()} + : std::nullopt; edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); vertex_t new_number_of_vertices = @@ -255,9 +260,8 @@ coarsen_graph( handle, edgelist, new_number_of_vertices, - cugraph::experimental::graph_properties_t{ - graph_view.is_symmetric(), false, graph_view.is_weighted()}, - true); + cugraph::experimental::graph_properties_t{graph_view.is_symmetric(), false}, + std::nullopt); } // explicit instantiation diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 9c6d7bb4491..971685d7537 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -105,14 +105,12 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam if (rank == 0) { // Create initial SG graph, renumbered according to the MNMG renumber map - rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); - vertex_t number_of_vertices{}; - bool is_symmetric{}; - - std::tie( - d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + + auto [d_edgelist_rows, + d_edgelist_cols, + d_edgelist_weights, + number_of_vertices, + is_symmetric] = cugraph::test::read_edgelist_from_matrix_market_file( handle, graph_filename, true); @@ -137,7 +135,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - cugraph::experimental::graph_properties_t{is_symmetric, false, true}, + cugraph::experimental::graph_properties_t{is_symmetric, false}, false); } @@ -191,11 +189,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam cudaStream_t stream = handle.get_stream(); - cugraph::experimental::graph_t mg_graph(handle); - - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - - std::tie(mg_graph, d_renumber_map_labels) = + auto [mg_graph, d_renumber_map_labels] = cugraph::test::read_graph_from_matrix_market_file( handle, param.graph_file_full_path, true, true); @@ -210,7 +204,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); auto d_renumber_map_gathered_v = cugraph::test::device_gatherv( - handle, d_renumber_map_labels.data(), d_renumber_map_labels.size()); + handle, (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()); compare_sg_results(handle, param.graph_file_full_path, diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index a64919c4f92..9285b3ed668 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -87,9 +87,7 @@ class Tests_MGWeaklyConnectedComponents hr_clock.start(); } - cugraph::experimental::graph_t mg_graph(handle); - rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); - std::tie(mg_graph, d_mg_renumber_map_labels) = + auto [mg_graph, d_mg_renumber_map_labels] = input_usecase.template construct_graph( handle, false, true); @@ -131,7 +129,7 @@ class Tests_MGWeaklyConnectedComponents // 4-1. aggregate MG results auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + handle, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); auto d_mg_aggregate_components = cugraph::test::device_gatherv(handle, d_mg_components.data(), d_mg_components.size()); diff --git a/cpp/tests/components/wcc_graphs.cu b/cpp/tests/components/wcc_graphs.cu index 8cbe18ed71e..ff1681076f6 100644 --- a/cpp/tests/components/wcc_graphs.cu +++ b/cpp/tests/components/wcc_graphs.cu @@ -29,7 +29,7 @@ template std::tuple, - rmm::device_uvector> + std::optional>> LineGraph_Usecase::construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber) const @@ -43,7 +43,6 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, rmm::device_uvector src_v(num_edges, handle.get_stream()); rmm::device_uvector dst_v(num_edges, handle.get_stream()); rmm::device_uvector order_v(num_vertices_, handle.get_stream()); - rmm::device_uvector weights_v(edge_t{0}, handle.get_stream()); thrust::sequence( rmm::exec_policy(handle.get_stream()), vertices_v.begin(), vertices_v.end(), vertex_t{0}); @@ -77,13 +76,13 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, std::make_tuple(vertices_v.data(), static_cast(vertices_v.size()))}, std::move(src_v), std::move(dst_v), - std::move(weights_v), - cugraph::experimental::graph_properties_t{true, false, false}, + std::nullopt, + cugraph::experimental::graph_properties_t{true, false}, false); } -template std::tuple, - rmm::device_uvector> +template std::tuple, + std::optional>> LineGraph_Usecase::construct_graph(raft::handle_t const&, bool, bool) const; } // namespace test diff --git a/cpp/tests/components/wcc_graphs.hpp b/cpp/tests/components/wcc_graphs.hpp index 18989b9b46f..cea82cb95bc 100644 --- a/cpp/tests/components/wcc_graphs.hpp +++ b/cpp/tests/components/wcc_graphs.hpp @@ -29,7 +29,7 @@ class LineGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; private: diff --git a/cpp/tests/components/weakly_connected_components_test.cpp b/cpp/tests/components/weakly_connected_components_test.cpp index 6523b6a280a..fe87e806687 100644 --- a/cpp/tests/components/weakly_connected_components_test.cpp +++ b/cpp/tests/components/weakly_connected_components_test.cpp @@ -118,9 +118,7 @@ class Tests_WeaklyConnectedComponent hr_clock.start(); } - cugraph::experimental::graph_t graph(handle); - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - std::tie(graph, d_renumber_map_labels) = + auto [graph, d_renumber_map_labels] = input_usecase.template construct_graph( handle, false, renumber); @@ -165,11 +163,11 @@ class Tests_WeaklyConnectedComponent std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); raft::update_host(h_offsets.data(), - unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), unrenumbered_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); @@ -187,8 +185,11 @@ class Tests_WeaklyConnectedComponent if (renumber) { rmm::device_uvector d_unrenumbered_components(size_t{0}, handle.get_stream_view()); - std::tie(std::ignore, d_unrenumbered_components) = cugraph::test::sort_by_key( - handle, d_renumber_map_labels.data(), d_components.data(), d_renumber_map_labels.size()); + std::tie(std::ignore, d_unrenumbered_components) = + cugraph::test::sort_by_key(handle, + (*d_renumber_map_labels).data(), + d_components.data(), + (*d_renumber_map_labels).size()); raft::update_host(h_cugraph_components.data(), d_unrenumbered_components.data(), d_unrenumbered_components.size(), diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 4114b695e49..8510d6698ca 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -113,9 +113,8 @@ class Tests_BFS : public ::testing::TestWithParam graph(handle); - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - std::tie(graph, d_renumber_map_labels) = + + auto [graph, d_renumber_map_labels] = input_usecase.template construct_graph( handle, true, renumber); @@ -169,11 +168,11 @@ class Tests_BFS : public ::testing::TestWithParam h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); raft::update_host(h_offsets.data(), - unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), unrenumbered_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); @@ -181,10 +180,10 @@ class Tests_BFS : public ::testing::TestWithParam(bfs_usecase.source); if (renumber) { - std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + std::vector h_renumber_map_labels((*d_renumber_map_labels).size()); raft::update_host(h_renumber_map_labels.data(), - d_renumber_map_labels.data(), - d_renumber_map_labels.size(), + (*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size(), handle.get_stream()); handle.get_stream_view().synchronize(); @@ -210,20 +209,23 @@ class Tests_BFS : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); - std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( - handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + std::tie(std::ignore, d_unrenumbered_distances) = + cugraph::test::sort_by_key(handle, + (*d_renumber_map_labels).data(), + d_distances.data(), + (*d_renumber_map_labels).size()); rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); std::tie(std::ignore, d_unrenumbered_predecessors) = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), + (*d_renumber_map_labels).data(), d_predecessors.data(), - d_renumber_map_labels.size()); + (*d_renumber_map_labels).size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), d_unrenumbered_distances.size(), diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 7f76094fa0f..aaee198a4b0 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -301,17 +301,17 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam std::vector h_org_indices(graph_view.get_number_of_edges()); std::vector h_org_weights{}; raft::update_host(h_org_offsets.data(), - graph_view.offsets(), + graph_view.get_matrix_partition_view().get_offsets(), graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_org_indices.data(), - graph_view.indices(), + graph_view.get_matrix_partition_view().get_indices(), graph_view.get_number_of_edges(), handle.get_stream()); if (graph_view.is_weighted()) { h_org_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); raft::update_host(h_org_weights.data(), - graph_view.weights(), + *(graph_view.get_matrix_partition_view().get_weights()), graph_view.get_number_of_edges(), handle.get_stream()); } @@ -322,17 +322,17 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam std::vector h_coarse_indices(coarse_graph_view.get_number_of_edges()); std::vector h_coarse_weights{}; raft::update_host(h_coarse_offsets.data(), - coarse_graph_view.offsets(), + coarse_graph_view.get_matrix_partition_view().get_offsets(), coarse_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_coarse_indices.data(), - coarse_graph_view.indices(), + coarse_graph_view.get_matrix_partition_view().get_indices(), coarse_graph_view.get_number_of_edges(), handle.get_stream()); if (graph_view.is_weighted()) { h_coarse_weights.resize(coarse_graph_view.get_number_of_edges()); raft::update_host(h_coarse_weights.data(), - coarse_graph_view.weights(), + *(coarse_graph_view.get_matrix_partition_view().get_weights()), coarse_graph_view.get_number_of_edges(), handle.get_stream()); } diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index 80f1b51f80c..94134e3426f 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -93,11 +93,11 @@ class Tests_Degree : public ::testing::TestWithParam { std::vector h_offsets(graph_view.get_number_of_vertices() + 1); std::vector h_indices(graph_view.get_number_of_edges()); raft::update_host(h_offsets.data(), - graph_view.offsets(), + graph_view.get_matrix_partition_view().get_offsets(), graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - graph_view.indices(), + graph_view.get_matrix_partition_view().get_indices(), graph_view.get_number_of_edges(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index ae899c3ba33..dff841a5b73 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -34,16 +34,18 @@ #include template -std::tuple, std::vector, std::vector> graph_reference( - vertex_t const* p_src_vertices, - vertex_t const* p_dst_vertices, - weight_t const* p_edge_weights, - vertex_t number_of_vertices, - edge_t number_of_edges) +std::tuple, std::vector, std::optional>> +graph_reference(vertex_t const* p_src_vertices, + vertex_t const* p_dst_vertices, + std::optional p_edge_weights, + vertex_t number_of_vertices, + edge_t number_of_edges) { std::vector offsets(number_of_vertices + 1, edge_t{0}); std::vector indices(number_of_edges, vertex_t{0}); - std::vector weights(p_edge_weights != nullptr ? number_of_edges : 0, weight_t{0.0}); + auto weights = p_edge_weights + ? std::make_optional>(number_of_edges, weight_t{0.0}) + : std::nullopt; for (size_t i = 0; i < number_of_edges; ++i) { auto major = store_transposed ? p_dst_vertices[i] : p_src_vertices[i]; @@ -58,7 +60,7 @@ std::tuple, std::vector, std::vector> gr auto degree = offsets[major + 1] - start; auto idx = indices[start + degree - 1]++; indices[start + idx] = minor; - if (p_edge_weights != nullptr) { weights[start + idx] = p_edge_weights[i]; } + if (p_edge_weights) { (*weights)[start + idx] = (*p_edge_weights)[i]; } } return std::make_tuple(std::move(offsets), std::move(indices), std::move(weights)); @@ -93,43 +95,36 @@ class Tests_Graph : public ::testing::TestWithParam { { raft::handle_t handle{}; - rmm::device_uvector d_rows(0, handle.get_stream()); - rmm::device_uvector d_cols(0, handle.get_stream()); - rmm::device_uvector d_weights(0, handle.get_stream()); - vertex_t number_of_vertices{}; - bool is_symmetric{}; - std::tie(d_rows, d_cols, d_weights, number_of_vertices, is_symmetric) = + auto [d_rows, d_cols, d_weights, number_of_vertices, is_symmetric] = cugraph::test::read_edgelist_from_matrix_market_file( handle, configuration.graph_file_full_path, configuration.test_weighted); edge_t number_of_edges = static_cast(d_rows.size()); std::vector h_rows(number_of_edges); std::vector h_cols(number_of_edges); - std::vector h_weights(configuration.test_weighted ? number_of_edges : edge_t{0}); + auto h_weights = + d_weights ? std::make_optional>(number_of_edges) : std::nullopt; raft::update_host(h_rows.data(), d_rows.data(), number_of_edges, handle.get_stream()); raft::update_host(h_cols.data(), d_cols.data(), number_of_edges, handle.get_stream()); - if (configuration.test_weighted) { - raft::update_host(h_weights.data(), d_weights.data(), number_of_edges, handle.get_stream()); + if (h_weights) { + raft::update_host( + (*h_weights).data(), (*d_weights).data(), number_of_edges, handle.get_stream()); } CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - std::vector h_reference_offsets{}; - std::vector h_reference_indices{}; - std::vector h_reference_weights{}; - - std::tie(h_reference_offsets, h_reference_indices, h_reference_weights) = + auto [h_reference_offsets, h_reference_indices, h_reference_weights] = graph_reference( h_rows.data(), h_cols.data(), - configuration.test_weighted ? h_weights.data() : static_cast(nullptr), + h_weights ? std::optional{(*h_weights).data()} : std::nullopt, number_of_vertices, number_of_edges); cugraph::experimental::edgelist_t edgelist{ d_rows.data(), d_cols.data(), - configuration.test_weighted ? d_weights.data() : nullptr, + d_weights ? std::optional{(*d_weights).data()} : std::nullopt, number_of_edges}; CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -139,8 +134,8 @@ class Tests_Graph : public ::testing::TestWithParam { handle, edgelist, number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false, configuration.test_weighted}, - false, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + std::nullopt, true); auto graph_view = graph.view(); @@ -152,20 +147,21 @@ class Tests_Graph : public ::testing::TestWithParam { std::vector h_cugraph_offsets(graph_view.get_number_of_vertices() + 1); std::vector h_cugraph_indices(graph_view.get_number_of_edges()); - std::vector h_cugraph_weights( - configuration.test_weighted ? graph_view.get_number_of_edges() : 0); + auto h_cugraph_weights = + graph.is_weighted() ? std::optional>(graph_view.get_number_of_edges()) + : std::nullopt; raft::update_host(h_cugraph_offsets.data(), - graph_view.offsets(), + graph_view.get_matrix_partition_view().get_offsets(), graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_cugraph_indices.data(), - graph_view.indices(), + graph_view.get_matrix_partition_view().get_indices(), graph_view.get_number_of_edges(), handle.get_stream()); - if (configuration.test_weighted) { - raft::update_host(h_cugraph_weights.data(), - graph_view.weights(), + if (h_cugraph_weights) { + raft::update_host((*h_cugraph_weights).data(), + *(graph_view.get_matrix_partition_view().get_weights()), graph_view.get_number_of_edges(), handle.get_stream()); } @@ -175,7 +171,10 @@ class Tests_Graph : public ::testing::TestWithParam { ASSERT_TRUE( std::equal(h_reference_offsets.begin(), h_reference_offsets.end(), h_cugraph_offsets.begin())) << "Graph compressed sparse format offsets do not match with the reference values."; - ASSERT_EQ(h_reference_weights.size(), h_cugraph_weights.size()); + ASSERT_EQ(h_reference_weights.has_value(), h_cugraph_weights.has_value()); + if (h_reference_weights) { + ASSERT_EQ((*h_reference_weights).size(), (*h_cugraph_weights).size()); + } for (vertex_t i = 0; i < number_of_vertices; ++i) { auto start = h_reference_offsets[i]; auto degree = h_reference_offsets[i + 1] - start; @@ -184,9 +183,9 @@ class Tests_Graph : public ::testing::TestWithParam { std::vector> cugraph_pairs(degree); for (edge_t j = 0; j < degree; ++j) { reference_pairs[j] = - std::make_tuple(h_reference_indices[start + j], h_reference_weights[start + j]); + std::make_tuple(h_reference_indices[start + j], (*h_reference_weights)[start + j]); cugraph_pairs[j] = - std::make_tuple(h_cugraph_indices[start + j], h_cugraph_weights[start + j]); + std::make_tuple(h_cugraph_indices[start + j], (*h_cugraph_weights)[start + j]); } std::sort(reference_pairs.begin(), reference_pairs.end()); std::sort(cugraph_pairs.begin(), cugraph_pairs.end()); diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 8a69da1475a..3e2dbf4fe3c 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -37,10 +37,13 @@ #include template -std::tuple, std::vector, std::vector, std::vector> +std::tuple, + std::vector, + std::optional>, + std::vector> extract_induced_subgraph_reference(edge_t const* offsets, vertex_t const* indices, - weight_t const* weights, + std::optional weights, size_t const* subgraph_offsets, vertex_t const* subgraph_vertices, vertex_t num_vertices, @@ -48,7 +51,7 @@ extract_induced_subgraph_reference(edge_t const* offsets, { std::vector edgelist_majors{}; std::vector edgelist_minors{}; - std::vector edgelist_weights{}; + auto edgelist_weights = weights ? std::make_optional>(0) : std::nullopt; std::vector subgraph_edge_offsets{0}; for (size_t i = 0; i < num_subgraphs; ++i) { @@ -71,7 +74,7 @@ extract_induced_subgraph_reference(edge_t const* offsets, indices[j])) { edgelist_majors.push_back(v); edgelist_minors.push_back(indices[j]); - if (weights != nullptr) { edgelist_weights.push_back(weights[j]); } + if (weights) { (*edgelist_weights).push_back((*weights)[j]); } } } }); @@ -122,19 +125,20 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam h_offsets(graph_view.get_number_of_vertices() + 1); std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights{}; + auto h_weights = graph_view.is_weighted() ? std::make_optional>( + graph_view.get_number_of_edges(), weight_t{0.0}) + : std::nullopt; raft::update_host(h_offsets.data(), - graph_view.offsets(), + graph_view.get_matrix_partition_view().get_offsets(), graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - graph_view.indices(), + graph_view.get_matrix_partition_view().get_indices(), graph_view.get_number_of_edges(), handle.get_stream()); - if (graph_view.is_weighted()) { - h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - graph_view.weights(), + if (h_weights) { + raft::update_host((*h_weights).data(), + *(graph_view.get_matrix_partition_view().get_weights()), graph_view.get_number_of_edges(), handle.get_stream()); } @@ -177,35 +181,26 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam h_reference_subgraph_edgelist_majors{}; - std::vector h_reference_subgraph_edgelist_minors{}; - std::vector h_reference_subgraph_edgelist_weights{}; - std::vector h_reference_subgraph_edge_offsets{}; - std::tie(h_reference_subgraph_edgelist_majors, - h_reference_subgraph_edgelist_minors, - h_reference_subgraph_edgelist_weights, - h_reference_subgraph_edge_offsets) = + auto [h_reference_subgraph_edgelist_majors, + h_reference_subgraph_edgelist_minors, + h_reference_subgraph_edgelist_weights, + h_reference_subgraph_edge_offsets] = extract_induced_subgraph_reference( h_offsets.data(), h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_weights ? std::optional{(*h_weights).data()} : std::nullopt, h_subgraph_offsets.data(), h_subgraph_vertices.data(), graph_view.get_number_of_vertices(), configuration.subgraph_sizes.size()); - rmm::device_uvector d_subgraph_edgelist_majors(0, handle.get_stream()); - rmm::device_uvector d_subgraph_edgelist_minors(0, handle.get_stream()); - rmm::device_uvector d_subgraph_edgelist_weights(0, handle.get_stream()); - rmm::device_uvector d_subgraph_edge_offsets(0, handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement // FIXME: turn-off do_expensive_check once verified. - std::tie(d_subgraph_edgelist_majors, - d_subgraph_edgelist_minors, - d_subgraph_edgelist_weights, - d_subgraph_edge_offsets) = + auto [d_subgraph_edgelist_majors, + d_subgraph_edgelist_minors, + d_subgraph_edgelist_weights, + d_subgraph_edge_offsets] = cugraph::experimental::extract_induced_subgraphs(handle, graph_view, d_subgraph_offsets.data(), @@ -217,7 +212,10 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam h_cugraph_subgraph_edgelist_majors(d_subgraph_edgelist_majors.size()); std::vector h_cugraph_subgraph_edgelist_minors(d_subgraph_edgelist_minors.size()); - std::vector h_cugraph_subgraph_edgelist_weights(d_subgraph_edgelist_weights.size()); + auto h_cugraph_subgraph_edgelist_weights = + d_subgraph_edgelist_weights + ? std::make_optional>((*d_subgraph_edgelist_weights).size()) + : std::nullopt; std::vector h_cugraph_subgraph_edge_offsets(d_subgraph_edge_offsets.size()); raft::update_host(h_cugraph_subgraph_edgelist_majors.data(), @@ -228,10 +226,10 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam> reference_tuples(last - start); std::vector> cugraph_tuples(last - start); for (auto j = start; j < last; ++j) { - reference_tuples[j - start] = std::make_tuple(h_reference_subgraph_edgelist_majors[j], - h_reference_subgraph_edgelist_minors[j], - h_reference_subgraph_edgelist_weights[j]); - cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], + reference_tuples[j - start] = + std::make_tuple(h_reference_subgraph_edgelist_majors[j], + h_reference_subgraph_edgelist_minors[j], + (*h_reference_subgraph_edgelist_weights)[j]); + cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], h_cugraph_subgraph_edgelist_minors[j], - h_cugraph_subgraph_edgelist_weights[j]); + (*h_cugraph_subgraph_edgelist_weights)[j]); } ASSERT_TRUE( std::equal(reference_tuples.begin(), reference_tuples.end(), cugraph_tuples.begin())) diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index aa66e69d4f7..0a4fba9acd1 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -46,7 +46,7 @@ static int PERF = 0; template void katz_centrality_reference(edge_t const* offsets, vertex_t const* indices, - weight_t const* weights, + std::optional weights, result_t const* betas, result_t* katz_centralities, vertex_t num_vertices, @@ -71,7 +71,7 @@ void katz_centrality_reference(edge_t const* offsets, katz_centralities[i] = betas != nullptr ? betas[i] : beta; for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { auto nbr = indices[j]; - auto w = weights != nullptr ? weights[j] : result_t{1.0}; + auto w = weights ? (*weights)[j] : result_t{1.0}; katz_centralities[i] += alpha * old_katz_centralities[nbr] * w; } } @@ -126,11 +126,10 @@ class Tests_KatzCentrality CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_clock.start(); } - cugraph::experimental::graph_t graph(handle); - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - std::tie(graph, d_renumber_map_labels) = + + auto [graph, d_renumber_map_labels] = input_usecase.template construct_graph( - handle, true, renumber); + handle, katz_usecase.test_weighted, renumber); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -138,6 +137,7 @@ class Tests_KatzCentrality hr_clock.stop(&elapsed_time); std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } + auto graph_view = graph.view(); auto degrees = graph_view.compute_in_degrees(handle); @@ -182,25 +182,27 @@ class Tests_KatzCentrality if (renumber) { std::tie(unrenumbered_graph, std::ignore) = input_usecase.template construct_graph( - handle, true, false); + handle, katz_usecase.test_weighted, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); - std::vector h_weights{}; + auto h_weights = unrenumbered_graph_view.is_weighted() + ? std::make_optional>( + unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}) + : std::nullopt; raft::update_host(h_offsets.data(), - unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), unrenumbered_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); - if (unrenumbered_graph_view.is_weighted()) { - h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - unrenumbered_graph_view.weights(), + if (h_weights) { + raft::update_host((*h_weights).data(), + *(unrenumbered_graph_view.get_matrix_partition_view().get_weights()), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); } @@ -213,7 +215,7 @@ class Tests_KatzCentrality katz_centrality_reference( h_offsets.data(), h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_weights ? std::optional{(*h_weights).data()} : std::nullopt, static_cast(nullptr), h_reference_katz_centralities.data(), unrenumbered_graph_view.get_number_of_vertices(), @@ -230,9 +232,9 @@ class Tests_KatzCentrality handle.get_stream()); std::tie(std::ignore, d_unrenumbered_katz_centralities) = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), + (*d_renumber_map_labels).data(), d_katz_centralities.data(), - d_renumber_map_labels.size()); + (*d_renumber_map_labels).size()); raft::update_host(h_cugraph_katz_centralities.data(), d_unrenumbered_katz_centralities.data(), d_unrenumbered_katz_centralities.size(), diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index 04eb1bf7b43..75d4d4ce6a3 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -86,9 +86,7 @@ class Tests_MGBFS : public ::testing::TestWithParam mg_graph(handle); - rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); - std::tie(mg_graph, d_mg_renumber_map_labels) = + auto [mg_graph, d_mg_renumber_map_labels] = input_usecase.template construct_graph( handle, false, true); @@ -141,7 +139,7 @@ class Tests_MGBFS : public ::testing::TestWithParam h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), + sg_graph_view.get_matrix_partition_view().get_offsets(), sg_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), + sg_graph_view.get_matrix_partition_view().get_indices(), sg_graph_view.get_number_of_edges(), handle.get_stream()); @@ -237,14 +235,14 @@ class Tests_MGBFS : public ::testing::TestWithParam::value) { + if (h_mg_aggregate_predecessors[i] == + cugraph::experimental::invalid_vertex_id::value) { ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) << "vertex reachability does not match with the SG result."; } else { diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index 27ef64d124e..d0120e3a2e8 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -82,11 +82,10 @@ class Tests_MGKatzCentrality handle.get_comms().barrier(); hr_clock.start(); } - cugraph::experimental::graph_t mg_graph(handle); - rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); - std::tie(mg_graph, d_mg_renumber_map_labels) = + + auto [mg_graph, d_mg_renumber_map_labels] = input_usecase.template construct_graph( - handle, true, true); + handle, katz_usecase.test_weighted, true); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -141,7 +140,7 @@ class Tests_MGKatzCentrality // 5-1. aggregate MG results auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + handle, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); auto d_mg_aggregate_katz_centralities = cugraph::test::device_gatherv( handle, d_mg_katz_centralities.data(), d_mg_katz_centralities.size()); @@ -159,7 +158,7 @@ class Tests_MGKatzCentrality cugraph::experimental::graph_t sg_graph(handle); std::tie(sg_graph, std::ignore) = input_usecase.template construct_graph( - handle, true, false); + handle, katz_usecase.test_weighted, false); auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index da5120163df..efc40bd6b97 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -82,9 +82,8 @@ class Tests_MGSSSP : public ::testing::TestWithParam mg_graph(handle); - rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); - std::tie(mg_graph, d_mg_renumber_map_labels) = + + auto [mg_graph, d_mg_renumber_map_labels] = input_usecase.template construct_graph( handle, true, true); @@ -136,7 +135,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam h_sg_indices(sg_graph_view.get_number_of_edges()); std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), + sg_graph_view.get_matrix_partition_view().get_offsets(), sg_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), + sg_graph_view.get_matrix_partition_view().get_indices(), sg_graph_view.get_number_of_edges(), handle.get_stream()); raft::update_host(h_sg_weights.data(), - sg_graph_view.weights(), + *(sg_graph_view.get_matrix_partition_view().get_weights()), sg_graph_view.get_number_of_edges(), handle.get_stream()); @@ -249,7 +248,8 @@ class Tests_MGSSSP : public ::testing::TestWithParam::value) { + if (h_mg_aggregate_predecessors[i] == + cugraph::experimental::invalid_vertex_id::value) { ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) << "vertex reachability does not match with the SG result."; } else { diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 5c0b0f288d4..105cf38acef 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -47,12 +47,12 @@ static int PERF = 0; template void pagerank_reference(edge_t const* offsets, vertex_t const* indices, - weight_t const* weights, - vertex_t const* personalization_vertices, - result_t const* personalization_values, + std::optional weights, + std::optional personalization_vertices, + std::optional personalization_values, + std::optional personalization_vector_size, result_t* pageranks, vertex_t num_vertices, - vertex_t personalization_vector_size, result_t alpha, result_t epsilon, size_t max_iterations, @@ -75,12 +75,14 @@ void pagerank_reference(edge_t const* offsets, } result_t personalization_sum{0.0}; - if (personalization_vertices != nullptr) { + if (personalization_vertices) { // use a double type counter (instead of result_t) to accumulate as std::accumulate is // inaccurate in adding a large number of comparably sized numbers. In C++17 or later, // std::reduce may be a better option. - personalization_sum = static_cast(std::accumulate( - personalization_values, personalization_values + personalization_vector_size, double{0.0})); + personalization_sum = + static_cast(std::accumulate(*personalization_values, + *personalization_values + *personalization_vector_size, + double{0.0})); ASSERT_TRUE(personalization_sum > 0.0); } @@ -88,7 +90,7 @@ void pagerank_reference(edge_t const* offsets, for (vertex_t i = 0; i < num_vertices; ++i) { for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { auto nbr = indices[j]; - auto w = weights != nullptr ? weights[j] : 1.0; + auto w = weights ? (*weights)[j] : weight_t{1.0}; out_weight_sums[nbr] += w; } } @@ -105,19 +107,19 @@ void pagerank_reference(edge_t const* offsets, pageranks[i] = result_t{0.0}; for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { auto nbr = indices[j]; - auto w = weights != nullptr ? weights[j] : result_t{1.0}; + auto w = weights ? (*weights)[j] : result_t{1.0}; pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]); } - if (personalization_vertices == nullptr) { + if (!personalization_vertices) { pageranks[i] += (dangling_sum * alpha + (1.0 - alpha)) / static_cast(num_vertices); } } - if (personalization_vertices != nullptr) { - for (vertex_t i = 0; i < personalization_vector_size; ++i) { - auto v = personalization_vertices[i]; + if (personalization_vertices) { + for (vertex_t i = 0; i < *personalization_vector_size; ++i) { + auto v = (*personalization_vertices)[i]; pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * - (personalization_values[i] / personalization_sum); + ((*personalization_values)[i] / personalization_sum); } } result_t diff_sum{0.0}; @@ -162,61 +164,67 @@ class Tests_PageRank CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_clock.start(); } - cugraph::experimental::graph_t graph(handle); - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - std::tie(graph, d_renumber_map_labels) = + + auto [graph, d_renumber_map_labels] = input_usecase.template construct_graph( - handle, true, renumber); + handle, pagerank_usecase.test_weighted, renumber); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; } + auto graph_view = graph.view(); - std::vector h_personalization_vertices{}; - std::vector h_personalization_values{}; + std::optional> h_personalization_vertices{std::nullopt}; + std::optional> h_personalization_values{std::nullopt}; if (pagerank_usecase.personalization_ratio > 0.0) { std::default_random_engine generator{}; std::uniform_real_distribution distribution{0.0, 1.0}; - h_personalization_vertices.resize(graph_view.get_number_of_local_vertices()); - std::iota(h_personalization_vertices.begin(), - h_personalization_vertices.end(), + h_personalization_vertices = std::vector(graph_view.get_number_of_local_vertices()); + std::iota((*h_personalization_vertices).begin(), + (*h_personalization_vertices).end(), graph_view.get_local_vertex_first()); - h_personalization_vertices.erase( - std::remove_if(h_personalization_vertices.begin(), - h_personalization_vertices.end(), - [&generator, &distribution, pagerank_usecase](auto v) { - return distribution(generator) >= pagerank_usecase.personalization_ratio; - }), - h_personalization_vertices.end()); - h_personalization_values.resize(h_personalization_vertices.size()); - std::for_each(h_personalization_values.begin(), - h_personalization_values.end(), + (*h_personalization_vertices) + .erase(std::remove_if((*h_personalization_vertices).begin(), + (*h_personalization_vertices).end(), + [&generator, &distribution, pagerank_usecase](auto v) { + return distribution(generator) >= + pagerank_usecase.personalization_ratio; + }), + (*h_personalization_vertices).end()); + h_personalization_values = std::vector((*h_personalization_vertices).size()); + std::for_each((*h_personalization_values).begin(), + (*h_personalization_values).end(), [&distribution, &generator](auto& val) { val = distribution(generator); }); // use a double type counter (instead of result_t) to accumulate as std::accumulate is // inaccurate in adding a large number of comparably sized numbers. In C++17 or later, // std::reduce may be a better option. auto sum = static_cast(std::accumulate( - h_personalization_values.begin(), h_personalization_values.end(), double{0.0})); - std::for_each(h_personalization_values.begin(), - h_personalization_values.end(), + (*h_personalization_values).begin(), (*h_personalization_values).end(), double{0.0})); + std::for_each((*h_personalization_values).begin(), + (*h_personalization_values).end(), [sum](auto& val) { val /= sum; }); } - rmm::device_uvector d_personalization_vertices(h_personalization_vertices.size(), - handle.get_stream()); - rmm::device_uvector d_personalization_values(d_personalization_vertices.size(), - handle.get_stream()); - if (d_personalization_vertices.size() > 0) { - raft::update_device(d_personalization_vertices.data(), - h_personalization_vertices.data(), - h_personalization_vertices.size(), + auto d_personalization_vertices = + h_personalization_vertices ? std::make_optional>( + (*h_personalization_vertices).size(), handle.get_stream()) + : std::nullopt; + auto d_personalization_values = h_personalization_values + ? std::make_optional>( + (*d_personalization_vertices).size(), handle.get_stream()) + : std::nullopt; + if (d_personalization_vertices) { + raft::update_device((*d_personalization_vertices).data(), + (*h_personalization_vertices).data(), + (*h_personalization_vertices).size(), handle.get_stream()); - raft::update_device(d_personalization_values.data(), - h_personalization_values.data(), - h_personalization_values.size(), + raft::update_device((*d_personalization_values).data(), + (*h_personalization_values).data(), + (*h_personalization_values).size(), handle.get_stream()); } @@ -231,18 +239,23 @@ class Tests_PageRank hr_clock.start(); } - cugraph::experimental::pagerank(handle, - graph_view, - static_cast(nullptr), - d_personalization_vertices.data(), - d_personalization_values.data(), - static_cast(d_personalization_vertices.size()), - d_pageranks.data(), - alpha, - epsilon, - std::numeric_limits::max(), - false, - false); + cugraph::experimental::pagerank( + handle, + graph_view, + std::nullopt, + d_personalization_vertices + ? std::optional{(*d_personalization_vertices).data()} + : std::nullopt, + d_personalization_values ? std::optional{(*d_personalization_values).data()} + : std::nullopt, + d_personalization_vertices ? std::optional{(*d_personalization_vertices).size()} + : std::nullopt, + d_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), + false, + false); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -257,100 +270,119 @@ class Tests_PageRank if (renumber) { std::tie(unrenumbered_graph, std::ignore) = input_usecase.template construct_graph( - handle, true, false); + handle, pagerank_usecase.test_weighted, false); } auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); - std::vector h_weights{}; + auto h_weights = unrenumbered_graph_view.is_weighted() + ? std::make_optional>( + unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}) + : std::nullopt; raft::update_host(h_offsets.data(), - unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), unrenumbered_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); - if (unrenumbered_graph_view.is_weighted()) { - h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - unrenumbered_graph_view.weights(), + if (h_weights) { + raft::update_host((*h_weights).data(), + *(unrenumbered_graph_view.get_matrix_partition_view().get_weights()), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); } - std::vector h_unrenumbered_personalization_vertices( - d_personalization_vertices.size()); - std::vector h_unrenumbered_personalization_values( - h_unrenumbered_personalization_vertices.size()); - if (renumber) { - rmm::device_uvector d_unrenumbered_personalization_vertices( - d_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_unrenumbered_personalization_values( - d_unrenumbered_personalization_vertices.size(), handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_vertices.data(), - d_personalization_vertices.data(), - d_personalization_vertices.size(), - handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_values.data(), - d_personalization_values.data(), - d_personalization_values.size(), - handle.get_stream()); - cugraph::experimental::unrenumber_local_int_vertices( - handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_vertices.size(), - d_renumber_map_labels.data(), - vertex_t{0}, - graph_view.get_number_of_vertices()); - std::tie(d_unrenumbered_personalization_vertices, d_unrenumbered_personalization_values) = - cugraph::test::sort_by_key(handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_vertices.size()); - - raft::update_host(h_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_vertices.size(), - handle.get_stream()); - raft::update_host(h_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_values.size(), - handle.get_stream()); - } else { - raft::update_host(h_unrenumbered_personalization_vertices.data(), - d_personalization_vertices.data(), - d_personalization_vertices.size(), - handle.get_stream()); - raft::update_host(h_unrenumbered_personalization_values.data(), - d_personalization_values.data(), - d_personalization_values.size(), - handle.get_stream()); + auto h_unrenumbered_personalization_vertices = + d_personalization_vertices + ? std::make_optional>((*d_personalization_vertices).size()) + : std::nullopt; + auto h_unrenumbered_personalization_values = + d_personalization_vertices + ? std::make_optional>((*d_personalization_vertices).size()) + : std::nullopt; + if (h_unrenumbered_personalization_vertices) { + if (renumber) { + rmm::device_uvector d_unrenumbered_personalization_vertices( + (*d_personalization_vertices).size(), handle.get_stream()); + rmm::device_uvector d_unrenumbered_personalization_values( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_vertices.data(), + (*d_personalization_vertices).data(), + (*d_personalization_vertices).size(), + handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_values.data(), + (*d_personalization_values).data(), + (*d_personalization_values).size(), + handle.get_stream()); + cugraph::experimental::unrenumber_local_int_vertices( + handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + (*d_renumber_map_labels).data(), + vertex_t{0}, + graph_view.get_number_of_vertices()); + std::tie(d_unrenumbered_personalization_vertices, d_unrenumbered_personalization_values) = + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); + + raft::update_host((*h_unrenumbered_personalization_vertices).data(), + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + handle.get_stream()); + raft::update_host((*h_unrenumbered_personalization_values).data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_values.size(), + handle.get_stream()); + } else { + raft::update_host((*h_unrenumbered_personalization_vertices).data(), + (*d_personalization_vertices).data(), + (*d_personalization_vertices).size(), + handle.get_stream()); + raft::update_host((*h_unrenumbered_personalization_values).data(), + (*d_personalization_values).data(), + (*d_personalization_values).size(), + handle.get_stream()); + } } handle.get_stream_view().synchronize(); std::vector h_reference_pageranks(unrenumbered_graph_view.get_number_of_vertices()); - pagerank_reference(h_offsets.data(), - h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - h_unrenumbered_personalization_vertices.data(), - h_unrenumbered_personalization_values.data(), - h_reference_pageranks.data(), - unrenumbered_graph_view.get_number_of_vertices(), - static_cast(h_personalization_vertices.size()), - alpha, - epsilon, - std::numeric_limits::max(), - false); + pagerank_reference( + h_offsets.data(), + h_indices.data(), + h_weights ? std::optional{(*h_weights).data()} : std::nullopt, + h_unrenumbered_personalization_vertices + ? std::optional{(*h_unrenumbered_personalization_vertices).data()} + : std::nullopt, + h_unrenumbered_personalization_values + ? std::optional{(*h_unrenumbered_personalization_values).data()} + : std::nullopt, + h_unrenumbered_personalization_vertices + ? std::optional{static_cast( + (*h_unrenumbered_personalization_vertices).size())} + : std::nullopt, + h_reference_pageranks.data(), + unrenumbered_graph_view.get_number_of_vertices(), + alpha, + epsilon, + std::numeric_limits::max(), + false); std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); if (renumber) { rmm::device_uvector d_unrenumbered_pageranks(size_t{0}, handle.get_stream()); - std::tie(std::ignore, d_unrenumbered_pageranks) = cugraph::test::sort_by_key( - handle, d_renumber_map_labels.data(), d_pageranks.data(), d_renumber_map_labels.size()); + std::tie(std::ignore, d_unrenumbered_pageranks) = + cugraph::test::sort_by_key(handle, + (*d_renumber_map_labels).data(), + d_pageranks.data(), + (*d_renumber_map_labels).size()); raft::update_host(h_cugraph_pageranks.data(), d_unrenumbered_pageranks.data(), d_unrenumbered_pageranks.size(), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 91f0edcbf47..07947a7a059 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -116,11 +116,11 @@ class Tests_SSSP : public ::testing::TestWithParam graph(handle); - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - std::tie(graph, d_renumber_map_labels) = + + auto [graph, d_renumber_map_labels] = input_usecase.template construct_graph( handle, true, renumber); + if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement double elapsed_time{0.0}; @@ -172,15 +172,15 @@ class Tests_SSSP : public ::testing::TestWithParam h_indices(unrenumbered_graph_view.get_number_of_edges()); std::vector h_weights(unrenumbered_graph_view.get_number_of_edges()); raft::update_host(h_offsets.data(), - unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), unrenumbered_graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); raft::update_host(h_weights.data(), - unrenumbered_graph_view.weights(), + *(unrenumbered_graph_view.get_matrix_partition_view().get_weights()), unrenumbered_graph_view.get_number_of_edges(), handle.get_stream()); @@ -188,10 +188,10 @@ class Tests_SSSP : public ::testing::TestWithParam(sssp_usecase.source); if (renumber) { - std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + std::vector h_renumber_map_labels((*d_renumber_map_labels).size()); raft::update_host(h_renumber_map_labels.data(), - d_renumber_map_labels.data(), - d_renumber_map_labels.size(), + (*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size(), handle.get_stream()); handle.get_stream_view().synchronize(); @@ -218,20 +218,23 @@ class Tests_SSSP : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); - std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( - handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + std::tie(std::ignore, d_unrenumbered_distances) = + cugraph::test::sort_by_key(handle, + (*d_renumber_map_labels).data(), + d_distances.data(), + (*d_renumber_map_labels).size()); rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); std::tie(std::ignore, d_unrenumbered_predecessors) = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), + (*d_renumber_map_labels).data(), d_predecessors.data(), - d_renumber_map_labels.size()); + (*d_renumber_map_labels).size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index 0320438c9a6..70c42da6136 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -96,15 +96,15 @@ class Tests_WeightSum : public ::testing::TestWithParam { std::vector h_indices(graph_view.get_number_of_edges()); std::vector h_weights(graph_view.get_number_of_edges()); raft::update_host(h_offsets.data(), - graph_view.offsets(), + graph_view.get_matrix_partition_view().get_offsets(), graph_view.get_number_of_vertices() + 1, handle.get_stream()); raft::update_host(h_indices.data(), - graph_view.indices(), + graph_view.get_matrix_partition_view().get_indices(), graph_view.get_number_of_edges(), handle.get_stream()); raft::update_host(h_weights.data(), - graph_view.weights(), + *(graph_view.get_matrix_partition_view().get_weights()), graph_view.get_number_of_edges(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 6370c7b7758..27d9bd1cbd1 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -85,10 +85,10 @@ class Tests_MGPageRank handle.get_comms().barrier(); hr_clock.start(); } - cugraph::experimental::graph_t mg_graph(handle); - rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); - std::tie(mg_graph, d_mg_renumber_map_labels) = - input_usecase.template construct_graph(handle, true); + + auto [mg_graph, d_mg_renumber_map_labels] = + input_usecase.template construct_graph( + handle, pagerank_usecase.test_weighted, true); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -102,41 +102,48 @@ class Tests_MGPageRank // 3. generate personalization vertex/value pairs - std::vector h_mg_personalization_vertices{}; - std::vector h_mg_personalization_values{}; + std::optional> h_mg_personalization_vertices{std::nullopt}; + std::optional> h_mg_personalization_values{std::nullopt}; if (pagerank_usecase.personalization_ratio > 0.0) { std::default_random_engine generator{ static_cast(comm.get_rank()) /* seed */}; std::uniform_real_distribution distribution{0.0, 1.0}; - h_mg_personalization_vertices.resize(mg_graph_view.get_number_of_local_vertices()); - std::iota(h_mg_personalization_vertices.begin(), - h_mg_personalization_vertices.end(), + h_mg_personalization_vertices = + std::vector(mg_graph_view.get_number_of_local_vertices()); + std::iota((*h_mg_personalization_vertices).begin(), + (*h_mg_personalization_vertices).end(), mg_graph_view.get_local_vertex_first()); - h_mg_personalization_vertices.erase( - std::remove_if(h_mg_personalization_vertices.begin(), - h_mg_personalization_vertices.end(), - [&generator, &distribution, pagerank_usecase](auto v) { - return distribution(generator) >= pagerank_usecase.personalization_ratio; - }), - h_mg_personalization_vertices.end()); - h_mg_personalization_values.resize(h_mg_personalization_vertices.size()); - std::for_each(h_mg_personalization_values.begin(), - h_mg_personalization_values.end(), + (*h_mg_personalization_vertices) + .erase(std::remove_if((*h_mg_personalization_vertices).begin(), + (*h_mg_personalization_vertices).end(), + [&generator, &distribution, pagerank_usecase](auto v) { + return distribution(generator) >= + pagerank_usecase.personalization_ratio; + }), + (*h_mg_personalization_vertices).end()); + h_mg_personalization_values = std::vector((*h_mg_personalization_vertices).size()); + std::for_each((*h_mg_personalization_values).begin(), + (*h_mg_personalization_values).end(), [&distribution, &generator](auto& val) { val = distribution(generator); }); } - rmm::device_uvector d_mg_personalization_vertices( - h_mg_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_mg_personalization_values(d_mg_personalization_vertices.size(), - handle.get_stream()); - if (d_mg_personalization_vertices.size() > 0) { - raft::update_device(d_mg_personalization_vertices.data(), - h_mg_personalization_vertices.data(), - h_mg_personalization_vertices.size(), + auto d_mg_personalization_vertices = + h_mg_personalization_vertices + ? std::make_optional>((*h_mg_personalization_vertices).size(), + handle.get_stream()) + : std::nullopt; + auto d_mg_personalization_values = + h_mg_personalization_values ? std::make_optional>( + (*d_mg_personalization_vertices).size(), handle.get_stream()) + : std::nullopt; + if (d_mg_personalization_vertices) { + raft::update_device((*d_mg_personalization_vertices).data(), + (*h_mg_personalization_vertices).data(), + (*h_mg_personalization_vertices).size(), handle.get_stream()); - raft::update_device(d_mg_personalization_values.data(), - h_mg_personalization_values.data(), - h_mg_personalization_values.size(), + raft::update_device((*d_mg_personalization_values).data(), + (*h_mg_personalization_values).data(), + (*h_mg_personalization_values).size(), handle.get_stream()); } @@ -154,17 +161,24 @@ class Tests_MGPageRank hr_clock.start(); } - cugraph::experimental::pagerank(handle, - mg_graph_view, - static_cast(nullptr), - d_mg_personalization_vertices.data(), - d_mg_personalization_values.data(), - static_cast(d_mg_personalization_vertices.size()), - d_mg_pageranks.data(), - alpha, - epsilon, - std::numeric_limits::max(), - false); + cugraph::experimental::pagerank( + handle, + mg_graph_view, + std::nullopt, + d_mg_personalization_vertices + ? std::optional{(*d_mg_personalization_vertices).data()} + : std::nullopt, + d_mg_personalization_values + ? std::optional{(*d_mg_personalization_values).data()} + : std::nullopt, + d_mg_personalization_vertices + ? std::optional{static_cast((*d_mg_personalization_vertices).size())} + : std::nullopt, + d_mg_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), + false); if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -180,30 +194,40 @@ class Tests_MGPageRank // 5-1. aggregate MG results auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); - auto d_mg_aggregate_personalization_vertices = cugraph::test::device_gatherv( - handle, d_mg_personalization_vertices.data(), d_mg_personalization_vertices.size()); - auto d_mg_aggregate_personalization_values = cugraph::test::device_gatherv( - handle, d_mg_personalization_values.data(), d_mg_personalization_values.size()); + handle, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); + auto d_mg_aggregate_personalization_vertices = + d_mg_personalization_vertices + ? std::optional>{cugraph::test::device_gatherv( + handle, + (*d_mg_personalization_vertices).data(), + (*d_mg_personalization_vertices).size())} + : std::nullopt; + auto d_mg_aggregate_personalization_values = + d_mg_personalization_values + ? std::optional>{cugraph::test::device_gatherv( + handle, (*d_mg_personalization_values).data(), (*d_mg_personalization_values).size())} + : std::nullopt; auto d_mg_aggregate_pageranks = cugraph::test::device_gatherv(handle, d_mg_pageranks.data(), d_mg_pageranks.size()); if (handle.get_comms().get_rank() == int{0}) { // 5-2. unrenumbr MG results - cugraph::experimental::unrenumber_int_vertices( - handle, - d_mg_aggregate_personalization_vertices.data(), - d_mg_aggregate_personalization_vertices.size(), - d_mg_aggregate_renumber_map_labels.data(), - vertex_t{0}, - mg_graph_view.get_number_of_vertices(), - std::vector{mg_graph_view.get_number_of_vertices()}); - std::tie(d_mg_aggregate_personalization_vertices, d_mg_aggregate_personalization_values) = - cugraph::test::sort_by_key(handle, - d_mg_aggregate_personalization_vertices.data(), - d_mg_aggregate_personalization_values.data(), - d_mg_aggregate_personalization_vertices.size()); + if (d_mg_aggregate_personalization_vertices) { + cugraph::experimental::unrenumber_int_vertices( + handle, + (*d_mg_aggregate_personalization_vertices).data(), + (*d_mg_aggregate_personalization_vertices).size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + std::tie(d_mg_aggregate_personalization_vertices, d_mg_aggregate_personalization_values) = + cugraph::test::sort_by_key(handle, + (*d_mg_aggregate_personalization_vertices).data(), + (*d_mg_aggregate_personalization_values).data(), + (*d_mg_aggregate_personalization_vertices).size()); + } std::tie(std::ignore, d_mg_aggregate_pageranks) = cugraph::test::sort_by_key(handle, d_mg_aggregate_renumber_map_labels.data(), @@ -215,7 +239,7 @@ class Tests_MGPageRank cugraph::experimental::graph_t sg_graph(handle); std::tie(sg_graph, std::ignore) = input_usecase.template construct_graph( - handle, true, false); + handle, pagerank_usecase.test_weighted, false); auto sg_graph_view = sg_graph.view(); @@ -226,13 +250,20 @@ class Tests_MGPageRank rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), handle.get_stream()); - cugraph::experimental::pagerank( + cugraph::experimental::pagerank( handle, sg_graph_view, - static_cast(nullptr), - d_mg_aggregate_personalization_vertices.data(), - d_mg_aggregate_personalization_values.data(), - static_cast(d_mg_aggregate_personalization_vertices.size()), + std::nullopt, + d_mg_aggregate_personalization_vertices + ? std::optional{(*d_mg_aggregate_personalization_vertices).data()} + : std::nullopt, + d_mg_aggregate_personalization_values + ? std::optional{(*d_mg_aggregate_personalization_values).data()} + : std::nullopt, + d_mg_aggregate_personalization_vertices + ? std::optional{static_cast( + (*d_mg_aggregate_personalization_vertices).size())} + : std::nullopt, d_sg_pageranks.data(), alpha, epsilon, diff --git a/cpp/tests/sampling/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh index 5e7d95e7ceb..df42242e6fe 100644 --- a/cpp/tests/sampling/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -96,9 +96,9 @@ bool host_check_rw_paths( edge_t num_edges = graph_view.get_number_of_edges(); vertex_t num_vertices = graph_view.get_number_of_vertices(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + auto offsets = graph_view.get_matrix_partition_view().get_offsets(); + auto indices = graph_view.get_matrix_partition_view().get_indices(); + auto values = graph_view.get_matrix_partition_view().get_weights(); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -108,9 +108,7 @@ bool host_check_rw_paths( raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); - if (graph_view.is_weighted()) { - raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); - } + if (values) { raft::update_host(v_vals.data(), *values, v_vals.size(), handle.get_stream()); } std::vector v_coalesced(d_coalesced_v.size()); std::vector w_coalesced(d_coalesced_w.size()); diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index 2d95a3672e2..3b2779a5814 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -93,13 +93,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -173,13 +174,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -249,13 +251,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -345,13 +348,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -423,13 +427,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -495,13 +500,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -640,13 +646,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); index_t num_paths = 4; index_t max_depth = 3; @@ -715,13 +722,14 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -772,13 +780,14 @@ TEST(RandomWalksQuery, GraphRWQueryOffsets) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -833,13 +842,14 @@ TEST(RandomWalksSpecialCase, SingleRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -888,18 +898,15 @@ TEST(RandomWalksSpecialCase, UnweightedGraph) std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; - std::vector v_w; - auto graph = cugraph::test::make_graph( - handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::nullopt, num_vertices, num_edges); // un-weighted auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); - - ASSERT_TRUE(values == nullptr); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + ASSERT_TRUE(graph_view.get_matrix_partition_view().get_weights().has_value() == false); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); @@ -948,13 +955,14 @@ TEST(RandomWalksPadded, SimpleGraph) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto graph_view = graph.view(); - edge_t const* offsets = graph_view.offsets(); - vertex_t const* indices = graph_view.indices(); - weight_t const* values = graph_view.weights(); + edge_t const* offsets = graph_view.get_matrix_partition_view().get_offsets(); + vertex_t const* indices = graph_view.get_matrix_partition_view().get_indices(); + weight_t const* values = *(graph_view.get_matrix_partition_view().get_weights()); std::vector v_ro(num_vertices + 1); std::vector v_ci(num_edges); diff --git a/cpp/tests/serialization/un_serialize_test.cpp b/cpp/tests/serialization/un_serialize_test.cpp index 9f11a9aaa27..e65d37fd77a 100644 --- a/cpp/tests/serialization/un_serialize_test.cpp +++ b/cpp/tests/serialization/un_serialize_test.cpp @@ -42,7 +42,8 @@ TEST(SerializationTest, GraphSerUnser) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); auto total_ser_sz = pair_sz.first + pair_sz.second; @@ -82,7 +83,8 @@ TEST(SerializationTest, GraphDecoupledSerUnser) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::optional>{v_w}, num_vertices, num_edges); auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); auto total_ser_sz = pair_sz.first + pair_sz.second; @@ -134,12 +136,11 @@ TEST(SerializationTest, UnweightedGraphDecoupledSerUnser) std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; - std::vector v_w{}; - auto graph = cugraph::test::make_graph( - handle, v_src, v_dst, v_w, num_vertices, num_edges, /*weighted=*/false); + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, std::nullopt, num_vertices, num_edges); - ASSERT_TRUE(graph.view().weights() == nullptr); + ASSERT_TRUE(graph.view().get_matrix_partition_view().get_weights().has_value() == false); auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); auto total_ser_sz = pair_sz.first + pair_sz.second; @@ -168,7 +169,7 @@ TEST(SerializationTest, UnweightedGraphDecoupledSerUnser) auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); - ASSERT_TRUE(graph_copy.view().weights() == nullptr); + ASSERT_TRUE(graph_copy.view().get_matrix_partition_view().get_weights().has_value() == false); auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index 347712d1ac3..711f332ae2f 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -262,7 +262,7 @@ std::unique_ptr> generate_ template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> read_edgelist_from_matrix_market_file(raft::handle_t const& handle, @@ -300,14 +300,15 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, rmm::device_uvector d_edgelist_rows(h_rows.size(), handle.get_stream()); rmm::device_uvector d_edgelist_cols(h_cols.size(), handle.get_stream()); - rmm::device_uvector d_edgelist_weights(test_weighted ? h_weights.size() : size_t{0}, - handle.get_stream()); + auto d_edgelist_weights = test_weighted ? std::make_optional>( + h_weights.size(), handle.get_stream()) + : std::nullopt; raft::update_device(d_edgelist_rows.data(), h_rows.data(), h_rows.size(), handle.get_stream()); raft::update_device(d_edgelist_cols.data(), h_cols.data(), h_cols.size(), handle.get_stream()); - if (test_weighted) { + if (d_edgelist_weights) { raft::update_device( - d_edgelist_weights.data(), h_weights.data(), h_weights.size(), handle.get_stream()); + (*d_edgelist_weights).data(), h_weights.data(), h_weights.size(), handle.get_stream()); } return std::make_tuple(std::move(d_edgelist_rows), @@ -323,18 +324,13 @@ template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted, bool renumber) { - rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); - vertex_t number_of_vertices{}; - bool is_symmetric{}; - std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + auto [d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric] = read_edgelist_from_matrix_market_file( handle, graph_file_full_path, test_weighted); @@ -371,9 +367,9 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ comm_size, row_comm_size, col_comm_size}; size_t number_of_local_edges{}; - if (test_weighted) { + if (d_edgelist_weights) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + d_edgelist_rows.begin(), d_edgelist_cols.begin(), (*d_edgelist_weights).begin())); number_of_local_edges = thrust::distance( edge_first, thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -403,9 +399,9 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, d_edgelist_rows.shrink_to_fit(handle.get_stream()); d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); d_edgelist_cols.shrink_to_fit(handle.get_stream()); - if (test_weighted) { - d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_weights.shrink_to_fit(handle.get_stream()); + if (d_edgelist_weights) { + (*d_edgelist_weights).resize(number_of_local_edges, handle.get_stream()); + (*d_edgelist_weights).shrink_to_fit(handle.get_stream()); } } @@ -418,7 +414,7 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + cugraph::experimental::graph_properties_t{is_symmetric, false}, renumber); } @@ -461,7 +457,7 @@ template std::unique_ptr> generate_graph_csr_from_mm(bool& directed, std::string mm_file); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -469,7 +465,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -477,7 +473,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -485,7 +481,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -493,7 +489,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -501,7 +497,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -509,7 +505,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -517,7 +513,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -525,7 +521,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -533,7 +529,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -541,7 +537,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -549,7 +545,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -557,7 +553,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -565,7 +561,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -573,7 +569,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -581,7 +577,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -589,7 +585,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -597,7 +593,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -605,7 +601,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -613,7 +609,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -621,7 +617,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -629,7 +625,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -637,7 +633,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, @@ -645,7 +641,7 @@ read_graph_from_matrix_market_file( bool renumber); template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file( raft::handle_t const& handle, std::string const& graph_file_full_path, diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index fda72fc9054..51867bc733d 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -39,7 +39,7 @@ template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params(raft::handle_t const& handle, size_t scale, size_t edge_factor, @@ -87,7 +87,9 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + auto d_edgelist_weights = + test_weighted ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; for (size_t i = 0; i < partition_ids.size(); ++i) { auto id = partition_ids[i]; @@ -103,17 +105,18 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, base_seed + id, undirected ? true : false); - rmm::device_uvector d_tmp_weights(0, handle.get_stream()); - if (test_weighted) { + std::optional> d_tmp_weights{std::nullopt}; + if (d_edgelist_weights) { if (i == 0) { - d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + (*d_edgelist_weights).resize(d_edgelist_rows.size(), handle.get_stream()); } else { - d_tmp_weights.resize(d_tmp_rows.size(), handle.get_stream()); + d_tmp_weights = + std::make_optional>(d_tmp_rows.size(), handle.get_stream()); } raft::random::Rng rng(base_seed + num_partitions + id); - rng.uniform(i == 0 ? d_edgelist_weights.data() : d_tmp_weights.data(), - i == 0 ? d_edgelist_weights.size() : d_tmp_weights.size(), + rng.uniform(i == 0 ? (*d_edgelist_weights).data() : (*d_tmp_weights).data(), + i == 0 ? (*d_edgelist_weights).size() : (*d_tmp_weights).size(), weight_t{0.0}, weight_t{1.0}, handle.get_stream()); @@ -131,12 +134,12 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, d_tmp_cols.begin(), d_tmp_cols.end(), d_edgelist_cols.begin() + start_offset); - if (test_weighted) { - d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + if (d_edgelist_weights) { + (*d_edgelist_weights).resize(d_edgelist_rows.size(), handle.get_stream()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_tmp_weights.begin(), - d_tmp_weights.end(), - d_edgelist_weights.begin() + start_offset); + (*d_tmp_weights).begin(), + (*d_tmp_weights).end(), + (*d_edgelist_weights).begin() + start_offset); } } } @@ -148,8 +151,9 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, auto offset = d_edgelist_rows.size(); d_edgelist_rows.resize(offset * 2, handle.get_stream()); d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); - d_edgelist_weights.resize(test_weighted ? d_edgelist_rows.size() : size_t{0}, - handle.get_stream()); + if (d_edgelist_weights) { + (*d_edgelist_weights).resize(d_edgelist_rows.size(), handle.get_stream()); + } thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), d_edgelist_cols.begin(), d_edgelist_cols.begin() + offset, @@ -158,11 +162,11 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, d_edgelist_rows.begin(), d_edgelist_rows.begin() + offset, d_edgelist_cols.begin() + offset); - if (test_weighted) { + if (d_edgelist_weights) { thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_edgelist_weights.begin(), - d_edgelist_weights.begin() + offset, - d_edgelist_weights.begin() + offset); + (*d_edgelist_weights).begin(), + (*d_edgelist_weights).begin() + offset, + (*d_edgelist_weights).begin() + offset); } #endif } @@ -177,12 +181,12 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, rmm::device_uvector d_rx_edgelist_rows(0, handle.get_stream()); rmm::device_uvector d_rx_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_rx_edgelist_weights(0, handle.get_stream()); - if (test_weighted) { + std::optional> d_rx_edgelist_weights{std::nullopt}; + if (d_edgelist_weights) { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin(), - d_edgelist_weights.begin())); + (*d_edgelist_weights).begin())); std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols, @@ -260,14 +264,14 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - cugraph::experimental::graph_properties_t{undirected, true, test_weighted}, + cugraph::experimental::graph_properties_t{undirected, true}, renumber); } // explicit instantiations template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -284,7 +288,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -301,7 +305,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -318,7 +322,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -335,7 +339,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -352,7 +356,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -369,7 +373,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -386,7 +390,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -403,7 +407,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -420,7 +424,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -437,7 +441,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -454,7 +458,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -471,7 +475,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -488,7 +492,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -505,7 +509,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -522,7 +526,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -539,7 +543,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -556,7 +560,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -573,7 +577,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -590,7 +594,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -607,7 +611,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -624,7 +628,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -641,7 +645,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, @@ -658,7 +662,7 @@ generate_graph_from_rmat_params( size_t num_partitions); template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params( raft::handle_t const& handle, size_t scale, diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index b8ee8f024b0..150f2790277 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -67,18 +67,12 @@ class File_Usecase : public detail::TranslateGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const { - rmm::device_uvector d_src_v(0, handle.get_stream()); - rmm::device_uvector d_dst_v(0, handle.get_stream()); - rmm::device_uvector d_weights_v(0, handle.get_stream()); - vertex_t num_vertices; - bool is_symmetric; - - std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + auto [d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric] = read_edgelist_from_matrix_market_file( handle, graph_file_full_path_, test_weighted); @@ -105,16 +99,10 @@ class File_Usecase : public detail::TranslateGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const { - rmm::device_uvector d_src_v(0, handle.get_stream()); - rmm::device_uvector d_dst_v(0, handle.get_stream()); - rmm::device_uvector d_weights_v(0, handle.get_stream()); - vertex_t num_vertices; - bool is_symmetric; - - std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + auto [d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric] = this->template construct_edgelist( handle, test_weighted); @@ -170,7 +158,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const @@ -189,7 +177,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const { std::vector partition_ids(1); @@ -259,13 +247,11 @@ class PathGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const { - rmm::device_uvector weights_v(0, handle.get_stream()); - constexpr bool symmetric{true}; std::vector> converted_parms(parms_.size()); @@ -275,21 +261,17 @@ class PathGraph_Usecase { static_cast(std::get<1>(p))); }); - rmm::device_uvector src_v(0, handle.get_stream()); - rmm::device_uvector dst_v(0, handle.get_stream()); - - std::tie(src_v, dst_v) = - cugraph::generate_path_graph_edgelist(handle, converted_parms); + auto [src_v, dst_v] = cugraph::generate_path_graph_edgelist(handle, converted_parms); std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( handle, std::move(src_v), std::move(dst_v), std::nullopt); - if (test_weighted) { - auto length = src_v.size(); - weights_v.resize(length, handle.get_stream()); - } - - return std::make_tuple( - std::move(src_v), std::move(dst_v), std::move(weights_v), num_vertices_, symmetric); + return std::make_tuple(std::move(src_v), + std::move(dst_v), + test_weighted ? std::make_optional>( + src_v.size(), handle.get_stream()) + : std::nullopt, + num_vertices_, + symmetric); } template std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const { CUGRAPH_FAIL("not implemented"); @@ -327,7 +309,7 @@ class Mesh2DGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const @@ -341,7 +323,7 @@ class Mesh2DGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; private: @@ -366,7 +348,7 @@ class Mesh3DGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; @@ -378,7 +360,7 @@ class Mesh3DGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; private: @@ -402,7 +384,7 @@ class CompleteGraph_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; @@ -414,7 +396,7 @@ class CompleteGraph_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; private: @@ -485,7 +467,7 @@ class CombinedGenerator_Usecase { bool multi_gpu> std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> construct_edgelist(raft::handle_t const& handle, bool test_weighted) const @@ -507,7 +489,7 @@ class CombinedGenerator_Usecase { bool multi_gpu> std::tuple< cugraph::experimental::graph_t, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const { // Call construct_edgelist to get tuple of edge lists @@ -526,19 +508,13 @@ template std::tuple, - rmm::device_uvector> + std::optional>> construct_graph(raft::handle_t const& handle, input_usecase_t const& input_usecase, bool test_weighted, bool renumber = true) { - rmm::device_uvector d_src_v(0, handle.get_stream()); - rmm::device_uvector d_dst_v(0, handle.get_stream()); - rmm::device_uvector d_weights_v(0, handle.get_stream()); - vertex_t num_vertices{0}; - bool is_symmetric{false}; - - std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + auto [d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric] = input_usecase .template construct_edgelist( handle, test_weighted); @@ -550,7 +526,7 @@ construct_graph(raft::handle_t const& handle, std::move(d_src_v), std::move(d_dst_v), std::move(d_weights_v), - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + cugraph::experimental::graph_properties_t{is_symmetric, false}, renumber); } diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 4ad450dde0a..b452ff9a95f 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -24,6 +25,7 @@ #include #include +#include #include #include #include @@ -114,7 +116,7 @@ static const std::string& get_rapids_dataset_root_dir() template std::tuple, rmm::device_uvector, - rmm::device_uvector, + std::optional>, vertex_t, bool> read_edgelist_from_matrix_market_file(raft::handle_t const& handle, @@ -128,7 +130,7 @@ template std::tuple, - rmm::device_uvector> + std::optional>> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted, @@ -140,7 +142,7 @@ template std::tuple, - rmm::device_uvector> + std::optional>> generate_graph_from_rmat_params(raft::handle_t const& handle, size_t scale, size_t edge_factor, @@ -164,32 +166,33 @@ template decltype(auto) make_graph(raft::handle_t const& handle, std::vector const& v_src, std::vector const& v_dst, - std::vector const& v_w, + std::optional> const& v_w, vertex_t num_vertices, - edge_t num_edges, - bool is_weighted) + edge_t num_edges) { using namespace cugraph::experimental; vector_test_t d_src(num_edges, handle.get_stream()); vector_test_t d_dst(num_edges, handle.get_stream()); - vector_test_t d_weights(num_edges, handle.get_stream()); + auto d_w = v_w ? std::make_optional>(num_edges, handle.get_stream()) + : std::nullopt; raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); - - weight_t* ptr_d_weights{nullptr}; - if (is_weighted) { - raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); - - ptr_d_weights = d_weights.data(); + if (d_w) { + raft::update_device((*d_w).data(), (*v_w).data(), (*d_w).size(), handle.get_stream()); } - edgelist_t edgelist{ - d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; - - graph_t graph( - handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::experimental::create_graph_from_edgelist( + handle, + std::nullopt, + std::move(d_src), + std::move(d_dst), + std::move(d_w), + cugraph::experimental::graph_properties_t{false, false}, + false); return graph; } @@ -246,27 +249,71 @@ std::pair compare_graphs(raft::handle_t const& handle, std::vector lv_ro(num_vertices + 1); std::vector lv_ci(num_edges); - raft::update_host(lv_ro.data(), lgraph_view.offsets(), num_vertices + 1, handle.get_stream()); - raft::update_host(lv_ci.data(), lgraph_view.indices(), num_edges, handle.get_stream()); + raft::update_host(lv_ro.data(), + lgraph_view.get_matrix_partition_view().get_offsets(), + num_vertices + 1, + handle.get_stream()); + raft::update_host(lv_ci.data(), + lgraph_view.get_matrix_partition_view().get_indices(), + num_edges, + handle.get_stream()); std::vector rv_ro(num_vertices + 1); std::vector rv_ci(num_edges); - raft::update_host(rv_ro.data(), rgraph_view.offsets(), num_vertices + 1, handle.get_stream()); - raft::update_host(rv_ci.data(), rgraph_view.indices(), num_edges, handle.get_stream()); - - if (lv_ro != rv_ro) return std::make_pair(false, std::string("offsets")); - - if (lv_ci != rv_ci) return std::make_pair(false, std::string("indices")); - + raft::update_host(rv_ro.data(), + rgraph_view.get_matrix_partition_view().get_offsets(), + num_vertices + 1, + handle.get_stream()); + raft::update_host(rv_ci.data(), + rgraph_view.get_matrix_partition_view().get_indices(), + num_edges, + handle.get_stream()); + + auto lv_vs = is_weighted ? std::make_optional>(num_edges) : std::nullopt; + auto rv_vs = is_weighted ? std::make_optional>(num_edges) : std::nullopt; if (is_weighted) { - std::vector lv_vs(num_edges); - raft::update_host(lv_vs.data(), lgraph_view.weights(), num_edges, handle.get_stream()); + raft::update_host((*lv_vs).data(), + *(lgraph_view.get_matrix_partition_view().get_weights()), + num_edges, + handle.get_stream()); + + raft::update_host((*rv_vs).data(), + *(rgraph_view.get_matrix_partition_view().get_weights()), + num_edges, + handle.get_stream()); + } - std::vector rv_vs(num_edges); - raft::update_host(rv_vs.data(), rgraph_view.weights(), num_edges, handle.get_stream()); + handle.get_stream_view().synchronize(); + + if (lv_ro != rv_ro) return std::make_pair(false, std::string("offsets")); - if (lv_vs != rv_vs) return std::make_pair(false, std::string("values")); + for (size_t i = 0; i < num_vertices; ++i) { + auto first = lv_ro[i]; + auto last = lv_ro[i + 1]; + if (is_weighted) { + std::vector> lv_pairs(last - first); + std::vector> rv_pairs(last - first); + for (edge_t j = first; j < last; ++j) { + lv_pairs[j - first] = std::make_tuple(lv_ci[j], (*lv_vs)[j]); + rv_pairs[j - first] = std::make_tuple(rv_ci[j], (*rv_vs)[j]); + } + std::sort(lv_pairs.begin(), lv_pairs.end()); + std::sort(rv_pairs.begin(), rv_pairs.end()); + if (!std::equal(lv_pairs.begin(), lv_pairs.end(), rv_pairs.begin(), [](auto lhs, auto rhs) { + return std::get<0>(lhs) == std::get<0>(rhs); + })) + return std::make_pair(false, std::string("indices")); + if (!std::equal(lv_pairs.begin(), lv_pairs.end(), rv_pairs.begin(), [](auto lhs, auto rhs) { + return std::get<1>(lhs) == std::get<1>(rhs); + })) + return std::make_pair(false, std::string("values")); + } else { + std::sort(lv_ci.begin() + first, lv_ci.begin() + last); + std::sort(rv_ci.begin() + first, rv_ci.begin() + last); + if (!std::equal(lv_ci.begin() + first, lv_ci.begin() + last, rv_ci.begin() + first)) + return std::make_pair(false, std::string("indices")); + } } if (lgraph_view.get_local_adj_matrix_partition_segment_offsets(0) != diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index eb62c2aa56c..418fc50b712 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -73,13 +73,14 @@ def egonet(input_graph, vertices, radius=1): handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, NULL, + NULL, + 0, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_verts, num_edges, - False, is_weighted, is_symmetric, False, False) diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index 588595644ed..28227bd1c07 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -70,13 +70,14 @@ def weakly_connected_components(input_graph): handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, NULL, + NULL, + 0, ((numberTypeEnum.int32Type)), ((numberTypeEnum.int32Type)), ((numberTypeMap[weight_t])), num_edges, num_verts, num_edges, False, - False, True, False, False) diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index cd6af8e7906..61c1869f974 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -27,6 +27,7 @@ def call_katz_centrality(sID, num_verts, num_edges, vertex_partition_offsets, + aggregate_segment_offsets, alpha, beta, max_iter, @@ -35,12 +36,16 @@ def call_katz_centrality(sID, normalized): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return mg_katz_centrality.mg_katz_centrality(data[0], num_verts, num_edges, vertex_partition_offsets, wid, handle, + segment_offsets, alpha, beta, max_iter, @@ -148,6 +153,7 @@ def katz_centrality(input_graph, num_verts, num_edges, vertex_partition_offsets, + input_graph.aggregate_segment_offsets, alpha, beta, max_iter, diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index acad3a29440..6160d13507f 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -30,6 +30,7 @@ def mg_katz_centrality(input_df, vertex_partition_offsets, rank, handle, + segment_offsets, alpha=None, beta=None, max_iter=100, @@ -83,17 +84,29 @@ def mg_katz_centrality(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container + populate_graph_container(graph_container, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - True, is_weighted, False, True, True) diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index c9af0f526c9..c4db00ab27a 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -29,20 +29,21 @@ def call_louvain(sID, num_verts, num_edges, vertex_partition_offsets, - sorted_by_degree, + aggregate_segment_offsets, max_level, resolution): - wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) - + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return c_mg_louvain.louvain(data[0], num_verts, num_edges, vertex_partition_offsets, wid, handle, - sorted_by_degree, + segment_offsets, max_level, resolution) @@ -84,7 +85,6 @@ def louvain(input_graph, max_iter=100, resolution=1.0): client = default_client() # Calling renumbering results in data that is sorted by degree input_graph.compute_renumber_edge_list(transposed=False) - sorted_by_degree = True ddf = input_graph.edgelist.edgelist_df vertex_partition_offsets = get_vertex_partition_offsets(input_graph) @@ -98,7 +98,7 @@ def louvain(input_graph, max_iter=100, resolution=1.0): num_verts, num_edges, vertex_partition_offsets, - sorted_by_degree, + input_graph.aggregate_segment_offsets, max_iter, resolution, workers=[wf[0]]) diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 57e643b6f1d..c3c3f1ad373 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -17,6 +17,7 @@ # cython: language_level = 3 from libc.stdint cimport uintptr_t +from libcpp.vector cimport vector from cugraph.dask.community cimport louvain as c_louvain from cugraph.structure.graph_utilities cimport * @@ -37,7 +38,7 @@ def louvain(input_df, vertex_partition_offsets, rank, handle, - sorted_by_degree, + segment_offsets, max_level, resolution): """ @@ -82,6 +83,16 @@ def louvain(input_df, num_local_verts = vertex_partition_offsets_host[rank+1] - vertex_partition_offsets_host[rank] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container # FIXME: The excessive casting for the enum arg is needed to make cython @@ -91,12 +102,13 @@ def louvain(input_df, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - sorted_by_degree, True, False, False, True) # store_transposed, multi_gpu diff --git a/python/cugraph/dask/components/connectivity.py b/python/cugraph/dask/components/connectivity.py index 7f3a647a0d9..81200e7383e 100644 --- a/python/cugraph/dask/components/connectivity.py +++ b/python/cugraph/dask/components/connectivity.py @@ -23,15 +23,20 @@ def call_wcc(sID, data, num_verts, num_edges, - vertex_partition_offsets): + vertex_partition_offsets, + aggregate_segment_offsets): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return mg_connectivity.mg_wcc(data[0], num_verts, num_edges, vertex_partition_offsets, wid, - handle) + handle, + segment_offsets) def weakly_connected_components(input_graph): @@ -52,6 +57,7 @@ def weakly_connected_components(input_graph): num_verts, num_edges, vertex_partition_offsets, + input_graph.aggregate_segment_offsets, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] wait(result) diff --git a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx index 32b499cd44a..b7bad1b6277 100644 --- a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx +++ b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx @@ -29,7 +29,8 @@ def mg_wcc(input_df, num_global_edges, vertex_partition_offsets, rank, - handle): + handle, + segment_offsets): cdef size_t handle_size_t = handle.getHandle() handle_ = handle_size_t @@ -64,18 +65,29 @@ def mg_wcc(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container populate_graph_container(graph_container, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - True, is_weighted, True, False, diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 8af7ccf4266..43ef3d7e31f 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -30,6 +30,7 @@ def mg_pagerank(input_df, vertex_partition_offsets, rank, handle, + segment_offsets, alpha=0.85, max_iter=100, tol=1.0e-5, @@ -78,18 +79,29 @@ def mg_pagerank(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container populate_graph_container(graph_container, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - True, is_weighted, False, True, True) diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index f90e5c72231..87ef94d1600 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -26,6 +26,7 @@ def call_pagerank(sID, num_verts, num_edges, vertex_partition_offsets, + aggregate_segment_offsets, alpha, max_iter, tol, @@ -33,12 +34,16 @@ def call_pagerank(sID, nstart): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return mg_pagerank.mg_pagerank(data[0], num_verts, num_edges, vertex_partition_offsets, wid, handle, + segment_offsets, alpha, max_iter, tol, @@ -148,6 +153,7 @@ def pagerank(input_graph, num_verts, num_edges, vertex_partition_offsets, + input_graph.aggregate_segment_offsets, alpha, max_iter, tol, @@ -162,6 +168,7 @@ def pagerank(input_graph, num_verts, num_edges, vertex_partition_offsets, + input_graph.aggregate_segment_offsets, alpha, max_iter, tol, diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 11283a1cb78..06cbf64782a 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -27,17 +27,22 @@ def call_bfs(sID, num_verts, num_edges, vertex_partition_offsets, + aggregate_segment_offsets, start, depth_limit, return_distances): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return mg_bfs.mg_bfs(data[0], num_verts, num_edges, vertex_partition_offsets, wid, handle, + segment_offsets, start, depth_limit, return_distances) @@ -121,6 +126,7 @@ def bfs(graph, num_verts, num_edges, vertex_partition_offsets, + graph.aggregate_segment_offsets, start, depth_limit, return_distances, diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index f0a9f2a81db..a15d6704ac8 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -27,6 +27,7 @@ def mg_bfs(input_df, vertex_partition_offsets, rank, handle, + segment_offsets, start, depth_limit, return_distances=False): @@ -69,18 +70,29 @@ def mg_bfs(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container populate_graph_container(graph_container, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - True, False, # BFS runs on unweighted graphs False, False, True) diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index 26928331273..63ac2942cfa 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -27,6 +27,7 @@ def mg_sssp(input_df, vertex_partition_offsets, rank, handle, + segment_offsets, start): """ Call sssp @@ -72,18 +73,29 @@ def mg_sssp(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + cdef vector[int] v_segment_offsets_32 + cdef vector[long] v_segment_offsets_64 + cdef uintptr_t c_segment_offsets + if (vertex_t == np.dtype("int32")): + v_segment_offsets_32 = segment_offsets + c_segment_offsets = v_segment_offsets_32.data() + else: + v_segment_offsets_64 = segment_offsets + c_segment_offsets = v_segment_offsets_64.data() + cdef graph_container_t graph_container populate_graph_container(graph_container, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, + c_segment_offsets, + len(segment_offsets) - 1, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_local_edges, num_global_verts, num_global_edges, - True, is_weighted, False, False, True) diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index f6c259caf77..fbaee901d65 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -27,15 +27,20 @@ def call_sssp(sID, num_verts, num_edges, vertex_partition_offsets, + aggregate_segment_offsets, start): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) + segment_offsets = \ + aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] return mg_sssp.mg_sssp(data[0], num_verts, num_edges, vertex_partition_offsets, wid, handle, + segment_offsets, start) @@ -109,6 +114,7 @@ def sssp(graph, num_verts, num_edges, vertex_partition_offsets, + graph.aggregate_segment_offsets, source, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 5a312f4f633..7198ccabc9e 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -97,12 +97,13 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, NULL, + NULL, + 0, ((numberTypeEnum.int32Type)), ((numberTypeEnum.int32Type)), ((numberTypeMap[weight_t])), num_local_edges, num_verts, num_edges, - False, is_weighted, is_symmetric, True, diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx index 64194976e87..688ece9595b 100644 --- a/python/cugraph/sampling/random_walks_wrapper.pyx +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -75,13 +75,14 @@ def random_walks(input_graph, start_vertices, max_depth, use_padding): handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, NULL, + NULL, + 0, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), num_partition_edges, num_verts, num_edges, - False, is_weighted, is_symmetric, False, False) diff --git a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 951ea8add5b..10d46cc3fed 100644 --- a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -47,6 +47,7 @@ def __init__(self, properties): # Structure self.edgelist = None self.renumber_map = None + self.aggregate_segment_offsets = None self.properties = simpleDistributedGraphImpl.Properties(properties) self.source_columns = None self.destination_columns = None @@ -463,14 +464,14 @@ def compute_renumber_edge_list(self, transposed=False): del self.edgelist - renumbered_ddf, number_map = NumberMap.renumber( - self.input_df, - self.source_columns, - self.destination_columns, - store_transposed=transposed, - ) + renumbered_ddf, number_map, aggregate_segment_offsets = \ + NumberMap.renumber_and_segment(self.input_df, + self.source_columns, + self.destination_columns, + store_transposed=transposed) self.edgelist = self.EdgeList(renumbered_ddf) self.renumber_map = number_map + self.aggregate_segment_offsets = aggregate_segment_offsets self.properties.store_transposed = transposed def vertex_column_size(self): diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index 2d5b081dd0c..b701ba8b400 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -43,13 +43,14 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": void *dst_vertices, void *weights, void *vertex_partition_offsets, + void *segment_offsets, + size_t num_segments, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, - bool sorted_by_degree, bool is_weighted, bool is_symmetric, bool transposed, @@ -144,15 +145,17 @@ ctypedef fused shuffled_vertices_t: # cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef cppclass renum_quad_t[vertex_t, edge_t]: - renum_quad_t(const handle_t &handle) + cdef cppclass renum_tuple_t[vertex_t, edge_t]: + renum_tuple_t(const handle_t &handle) pair[unique_ptr[device_buffer], size_t] get_dv_wrap() vertex_t& get_num_vertices() edge_t& get_num_edges() + vector[vertex_t]& get_segment_offsets() + unique_ptr[vector[vertex_t]] get_segment_offsets_wrap() int get_part_row_size() int get_part_col_size() int get_part_comm_rank() - unique_ptr[vector[vertex_t]] get_partition_offsets() + unique_ptr[vector[vertex_t]] get_partition_offsets_wrap() pair[vertex_t, vertex_t] get_part_local_vertex_range() vertex_t get_part_local_vertex_first() vertex_t get_part_local_vertex_last() @@ -183,7 +186,7 @@ cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": # cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( + cdef unique_ptr[renum_tuple_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( const handle_t &handle, vertex_t *edgelist_major_vertices, vertex_t *edgelist_minor_vertices, diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index a1ff2f47abf..1e3281ca33b 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -16,7 +16,6 @@ from dask.distributed import wait, default_client from cugraph.dask.common.input_utils import get_distributed_data from cugraph.structure import renumber_wrapper as c_renumber -from cugraph.utilities.utils import is_device_version_less_than import cugraph.comms.comms as Comms import dask_cudf import numpy as np @@ -469,15 +468,15 @@ def from_internal_vertex_id( return output_df - def renumber(df, src_col_names, dst_col_names, preserve_order=False, - store_transposed=False): + def renumber_and_segment( + df, src_col_names, dst_col_names, preserve_order=False, + store_transposed=False + ): if isinstance(src_col_names, list): renumber_type = 'legacy' elif not (df[src_col_names].dtype == np.int32 or df[src_col_names].dtype == np.int64): renumber_type = 'legacy' - elif is_device_version_less_than((7, 0)): - renumber_type = 'legacy' else: renumber_type = 'experimental' @@ -538,14 +537,27 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, def get_renumber_map(data): return data[0] - def get_renumbered_df(data): + def get_segment_offsets(data): return data[1] + def get_renumbered_df(data): + return data[2] + renumbering_map = dask_cudf.from_delayed( [client.submit(get_renumber_map, data, workers=[wf]) for (data, wf) in result]) + + list_of_segment_offsets = client.gather( + [client.submit(get_segment_offsets, + data, + workers=[wf]) + for (data, wf) in result]) + aggregate_segment_offsets = [] + for segment_offsets in list_of_segment_offsets: + aggregate_segment_offsets.extend(segment_offsets) + renumbered_df = dask_cudf.from_delayed( [client.submit(get_renumbered_df, data, @@ -562,22 +574,16 @@ def get_renumbered_df(data): renumber_map.implementation.ddf = renumbering_map.rename( columns={'original_ids': '0', 'new_ids': 'global_id'}) renumber_map.implementation.numbered = True - return renumbered_df, renumber_map + return renumbered_df, renumber_map, aggregate_segment_offsets else: - if is_device_version_less_than((7, 0)): - renumbered_df = df - renumber_map.implementation.df = indirection_map - renumber_map.implementation.numbered = True - return renumbered_df, renumber_map - - renumbering_map, renumbered_df = c_renumber.renumber( - df, - num_edges, - 0, - Comms.get_default_handle(), - is_mnmg, - store_transposed) + renumbering_map, segment_offsets, renumbered_df = \ + c_renumber.renumber(df, + num_edges, + 0, + Comms.get_default_handle(), + is_mnmg, + store_transposed) if renumber_type == 'legacy': renumber_map.implementation.df = indirection_map.\ merge(renumbering_map, @@ -589,7 +595,13 @@ def get_renumbered_df(data): columns={'original_ids': '0', 'new_ids': 'id'}, copy=False) renumber_map.implementation.numbered = True - return renumbered_df, renumber_map + return renumbered_df, renumber_map, segment_offsets + + def renumber(df, src_col_names, dst_col_names, preserve_order=False, + store_transposed=False): + return NumberMap.renumber_and_segment( + df, src_col_names, dst_col_names, + preserve_order, store_transposed)[0:2] def unrenumber(self, df, column_name, preserve_order=False, get_column_names=False): diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 1c302d21220..4433bbb09cb 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -117,7 +117,6 @@ def renumber(input_df, # maybe use cpdef ? cdef bool mg_flag = is_multi_gpu # run Single-GPU or MNMG cdef pair[unique_ptr[device_buffer], size_t] pair_original - cdef pair[unique_ptr[device_buffer], size_t] pair_partition # tparams: vertex_t, edge_t, weight_t: # @@ -130,9 +129,9 @@ def renumber(input_df, # maybe use cpdef ? # tparams: vertex_t, edge_t: # - cdef unique_ptr[renum_quad_t[int, int]] ptr_renum_quad_32_32 - cdef unique_ptr[renum_quad_t[int, long]] ptr_renum_quad_32_64 - cdef unique_ptr[renum_quad_t[long, long]] ptr_renum_quad_64_64 + cdef unique_ptr[renum_tuple_t[int, int]] ptr_renum_tuple_32_32 + cdef unique_ptr[renum_tuple_t[int, long]] ptr_renum_tuple_32_64 + cdef unique_ptr[renum_tuple_t[long, long]] ptr_renum_tuple_64_64 # tparam: vertex_t: # @@ -144,6 +143,11 @@ def renumber(input_df, # maybe use cpdef ? cdef unique_ptr[vector[int]] uniq_partition_vector_32 cdef unique_ptr[vector[long]] uniq_partition_vector_64 + # tparam: vertex_t: + # + cdef unique_ptr[vector[int]] uniq_segment_vector_32 + cdef unique_ptr[vector[long]] uniq_segment_vector_64 + cdef size_t rank_indx = rank if (vertex_t == np.dtype("int32")): @@ -172,14 +176,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_32.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_32.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_32_32.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -189,7 +193,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + uniq_partition_vector_32 = move(ptr_renum_tuple_32_32.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -198,7 +202,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -208,7 +212,12 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_32 = move(ptr_renum_tuple_32_32.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_32).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_32)[i] + + return renumbered_map, segment_offsets, shuffled_df elif( weight_t == np.dtype("float64")): if(is_multi_gpu): @@ -235,14 +244,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_32.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_32.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_32_32.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -252,7 +261,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + uniq_partition_vector_32 = move(ptr_renum_tuple_32_32.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -261,7 +270,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -272,7 +281,12 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_32 = move(ptr_renum_tuple_32_32.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_32).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_32)[i] + + return renumbered_map, segment_offsets, shuffled_df elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): @@ -300,14 +314,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_64.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_64.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_32_64.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -317,7 +331,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + uniq_partition_vector_32 = move(ptr_renum_tuple_32_64.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -326,7 +340,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -337,7 +351,12 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_32 = move(ptr_renum_tuple_32_64.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_32).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_32)[i] + + return renumbered_map, segment_offsets, shuffled_df elif( weight_t == np.dtype("float64")): if(is_multi_gpu): ptr_shuffled_32_64_64.reset(call_shuffle[int, long, double](deref(handle_ptr), @@ -363,14 +382,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_64.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_64.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_32_64.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -380,7 +399,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + uniq_partition_vector_32 = move(ptr_renum_tuple_32_64.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -389,7 +408,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -399,7 +418,12 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_32 = move(ptr_renum_tuple_32_64.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_32).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_32)[i] + + return renumbered_map, segment_offsets, shuffled_df elif (vertex_t == np.dtype("int64")): if ( edge_t == np.dtype("int64")): @@ -428,14 +452,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_64.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_64.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_64_64.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -445,7 +469,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + uniq_partition_vector_64 = move(ptr_renum_tuple_64_64.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -454,7 +478,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -465,7 +489,12 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_64 = move(ptr_renum_tuple_64_64.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_64).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_64)[i] + + return renumbered_map, segment_offsets, shuffled_df elif( weight_t == np.dtype("float64")): if(is_multi_gpu): @@ -492,14 +521,14 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] - ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), - shuffled_major, - shuffled_minor, - deref(edge_counts_64.get()), - do_check, - mg_flag).release()) + ptr_renum_tuple_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + deref(edge_counts_64.get()), + do_check, + mg_flag).release()) - pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + pair_original = ptr_renum_tuple_64_64.get().get_dv_wrap() # original vertices: see helper original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) @@ -509,7 +538,7 @@ def renumber(input_df, # maybe use cpdef ? # extract unique_ptr[partition_offsets]: # - uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + uniq_partition_vector_64 = move(ptr_renum_tuple_64_64.get().get_partition_offsets_wrap()) # create series out of a partition range from rank to rank+1: # @@ -518,7 +547,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), + new_series = cudf.Series(np.arange(0, ptr_renum_tuple_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -529,4 +558,9 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['original_ids'] = original_series renumbered_map['new_ids'] = new_series - return renumbered_map, shuffled_df + uniq_segment_vector_64 = move(ptr_renum_tuple_64_64.get().get_segment_offsets_wrap()) + segment_offsets = [None] * (deref(uniq_segment_vector_64).size()) + for i in range(len(segment_offsets)): + segment_offsets[i] = deref(uniq_segment_vector_64)[i] + + return renumbered_map, segment_offsets, shuffled_df diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 933a34aef3c..71046f65cd8 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -20,7 +20,7 @@ import scipy import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq import cugraph from cugraph.tests import utils diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index 43801be9fdc..79ae7139180 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -39,7 +39,7 @@ import pandas as pd import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq import cugraph diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index 05c175c563f..1896415b1e3 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -85,13 +85,14 @@ def bfs(input_graph, start, depth_limit, direction_optimizing=False): handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, NULL, + NULL, + 0, ((numberTypeEnum.int32Type)), ((numberTypeEnum.int32Type)), ((numberTypeMap[weight_t])), num_edges, num_verts, num_edges, False, - False, is_symmetric, False, False) From e2ac46790a4272fceaf7bdd68794b23b5f79307d Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Wed, 7 Jul 2021 10:13:50 -0400 Subject: [PATCH 314/343] Updating Clang Version to 11.0.0 (#1688) Follow up PR to: https://github.com/rapidsai/cudf/pull/6695. Performing the same changes but for `rapidsai/cugraph` Depends on: https://github.com/rapidsai/integration/pull/304 Authors: - Conor Hoekstra (https://github.com/codereport) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1688 --- conda/environments/cugraph_dev_cuda11.0.yml | 4 +- conda/environments/cugraph_dev_cuda11.1.yml | 4 +- conda/environments/cugraph_dev_cuda11.2.yml | 4 +- cpp/.clang-format | 20 +- cpp/include/cugraph/algorithms.hpp | 262 +++++++++--------- cpp/include/cugraph/compute_partition.cuh | 18 +- cpp/include/cugraph/dendrogram.hpp | 14 +- .../experimental/detail/graph_utils.cuh | 28 +- cpp/include/cugraph/experimental/graph.hpp | 48 ++-- cpp/include/cugraph/functions.hpp | 6 +- cpp/include/cugraph/graph_generators.hpp | 46 +-- cpp/include/cugraph/internals.hpp | 6 +- cpp/include/cugraph/legacy/graph.hpp | 88 +++--- .../cugraph/matrix_partition_device_view.cuh | 16 +- .../copy_v_transform_reduce_in_out_nbr.cuh | 72 ++--- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 22 +- .../cugraph/prims/transform_reduce_e.cuh | 72 ++--- .../update_frontier_v_push_if_out_nbr.cuh | 2 +- cpp/include/cugraph/prims/vertex_frontier.cuh | 4 +- .../cugraph/utilities/collect_comm.cuh | 4 +- .../cugraph/utilities/host_scalar_comm.cuh | 4 +- .../cugraph/utilities/path_retrieval.hpp | 20 +- .../cugraph/utilities/shuffle_comm.cuh | 12 +- .../cugraph/vertex_partition_device.cuh | 112 ++++++++ .../cugraph/vertex_partition_device_view.cuh | 4 +- cpp/scripts/run-clang-format.py | 5 +- cpp/src/centrality/betweenness_centrality.cu | 94 +++---- cpp/src/centrality/betweenness_centrality.cuh | 62 ++--- .../betweenness_centrality_kernels.cuh | 40 +-- cpp/src/centrality/katz_centrality.cu | 6 +- cpp/src/community/ecg.cu | 26 +- cpp/src/community/egonet.cu | 48 ++-- .../community/extract_subgraph_by_vertex.cu | 26 +- cpp/src/community/flatten_dendrogram.cuh | 12 +- cpp/src/community/ktruss.cu | 14 +- cpp/src/community/leiden.cu | 18 +- cpp/src/community/leiden.cuh | 20 +- cpp/src/community/louvain.cu | 174 ++++++------ cpp/src/community/louvain.cuh | 82 +++--- cpp/src/community/spectral_clustering.cu | 76 ++--- cpp/src/community/triangles_counting.cu | 104 +++---- cpp/src/components/connectivity.cu | 12 +- cpp/src/components/utils.h | 4 +- cpp/src/components/weak_cc.cuh | 54 ++-- .../components/weakly_connected_components.cu | 120 ++++---- cpp/src/converters/COOtoCSR.cu | 46 +-- cpp/src/converters/COOtoCSR.cuh | 64 ++--- cpp/src/converters/permute_graph.cuh | 14 +- cpp/src/cores/core_number.cu | 50 ++-- cpp/src/experimental/bfs.cu | 110 ++++---- cpp/src/experimental/coarsen_graph.cu | 215 +++++++------- cpp/src/experimental/graph.cu | 22 +- cpp/src/experimental/induced_subgraph.cu | 114 ++++---- cpp/src/experimental/katz_centrality.cu | 112 ++++---- cpp/src/experimental/louvain.cuh | 38 +-- cpp/src/experimental/pagerank.cu | 6 +- cpp/src/experimental/sssp.cu | 116 ++++---- cpp/src/generators/generator_tools.cu | 107 +++---- cpp/src/layout/barnes_hut.hpp | 61 ++-- cpp/src/layout/bh_kernels.hpp | 120 ++++---- cpp/src/layout/exact_fa2.hpp | 30 +- cpp/src/layout/exact_repulsion.hpp | 24 +- cpp/src/layout/fa2_kernels.hpp | 140 +++++----- cpp/src/layout/force_atlas2.cu | 36 +-- cpp/src/linear_assignment/hungarian.cu | 118 ++++---- cpp/src/link_analysis/gunrock_hits.cpp | 16 +- cpp/src/link_prediction/jaccard.cu | 134 ++++----- cpp/src/link_prediction/overlap.cu | 138 ++++----- cpp/src/sampling/random_walks.cuh | 2 +- cpp/src/sampling/rw_traversals.hpp | 10 +- cpp/src/structure/graph.cu | 20 +- cpp/src/traversal/bfs.cu | 84 +++--- cpp/src/traversal/bfs.cuh | 46 +-- cpp/src/traversal/bfs_kernels.cuh | 186 ++++++------- cpp/src/traversal/mg/bfs.cuh | 22 +- cpp/src/traversal/mg/common_utils.cuh | 128 ++++----- cpp/src/traversal/mg/frontier_expand.cuh | 12 +- .../traversal/mg/frontier_expand_kernels.cuh | 56 ++-- .../traversal/mg/vertex_binning_kernels.cuh | 38 +-- cpp/src/traversal/sssp.cu | 36 +-- cpp/src/traversal/traversal_common.cuh | 4 +- cpp/src/traversal/tsp.cu | 36 +-- cpp/src/traversal/tsp.hpp | 50 ++-- cpp/src/traversal/tsp_solver.hpp | 74 ++--- cpp/src/traversal/tsp_utils.hpp | 7 +- cpp/src/traversal/two_hop_neighbors.cu | 14 +- cpp/src/traversal/two_hop_neighbors.cuh | 26 +- cpp/src/tree/mst.cu | 24 +- cpp/src/utilities/cython.cu | 2 +- cpp/src/utilities/graph_utils.cuh | 76 ++--- cpp/src/utilities/high_res_timer.hpp | 8 +- cpp/src/utilities/path_retrieval.cu | 74 ++--- cpp/src/utilities/spmv_1D.cu | 20 +- .../centrality/betweenness_centrality_test.cu | 90 +++--- .../edge_betweenness_centrality_test.cu | 58 ++-- cpp/tests/centrality/katz_centrality_test.cu | 4 +- cpp/tests/community/ecg_test.cpp | 4 +- cpp/tests/community/mg_louvain_helper.cu | 67 +++-- cpp/tests/community/mg_louvain_helper.hpp | 28 +- cpp/tests/community/mg_louvain_test.cpp | 4 +- .../mg_weakly_connected_components_test.cpp | 4 +- cpp/tests/components/scc_test.cu | 3 +- cpp/tests/experimental/mg_bfs_test.cpp | 4 +- .../experimental/mg_katz_centrality_test.cpp | 14 +- cpp/tests/experimental/mg_sssp_test.cpp | 4 +- cpp/tests/generators/erdos_renyi_test.cpp | 2 +- cpp/tests/layout/force_atlas2_test.cu | 3 +- cpp/tests/layout/knn.h | 8 +- cpp/tests/layout/trust_worthiness.h | 15 +- cpp/tests/linear_assignment/hungarian_test.cu | 4 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 4 +- cpp/tests/sampling/random_walks_profiling.cu | 4 +- cpp/tests/traversal/bfs_ref.h | 18 +- cpp/tests/traversal/bfs_test.cu | 8 +- cpp/tests/utilities/base_fixture.hpp | 14 +- cpp/tests/utilities/cxxopts.hpp | 26 +- cpp/tests/utilities/device_comm_wrapper.cu | 18 +- cpp/tests/utilities/device_comm_wrapper.hpp | 2 +- 118 files changed, 2697 insertions(+), 2488 deletions(-) create mode 100644 cpp/include/cugraph/vertex_partition_device.cuh diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 91e640ea50b..7d38f73eb1f 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -19,8 +19,8 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- clang=8.0.1 -- clang-tools=8.0.1 +- clang=11.0.0 +- clang-tools=11.0.0 - cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 39fb0ca5811..26ecf04f0ea 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -19,8 +19,8 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- clang=8.0.1 -- clang-tools=8.0.1 +- clang=11.0.0 +- clang-tools=11.0.0 - cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 80fdf63a8e9..11e826d784d 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -19,8 +19,8 @@ dependencies: - ucx-proc=*=gpu - scipy - networkx>=2.5.1 -- clang=8.0.1 -- clang-tools=8.0.1 +- clang=11.0.0 +- clang-tools=11.0.0 - cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 diff --git a/cpp/.clang-format b/cpp/.clang-format index 6f48df58b74..0c05436e922 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -6,16 +6,22 @@ Language: Cpp AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true +AlignConsecutiveBitFields: true AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true -AllowShortLoopsOnASingleLine: true +AllowShortLambdasOnASingleLine: true +AllowShortLoopsOnASingleLine: false # This is deprecated AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None @@ -40,14 +46,14 @@ BraceWrapping: SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false +BreakAfterJavaFieldAnnotations: false BreakBeforeBinaryOperators: None BreakBeforeBraces: WebKit BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false +BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 100 CommentPragmas: '^ IWYU pragma:' @@ -57,7 +63,7 @@ ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 2 ContinuationIndentWidth: 2 Cpp11BracedListStyle: true -DerivePointerAlignment: true +DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true @@ -139,18 +145,20 @@ SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false +SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false -Standard: Cpp11 +Standard: c++17 StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION # Be consistent with indent-width, even for people who use tab for indentation! TabWidth: 2 UseTab: Never - diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 12776f1f8c7..2e0d0f055c8 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -45,7 +45,7 @@ namespace cugraph { * caller */ template -void jaccard(legacy::GraphCSRView const &graph, WT const *weights, WT *result); +void jaccard(legacy::GraphCSRView const& graph, WT const* weights, WT* result); /** * @brief Compute jaccard similarity coefficient for selected vertex pairs @@ -69,12 +69,12 @@ void jaccard(legacy::GraphCSRView const &graph, WT const *weights, W * caller */ template -void jaccard_list(legacy::GraphCSRView const &graph, - WT const *weights, +void jaccard_list(legacy::GraphCSRView const& graph, + WT const* weights, ET num_pairs, - VT const *first, - VT const *second, - WT *result); + VT const* first, + VT const* second, + WT* result); /** * @brief Compute overlap coefficient for all vertices in the graph @@ -95,7 +95,7 @@ void jaccard_list(legacy::GraphCSRView const &graph, * caller */ template -void overlap(legacy::GraphCSRView const &graph, WT const *weights, WT *result); +void overlap(legacy::GraphCSRView const& graph, WT const* weights, WT* result); /** * @brief Compute overlap coefficient for select pairs of vertices @@ -119,12 +119,12 @@ void overlap(legacy::GraphCSRView const &graph, WT const *weights, W * caller */ template -void overlap_list(legacy::GraphCSRView const &graph, - WT const *weights, +void overlap_list(legacy::GraphCSRView const& graph, + WT const* weights, ET num_pairs, - VT const *first, - VT const *second, - WT *result); + VT const* first, + VT const* second, + WT* result); /** * @@ -180,12 +180,12 @@ void overlap_list(legacy::GraphCSRView const &graph, * */ template -void force_atlas2(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +void force_atlas2(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter = 500, - float *x_start = nullptr, - float *y_start = nullptr, + float* x_start = nullptr, + float* y_start = nullptr, bool outbound_attraction_distribution = true, bool lin_log_mode = false, bool prevent_overlapping = false, @@ -197,7 +197,7 @@ void force_atlas2(raft::handle_t const &handle, bool strong_gravity_mode = false, const float gravity = 1.0, bool verbose = false, - internals::GraphBasedDimRedCallback *callback = nullptr); + internals::GraphBasedDimRedCallback* callback = nullptr); /** * @brief Finds an approximate solution to the traveling salesperson problem (TSP). @@ -225,17 +225,17 @@ void force_atlas2(raft::handle_t const &handle, * @param[out] route Device array containing the returned route. * */ -float traveling_salesperson(raft::handle_t const &handle, - int const *vtx_ptr, - float const *x_pos, - float const *y_pos, +float traveling_salesperson(raft::handle_t const& handle, + int const* vtx_ptr, + float const* x_pos, + float const* y_pos, int nodes, int restarts, bool beam_search, int k, int nstart, bool verbose, - int *route); + int* route); /** * @brief Compute betweenness centrality for a graph @@ -275,14 +275,14 @@ float traveling_salesperson(raft::handle_t const &handle, * */ template -void betweenness_centrality(const raft::handle_t &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void betweenness_centrality(const raft::handle_t& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalized = true, bool endpoints = false, - weight_t const *weight = nullptr, + weight_t const* weight = nullptr, vertex_t k = 0, - vertex_t const *vertices = nullptr); + vertex_t const* vertices = nullptr); /** * @brief Compute edge betweenness centrality for a graph @@ -319,13 +319,13 @@ void betweenness_centrality(const raft::handle_t &handle, * */ template -void edge_betweenness_centrality(const raft::handle_t &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void edge_betweenness_centrality(const raft::handle_t& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalized = true, - weight_t const *weight = nullptr, + weight_t const* weight = nullptr, vertex_t k = 0, - vertex_t const *vertices = nullptr); + vertex_t const* vertices = nullptr); enum class cugraph_cc_t { CUGRAPH_WEAK = 0, ///> Weakly Connected Components @@ -363,9 +363,9 @@ enum class cugraph_cc_t { * associated with vertex id i. */ template -void connected_components(legacy::GraphCSRView const &graph, +void connected_components(legacy::GraphCSRView const& graph, cugraph_cc_t connectivity_type, - VT *labels); + VT* labels); /** * @brief Compute k truss for a graph @@ -393,9 +393,9 @@ void connected_components(legacy::GraphCSRView const &graph, */ template std::unique_ptr> k_truss_subgraph( - legacy::GraphCOOView const &graph, + legacy::GraphCOOView const& graph, int k, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Compute the Katz centrality for the nodes of the graph G @@ -430,8 +430,8 @@ std::unique_ptr> k_truss_subgraph( * @param[in] normalized If True normalize the resulting katz centrality values */ template -void katz_centrality(legacy::GraphCSRView const &graph, - result_t *result, +void katz_centrality(legacy::GraphCSRView const& graph, + result_t* result, double alpha, int max_iter, double tol, @@ -448,7 +448,7 @@ void katz_centrality(legacy::GraphCSRView const &graph, */ /* ----------------------------------------------------------------------------*/ template -void core_number(legacy::GraphCSRView const &graph, VT *core_number); +void core_number(legacy::GraphCSRView const& graph, VT* core_number); /** * @brief Compute K Core of the graph G @@ -473,12 +473,12 @@ void core_number(legacy::GraphCSRView const &graph, VT *core_number) */ template std::unique_ptr> k_core( - legacy::GraphCOOView const &graph, + legacy::GraphCOOView const& graph, int k, - VT const *vertex_id, - VT const *core_number, + VT const* vertex_id, + VT const* core_number, VT num_vertex_ids, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Find all 2-hop neighbors in the graph @@ -499,7 +499,7 @@ std::unique_ptr> k_core( */ template std::unique_ptr> get_two_hop_neighbors( - legacy::GraphCSRView const &graph); + legacy::GraphCSRView const& graph); /** * @Synopsis Performs a single source shortest path traversal of a graph starting from a vertex. @@ -526,9 +526,9 @@ std::unique_ptr> get_two_hop_neighbors( * */ template -void sssp(legacy::GraphCSRView const &graph, - WT *distances, - VT *predecessors, +void sssp(legacy::GraphCSRView const& graph, + WT* distances, + VT* predecessors, const VT source_vertex); // FIXME: Internally distances is of int (signed 32-bit) data type, but current @@ -567,11 +567,11 @@ void sssp(legacy::GraphCSRView const &graph, * */ template -void bfs(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - VT *distances, - VT *predecessors, - double *sp_counters, +void bfs(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + VT* distances, + VT* predecessors, + double* sp_counters, const VT start_vertex, bool directed = true, bool mg_batch = false); @@ -601,11 +601,11 @@ void bfs(raft::handle_t const &handle, * assigned to that worker */ template -weight_t hungarian(raft::handle_t const &handle, - legacy::GraphCOOView const &graph, +weight_t hungarian(raft::handle_t const& handle, + legacy::GraphCOOView const& graph, vertex_t num_workers, - vertex_t const *workers, - vertex_t *assignments); + vertex_t const* workers, + vertex_t* assignments); /** * @brief Compute Hungarian algorithm on a weighted bipartite graph @@ -634,11 +634,11 @@ weight_t hungarian(raft::handle_t const &handle, * in reducing weights to zero. */ template -weight_t hungarian(raft::handle_t const &handle, - legacy::GraphCOOView const &graph, +weight_t hungarian(raft::handle_t const& handle, + legacy::GraphCOOView const& graph, vertex_t num_workers, - vertex_t const *workers, - vertex_t *assignments, + vertex_t const* workers, + vertex_t* assignments, weight_t epsilon); /** @@ -673,9 +673,9 @@ weight_t hungarian(raft::handle_t const &handle, */ template std::pair louvain( - raft::handle_t const &handle, - graph_view_t const &graph_view, - typename graph_view_t::vertex_type *clustering, + raft::handle_t const& handle, + graph_view_t const& graph_view, + typename graph_view_t::vertex_type* clustering, size_t max_level = 100, typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); @@ -711,8 +711,8 @@ std::pair louvain( template std::pair>, typename graph_view_t::weight_type> -louvain(raft::handle_t const &handle, - graph_view_t const &graph_view, +louvain(raft::handle_t const& handle, + graph_view_t const& graph_view, size_t max_level = 100, typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); @@ -735,10 +735,10 @@ louvain(raft::handle_t const &handle, * */ template -void flatten_dendrogram(raft::handle_t const &handle, - graph_view_t const &graph_view, - Dendrogram const &dendrogram, - typename graph_view_t::vertex_type *clustering); +void flatten_dendrogram(raft::handle_t const& handle, + graph_view_t const& graph_view, + Dendrogram const& dendrogram, + typename graph_view_t::vertex_type* clustering); /** * @brief Leiden implementation @@ -775,9 +775,9 @@ void flatten_dendrogram(raft::handle_t const &handle, * 2) modularity of the returned clustering */ template -std::pair leiden(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - vertex_t *clustering, +std::pair leiden(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + vertex_t* clustering, size_t max_iter = 100, weight_t resolution = weight_t{1}); @@ -807,11 +807,11 @@ std::pair leiden(raft::handle_t const &handle, * written */ template -void ecg(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, +void ecg(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, weight_t min_weight, vertex_t ensemble_size, - vertex_t *clustering); + vertex_t* clustering); /** * @brief Generate edges in a minimum spanning forest of an undirected weighted graph. @@ -835,9 +835,9 @@ void ecg(raft::handle_t const &handle, */ template std::unique_ptr> minimum_spanning_tree( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); namespace triangle { /** @@ -856,7 +856,7 @@ namespace triangle { * @return The number of triangles */ template -uint64_t triangle_count(legacy::GraphCSRView const &graph); +uint64_t triangle_count(legacy::GraphCSRView const& graph); } // namespace triangle namespace subgraph { @@ -882,7 +882,7 @@ namespace subgraph { */ template std::unique_ptr> extract_subgraph_vertex( - legacy::GraphCOOView const &graph, VT const *vertices, VT num_vertices); + legacy::GraphCOOView const& graph, VT const* vertices, VT num_vertices); } // namespace subgraph /** @@ -909,14 +909,14 @@ std::unique_ptr> extract_subgraph_vertex( namespace ext_raft { template -void balancedCutClustering(legacy::GraphCSRView const &graph, +void balancedCutClustering(legacy::GraphCSRView const& graph, VT num_clusters, VT num_eigen_vects, WT evs_tolerance, int evs_max_iter, WT kmean_tolerance, int kmean_max_iter, - VT *clustering); + VT* clustering); /** * @brief Wrapper function for Nvgraph spectral modularity maximization algorithm @@ -940,14 +940,14 @@ void balancedCutClustering(legacy::GraphCSRView const &graph, * be stored */ template -void spectralModularityMaximization(legacy::GraphCSRView const &graph, +void spectralModularityMaximization(legacy::GraphCSRView const& graph, VT n_clusters, VT n_eig_vects, WT evs_tolerance, int evs_max_iter, WT kmean_tolerance, int kmean_max_iter, - VT *clustering); + VT* clustering); /** * @brief Wrapper function for Nvgraph clustering modularity metric @@ -966,10 +966,10 @@ void spectralModularityMaximization(legacy::GraphCSRView const &grap * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_modularity(legacy::GraphCSRView const &graph, +void analyzeClustering_modularity(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score); + VT const* clustering, + WT* score); /** * @brief Wrapper function for Nvgraph clustering edge cut metric @@ -988,10 +988,10 @@ void analyzeClustering_modularity(legacy::GraphCSRView const &graph, * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_edge_cut(legacy::GraphCSRView const &graph, +void analyzeClustering_edge_cut(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score); + VT const* clustering, + WT* score); /** * @brief Wrapper function for Nvgraph clustering ratio cut metric @@ -1010,10 +1010,10 @@ void analyzeClustering_edge_cut(legacy::GraphCSRView const &graph, * @param[out] score Pointer to a float in which the result will be written */ template -void analyzeClustering_ratio_cut(legacy::GraphCSRView const &graph, +void analyzeClustering_ratio_cut(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score); + VT const* clustering, + WT* score); } // namespace ext_raft @@ -1046,13 +1046,13 @@ namespace gunrock { * */ template -void hits(legacy::GraphCSRView const &graph, +void hits(legacy::GraphCSRView const& graph, int max_iter, WT tolerance, - WT const *starting_value, + WT const* starting_value, bool normalized, - WT *hubs, - WT *authorities); + WT* hubs, + WT* authorities); } // namespace gunrock @@ -1080,11 +1080,11 @@ namespace dense { * which vertex id (job) is assigned to that worker */ template -weight_t hungarian(raft::handle_t const &handle, - weight_t const *costs, +weight_t hungarian(raft::handle_t const& handle, + weight_t const* costs, vertex_t num_rows, vertex_t num_columns, - vertex_t *assignments); + vertex_t* assignments); /** * @brief Compute Hungarian algorithm on a weighted bipartite graph @@ -1111,11 +1111,11 @@ weight_t hungarian(raft::handle_t const &handle, * in reducing weights to zero. */ template -weight_t hungarian(raft::handle_t const &handle, - weight_t const *costs, +weight_t hungarian(raft::handle_t const& handle, + weight_t const* costs, vertex_t num_rows, vertex_t num_columns, - vertex_t *assignments, + vertex_t* assignments, weight_t epsilon); } // namespace dense @@ -1153,10 +1153,10 @@ namespace experimental { * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *distances, - vertex_t *predecessors, +void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* distances, + vertex_t* predecessors, vertex_t source_vertex, bool direction_optimizing = false, vertex_t depth_limit = std::numeric_limits::max(), @@ -1188,10 +1188,10 @@ void bfs(raft::handle_t const &handle, * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - weight_t *distances, - vertex_t *predecessors, +void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + weight_t* distances, + vertex_t* predecessors, vertex_t source_vertex, weight_t cutoff = std::numeric_limits::max(), bool do_expensive_check = false); @@ -1238,13 +1238,13 @@ void sssp(raft::handle_t const &handle, * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void pagerank(raft::handle_t const &handle, - graph_view_t const &graph_view, - std::optional precomputed_vertex_out_weight_sums, - std::optional personalization_vertices, - std::optional personalization_values, +void pagerank(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional precomputed_vertex_out_weight_sums, + std::optional personalization_vertices, + std::optional personalization_values, std::optional personalization_vector_size, - result_t *pageranks, + result_t* pageranks, result_t alpha, result_t epsilon, size_t max_iterations = 500, @@ -1288,10 +1288,10 @@ void pagerank(raft::handle_t const &handle, * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - result_t const *betas, - result_t *katz_centralities, +void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + result_t const* betas, + result_t* katz_centralities, result_t alpha, result_t beta, result_t epsilon, @@ -1324,9 +1324,9 @@ std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *source_vertex, +extract_ego(raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); @@ -1360,9 +1360,9 @@ template std::tuple, rmm::device_uvector, rmm::device_uvector> -random_walks(raft::handle_t const &handle, - graph_t const &graph, - typename graph_t::vertex_type const *ptr_d_start, +random_walks(raft::handle_t const& handle, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, index_t num_paths, index_t max_depth, bool use_padding = false); @@ -1385,9 +1385,9 @@ random_walks(raft::handle_t const &handle, */ template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* components, bool do_expensive_check = false); } // namespace experimental diff --git a/cpp/include/cugraph/compute_partition.cuh b/cpp/include/cugraph/compute_partition.cuh index 6405d239adc..b8ad0fc19ab 100644 --- a/cpp/include/cugraph/compute_partition.cuh +++ b/cpp/include/cugraph/compute_partition.cuh @@ -39,20 +39,20 @@ class compute_partition_t { using graph_view_t = graph_view_type; using vertex_t = typename graph_view_type::vertex_type; - compute_partition_t(raft::handle_t const &handle, graph_view_t const &graph_view) + compute_partition_t(raft::handle_t const& handle, graph_view_t const& graph_view) : vertex_partition_offsets_v_(0, handle.get_stream()) { init(handle, graph_view); } private: - template * = nullptr> - void init(raft::handle_t const &handle, graph_view_t const &graph_view) + template * = nullptr> + void init(raft::handle_t const& handle, graph_view_t const& graph_view) { } - template * = nullptr> - void init(raft::handle_t const &handle, graph_view_t const &graph_view) + template * = nullptr> + void init(raft::handle_t const& handle, graph_view_t const& graph_view) { auto partition = graph_view.get_partition(); row_size_ = partition.get_row_size(); @@ -81,7 +81,7 @@ class compute_partition_t { */ class vertex_device_view_t { public: - vertex_device_view_t(vertex_t const *d_vertex_partition_offsets, int size) + vertex_device_view_t(vertex_t const* d_vertex_partition_offsets, int size) : d_vertex_partition_offsets_(d_vertex_partition_offsets), size_(size) { } @@ -106,13 +106,13 @@ class compute_partition_t { } private: - vertex_t const *d_vertex_partition_offsets_; + vertex_t const* d_vertex_partition_offsets_; int size_; }; class edge_device_view_t { public: - edge_device_view_t(vertex_t const *d_vertex_partition_offsets, + edge_device_view_t(vertex_t const* d_vertex_partition_offsets, int row_size, int col_size, int size) @@ -158,7 +158,7 @@ class compute_partition_t { } private: - vertex_t const *d_vertex_partition_offsets_; + vertex_t const* d_vertex_partition_offsets_; int row_size_; int col_size_; int size_; diff --git a/cpp/include/cugraph/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp index 2640944dc09..beebec4fd3f 100644 --- a/cpp/include/cugraph/dendrogram.hpp +++ b/cpp/include/cugraph/dendrogram.hpp @@ -28,7 +28,7 @@ class Dendrogram { void add_level(vertex_t first_index, vertex_t num_verts, rmm::cuda_stream_view stream_view, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { level_ptr_.push_back( std::make_unique>(num_verts, stream_view, mr)); @@ -39,21 +39,21 @@ class Dendrogram { size_t num_levels() const { return level_ptr_.size(); } - vertex_t const *get_level_ptr_nocheck(size_t level) const { return level_ptr_[level]->data(); } + vertex_t const* get_level_ptr_nocheck(size_t level) const { return level_ptr_[level]->data(); } - vertex_t *get_level_ptr_nocheck(size_t level) { return level_ptr_[level]->data(); } + vertex_t* get_level_ptr_nocheck(size_t level) { return level_ptr_[level]->data(); } size_t get_level_size_nocheck(size_t level) const { return level_ptr_[level]->size(); } vertex_t get_level_first_index_nocheck(size_t level) const { return level_first_index_[level]; } - vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } + vertex_t const* current_level_begin() const { return get_level_ptr_nocheck(current_level()); } - vertex_t const *current_level_end() const { return current_level_begin() + current_level_size(); } + vertex_t const* current_level_end() const { return current_level_begin() + current_level_size(); } - vertex_t *current_level_begin() { return get_level_ptr_nocheck(current_level()); } + vertex_t* current_level_begin() { return get_level_ptr_nocheck(current_level()); } - vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } + vertex_t* current_level_end() { return current_level_begin() + current_level_size(); } size_t current_level_size() const { return get_level_size_nocheck(current_level()); } diff --git a/cpp/include/cugraph/experimental/detail/graph_utils.cuh b/cpp/include/cugraph/experimental/detail/graph_utils.cuh index e9f86eb9d62..c384e2842ab 100644 --- a/cpp/include/cugraph/experimental/detail/graph_utils.cuh +++ b/cpp/include/cugraph/experimental/detail/graph_utils.cuh @@ -41,14 +41,14 @@ namespace detail { // false) or columns (of the graph adjacency matrix, if store_transposed = true) template rmm::device_uvector compute_major_degrees( - raft::handle_t const &handle, - std::vector const &adj_matrix_partition_offsets, - partition_t const &partition) + raft::handle_t const& handle, + std::vector const& adj_matrix_partition_offsets, + partition_t const& partition) { - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); @@ -75,7 +75,7 @@ rmm::device_uvector compute_major_degrees( local_degrees.data(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); col_comm.reduce(local_degrees.data(), - i == col_comm_rank ? degrees.data() : static_cast(nullptr), + i == col_comm_rank ? degrees.data() : static_cast(nullptr), static_cast(major_last - major_first), raft::comms::op_t::SUM, i, @@ -89,24 +89,24 @@ rmm::device_uvector compute_major_degrees( // false) or columns (of the graph adjacency matrix, if store_transposed = true) template rmm::device_uvector compute_major_degrees( - raft::handle_t const &handle, - std::vector> const &adj_matrix_partition_offsets, - partition_t const &partition) + raft::handle_t const& handle, + std::vector> const& adj_matrix_partition_offsets, + partition_t const& partition) { // we can avoid creating this temporary with "if constexpr" supported from C++17 - std::vector tmp_offsets(adj_matrix_partition_offsets.size(), nullptr); + std::vector tmp_offsets(adj_matrix_partition_offsets.size(), nullptr); std::transform(adj_matrix_partition_offsets.begin(), adj_matrix_partition_offsets.end(), tmp_offsets.begin(), - [](auto const &offsets) { return offsets.data(); }); + [](auto const& offsets) { return offsets.data(); }); return compute_major_degrees(handle, tmp_offsets, partition); } // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template -rmm::device_uvector compute_major_degrees(raft::handle_t const &handle, - edge_t const *offsets, +rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, + edge_t const* offsets, vertex_t number_of_vertices) { rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); @@ -119,7 +119,7 @@ rmm::device_uvector compute_major_degrees(raft::handle_t const &handle, template struct degree_from_offsets_t { - edge_t const *offsets{nullptr}; + edge_t const* offsets{nullptr}; __device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; } }; diff --git a/cpp/include/cugraph/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp index 0f93abb1635..9b0849c704f 100644 --- a/cpp/include/cugraph/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -32,9 +32,9 @@ namespace experimental { template struct edgelist_t { - vertex_t const *p_src_vertices{nullptr}; - vertex_t const *p_dst_vertices{nullptr}; - std::optional p_edge_weights{std::nullopt}; + vertex_t const* p_src_vertices{nullptr}; + vertex_t const* p_dst_vertices{nullptr}; + std::optional p_edge_weights{std::nullopt}; edge_t number_of_edges{0}; }; @@ -62,29 +62,29 @@ class graph_t() {} + graph_t(raft::handle_t const& handle) : detail::graph_base_t() {} - graph_t(raft::handle_t const &handle, - std::vector> const &edgelists, - partition_t const &partition, + graph_t(raft::handle_t const& handle, + std::vector> const& edgelists, + partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties, - std::optional> const &segment_offsets, + std::optional> const& segment_offsets, bool do_expensive_check = false); bool is_weighted() const { return adj_matrix_partition_weights_.has_value(); } graph_view_t view() const { - std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); - std::vector indices(adj_matrix_partition_indices_.size(), nullptr); - auto weights = adj_matrix_partition_weights_ - ? std::make_optional>( + std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); + std::vector indices(adj_matrix_partition_indices_.size(), nullptr); + auto weights = adj_matrix_partition_weights_ + ? std::make_optional>( (*adj_matrix_partition_weights_).size(), nullptr) - : std::nullopt; + : std::nullopt; auto dcs_nzd_vertices = adj_matrix_partition_dcs_nzd_vertices_ - ? std::make_optional>( + ? std::make_optional>( (*adj_matrix_partition_dcs_nzd_vertices_).size(), nullptr) : std::nullopt; auto dcs_nzd_vertex_counts = @@ -149,16 +149,16 @@ class graph_t(), offsets_(0, handle.get_stream()), indices_(0, handle.get_stream()){}; - graph_t(raft::handle_t const &handle, - edgelist_t const &edgelist, + graph_t(raft::handle_t const& handle, + edgelist_t const& edgelist, vertex_t number_of_vertices, graph_properties_t properties, - std::optional> const &segment_offsets, + std::optional> const& segment_offsets, bool do_expensive_check = false); bool is_weighted() const { return weights_.has_value(); } @@ -169,7 +169,7 @@ class graph_tget_handle_ptr()), offsets_.data(), indices_.data(), - weights_ ? std::optional{(*weights_).data()} : std::nullopt, + weights_ ? std::optional{(*weights_).data()} : std::nullopt, this->get_number_of_vertices(), this->get_number_of_edges(), this->get_graph_properties(), @@ -182,14 +182,14 @@ class graph_t &&offsets, - rmm::device_uvector &&indices, - std::optional> &&weights, - std::optional> &&segment_offsets) + rmm::device_uvector&& offsets, + rmm::device_uvector&& indices, + std::optional>&& weights, + std::optional>&& segment_offsets) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), offsets_(std::move(offsets)), diff --git a/cpp/include/cugraph/functions.hpp b/cpp/include/cugraph/functions.hpp index bb1adcf818b..23edd204c3b 100644 --- a/cpp/include/cugraph/functions.hpp +++ b/cpp/include/cugraph/functions.hpp @@ -41,8 +41,8 @@ namespace cugraph { */ template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Broadcast using handle communicator @@ -60,7 +60,7 @@ std::unique_ptr> coo_to_csr( // FIXME: It would be better to expose it in RAFT template -void comms_bcast(const raft::handle_t &handle, value_t *value, size_t count) +void comms_bcast(const raft::handle_t& handle, value_t* value, size_t count) { handle.get_comms().bcast(value, count, 0, handle.get_stream()); } diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp index 9bd002b4299..94ae5d2cf81 100644 --- a/cpp/include/cugraph/graph_generators.hpp +++ b/cpp/include/cugraph/graph_generators.hpp @@ -67,7 +67,7 @@ namespace cugraph { */ template std::tuple, rmm::device_uvector> generate_rmat_edgelist( - raft::handle_t const &handle, + raft::handle_t const& handle, size_t scale, size_t num_edges, double a = 0.57, @@ -114,7 +114,7 @@ enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; template std::vector, rmm::device_uvector>> generate_rmat_edgelists( - raft::handle_t const &handle, + raft::handle_t const& handle, size_t n_edgelists, size_t min_scale, size_t max_scale, @@ -150,8 +150,8 @@ generate_rmat_edgelists( template std::tuple, rmm::device_uvector> generate_path_graph_edgelist( - raft::handle_t const &handle, - std::vector> const &component_parameters_v); + raft::handle_t const& handle, + std::vector> const& component_parameters_v); /** * @brief generate an edge list for a 2D Mesh Graph @@ -175,8 +175,8 @@ generate_path_graph_edgelist( template std::tuple, rmm::device_uvector> generate_2d_mesh_graph_edgelist( - raft::handle_t const &handle, - std::vector> const &component_parameters_v); + raft::handle_t const& handle, + std::vector> const& component_parameters_v); /** * @brief generate an edge list for a 3D Mesh Graph @@ -200,8 +200,8 @@ generate_2d_mesh_graph_edgelist( template std::tuple, rmm::device_uvector> generate_3d_mesh_graph_edgelist( - raft::handle_t const &handle, - std::vector> const &component_parameters_v); + raft::handle_t const& handle, + std::vector> const& component_parameters_v); /** * @brief generate an edge lists for some complete graphs @@ -225,8 +225,8 @@ generate_3d_mesh_graph_edgelist( template std::tuple, rmm::device_uvector> generate_complete_graph_edgelist( - raft::handle_t const &handle, - std::vector> const &component_parameters_v); + raft::handle_t const& handle, + std::vector> const& component_parameters_v); /** * @brief generate an edge lists for an Erdos-Renyi graph @@ -249,7 +249,7 @@ generate_complete_graph_edgelist( */ template std::tuple, rmm::device_uvector> -generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const &handle, +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, vertex_t num_vertices, float p, vertex_t base_vertex_id, @@ -276,7 +276,7 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const &handle, */ template std::tuple, rmm::device_uvector> -generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const &handle, +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, vertex_t num_vertices, size_t m, vertex_t base_vertex_id, @@ -304,10 +304,10 @@ template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v); +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v); /** * @brief scramble vertex ids in a graph @@ -327,9 +327,9 @@ symmetrize_edgelist(raft::handle_t const &handle, * @param seed Used to initialize random number generator */ template -void scramble_vertex_ids(raft::handle_t const &handle, - rmm::device_uvector &d_src_v, - rmm::device_uvector &d_dst_v, +void scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, vertex_t vertex_id_offset, uint64_t seed = 0); @@ -355,10 +355,10 @@ template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&d_sources, - std::vector> &&d_dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& d_sources, + std::vector>&& d_dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges = true); } // namespace cugraph diff --git a/cpp/include/cugraph/internals.hpp b/cpp/include/cugraph/internals.hpp index cc6b3031079..1c311304ae4 100644 --- a/cpp/include/cugraph/internals.hpp +++ b/cpp/include/cugraph/internals.hpp @@ -35,9 +35,9 @@ class GraphBasedDimRedCallback : public Callback { this->n_components = n_components; this->isFloat = std::is_same::value; } - virtual void on_preprocess_end(void *positions) = 0; - virtual void on_epoch_end(void *positions) = 0; - virtual void on_train_end(void *positions) = 0; + virtual void on_preprocess_end(void* positions) = 0; + virtual void on_epoch_end(void* positions) = 0; + virtual void on_train_end(void* positions) = 0; protected: int n; diff --git a/cpp/include/cugraph/legacy/graph.hpp b/cpp/include/cugraph/legacy/graph.hpp index 4b009bc7a25..4345fd225c1 100644 --- a/cpp/include/cugraph/legacy/graph.hpp +++ b/cpp/include/cugraph/legacy/graph.hpp @@ -58,17 +58,17 @@ class GraphViewBase { using edge_type = edge_t; using weight_type = weight_t; - raft::handle_t *handle; - weight_t *edge_data; ///< edge weight + raft::handle_t* handle; + weight_t* edge_data; ///< edge weight GraphProperties prop; vertex_t number_of_vertices; edge_t number_of_edges; - vertex_t *local_vertices; - edge_t *local_edges; - vertex_t *local_offsets; + vertex_t* local_vertices; + edge_t* local_edges; + vertex_t* local_offsets; vertex_t get_number_of_vertices() const { return number_of_vertices; } @@ -80,18 +80,18 @@ class GraphViewBase { * @param[out] identifiers Pointer to device memory to store the vertex * identifiers */ - void get_vertex_identifiers(vertex_t *identifiers) const; + void get_vertex_identifiers(vertex_t* identifiers) const; - void set_local_data(vertex_t *vertices, edge_t *edges, vertex_t *offsets) + void set_local_data(vertex_t* vertices, edge_t* edges, vertex_t* offsets) { local_vertices = vertices; local_edges = edges; local_offsets = offsets; } - void set_handle(raft::handle_t *handle_in) { handle = handle_in; } + void set_handle(raft::handle_t* handle_in) { handle = handle_in; } - GraphViewBase(weight_t *edge_data, vertex_t number_of_vertices, edge_t number_of_edges) + GraphViewBase(weight_t* edge_data, vertex_t number_of_vertices, edge_t number_of_edges) : handle(nullptr), edge_data(edge_data), prop(), @@ -116,8 +116,8 @@ class GraphViewBase { template class GraphCOOView : public GraphViewBase { public: - vertex_t *src_indices{nullptr}; ///< rowInd - vertex_t *dst_indices{nullptr}; ///< colInd + vertex_t* src_indices{nullptr}; ///< rowInd + vertex_t* dst_indices{nullptr}; ///< colInd /** * @brief Computes degree(in, out, in+out) of all the nodes of a Graph @@ -129,7 +129,7 @@ class GraphCOOView : public GraphViewBase { * to zeros. Will contain the computed degree of every vertex. * @param[in] direction IN_PLUS_OUT, IN or OUT */ - void degree(edge_t *degree, DegreeDirection direction) const; + void degree(edge_t* degree, DegreeDirection direction) const; /** * @brief Default constructor @@ -156,9 +156,9 @@ class GraphCOOView : public GraphViewBase { * @param number_of_vertices The number of vertices in the graph * @param number_of_edges The number of edges in the graph */ - GraphCOOView(vertex_t *src_indices, - vertex_t *dst_indices, - weight_t *edge_data, + GraphCOOView(vertex_t* src_indices, + vertex_t* dst_indices, + weight_t* edge_data, vertex_t number_of_vertices, edge_t number_of_edges) : GraphViewBase(edge_data, number_of_vertices, number_of_edges), @@ -180,8 +180,8 @@ class GraphCOOView : public GraphViewBase { template class GraphCompressedSparseBaseView : public GraphViewBase { public: - edge_t *offsets{nullptr}; ///< CSR offsets - vertex_t *indices{nullptr}; ///< CSR indices + edge_t* offsets{nullptr}; ///< CSR offsets + vertex_t* indices{nullptr}; ///< CSR indices /** * @brief Fill the identifiers in the array with the source vertex @@ -190,7 +190,7 @@ class GraphCompressedSparseBaseView : public GraphViewBase(edge_data, number_of_vertices, number_of_edges), @@ -280,9 +280,9 @@ class GraphCSRView : public GraphCompressedSparseBaseView( @@ -331,9 +331,9 @@ class GraphCSCView : public GraphCompressedSparseBaseView( @@ -399,7 +399,7 @@ class GraphCOO { edge_t number_of_edges, bool has_data = false, cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : number_of_vertices_p(number_of_vertices), number_of_edges_p(number_of_edges), src_indices_p(sizeof(vertex_t) * number_of_edges, stream, mr), @@ -408,9 +408,9 @@ class GraphCOO { { } - GraphCOO(GraphCOOView const &graph, + GraphCOO(GraphCOOView const& graph, cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : number_of_vertices_p(graph.number_of_vertices), number_of_edges_p(graph.number_of_edges), src_indices_p(graph.src_indices, graph.number_of_edges * sizeof(vertex_t), stream, mr), @@ -421,7 +421,7 @@ class GraphCOO { rmm::device_buffer{graph.edge_data, graph.number_of_edges * sizeof(weight_t), stream, mr}; } } - GraphCOO(GraphCOOContents &&contents) + GraphCOO(GraphCOOContents&& contents) : number_of_vertices_p(contents.number_of_vertices), number_of_edges_p(contents.number_of_edges), src_indices_p(std::move(*(contents.src_indices.release()))), @@ -432,9 +432,9 @@ class GraphCOO { vertex_t number_of_vertices(void) { return number_of_vertices_p; } edge_t number_of_edges(void) { return number_of_edges_p; } - vertex_t *src_indices(void) { return static_cast(src_indices_p.data()); } - vertex_t *dst_indices(void) { return static_cast(dst_indices_p.data()); } - weight_t *edge_data(void) { return static_cast(edge_data_p.data()); } + vertex_t* src_indices(void) { return static_cast(src_indices_p.data()); } + vertex_t* dst_indices(void) { return static_cast(dst_indices_p.data()); } + weight_t* edge_data(void) { return static_cast(edge_data_p.data()); } GraphCOOContents release() noexcept { @@ -505,7 +505,7 @@ class GraphCompressedSparseBase { edge_t number_of_edges, bool has_data, cudaStream_t stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : number_of_vertices_p(number_of_vertices), number_of_edges_p(number_of_edges), offsets_p(sizeof(edge_t) * (number_of_vertices + 1), stream, mr), @@ -514,7 +514,7 @@ class GraphCompressedSparseBase { { } - GraphCompressedSparseBase(GraphSparseContents &&contents) + GraphCompressedSparseBase(GraphSparseContents&& contents) : number_of_vertices_p(contents.number_of_vertices), number_of_edges_p(contents.number_of_edges), offsets_p(std::move(*contents.offsets.release())), @@ -525,9 +525,9 @@ class GraphCompressedSparseBase { vertex_t number_of_vertices(void) { return number_of_vertices_p; } edge_t number_of_edges(void) { return number_of_edges_p; } - edge_t *offsets(void) { return static_cast(offsets_p.data()); } - vertex_t *indices(void) { return static_cast(indices_p.data()); } - weight_t *edge_data(void) { return static_cast(edge_data_p.data()); } + edge_t* offsets(void) { return static_cast(offsets_p.data()); } + vertex_t* indices(void) { return static_cast(indices_p.data()); } + weight_t* edge_data(void) { return static_cast(edge_data_p.data()); } GraphSparseContents release() noexcept { @@ -575,13 +575,13 @@ class GraphCSR : public GraphCompressedSparseBase { edge_t number_of_edges_, bool has_data_ = false, cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : GraphCompressedSparseBase( number_of_vertices_, number_of_edges_, has_data_, stream, mr) { } - GraphCSR(GraphSparseContents &&contents) + GraphCSR(GraphSparseContents&& contents) : GraphCompressedSparseBase(std::move(contents)) { } @@ -626,13 +626,13 @@ class GraphCSC : public GraphCompressedSparseBase { edge_t number_of_edges_in, bool has_data_in = false, cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : GraphCompressedSparseBase( number_of_vertices_in, number_of_edges_in, has_data_in, stream, mr) { } - GraphCSC(GraphSparseContents &&contents) + GraphCSC(GraphSparseContents&& contents) : GraphCompressedSparseBase( std::forward>(contents)) { diff --git a/cpp/include/cugraph/matrix_partition_device_view.cuh b/cpp/include/cugraph/matrix_partition_device_view.cuh index aa7bb6f97ba..9653e5c94d9 100644 --- a/cpp/include/cugraph/matrix_partition_device_view.cuh +++ b/cpp/include/cugraph/matrix_partition_device_view.cuh @@ -142,14 +142,14 @@ class matrix_partition_device_view_t(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) + : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; return evaluate_edge_op(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + auto e_op_result = evaluate_edge_op(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + auto e_op_result = evaluate_edge_op(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) + : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto key = *(adj_matrix_row_col_key_first + diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh index 8eac2ac4f07..9bc7fe1cdf1 100644 --- a/cpp/include/cugraph/prims/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -85,17 +85,17 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto weight = weights ? (*weights)[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(major_offset); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) - : minor; - auto row_offset = GraphViewType::is_adj_matrix_transposed - ? minor_offset - : static_cast(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) + : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; return evaluate_edge_op(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + auto e_op_result = evaluate_edge_op(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + auto e_op_result = evaluate_edge_op h_thresholds(detail::num_sparse_segments_per_vertex_partition - 1); diff --git a/cpp/include/cugraph/prims/vertex_frontier.cuh b/cpp/include/cugraph/prims/vertex_frontier.cuh index bfe23882088..22c7ca867f5 100644 --- a/cpp/include/cugraph/prims/vertex_frontier.cuh +++ b/cpp/include/cugraph/prims/vertex_frontier.cuh @@ -300,7 +300,9 @@ class VertexFrontier { VertexFrontier(raft::handle_t const& handle) : handle_ptr_(&handle) { - for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } + for (size_t i = 0; i < num_buckets; ++i) { + buckets_.emplace_back(handle); + } } SortedUniqueKeyBucket& get_bucket(size_t bucket_idx) diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index 76eff8afc71..1e15afea1e5 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -43,7 +43,7 @@ template decltype(allocate_dataframe_buffer::value_type>( 0, cudaStream_t{nullptr})) -collect_values_for_keys(raft::comms::comms_t const &comm, +collect_values_for_keys(raft::comms::comms_t const& comm, VertexIterator0 map_key_first, VertexIterator0 map_key_last, ValueIterator map_value_first, @@ -158,7 +158,7 @@ template decltype(allocate_dataframe_buffer::value_type>( 0, cudaStream_t{nullptr})) -collect_values_for_unique_keys(raft::comms::comms_t const &comm, +collect_values_for_unique_keys(raft::comms::comms_t const& comm, VertexIterator0 map_key_first, VertexIterator0 map_key_last, ValueIterator map_value_first, diff --git a/cpp/include/cugraph/utilities/host_scalar_comm.cuh b/cpp/include/cugraph/utilities/host_scalar_comm.cuh index 85994ed22bf..26994ebde14 100644 --- a/cpp/include/cugraph/utilities/host_scalar_comm.cuh +++ b/cpp/include/cugraph/utilities/host_scalar_comm.cuh @@ -274,7 +274,9 @@ host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t st size_t constexpr tuple_size = thrust::tuple_size::value; std::vector rx_counts(comm.get_size(), tuple_size); std::vector displacements(rx_counts.size(), size_t{0}); - for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } + for (size_t i = 0; i < displacements.size(); ++i) { + displacements[i] = i * tuple_size; + } std::vector h_tuple_scalar_elements(tuple_size); rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, stream); diff --git a/cpp/include/cugraph/utilities/path_retrieval.hpp b/cpp/include/cugraph/utilities/path_retrieval.hpp index 3b2408d9037..b4789c14c4b 100644 --- a/cpp/include/cugraph/utilities/path_retrieval.hpp +++ b/cpp/include/cugraph/utilities/path_retrieval.hpp @@ -35,11 +35,11 @@ namespace cugraph { * @param num_vertices Number of vertices. **/ template -void get_traversed_cost(raft::handle_t const &handle, - vertex_t const *vertices, - vertex_t const *preds, - weight_t const *info_weights, - weight_t *out, +void get_traversed_cost(raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t const* preds, + weight_t const* info_weights, + weight_t* out, vertex_t stop_vertex, vertex_t num_vertices); @@ -62,11 +62,11 @@ namespace experimental { template std:: tuple, rmm::device_uvector, rmm::device_uvector> - convert_paths_to_coo(raft::handle_t const &handle, + convert_paths_to_coo(raft::handle_t const& handle, index_t coalesced_sz_v, index_t num_paths, - rmm::device_buffer &&d_coalesced_v, - rmm::device_buffer &&d_sizes); + rmm::device_buffer&& d_coalesced_v, + rmm::device_buffer&& d_sizes); /** * @brief returns additional RW information on vertex paths offsets and weight path sizes and @@ -82,7 +82,7 @@ std:: */ template std::tuple, rmm::device_uvector, rmm::device_uvector> -query_rw_sizes_offsets(raft::handle_t const &handle, index_t num_paths, index_t const *ptr_d_sizes); +query_rw_sizes_offsets(raft::handle_t const& handle, index_t num_paths, index_t const* ptr_d_sizes); } // namespace experimental namespace broadcast { @@ -96,7 +96,7 @@ namespace broadcast { * @return graph_t object that was sent/received */ template -graph_t graph_broadcast(raft::handle_t const &handle, graph_t *graph_ptr); +graph_t graph_broadcast(raft::handle_t const& handle, graph_t* graph_ptr); }; // namespace broadcast } // namespace cugraph diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index e4f7067cfdf..18752897a58 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -44,8 +44,8 @@ inline std::tuple, std::vector, std::vector, std::vector> -compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, - rmm::device_uvector const &d_tx_value_counts, +compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, + rmm::device_uvector const& d_tx_value_counts, rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); @@ -191,9 +191,9 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN } template -auto shuffle_values(raft::comms::comms_t const &comm, +auto shuffle_values(raft::comms::comms_t const& comm, TxValueIterator tx_value_first, - std::vector const &tx_value_counts, + std::vector const& tx_value_counts, rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); @@ -243,7 +243,7 @@ auto shuffle_values(raft::comms::comms_t const &comm, } template -auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, +auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const& comm, ValueIterator tx_value_first /* [INOUT */, ValueIterator tx_value_last /* [INOUT */, ValueToGPUIdOp value_to_gpu_id_op, @@ -294,7 +294,7 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, } template -auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, +auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const& comm, VertexIterator tx_key_first /* [INOUT */, VertexIterator tx_key_last /* [INOUT */, ValueIterator tx_value_first /* [INOUT */, diff --git a/cpp/include/cugraph/vertex_partition_device.cuh b/cpp/include/cugraph/vertex_partition_device.cuh new file mode 100644 index 00000000000..f598c7d89d8 --- /dev/null +++ b/cpp/include/cugraph/vertex_partition_device.cuh @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +template +class vertex_partition_device_base_t { + public: + vertex_partition_device_base_t(vertex_t number_of_vertices) + : number_of_vertices_(number_of_vertices) + { + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return ((v >= 0) && (v < number_of_vertices_)); + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return (v < number_of_vertices_); + } + + private: + // should be trivially copyable to device + vertex_t number_of_vertices_{0}; +}; + +template +class vertex_partition_device_t; + +// multi-GPU version +template +class vertex_partition_device_t> + : public vertex_partition_device_base_t { + public: + vertex_partition_device_t(GraphViewType const& graph_view) + : vertex_partition_device_base_t( + graph_view.get_number_of_vertices()), + first_(graph_view.get_local_vertex_first()), + last_(graph_view.get_local_vertex_last()) + { + } + + __host__ __device__ bool is_local_vertex_nocheck( + typename GraphViewType::vertex_type v) const noexcept + { + return (v >= first_) && (v < last_); + } + + __host__ __device__ typename GraphViewType::vertex_type + get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept + { + return v - first_; + } + + private: + // should be trivially copyable to device + typename GraphViewType::vertex_type first_{0}; + typename GraphViewType::vertex_type last_{0}; +}; + +// single-GPU version +template +class vertex_partition_device_t> + : public vertex_partition_device_base_t { + public: + vertex_partition_device_t(GraphViewType const& graph_view) + : vertex_partition_device_base_t( + graph_view.get_number_of_vertices()) + { + } + + __host__ __device__ constexpr bool is_local_vertex_nocheck( + typename GraphViewType::vertex_type v) const noexcept + { + return true; + } + + __host__ __device__ constexpr typename GraphViewType::vertex_type + get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept + { + return v; + } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/vertex_partition_device_view.cuh b/cpp/include/cugraph/vertex_partition_device_view.cuh index 046c89e62a2..39eb3e7238a 100644 --- a/cpp/include/cugraph/vertex_partition_device_view.cuh +++ b/cpp/include/cugraph/vertex_partition_device_view.cuh @@ -73,8 +73,8 @@ class vertex_partition_device_view_t= local_vertex_first_) && (v < local_vertex_last_); } - __host__ __device__ vertex_t get_local_vertex_offset_from_vertex_nocheck(vertex_t v) const - noexcept + __host__ __device__ vertex_t + get_local_vertex_offset_from_vertex_nocheck(vertex_t v) const noexcept { return v - local_vertex_first_; } diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index 9bd3c364329..02434278343 100644 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ import tempfile -EXPECTED_VERSION = "8.0.1" +EXPECTED_VERSION = "11.0.0" VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") # NOTE: populate this list with more top-level dirs as we add more of them to the cugraph repo DEFAULT_DIRS = ["cpp/include", @@ -139,4 +139,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index 2af0710d1ec..6949399b4b7 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -35,14 +35,14 @@ namespace cugraph { namespace detail { namespace { template -void betweenness_centrality_impl(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void betweenness_centrality_impl(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalize, bool endpoints, - weight_t const *weight, + weight_t const* weight, vertex_t number_of_sources, - vertex_t const *sources, + vertex_t const* sources, vertex_t total_number_of_sources) { // Current Implementation relies on BFS @@ -59,13 +59,13 @@ void betweenness_centrality_impl(raft::handle_t const &handle, } template -void edge_betweenness_centrality_impl(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void edge_betweenness_centrality_impl(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalize, - weight_t const *weight, + weight_t const* weight, vertex_t number_of_sources, - vertex_t const *sources, + vertex_t const* sources, vertex_t total_number_of_sources) { // Current Implementation relies on BFS @@ -84,7 +84,7 @@ void edge_betweenness_centrality_impl(raft::handle_t const &handle, // bc.rescale_by_total_sources_used(total_number_of_sources); } template -vertex_t get_total_number_of_sources(raft::handle_t const &handle, vertex_t local_number_of_sources) +vertex_t get_total_number_of_sources(raft::handle_t const& handle, vertex_t local_number_of_sources) { vertex_t total_number_of_sources_used = local_number_of_sources; if (handle.comms_initialized()) { @@ -103,13 +103,13 @@ vertex_t get_total_number_of_sources(raft::handle_t const &handle, vertex_t loca } // namespace template -void verify_betweenness_centrality_input(result_t *result, +void verify_betweenness_centrality_input(result_t* result, bool is_edge_betweenness, bool normalize, bool endpoints, - weight_t const *weights, + weight_t const* weights, vertex_t const number_of_sources, - vertex_t const *sources) + vertex_t const* sources) { static_assert(std::is_same::value, "vertex_t should be int"); static_assert(std::is_same::value, "edge_t should be int"); @@ -139,12 +139,12 @@ void BC::setup() } template -void BC::configure(result_t *betweenness, +void BC::configure(result_t* betweenness, bool is_edge_betweenness, bool normalized, bool endpoints, - weight_t const *weights, - vertex_t const *sources, + weight_t const* weights, + vertex_t const* sources, vertex_t number_of_sources) { // --- Bind betweenness output vector to internal --- @@ -448,14 +448,14 @@ void BC::rescale_by_total_sources_used( } // namespace detail template -void betweenness_centrality(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void betweenness_centrality(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalize, bool endpoints, - weight_t const *weight, + weight_t const* weight, vertex_t k, - vertex_t const *vertices) + vertex_t const* vertices) { vertex_t total_number_of_sources_used = detail::get_total_number_of_sources(handle, k); if (handle.comms_initialized()) { @@ -489,32 +489,32 @@ void betweenness_centrality(raft::handle_t const &handle, } template void betweenness_centrality( - const raft::handle_t &, - legacy::GraphCSRView const &, - float *, + const raft::handle_t&, + legacy::GraphCSRView const&, + float*, bool, bool, - float const *, + float const*, int, - int const *); + int const*); template void betweenness_centrality( - const raft::handle_t &, - legacy::GraphCSRView const &, - double *, + const raft::handle_t&, + legacy::GraphCSRView const&, + double*, bool, bool, - double const *, + double const*, int, - int const *); + int const*); template -void edge_betweenness_centrality(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void edge_betweenness_centrality(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalize, - weight_t const *weight, + weight_t const* weight, vertex_t k, - vertex_t const *vertices) + vertex_t const* vertices) { vertex_t total_number_of_sources_used = detail::get_total_number_of_sources(handle, k); if (handle.comms_initialized()) { @@ -540,20 +540,20 @@ void edge_betweenness_centrality(raft::handle_t const &handle, } template void edge_betweenness_centrality( - const raft::handle_t &, - legacy::GraphCSRView const &, - float *, + const raft::handle_t&, + legacy::GraphCSRView const&, + float*, bool, - float const *, + float const*, int, - int const *); + int const*); template void edge_betweenness_centrality( - raft::handle_t const &handle, - legacy::GraphCSRView const &, - double *, + raft::handle_t const& handle, + legacy::GraphCSRView const&, + double*, bool, - double const *, + double const*, int, - int const *); + int const*); } // namespace cugraph diff --git a/cpp/src/centrality/betweenness_centrality.cuh b/cpp/src/centrality/betweenness_centrality.cuh index 9e3abca3e78..706b8bfebac 100644 --- a/cpp/src/centrality/betweenness_centrality.cuh +++ b/cpp/src/centrality/betweenness_centrality.cuh @@ -22,69 +22,69 @@ namespace cugraph { namespace detail { template -void betweenness_centrality(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - result_t *result, +void betweenness_centrality(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + result_t* result, bool normalize, bool endpoints, - weight_t const *weight, + weight_t const* weight, vertex_t const number_of_sources, - vertex_t const *sources); + vertex_t const* sources); template -void edge_betweenness_centrality(legacy::GraphCSRView const &graph, - result_t *result, +void edge_betweenness_centrality(legacy::GraphCSRView const& graph, + result_t* result, bool normalize, - weight_t const *weight, + weight_t const* weight, vertex_t const number_of_sources, - vertex_t const *sources); + vertex_t const* sources); template -void verify_betweenness_centrality_input(result_t *result, +void verify_betweenness_centrality_input(result_t* result, bool is_edge_betweenness, bool normalize, bool endpoints, - weight_t const *weights, + weight_t const* weights, vertex_t const number_of_sources, - vertex_t const *sources); + vertex_t const* sources); template class BC { public: virtual ~BC(void) {} - BC(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, + BC(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, cudaStream_t stream = 0) : handle_(handle), graph_(graph) { setup(); } - void configure(result_t *betweenness, + void configure(result_t* betweenness, bool is_edge_betweenness, bool normalize, bool endpoints, - weight_t const *weight, - vertex_t const *sources, + weight_t const* weight, + vertex_t const* sources, vertex_t const number_of_sources); - void configure_edge(result_t *betweenness, + void configure_edge(result_t* betweenness, bool normalize, - weight_t const *weight, - vertex_t const *sources, + weight_t const* weight, + vertex_t const* sources, vertex_t const number_of_sources); void compute(); void rescale_by_total_sources_used(vertex_t total_number_of_sources_used); private: // --- RAFT handle --- - raft::handle_t const &handle_; + raft::handle_t const& handle_; // --- Information concerning the graph --- - const legacy::GraphCSRView &graph_; + const legacy::GraphCSRView& graph_; // --- These information are extracted on setup --- vertex_t number_of_vertices_; // Number of vertices in the graph vertex_t number_of_edges_; // Number of edges in the graph - edge_t const *offsets_ptr_; // Pointer to the offsets - vertex_t const *indices_ptr_; // Pointers to the indices + edge_t const* offsets_ptr_; // Pointer to the offsets + vertex_t const* indices_ptr_; // Pointers to the indices // --- Information from configuration --- bool configured_ = false; // Flag to ensure configuration was called @@ -92,14 +92,14 @@ class BC { bool is_edge_betweenness_ = false; // If True compute edge_betweeness // FIXME: For weighted version - weight_t const *edge_weights_ptr_ = nullptr; // Pointer to the weights + weight_t const* edge_weights_ptr_ = nullptr; // Pointer to the weights bool endpoints_ = false; // If True normalize the betweenness - vertex_t const *sources_ = nullptr; // Subset of vertices to gather information from + vertex_t const* sources_ = nullptr; // Subset of vertices to gather information from vertex_t number_of_sources_; // Number of vertices in sources // --- Output ---- // betweenness is set/read by users - using Vectors - result_t *betweenness_ = nullptr; + result_t* betweenness_ = nullptr; // --- Data required to perform computation ---- rmm::device_vector distances_vec_; @@ -107,13 +107,13 @@ class BC { rmm::device_vector sp_counters_vec_; rmm::device_vector deltas_vec_; - vertex_t *distances_ = + vertex_t* distances_ = nullptr; // array(|V|) stores the distances gathered by the latest SSSP - vertex_t *predecessors_ = + vertex_t* predecessors_ = nullptr; // array(|V|) stores the predecessors of the latest SSSP - double *sp_counters_ = + double* sp_counters_ = nullptr; // array(|V|) stores the shortest path counter for the latest SSSP - double *deltas_ = nullptr; // array(|V|) stores the dependencies for the latest SSSP + double* deltas_ = nullptr; // array(|V|) stores the dependencies for the latest SSSP int max_grid_dim_1D_ = 0; int max_block_dim_1D_ = 0; diff --git a/cpp/src/centrality/betweenness_centrality_kernels.cuh b/cpp/src/centrality/betweenness_centrality_kernels.cuh index 3cb5add8ad6..27666095375 100644 --- a/cpp/src/centrality/betweenness_centrality_kernels.cuh +++ b/cpp/src/centrality/betweenness_centrality_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,13 +25,13 @@ namespace detail { // Should look into forAllEdge type primitive for different // load balancing template -__global__ void edges_accumulation_kernel(result_t *betweenness, +__global__ void edges_accumulation_kernel(result_t* betweenness, vertex_t number_vertices, - vertex_t const *indices, - edge_t const *offsets, - vertex_t *distances, - double *sp_counters, - double *deltas, + vertex_t const* indices, + edge_t const* offsets, + vertex_t* distances, + double* sp_counters, + double* deltas, vertex_t depth) { for (int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; thread_idx < number_vertices; @@ -58,13 +58,13 @@ __global__ void edges_accumulation_kernel(result_t *betweenness, } template -__global__ void endpoints_accumulation_kernel(result_t *betweenness, +__global__ void endpoints_accumulation_kernel(result_t* betweenness, vertex_t number_vertices, - vertex_t const *indices, - edge_t const *offsets, - vertex_t *distances, - double *sp_counters, - double *deltas, + vertex_t const* indices, + edge_t const* offsets, + vertex_t* distances, + double* sp_counters, + double* deltas, vertex_t depth) { for (int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; thread_idx < number_vertices; @@ -88,13 +88,13 @@ __global__ void endpoints_accumulation_kernel(result_t *betweenness, } } template -__global__ void accumulation_kernel(result_t *betweenness, +__global__ void accumulation_kernel(result_t* betweenness, vertex_t number_vertices, - vertex_t const *indices, - edge_t const *offsets, - vertex_t *distances, - double *sp_counters, - double *deltas, + vertex_t const* indices, + edge_t const* offsets, + vertex_t* distances, + double* sp_counters, + double* deltas, vertex_t depth) { for (int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; thread_idx < number_vertices; @@ -117,4 +117,4 @@ __global__ void accumulation_kernel(result_t *betweenness, } } } // namespace detail -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/cpp/src/centrality/katz_centrality.cu b/cpp/src/centrality/katz_centrality.cu index 608e617e3af..320d76e5c03 100644 --- a/cpp/src/centrality/katz_centrality.cu +++ b/cpp/src/centrality/katz_centrality.cu @@ -29,8 +29,8 @@ namespace cugraph { template -void katz_centrality(legacy::GraphCSRView const &graph, - result_t *result, +void katz_centrality(legacy::GraphCSRView const& graph, + result_t* result, double alpha, int max_iter, double tol, @@ -52,6 +52,6 @@ void katz_centrality(legacy::GraphCSRView const &graph, } template void katz_centrality( - legacy::GraphCSRView const &, double *, double, int, double, bool, bool); + legacy::GraphCSRView const&, double*, double, int, double, bool, bool); } // namespace cugraph diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index 196998b38c1..ca0f50c4801 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -28,7 +28,7 @@ namespace { template __device__ IndexType -binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexType high) +binsearch_maxle(const IndexType* vec, const IndexType val, IndexType low, IndexType high) { while (true) { if (low == high) return low; // we know it exists @@ -47,7 +47,7 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT // seems like it should just be a thrust::transform template __global__ void match_check_kernel( - IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights) + IdxT size, IdxT num_verts, IdxT* offsets, IdxT* indices, IdxT* parts, ValT* weights) { IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; while (tid < size) { @@ -90,7 +90,7 @@ struct update_functor { * responsible for freeing the allocated memory using ALLOC_FREE_TRY(). */ template -void get_permutation_vector(T size, T seed, T *permutation, rmm::cuda_stream_view stream_view) +void get_permutation_vector(T size, T seed, T* permutation, rmm::cuda_stream_view stream_view) { rmm::device_uvector randoms_v(size, stream_view); @@ -109,7 +109,7 @@ class EcgLouvain : public cugraph::Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed) + EcgLouvain(raft::handle_t const& handle, graph_type const& graph, vertex_t seed) : cugraph::Louvain(handle, graph), seed_(seed) { } @@ -133,11 +133,11 @@ class EcgLouvain : public cugraph::Louvain { namespace cugraph { template -void ecg(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, +void ecg(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, weight_t min_weight, vertex_t ensemble_size, - vertex_t *clustering) + vertex_t* clustering) { using graph_type = legacy::GraphCSRView; @@ -202,15 +202,15 @@ void ecg(raft::handle_t const &handle, // Explicit template instantiations. template void ecg( - raft::handle_t const &, - legacy::GraphCSRView const &graph, + raft::handle_t const&, + legacy::GraphCSRView const& graph, float min_weight, int32_t ensemble_size, - int32_t *clustering); + int32_t* clustering); template void ecg( - raft::handle_t const &, - legacy::GraphCSRView const &graph, + raft::handle_t const&, + legacy::GraphCSRView const& graph, double min_weight, int32_t ensemble_size, - int32_t *clustering); + int32_t* clustering); } // namespace cugraph diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 72dc0eebc42..1e4569715af 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -60,9 +60,9 @@ std::tuple, std::optional>, rmm::device_uvector> extract( - raft::handle_t const &handle, - cugraph::experimental::graph_view_t const &csr_view, - vertex_t *source_vertex, + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& csr_view, + vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius) { @@ -187,9 +187,9 @@ std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *source_vertex, +extract_ego(raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius) { @@ -216,27 +216,27 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int32_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int32_t*, int32_t, int32_t); template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int32_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int32_t*, int32_t, int32_t); template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int64_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int64_t*, int64_t, int64_t); @@ -245,27 +245,27 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int32_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int32_t*, int32_t, int32_t); template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int32_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int32_t*, int32_t, int32_t); template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_ego(raft::handle_t const &, - graph_view_t const &, - int64_t *, +extract_ego(raft::handle_t const&, + graph_view_t const&, + int64_t*, int64_t, int64_t); } // namespace experimental diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index 305bb566af6..224a3417caf 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -25,8 +25,8 @@ namespace { template std::unique_ptr> extract_subgraph_by_vertices( - cugraph::legacy::GraphCOOView const &graph, - vertex_t const *vertices, + cugraph::legacy::GraphCOOView const& graph, + vertex_t const* vertices, vertex_t num_vertices, cudaStream_t stream) { @@ -35,8 +35,8 @@ std::unique_ptr> extract_s rmm::device_vector error_count_v{1, 0}; rmm::device_vector vertex_used_v{graph_num_verts, num_vertices}; - vertex_t *d_vertex_used = vertex_used_v.data().get(); - int64_t *d_error_count = error_count_v.data().get(); + vertex_t* d_vertex_used = vertex_used_v.data().get(); + int64_t* d_error_count = error_count_v.data().get(); thrust::for_each( rmm::exec_policy(stream)->on(stream), @@ -54,9 +54,9 @@ std::unique_ptr> extract_s CUGRAPH_EXPECTS(error_count_v[0] == 0, "Input error... vertices specifies vertex id out of range"); - vertex_t *graph_src = graph.src_indices; - vertex_t *graph_dst = graph.dst_indices; - weight_t *graph_weight = graph.edge_data; + vertex_t* graph_src = graph.src_indices; + vertex_t* graph_dst = graph.dst_indices; + weight_t* graph_weight = graph.edge_data; // iterate over the edges and count how many make it into the output int64_t count = thrust::count_if( @@ -73,9 +73,9 @@ std::unique_ptr> extract_s auto result = std::make_unique>( num_vertices, count, has_weight); - vertex_t *d_new_src = result->src_indices(); - vertex_t *d_new_dst = result->dst_indices(); - weight_t *d_new_weight = result->edge_data(); + vertex_t* d_new_src = result->src_indices(); + vertex_t* d_new_dst = result->dst_indices(); + weight_t* d_new_weight = result->edge_data(); // reusing error_count as a vertex counter... thrust::for_each(rmm::exec_policy(stream)->on(stream), @@ -117,7 +117,7 @@ namespace subgraph { template std::unique_ptr> extract_subgraph_vertex( - legacy::GraphCOOView const &graph, VT const *vertices, VT num_vertices) + legacy::GraphCOOView const& graph, VT const* vertices, VT num_vertices) { CUGRAPH_EXPECTS(vertices != nullptr, "Invalid input argument: vertices must be non null"); @@ -132,10 +132,10 @@ std::unique_ptr> extract_subgraph_vertex( template std::unique_ptr> extract_subgraph_vertex( - legacy::GraphCOOView const &, int32_t const *, int32_t); + legacy::GraphCOOView const&, int32_t const*, int32_t); template std::unique_ptr> extract_subgraph_vertex( - legacy::GraphCOOView const &, int32_t const *, int32_t); + legacy::GraphCOOView const&, int32_t const*, int32_t); } // namespace subgraph } // namespace cugraph diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index ff6446b0e5f..9a8d214f883 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -24,10 +24,10 @@ namespace cugraph { template -void partition_at_level(raft::handle_t const &handle, - Dendrogram const &dendrogram, - vertex_t const *d_vertex_ids, - vertex_t *d_partition, +void partition_at_level(raft::handle_t const& handle, + Dendrogram const& dendrogram, + vertex_t const* d_vertex_ids, + vertex_t* d_partition, size_t level) { vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); @@ -47,8 +47,8 @@ void partition_at_level(raft::handle_t const &handle, cugraph::experimental::relabel( handle, - std::tuple(local_vertex_ids_v.data(), - dendrogram.get_level_ptr_nocheck(l)), + std::tuple(local_vertex_ids_v.data(), + dendrogram.get_level_ptr_nocheck(l)), dendrogram.get_level_size_nocheck(l), d_partition, local_num_verts, diff --git a/cpp/src/community/ktruss.cu b/cpp/src/community/ktruss.cu index 9297e416287..2216278add8 100644 --- a/cpp/src/community/ktruss.cu +++ b/cpp/src/community/ktruss.cu @@ -36,7 +36,7 @@ namespace detail { template std::unique_ptr> ktruss_subgraph_impl( - legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) + legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr) { using HornetGraph = hornet::gpu::Hornet; using UpdatePtr = hornet::BatchUpdatePtr; @@ -79,7 +79,7 @@ std::unique_ptr> ktruss_subgraph_impl( } template std::unique_ptr> weighted_ktruss_subgraph_impl( - legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) + legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr) { using HornetGraph = hornet::gpu::Hornet>; using UpdatePtr = hornet::BatchUpdatePtr, hornet::DeviceType::DEVICE>; @@ -125,7 +125,7 @@ std::unique_ptr> weighted_ktruss_subgraph_impl( template std::unique_ptr> k_truss_subgraph( - legacy::GraphCOOView const &graph, int k, rmm::mr::device_memory_resource *mr) + legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr) { CUGRAPH_EXPECTS(graph.src_indices != nullptr, "Graph source indices cannot be a nullptr"); CUGRAPH_EXPECTS(graph.dst_indices != nullptr, "Graph destination indices cannot be a nullptr"); @@ -138,13 +138,13 @@ std::unique_ptr> k_truss_subgraph( } template std::unique_ptr> -k_truss_subgraph(legacy::GraphCOOView const &, +k_truss_subgraph(legacy::GraphCOOView const&, int, - rmm::mr::device_memory_resource *); + rmm::mr::device_memory_resource*); template std::unique_ptr> -k_truss_subgraph(legacy::GraphCOOView const &, +k_truss_subgraph(legacy::GraphCOOView const&, int, - rmm::mr::device_memory_resource *); + rmm::mr::device_memory_resource*); } // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 703738fc190..f55321dbebb 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -22,9 +22,9 @@ namespace cugraph { template -std::pair leiden(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - vertex_t *clustering, +std::pair leiden(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + vertex_t* clustering, size_t max_level, weight_t resolution) { @@ -56,15 +56,15 @@ std::pair leiden(raft::handle_t const &handle, } // Explicit template instantations -template std::pair leiden(raft::handle_t const &, - legacy::GraphCSRView const &, - int32_t *, +template std::pair leiden(raft::handle_t const&, + legacy::GraphCSRView const&, + int32_t*, size_t, float); -template std::pair leiden(raft::handle_t const &, - legacy::GraphCSRView const &, - int32_t *, +template std::pair leiden(raft::handle_t const&, + legacy::GraphCSRView const&, + int32_t*, size_t, double); diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index 6f0a0b6e8b0..252fdbf60a7 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -29,7 +29,7 @@ class Leiden : public Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Leiden(raft::handle_t const &handle, graph_type const &graph) + Leiden(raft::handle_t const& handle, graph_type const& graph) : Louvain(handle, graph), constraint_v_(graph.number_of_vertices, handle.get_stream()) { @@ -37,7 +37,7 @@ class Leiden : public Louvain { weight_t update_clustering_constrained(weight_t total_edge_weight, weight_t resolution, - graph_type const &graph) + graph_type const& graph) { this->timer_start("update_clustering_constrained"); @@ -49,14 +49,14 @@ class Leiden : public Louvain { rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, this->handle_.get_stream_view()); - vertex_t const *d_src_indices = this->src_indices_v_.data(); - vertex_t const *d_dst_indices = graph.indices; - vertex_t *d_cluster_hash = cluster_hash_v.data(); - vertex_t *d_cluster = this->dendrogram_->current_level_begin(); - weight_t const *d_vertex_weights = this->vertex_weights_v_.data(); - weight_t *d_cluster_weights = this->cluster_weights_v_.data(); - weight_t *d_delta_Q = delta_Q_v.data(); - vertex_t *d_constraint = constraint_v_.data(); + vertex_t const* d_src_indices = this->src_indices_v_.data(); + vertex_t const* d_dst_indices = graph.indices; + vertex_t* d_cluster_hash = cluster_hash_v.data(); + vertex_t* d_cluster = this->dendrogram_->current_level_begin(); + weight_t const* d_vertex_weights = this->vertex_weights_v_.data(); + weight_t* d_cluster_weights = this->cluster_weights_v_.data(); + weight_t* d_delta_Q = delta_Q_v.data(); + vertex_t* d_constraint = constraint_v_.data(); thrust::copy(rmm::exec_policy(this->handle_.get_stream_view()), this->dendrogram_->current_level_begin(), diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 3fee1f58577..c3df4207283 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -30,8 +30,8 @@ namespace detail { template std::pair>, weight_t> louvain( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph_view, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph_view, size_t max_level, weight_t resolution) { @@ -46,8 +46,8 @@ std::pair>, weight_t> louvain( template std::pair>, weight_t> louvain( - raft::handle_t const &handle, - experimental::graph_view_t const &graph_view, + raft::handle_t const& handle, + experimental::graph_view_t const& graph_view, size_t max_level, weight_t resolution) { @@ -60,10 +60,10 @@ std::pair>, weight_t> louvain( } template -void flatten_dendrogram(raft::handle_t const &handle, - legacy::GraphCSRView const &graph_view, - Dendrogram const &dendrogram, - vertex_t *clustering) +void flatten_dendrogram(raft::handle_t const& handle, + legacy::GraphCSRView const& graph_view, + Dendrogram const& dendrogram, + vertex_t* clustering) { rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); @@ -78,10 +78,10 @@ void flatten_dendrogram(raft::handle_t const &handle, template void flatten_dendrogram( - raft::handle_t const &handle, - experimental::graph_view_t const &graph_view, - Dendrogram const &dendrogram, - vertex_t *clustering) + raft::handle_t const& handle, + experimental::graph_view_t const& graph_view, + Dendrogram const& dendrogram, + vertex_t* clustering) { rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -100,8 +100,8 @@ void flatten_dendrogram( template std::pair>, typename graph_view_t::weight_type> -louvain(raft::handle_t const &handle, - graph_view_t const &graph_view, +louvain(raft::handle_t const& handle, + graph_view_t const& graph_view, size_t max_level, typename graph_view_t::weight_type resolution) { @@ -109,19 +109,19 @@ louvain(raft::handle_t const &handle, } template -void flatten_dendrogram(raft::handle_t const &handle, - graph_view_t const &graph_view, - Dendrogram const &dendrogram, - typename graph_view_t::vertex_type *clustering) +void flatten_dendrogram(raft::handle_t const& handle, + graph_view_t const& graph_view, + Dendrogram const& dendrogram, + typename graph_view_t::vertex_type* clustering) { detail::flatten_dendrogram(handle, graph_view, dendrogram, clustering); } template std::pair louvain( - raft::handle_t const &handle, - graph_view_t const &graph_view, - typename graph_view_t::vertex_type *clustering, + raft::handle_t const& handle, + graph_view_t const& graph_view, + typename graph_view_t::vertex_type* clustering, size_t max_level, typename graph_view_t::weight_type resolution) { @@ -142,149 +142,149 @@ std::pair louvain( // Explicit template instantations template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, float> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, float); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); template std::pair>, double> louvain( - raft::handle_t const &, - experimental::graph_view_t const &, + raft::handle_t const&, + experimental::graph_view_t const&, size_t, double); -template std::pair louvain(raft::handle_t const &, - legacy::GraphCSRView const &, - int32_t *, +template std::pair louvain(raft::handle_t const&, + legacy::GraphCSRView const&, + int32_t*, size_t, float); -template std::pair louvain(raft::handle_t const &, - legacy::GraphCSRView const &, - int32_t *, +template std::pair louvain(raft::handle_t const&, + legacy::GraphCSRView const&, + int32_t*, size_t, double); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, double); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, double); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int64_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int64_t*, size_t, double); // instantations with multi_gpu = true template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, double); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int32_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int32_t*, size_t, double); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int64_t*, size_t, float); template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, + raft::handle_t const&, + experimental::graph_view_t const&, + int64_t*, size_t, double); diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index a09f648b37a..31c5a2281ad 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -41,7 +41,7 @@ class Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Louvain(raft::handle_t const &handle, graph_type const &graph) + Louvain(raft::handle_t const& handle, graph_type const& graph) : #ifdef TIMING hr_timer_(), @@ -84,8 +84,8 @@ class Louvain { weight_t modularity(weight_t total_edge_weight, weight_t resolution, - graph_t const &graph, - vertex_t const *d_cluster) + graph_t const& graph, + vertex_t const* d_cluster) { vertex_t n_verts = graph.number_of_vertices; @@ -140,9 +140,9 @@ class Louvain { return Q; } - Dendrogram const &get_dendrogram() const { return *dendrogram_; } + Dendrogram const& get_dendrogram() const { return *dendrogram_; } - Dendrogram &get_dendrogram() { return *dendrogram_; } + Dendrogram& get_dendrogram() { return *dendrogram_; } std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } @@ -188,7 +188,7 @@ class Louvain { } protected: - void timer_start(std::string const ®ion) + void timer_start(std::string const& region) { #ifdef TIMING hr_timer_.start(region); @@ -203,7 +203,7 @@ class Louvain { #endif } - void timer_display(std::ostream &os) + void timer_display(std::ostream& os) { #ifdef TIMING hr_timer_.display(os); @@ -220,15 +220,15 @@ class Louvain { } public: - void compute_vertex_and_cluster_weights(graph_type const &graph) + void compute_vertex_and_cluster_weights(graph_type const& graph) { timer_start("compute_vertex_and_cluster_weights"); - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_vertex_weights = vertex_weights_v_.data(); - weight_t *d_cluster_weights = cluster_weights_v_.data(); + edge_t const* d_offsets = graph.offsets; + vertex_t const* d_indices = graph.indices; + weight_t const* d_weights = graph.edge_data; + weight_t* d_vertex_weights = vertex_weights_v_.data(); + weight_t* d_cluster_weights = cluster_weights_v_.data(); // // MNMG: copy_v_transform_reduce_out_nbr, then copy @@ -251,7 +251,7 @@ class Louvain { virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution, - graph_type const &graph) + graph_type const& graph) { timer_start("update_clustering"); @@ -262,10 +262,10 @@ class Louvain { rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, handle_.get_stream_view()); - vertex_t *d_cluster = dendrogram_->current_level_begin(); - weight_t const *d_vertex_weights = vertex_weights_v_.data(); - weight_t *d_cluster_weights = cluster_weights_v_.data(); - weight_t *d_delta_Q = delta_Q_v.data(); + vertex_t* d_cluster = dendrogram_->current_level_begin(); + weight_t const* d_vertex_weights = vertex_weights_v_.data(); + weight_t* d_cluster_weights = cluster_weights_v_.data(); + weight_t* d_delta_Q = delta_Q_v.data(); thrust::copy(rmm::exec_policy(handle_.get_stream_view()), dendrogram_->current_level_begin(), @@ -308,21 +308,21 @@ class Louvain { void compute_delta_modularity(weight_t total_edge_weight, weight_t resolution, - graph_type const &graph, - rmm::device_uvector &cluster_hash_v, - rmm::device_uvector &old_cluster_sum_v, - rmm::device_uvector &delta_Q_v) + graph_type const& graph, + rmm::device_uvector& cluster_hash_v, + rmm::device_uvector& old_cluster_sum_v, + rmm::device_uvector& delta_Q_v) { - edge_t const *d_offsets = graph.offsets; - weight_t const *d_weights = graph.edge_data; - vertex_t const *d_cluster = dendrogram_->current_level_begin(); - weight_t const *d_vertex_weights = vertex_weights_v_.data(); - weight_t const *d_cluster_weights = cluster_weights_v_.data(); + edge_t const* d_offsets = graph.offsets; + weight_t const* d_weights = graph.edge_data; + vertex_t const* d_cluster = dendrogram_->current_level_begin(); + weight_t const* d_vertex_weights = vertex_weights_v_.data(); + weight_t const* d_cluster_weights = cluster_weights_v_.data(); - vertex_t *d_cluster_hash = cluster_hash_v.data(); - weight_t *d_delta_Q = delta_Q_v.data(); - weight_t *d_old_cluster_sum = old_cluster_sum_v.data(); - weight_t *d_new_cluster_sum = d_delta_Q; + vertex_t* d_cluster_hash = cluster_hash_v.data(); + weight_t* d_delta_Q = delta_Q_v.data(); + weight_t* d_old_cluster_sum = old_cluster_sum_v.data(); + weight_t* d_new_cluster_sum = d_delta_Q; thrust::fill(rmm::exec_policy(handle_.get_stream_view()), cluster_hash_v.begin(), @@ -409,10 +409,10 @@ class Louvain { }); } - void assign_nodes(graph_type const &graph, - rmm::device_uvector &cluster_hash_v, - rmm::device_uvector &next_cluster_v, - rmm::device_uvector &delta_Q_v, + void assign_nodes(graph_type const& graph, + rmm::device_uvector& cluster_hash_v, + rmm::device_uvector& next_cluster_v, + rmm::device_uvector& delta_Q_v, bool up_down) { rmm::device_uvector temp_vertices_v(graph.number_of_vertices, @@ -483,7 +483,7 @@ class Louvain { }); } - void shrink_graph(graph_t &graph) + void shrink_graph(graph_t& graph) { timer_start("shrinking graph"); @@ -499,9 +499,9 @@ class Louvain { vertex_t renumber_clusters() { - vertex_t *d_tmp_array = tmp_arr_v_.data(); - vertex_t *d_cluster_inverse = cluster_inverse_v_.data(); - vertex_t *d_cluster = dendrogram_->current_level_begin(); + vertex_t* d_tmp_array = tmp_arr_v_.data(); + vertex_t* d_cluster_inverse = cluster_inverse_v_.data(); + vertex_t* d_cluster = dendrogram_->current_level_begin(); vertex_t old_num_clusters = dendrogram_->current_level_size(); @@ -560,7 +560,7 @@ class Louvain { return new_num_clusters; } - void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) + void generate_superverticies_graph(graph_t& graph, vertex_t num_clusters) { rmm::device_uvector new_src_v(graph.number_of_edges, handle_.get_stream_view()); rmm::device_uvector new_dst_v(graph.number_of_edges, handle_.get_stream_view()); @@ -627,7 +627,7 @@ class Louvain { } protected: - raft::handle_t const &handle_; + raft::handle_t const& handle_; vertex_t number_of_vertices_; edge_t number_of_edges_; diff --git a/cpp/src/community/spectral_clustering.cu b/cpp/src/community/spectral_clustering.cu index 7fc52d6ed5b..4dd27a56b70 100644 --- a/cpp/src/community/spectral_clustering.cu +++ b/cpp/src/community/spectral_clustering.cu @@ -39,16 +39,16 @@ namespace ext_raft { namespace detail { template -void balancedCutClustering_impl(legacy::GraphCSRView const &graph, +void balancedCutClustering_impl(legacy::GraphCSRView const& graph, vertex_t n_clusters, vertex_t n_eig_vects, weight_t evs_tolerance, int evs_max_iter, weight_t kmean_tolerance, int kmean_max_iter, - vertex_t *clustering, - weight_t *eig_vals, - weight_t *eig_vects) + vertex_t* clustering, + weight_t* eig_vals, + weight_t* eig_vects) { RAFT_EXPECTS(graph.edge_data != nullptr, "API error, graph must have weights"); RAFT_EXPECTS(evs_tolerance >= weight_t{0.0}, @@ -110,16 +110,16 @@ void balancedCutClustering_impl(legacy::GraphCSRView template void spectralModularityMaximization_impl( - legacy::GraphCSRView const &graph, + legacy::GraphCSRView const& graph, vertex_t n_clusters, vertex_t n_eig_vects, weight_t evs_tolerance, int evs_max_iter, weight_t kmean_tolerance, int kmean_max_iter, - vertex_t *clustering, - weight_t *eig_vals, - weight_t *eig_vects) + vertex_t* clustering, + weight_t* eig_vals, + weight_t* eig_vects) { RAFT_EXPECTS(graph.edge_data != nullptr, "API error, graph must have weights"); RAFT_EXPECTS(evs_tolerance >= weight_t{0.0}, @@ -187,10 +187,10 @@ void spectralModularityMaximization_impl( } template -void analyzeModularityClustering_impl(legacy::GraphCSRView const &graph, +void analyzeModularityClustering_impl(legacy::GraphCSRView const& graph, int n_clusters, - vertex_t const *clustering, - weight_t *modularity) + vertex_t const* clustering, + weight_t* modularity) { raft::handle_t handle; auto stream = handle.get_stream(); @@ -208,11 +208,11 @@ void analyzeModularityClustering_impl(legacy::GraphCSRView -void analyzeBalancedCut_impl(legacy::GraphCSRView const &graph, +void analyzeBalancedCut_impl(legacy::GraphCSRView const& graph, vertex_t n_clusters, - vertex_t const *clustering, - weight_t *edgeCut, - weight_t *ratioCut) + vertex_t const* clustering, + weight_t* edgeCut, + weight_t* ratioCut) { raft::handle_t handle; auto stream = handle.get_stream(); @@ -241,14 +241,14 @@ void analyzeBalancedCut_impl(legacy::GraphCSRView co } // namespace detail template -void balancedCutClustering(legacy::GraphCSRView const &graph, +void balancedCutClustering(legacy::GraphCSRView const& graph, VT num_clusters, VT num_eigen_vects, WT evs_tolerance, int evs_max_iter, WT kmean_tolerance, int kmean_max_iter, - VT *clustering) + VT* clustering) { rmm::device_vector eig_vals(num_eigen_vects); rmm::device_vector eig_vects(num_eigen_vects * graph.number_of_vertices); @@ -266,14 +266,14 @@ void balancedCutClustering(legacy::GraphCSRView const &graph, } template -void spectralModularityMaximization(legacy::GraphCSRView const &graph, +void spectralModularityMaximization(legacy::GraphCSRView const& graph, VT n_clusters, VT n_eigen_vects, WT evs_tolerance, int evs_max_iter, WT kmean_tolerance, int kmean_max_iter, - VT *clustering) + VT* clustering) { rmm::device_vector eig_vals(n_eigen_vects); rmm::device_vector eig_vects(n_eigen_vects * graph.number_of_vertices); @@ -291,54 +291,54 @@ void spectralModularityMaximization(legacy::GraphCSRView const &grap } template -void analyzeClustering_modularity(legacy::GraphCSRView const &graph, +void analyzeClustering_modularity(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score) + VT const* clustering, + WT* score) { detail::analyzeModularityClustering_impl(graph, n_clusters, clustering, score); } template -void analyzeClustering_edge_cut(legacy::GraphCSRView const &graph, +void analyzeClustering_edge_cut(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score) + VT const* clustering, + WT* score) { WT dummy{0.0}; detail::analyzeBalancedCut_impl(graph, n_clusters, clustering, score, &dummy); } template -void analyzeClustering_ratio_cut(legacy::GraphCSRView const &graph, +void analyzeClustering_ratio_cut(legacy::GraphCSRView const& graph, int n_clusters, - VT const *clustering, - WT *score) + VT const* clustering, + WT* score) { WT dummy{0.0}; detail::analyzeBalancedCut_impl(graph, n_clusters, clustering, &dummy, score); } template void balancedCutClustering( - legacy::GraphCSRView const &, int, int, float, int, float, int, int *); + legacy::GraphCSRView const&, int, int, float, int, float, int, int*); template void balancedCutClustering( - legacy::GraphCSRView const &, int, int, double, int, double, int, int *); + legacy::GraphCSRView const&, int, int, double, int, double, int, int*); template void spectralModularityMaximization( - legacy::GraphCSRView const &, int, int, float, int, float, int, int *); + legacy::GraphCSRView const&, int, int, float, int, float, int, int*); template void spectralModularityMaximization( - legacy::GraphCSRView const &, int, int, double, int, double, int, int *); + legacy::GraphCSRView const&, int, int, double, int, double, int, int*); template void analyzeClustering_modularity( - legacy::GraphCSRView const &, int, int const *, float *); + legacy::GraphCSRView const&, int, int const*, float*); template void analyzeClustering_modularity( - legacy::GraphCSRView const &, int, int const *, double *); + legacy::GraphCSRView const&, int, int const*, double*); template void analyzeClustering_edge_cut( - legacy::GraphCSRView const &, int, int const *, float *); + legacy::GraphCSRView const&, int, int const*, float*); template void analyzeClustering_edge_cut( - legacy::GraphCSRView const &, int, int const *, double *); + legacy::GraphCSRView const&, int, int const*, double*); template void analyzeClustering_ratio_cut( - legacy::GraphCSRView const &, int, int const *, float *); + legacy::GraphCSRView const&, int, int const*, float*); template void analyzeClustering_ratio_cut( - legacy::GraphCSRView const &, int, int const *, double *); + legacy::GraphCSRView const&, int, int const*, double*); } // namespace ext_raft } // namespace cugraph diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index 9aaf79490cb..97543d28c62 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -31,8 +31,8 @@ #include "cub/cub.cuh" #define TH_CENT_K_LOCLEN (34) -#define WP_LEN_TH1 (24) -#define WP_LEN_TH2 (2) +#define WP_LEN_TH1 (24) +#define WP_LEN_TH2 (2) #if WP_LEN_TH1 > 32 #error WP_LEN_TH1 must be <= 32! @@ -41,9 +41,9 @@ #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define THREADS (128) +#define THREADS (128) #define DIV_UP(a, b) (((a) + ((b)-1)) / (b)) -#define BITSOF(x) (sizeof(*x) * 8) +#define BITSOF(x) (sizeof(*x) * 8) #define BLK_BWL0 (128) @@ -73,9 +73,9 @@ struct spmat_t { T N; T nnz; T nrows; - const T *roff_d; - const T *rows_d; - const T *cols_d; + const T* roff_d; + const T* rows_d; + const T* cols_d; bool is_lower_triangular; }; @@ -173,7 +173,9 @@ __device__ __forceinline__ T block_sum(T v) const int wid = threadIdx.x / 32 + ((BDIM_Y > 1) ? threadIdx.y * (BDIM_X / 32) : 0); #pragma unroll - for (int i = WSIZE / 2; i; i >>= 1) { v += __shfl_down_sync(raft::warp_full_mask(), v, i); } + for (int i = WSIZE / 2; i; i >>= 1) { + v += __shfl_down_sync(raft::warp_full_mask(), v, i); + } if (lid == 0) sh[wid] = v; __syncthreads(); @@ -197,13 +199,13 @@ template __global__ void tricnt_b2b_k(const ROW_T ner, - const ROW_T *__restrict__ rows, - const OFF_T *__restrict__ roff, - const ROW_T *__restrict__ cols, - CNT_T *__restrict__ ocnt, - MAP_T *__restrict__ bmapL0, + const ROW_T* __restrict__ rows, + const OFF_T* __restrict__ roff, + const ROW_T* __restrict__ cols, + CNT_T* __restrict__ ocnt, + MAP_T* __restrict__ bmapL0, const size_t bmldL0, - MAP_T *__restrict__ bmapL1, + MAP_T* __restrict__ bmapL1, const size_t bmldL1) { CNT_T __cnt = 0; @@ -277,11 +279,11 @@ __global__ void tricnt_b2b_k(const ROW_T ner, template void tricnt_b2b(T nblock, - spmat_t *m, - uint64_t *ocnt_d, - unsigned int *bmapL0_d, + spmat_t* m, + uint64_t* ocnt_d, + unsigned int* bmapL0_d, size_t bmldL0, - unsigned int *bmapL1_d, + unsigned int* bmapL1_d, size_t bmldL1, cudaStream_t stream) { @@ -294,13 +296,15 @@ void tricnt_b2b(T nblock, ////////////////////////////////////////////////////////////////////////////////////////// template -__device__ __forceinline__ T block_sum_sh(T v, T *sh) +__device__ __forceinline__ T block_sum_sh(T v, T* sh) { const int lid = threadIdx.x % 32; const int wid = threadIdx.x / 32 + ((BDIM_Y > 1) ? threadIdx.y * (BDIM_X / 32) : 0); #pragma unroll - for (int i = WSIZE / 2; i; i >>= 1) { v += __shfl_down_sync(raft::warp_full_mask(), v, i); } + for (int i = WSIZE / 2; i; i >>= 1) { + v += __shfl_down_sync(raft::warp_full_mask(), v, i); + } if (lid == 0) sh[wid] = v; __syncthreads(); @@ -317,10 +321,10 @@ __device__ __forceinline__ T block_sum_sh(T v, T *sh) template __global__ void tricnt_bsh_k(const ROW_T ner, - const ROW_T *__restrict__ rows, - const OFF_T *__restrict__ roff, - const ROW_T *__restrict__ cols, - CNT_T *__restrict__ ocnt, + const ROW_T* __restrict__ rows, + const OFF_T* __restrict__ roff, + const ROW_T* __restrict__ cols, + CNT_T* __restrict__ ocnt, const size_t bmld) { CNT_T __cnt = 0; @@ -374,7 +378,7 @@ __global__ void tricnt_bsh_k(const ROW_T ner, __syncthreads(); if (lastcol - firstcol < rend - rbeg) { for (int i = firstcol; i <= lastcol; i += BDIM) { - if (i + threadIdx.x <= lastcol) { ((unsigned long long *)shm)[i + threadIdx.x] = 0ull; } + if (i + threadIdx.x <= lastcol) { ((unsigned long long*)shm)[i + threadIdx.x] = 0ull; } } } else { for (int i = rbeg; i < rend; i += BDIM) { @@ -383,14 +387,14 @@ __global__ void tricnt_bsh_k(const ROW_T ner, } __syncthreads(); } - __cnt = block_sum_sh(__cnt, (uint64_t *)shm); + __cnt = block_sum_sh(__cnt, (uint64_t*)shm); if (threadIdx.x == 0) ocnt[blockIdx.x] = __cnt; return; } template -void tricnt_bsh(T nblock, spmat_t *m, uint64_t *ocnt_d, size_t bmld, cudaStream_t stream) +void tricnt_bsh(T nblock, spmat_t* m, uint64_t* ocnt_d, size_t bmld, cudaStream_t stream) { tricnt_bsh_k<<>>( m->nrows, m->rows_d, m->roff_d, m->cols_d, ocnt_d, bmld); @@ -408,11 +412,11 @@ template __global__ void tricnt_wrp_ps_k(const ROW_T ner, - const ROW_T *__restrict__ rows, - const OFF_T *__restrict__ roff, - const ROW_T *__restrict__ cols, - CNT_T *__restrict__ ocnt, - MAP_T *__restrict__ bmap, + const ROW_T* __restrict__ rows, + const OFF_T* __restrict__ roff, + const ROW_T* __restrict__ cols, + CNT_T* __restrict__ ocnt, + MAP_T* __restrict__ bmap, const size_t bmld) { __shared__ OFF_T sho[NWARP][WSIZE]; @@ -520,7 +524,7 @@ __global__ void tricnt_wrp_ps_k(const ROW_T ner, if (lastcol - firstcol < rend - rbeg) { for (int i = firstcol; i <= lastcol; i += WSIZE) { - if (i + threadIdx.x <= lastcol) { ((unsigned long long *)bmap)[i + threadIdx.x] = 0ull; } + if (i + threadIdx.x <= lastcol) { ((unsigned long long*)bmap)[i + threadIdx.x] = 0ull; } } } else { for (int i = rbeg; i < rend; i += WSIZE) { @@ -537,7 +541,7 @@ __global__ void tricnt_wrp_ps_k(const ROW_T ner, template void tricnt_wrp( - T nblock, spmat_t *m, uint64_t *ocnt_d, unsigned int *bmap_d, size_t bmld, cudaStream_t stream) + T nblock, spmat_t* m, uint64_t* ocnt_d, unsigned int* bmap_d, size_t bmld, cudaStream_t stream) { dim3 block(32, THREADS / 32); tricnt_wrp_ps_k<32, THREADS / 32, WP_LEN_TH1, WP_LEN_TH2> @@ -549,10 +553,10 @@ void tricnt_wrp( ////////////////////////////////////////////////////////////////////////////////////////// template __global__ void tricnt_thr_k(const ROW_T ner, - const ROW_T *__restrict__ rows, - const OFF_T *__restrict__ roff, - const ROW_T *__restrict__ cols, - CNT_T *__restrict__ ocnt) + const ROW_T* __restrict__ rows, + const OFF_T* __restrict__ roff, + const ROW_T* __restrict__ cols, + CNT_T* __restrict__ ocnt) { CNT_T __cnt = 0; const ROW_T tid = blockIdx.x * BDIM + threadIdx.x; @@ -619,7 +623,7 @@ __global__ void tricnt_thr_k(const ROW_T ner, } template -void tricnt_thr(T nblock, spmat_t *m, uint64_t *ocnt_d, cudaStream_t stream) +void tricnt_thr(T nblock, spmat_t* m, uint64_t* ocnt_d, cudaStream_t stream) { cudaFuncSetCacheConfig(tricnt_thr_k *m, uint64_t *ocnt_d, cudaStream_t stream) ///////////////////////////////////////////////////////////////// template struct NonEmptyRow { - const IndexType *p_roff; - __host__ __device__ NonEmptyRow(const IndexType *roff) : p_roff(roff) {} - __host__ __device__ __forceinline__ bool operator()(const IndexType &a) const + const IndexType* p_roff; + __host__ __device__ NonEmptyRow(const IndexType* roff) : p_roff(roff) {} + __host__ __device__ __forceinline__ bool operator()(const IndexType& a) const { return (p_roff[a] < p_roff[a + 1]); } @@ -647,7 +651,7 @@ struct NonEmptyRow { template void create_nondangling_vector( - const T *roff, T *p_nonempty, T *n_nonempty, size_t n, cudaStream_t stream) + const T* roff, T* p_nonempty, T* n_nonempty, size_t n, cudaStream_t stream) { if (n <= 0) return; thrust::counting_iterator it(0); @@ -660,7 +664,7 @@ void create_nondangling_vector( } template -uint64_t reduce(uint64_t *v_d, T n, cudaStream_t stream) +uint64_t reduce(uint64_t* v_d, T n, cudaStream_t stream) { rmm::device_vector tmp(1); @@ -694,8 +698,8 @@ class TrianglesCount { // Simple constructor TrianglesCount(IndexType num_vertices, IndexType num_edges, - IndexType const *row_offsets, - IndexType const *col_indices, + IndexType const* row_offsets, + IndexType const* col_indices, cudaStream_t stream = NULL); void count(); @@ -705,8 +709,8 @@ class TrianglesCount { template TrianglesCount::TrianglesCount(IndexType num_vertices, IndexType num_edges, - IndexType const *row_offsets, - IndexType const *col_indices, + IndexType const* row_offsets, + IndexType const* col_indices, cudaStream_t stream) : m_mat{num_vertices, num_edges, num_vertices, row_offsets, nullptr, col_indices}, m_stream{stream}, @@ -841,7 +845,7 @@ void TrianglesCount::count() } // namespace template -uint64_t triangle_count(legacy::GraphCSRView const &graph) +uint64_t triangle_count(legacy::GraphCSRView const& graph) { TrianglesCount counter( graph.number_of_vertices, graph.number_of_edges, graph.offsets, graph.indices); @@ -851,7 +855,7 @@ uint64_t triangle_count(legacy::GraphCSRView const &graph) } template uint64_t triangle_count( - legacy::GraphCSRView const &); + legacy::GraphCSRView const&); } // namespace triangle } // namespace cugraph diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index 0c0021d9e43..85134c1ad67 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -57,9 +57,9 @@ namespace detail { */ template std::enable_if_t::value> connected_components_impl( - legacy::GraphCSRView const &graph, + legacy::GraphCSRView const& graph, cugraph_cc_t connectivity_type, - VT *labels, + VT* labels, cudaStream_t stream) { using ByteT = unsigned char; // minimum addressable unit @@ -84,9 +84,9 @@ std::enable_if_t::value> connected_components_impl( } // namespace detail template -void connected_components(legacy::GraphCSRView const &graph, +void connected_components(legacy::GraphCSRView const& graph, cugraph_cc_t connectivity_type, - VT *labels) + VT* labels) { cudaStream_t stream{nullptr}; @@ -96,8 +96,8 @@ void connected_components(legacy::GraphCSRView const &graph, } template void connected_components( - legacy::GraphCSRView const &, cugraph_cc_t, int32_t *); + legacy::GraphCSRView const&, cugraph_cc_t, int32_t*); template void connected_components( - legacy::GraphCSRView const &, cugraph_cc_t, int64_t *); + legacy::GraphCSRView const&, cugraph_cc_t, int64_t*); } // namespace cugraph diff --git a/cpp/src/components/utils.h b/cpp/src/components/utils.h index 7b0e3042a97..4e7dbe075da 100644 --- a/cpp/src/components/utils.h +++ b/cpp/src/components/utils.h @@ -73,7 +73,9 @@ class Exception : public std::exception { return; } ///@todo: support for demangling of C++ symbol names - for (int i = 0; i < depth; ++i) { oss << "#" << i << " in " << strings.get()[i] << std::endl; } + for (int i = 0; i < depth; ++i) { + oss << "#" << i << " in " << strings.get()[i] << std::endl; + } msg += oss.str(); #endif // __GNUC__ } diff --git a/cpp/src/components/weak_cc.cuh b/cpp/src/components/weak_cc.cuh index d644a988117..e0da23c2ae8 100644 --- a/cpp/src/components/weak_cc.cuh +++ b/cpp/src/components/weak_cc.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,22 +47,22 @@ namespace Sparse { class WeakCCState { public: - bool *xa; - bool *fa; - bool *m; + bool* xa; + bool* fa; + bool* m; bool owner; - WeakCCState(bool *xa, bool *fa, bool *m) : xa(xa), fa(fa), m(m) {} + WeakCCState(bool* xa, bool* fa, bool* m) : xa(xa), fa(fa), m(m) {} }; template -__global__ void weak_cc_label_device(vertex_t *labels, - edge_t const *offsets, - vertex_t const *indices, +__global__ void weak_cc_label_device(vertex_t* labels, + edge_t const* offsets, + vertex_t const* indices, edge_t nnz, - bool *fa, - bool *xa, - bool *m, + bool* fa, + bool* xa, + bool* m, vertex_t startVertexId, vertex_t batchSize) { @@ -115,7 +115,7 @@ __global__ void weak_cc_label_device(vertex_t *labels, } template -__global__ void weak_cc_init_label_kernel(vertex_t *labels, +__global__ void weak_cc_init_label_kernel(vertex_t* labels, vertex_t startVertexId, vertex_t batchSize, vertex_t MAX_LABEL, @@ -132,7 +132,7 @@ __global__ void weak_cc_init_label_kernel(vertex_t *labels, template __global__ void weak_cc_init_all_kernel( - vertex_t *labels, bool *fa, bool *xa, vertex_t N, vertex_t MAX_LABEL) + vertex_t* labels, bool* fa, bool* xa, vertex_t N, vertex_t MAX_LABEL) { vertex_t tid = threadIdx.x + blockIdx.x * TPB_X; if (tid < N) { @@ -143,12 +143,12 @@ __global__ void weak_cc_init_all_kernel( } template -void weak_cc_label_batched(vertex_t *labels, - edge_t const *offsets, - vertex_t const *indices, +void weak_cc_label_batched(vertex_t* labels, + edge_t const* offsets, + vertex_t const* indices, edge_t nnz, vertex_t N, - WeakCCState &state, + WeakCCState& state, vertex_t startVertexId, vertex_t batchSize, cudaStream_t stream, @@ -217,14 +217,14 @@ template bool> -void weak_cc_batched(vertex_t *labels, - edge_t const *offsets, - vertex_t const *indices, +void weak_cc_batched(vertex_t* labels, + edge_t const* offsets, + vertex_t const* indices, edge_t nnz, vertex_t N, vertex_t startVertexId, vertex_t batchSize, - WeakCCState &state, + WeakCCState& state, cudaStream_t stream, Lambda filter_op) { @@ -273,9 +273,9 @@ template bool> -void weak_cc(vertex_t *labels, - edge_t const *offsets, - vertex_t const *indices, +void weak_cc(vertex_t* labels, + edge_t const* offsets, + vertex_t const* indices, edge_t nnz, vertex_t N, cudaStream_t stream, @@ -315,9 +315,9 @@ void weak_cc(vertex_t *labels, * @param stream Cuda stream to use */ template -void weak_cc_entry(vertex_t *labels, - edge_t const *offsets, - vertex_t const *indices, +void weak_cc_entry(vertex_t* labels, + edge_t const* offsets, + vertex_t const* indices, edge_t nnz, vertex_t N, cudaStream_t stream) diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 848a6d9e615..57441cb9b4f 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -59,13 +59,13 @@ template std::tuple, typename GraphViewType::vertex_type, typename GraphViewType::edge_type> -accumulate_new_roots(raft::handle_t const &handle, +accumulate_new_roots(raft::handle_t const& handle, vertex_partition_device_view_t vertex_partition, - typename GraphViewType::vertex_type const *components, - typename GraphViewType::edge_type const *degrees, - typename GraphViewType::vertex_type const *candidate_first, - typename GraphViewType::vertex_type const *candidate_last, + typename GraphViewType::vertex_type const* components, + typename GraphViewType::edge_type const* degrees, + typename GraphViewType::vertex_type const* candidate_first, + typename GraphViewType::vertex_type const* candidate_last, typename GraphViewType::vertex_type max_new_roots, typename GraphViewType::edge_type degree_sum_threshold) { @@ -173,12 +173,12 @@ struct v_op_t { vertex_partition_device_view_t vertex_partition{}; - vertex_type *level_components{}; + vertex_type* level_components{}; decltype(thrust::make_zip_iterator(thrust::make_tuple( - static_cast(nullptr), static_cast(nullptr)))) edge_buffer_first{}; + static_cast(nullptr), static_cast(nullptr)))) edge_buffer_first{}; // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this requires // placing the atomic barrier on managed memory and this adds additional complication. - size_t *num_edge_inserts{}; + size_t* num_edge_inserts{}; size_t next_bucket_idx{}; size_t conflict_bucket_idx{}; // relevant only if GraphViewType::is_multi_gpu is true @@ -214,9 +214,9 @@ struct v_op_t { }; template -void weakly_connected_components_impl(raft::handle_t const &handle, - GraphViewType const &push_graph_view, - typename GraphViewType::vertex_type *components, +void weakly_connected_components_impl(raft::handle_t const& handle, + GraphViewType const& push_graph_view, + typename GraphViewType::vertex_type* components, bool do_expensive_check) { using vertex_t = typename GraphViewType::vertex_type; @@ -346,7 +346,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, auto init_max_new_roots = max_new_roots; if (GraphViewType::is_multi_gpu) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); @@ -401,7 +401,9 @@ void weakly_connected_components_impl(raft::handle_t const &handle, std::shuffle(gpuids.begin(), gpuids.end(), std::mt19937(rd())); gpuids.resize( std::max(static_cast(gpuids.size() * max_new_roots_ratio), vertex_t{1})); - for (size_t i = 0; i < gpuids.size(); ++i) { ++init_max_new_root_counts[gpuids[i]]; } + for (size_t i = 0; i < gpuids.size(); ++i) { + ++init_max_new_root_counts[gpuids[i]]; + } } else { std::fill(init_max_new_root_counts.begin(), init_max_new_root_counts.end(), @@ -560,7 +562,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, atomicCAS(col_components + col_offset, invalid_component_id::value, tag); if (old != invalid_component_id::value && old != tag) { // conflict static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), static_cast(1)); // keep only the edges in the lower triangular part *(edge_buffer_first + edge_idx) = @@ -582,7 +584,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, if (GraphViewType::is_multi_gpu) { auto cur_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); - auto &conflict_bucket = vertex_frontier.get_bucket(static_cast(Bucket::conflict)); + auto& conflict_bucket = vertex_frontier.get_bucket(static_cast(Bucket::conflict)); resize_dataframe_buffer>( edge_buffer, cur_num_edge_inserts + conflict_bucket.size(), handle.get_stream()); thrust::for_each( @@ -599,7 +601,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, auto old = *(level_components + v_offset); auto tag = thrust::get<1>(tagged_v); static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), static_cast(1)); // keep only the edges in the lower triangular part *(edge_buffer_first + edge_idx) = @@ -663,7 +665,7 @@ void weakly_connected_components_impl(raft::handle_t const &handle, auto num_inserts = num_edge_inserts.value(handle.get_stream_view()); auto aggregate_num_inserts = num_inserts; if (GraphViewType::is_multi_gpu) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); aggregate_num_inserts = host_scalar_allreduce(comm, num_inserts, handle.get_stream()); } @@ -681,11 +683,11 @@ void weakly_connected_components_impl(raft::handle_t const &handle, output_first); if (GraphViewType::is_multi_gpu) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); std::tie(edge_buffer, std::ignore) = @@ -767,9 +769,9 @@ void weakly_connected_components_impl(raft::handle_t const &handle, template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* components, bool do_expensive_check) { weakly_connected_components_impl(handle, graph_view, components, do_expensive_check); @@ -778,75 +780,75 @@ void weakly_connected_components( // explicit instantiation template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* components, bool do_expensive_check); template void weakly_connected_components( - raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *components, + raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* components, bool do_expensive_check); } // namespace experimental diff --git a/cpp/src/converters/COOtoCSR.cu b/cpp/src/converters/COOtoCSR.cu index 2f6eac8ce8f..49986810539 100644 --- a/cpp/src/converters/COOtoCSR.cu +++ b/cpp/src/converters/COOtoCSR.cu @@ -21,64 +21,64 @@ namespace cugraph { // Explicit instantiation for uint32_t + float template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // Explicit instantiation for uint32_t + double template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); + legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource*); // Explicit instantiation for int + float template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // Explicit instantiation for int + double template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // Explicit instantiation for int64_t + float template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // Explicit instantiation for int64_t + double template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // in-place versions: // // Explicit instantiation for uint32_t + float template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // Explicit instantiation for uint32_t + double template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // Explicit instantiation for int + float template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // Explicit instantiation for int + double template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // Explicit instantiation for int64_t + float template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // Explicit instantiation for int64_t + double template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); } // namespace cugraph diff --git a/cpp/src/converters/COOtoCSR.cuh b/cpp/src/converters/COOtoCSR.cuh index 10899230b2a..641b037efdd 100644 --- a/cpp/src/converters/COOtoCSR.cuh +++ b/cpp/src/converters/COOtoCSR.cuh @@ -60,7 +60,7 @@ namespace detail { * @param[out] result Total number of vertices */ template -VT sort(legacy::GraphCOOView &graph, rmm::cuda_stream_view stream_view) +VT sort(legacy::GraphCOOView& graph, rmm::cuda_stream_view stream_view) { VT max_src_id; VT max_dst_id; @@ -97,8 +97,8 @@ VT sort(legacy::GraphCOOView &graph, rmm::cuda_stream_view stream_vi } template -void fill_offset(VT *source, - ET *offsets, +void fill_offset(VT* source, + ET* offsets, VT number_of_vertices, ET number_of_edges, rmm::cuda_stream_view stream_view) @@ -125,16 +125,16 @@ void fill_offset(VT *source, } template -rmm::device_buffer create_offset(VT *source, +rmm::device_buffer create_offset(VT* source, VT number_of_vertices, ET number_of_edges, rmm::cuda_stream_view stream_view, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { // Offset array needs an extra element at the end to contain the ending offsets // of the last vertex rmm::device_buffer offsets_buffer(sizeof(ET) * (number_of_vertices + 1), stream_view, mr); - ET *offsets = static_cast(offsets_buffer.data()); + ET* offsets = static_cast(offsets_buffer.data()); fill_offset(source, offsets, number_of_vertices, number_of_edges, stream_view); @@ -145,7 +145,7 @@ rmm::device_buffer create_offset(VT *source, template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *mr) + legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource* mr) { rmm::cuda_stream_view stream_view; @@ -166,8 +166,8 @@ std::unique_ptr> coo_to_csr( } template -void coo_to_csr_inplace(legacy::GraphCOOView &graph, - legacy::GraphCSRView &result) +void coo_to_csr_inplace(legacy::GraphCOOView& graph, + legacy::GraphCSRView& result) { rmm::cuda_stream_view stream_view; @@ -190,64 +190,64 @@ void coo_to_csr_inplace(legacy::GraphCOOView &graph, // // EIDecl for uint32_t + float extern template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // EIDecl for uint32_t + double extern template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const &graph, rmm::mr::device_memory_resource *); + legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource*); // EIDecl for int + float extern template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // EIDecl for int + double extern template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // EIDecl for int64_t + float extern template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // EIDecl for int64_t + double extern template std::unique_ptr> -coo_to_csr(legacy::GraphCOOView const &graph, - rmm::mr::device_memory_resource *); +coo_to_csr(legacy::GraphCOOView const& graph, + rmm::mr::device_memory_resource*); // in-place versions: // // EIDecl for uint32_t + float extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // EIDecl for uint32_t + double extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // EIDecl for int + float extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // EIDecl for int + double extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // EIDecl for int64_t + float extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); // EIDecl for int64_t + double extern template void coo_to_csr_inplace( - legacy::GraphCOOView &graph, - legacy::GraphCSRView &result); + legacy::GraphCOOView& graph, + legacy::GraphCSRView& result); } // namespace cugraph diff --git a/cpp/src/converters/permute_graph.cuh b/cpp/src/converters/permute_graph.cuh index 5f9cd8d7d7f..024dfc2f3a7 100644 --- a/cpp/src/converters/permute_graph.cuh +++ b/cpp/src/converters/permute_graph.cuh @@ -24,8 +24,8 @@ namespace detail { template struct permutation_functor { - IdxT const *permutation; - permutation_functor(IdxT const *p) : permutation(p) {} + IdxT const* permutation; + permutation_functor(IdxT const* p) : permutation(p) {} __host__ __device__ IdxT operator()(IdxT in) const { return permutation[in]; } }; @@ -42,8 +42,8 @@ struct permutation_functor { * @return The permuted graph. */ template -void permute_graph(legacy::GraphCSRView const &graph, - vertex_t const *permutation, +void permute_graph(legacy::GraphCSRView const& graph, + vertex_t const* permutation, legacy::GraphCSRView result, cudaStream_t stream = 0) { @@ -52,9 +52,9 @@ void permute_graph(legacy::GraphCSRView const &graph rmm::device_vector dst_vertices_v(graph.number_of_edges); rmm::device_vector weights_v(graph.number_of_edges); - vertex_t *d_src = src_vertices_v.data().get(); - vertex_t *d_dst = dst_vertices_v.data().get(); - weight_t *d_weights = weights_v.data().get(); + vertex_t* d_src = src_vertices_v.data().get(); + vertex_t* d_dst = dst_vertices_v.data().get(); + weight_t* d_weights = weights_v.data().get(); graph.get_source_indices(d_src); diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index 74b3070ca8e..b23e7a25405 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -25,7 +25,7 @@ namespace cugraph { namespace detail { template -void core_number(legacy::GraphCSRView const &graph, int *core_number) +void core_number(legacy::GraphCSRView const& graph, int* core_number) { using HornetGraph = hornet::gpu::HornetStatic; using HornetInit = hornet::HornetInit; @@ -38,9 +38,9 @@ void core_number(legacy::GraphCSRView const &graph, int *core_number struct FilterEdges { int k; - int *core_number; + int* core_number; - FilterEdges(int _k, int *d_core_num) : k(_k), core_number(d_core_num) {} + FilterEdges(int _k, int* d_core_num) : k(_k), core_number(d_core_num) {} template __host__ __device__ bool operator()(T t) @@ -52,9 +52,9 @@ struct FilterEdges { }; template -void extract_edges(legacy::GraphCOOView const &i_graph, - legacy::GraphCOOView &o_graph, - VT *d_core, +void extract_edges(legacy::GraphCOOView const& i_graph, + legacy::GraphCOOView& o_graph, + VT* d_core, int k) { cudaStream_t stream{nullptr}; @@ -97,12 +97,12 @@ void extract_edges(legacy::GraphCOOView const &i_graph, // if core_num[s] and core_num[d] are greater than or equal to k. template std::unique_ptr> extract_subgraph( - legacy::GraphCOOView const &in_graph, - int const *vid, - int const *core_num, + legacy::GraphCOOView const& in_graph, + int const* vid, + int const* core_num, int k, int len, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { cudaStream_t stream{nullptr}; @@ -112,7 +112,7 @@ std::unique_ptr> extract_subgraph( thrust::scatter( rmm::exec_policy(stream)->on(stream), core_num, core_num + len, vid, sorted_core_num.begin()); - VT *d_sorted_core_num = sorted_core_num.data().get(); + VT* d_sorted_core_num = sorted_core_num.data().get(); // Count number of edges in the input graph that satisfy kcore conditions // i.e. core_num[src] and core_num[dst] are both greater than or equal to k @@ -138,19 +138,19 @@ std::unique_ptr> extract_subgraph( } // namespace detail template -void core_number(legacy::GraphCSRView const &graph, VT *core_number) +void core_number(legacy::GraphCSRView const& graph, VT* core_number) { return detail::core_number(graph, core_number); } template std::unique_ptr> k_core( - legacy::GraphCOOView const &in_graph, + legacy::GraphCOOView const& in_graph, int k, - VT const *vertex_id, - VT const *core_number, + VT const* vertex_id, + VT const* core_number, VT num_vertex_ids, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid input argument: vertex_id is NULL"); CUGRAPH_EXPECTS(core_number != nullptr, "Invalid input argument: core_number is NULL"); @@ -160,20 +160,20 @@ std::unique_ptr> k_core( } template void core_number( - legacy::GraphCSRView const &, int32_t *core_number); + legacy::GraphCSRView const&, int32_t* core_number); template std::unique_ptr> k_core( - legacy::GraphCOOView const &, + legacy::GraphCOOView const&, int, - int32_t const *, - int32_t const *, + int32_t const*, + int32_t const*, int32_t, - rmm::mr::device_memory_resource *); + rmm::mr::device_memory_resource*); template std::unique_ptr> -k_core(legacy::GraphCOOView const &, +k_core(legacy::GraphCOOView const&, int, - int32_t const *, - int32_t const *, + int32_t const*, + int32_t const*, int32_t, - rmm::mr::device_memory_resource *); + rmm::mr::device_memory_resource*); } // namespace cugraph diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 24c86fea79f..903228a79f6 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -42,9 +42,9 @@ namespace experimental { namespace detail { template -void bfs(raft::handle_t const &handle, - GraphViewType const &push_graph_view, - typename GraphViewType::vertex_type *distances, +void bfs(raft::handle_t const& handle, + GraphViewType const& push_graph_view, + typename GraphViewType::vertex_type* distances, PredecessorIterator predecessor_first, typename GraphViewType::vertex_type source_vertex, bool direction_optimizing, @@ -164,10 +164,10 @@ void bfs(raft::handle_t const &handle, } // namespace detail template -void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t *distances, - vertex_t *predecessors, +void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t* distances, + vertex_t* predecessors, vertex_t source_vertex, bool direction_optimizing, vertex_t depth_limit, @@ -196,109 +196,109 @@ void bfs(raft::handle_t const &handle, // explicit instantiation -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *distances, - int64_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* distances, + int64_t* predecessors, int64_t source_vertex, bool direction_optimizing, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *distances, - int64_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* distances, + int64_t* predecessors, int64_t source_vertex, bool direction_optimizing, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t *distances, - int32_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t* distances, + int32_t* predecessors, int32_t source_vertex, bool direction_optimizing, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *distances, - int64_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* distances, + int64_t* predecessors, int64_t source_vertex, bool direction_optimizing, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t *distances, - int64_t *predecessors, +template void bfs(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t* distances, + int64_t* predecessors, int64_t source_vertex, bool direction_optimizing, int64_t depth_limit, diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 573e7818c82..e785b770ff7 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -90,9 +90,9 @@ decompress_matrix_partition_to_edgelist( } template -edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOUT] */, - vertex_t *edgelist_minor_vertices /* [INOUT] */, - std::optional edgelist_weights /* [INOUT] */, +edge_t groupby_e_and_coarsen_edgelist(vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + std::optional edgelist_weights /* [INOUT] */, edge_t number_of_edges, cudaStream_t stream) { @@ -146,8 +146,8 @@ std::tuple, std::optional>> decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( matrix_partition_device_view_t const matrix_partition, - vertex_t const *p_major_labels, - vertex_t const *p_minor_labels, + vertex_t const* p_major_labels, + vertex_t const* p_minor_labels, cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the @@ -173,7 +173,7 @@ decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( auto number_of_edges = groupby_e_and_coarsen_edgelist( edgelist_major_vertices.data(), edgelist_minor_vertices.data(), - edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, + edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, static_cast(edgelist_major_vertices.size()), stream); edgelist_major_vertices.resize(number_of_edges, stream); @@ -201,18 +201,18 @@ std::enable_if_t< std::tuple>, rmm::device_uvector>> coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels, + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t const* labels, bool do_expensive_check) { - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); - auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); auto const row_comm_rank = row_comm.get_rank(); - auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); auto const col_comm_rank = col_comm.get_rank(); @@ -390,9 +390,8 @@ coarsen_graph( auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( edgelist_major_vertices.begin() + h_displacements[j], edgelist_minor_vertices.begin() + h_displacements[j], - edgelist_weights - ? std::optional{(*edgelist_weights).data() + h_displacements[j]} - : std::nullopt, + edgelist_weights ? std::optional{(*edgelist_weights).data() + h_displacements[j]} + : std::nullopt, h_counts[j], handle.get_stream()); @@ -442,9 +441,8 @@ coarsen_graph( auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( coarsened_edgelist_major_vertices[i].data(), coarsened_edgelist_minor_vertices[i].data(), - coarsened_edgelist_weights - ? std::optional{(*coarsened_edgelist_weights)[i].data()} - : std::nullopt, + coarsened_edgelist_weights ? std::optional{(*coarsened_edgelist_weights)[i].data()} + : std::nullopt, static_cast(coarsened_edgelist_major_vertices[i].size()), handle.get_stream()); coarsened_edgelist_major_vertices[i].resize(number_of_partition_edges, handle.get_stream()); @@ -508,8 +506,8 @@ coarsen_graph( edge_t number_of_edges{}; std::optional> segment_offsets{}; { - std::vector major_ptrs(coarsened_edgelist_major_vertices.size()); - std::vector minor_ptrs(major_ptrs.size()); + std::vector major_ptrs(coarsened_edgelist_major_vertices.size()); + std::vector minor_ptrs(major_ptrs.size()); std::vector counts(major_ptrs.size()); for (size_t i = 0; i < coarsened_edgelist_major_vertices.size(); ++i) { major_ptrs[i] = coarsened_edgelist_major_vertices[i].data(); @@ -519,7 +517,7 @@ coarsen_graph( std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges, segment_offsets) = renumber_edgelist( handle, - std::optional>{ + std::optional>{ std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, major_ptrs, minor_ptrs, @@ -538,7 +536,7 @@ coarsen_graph( : coarsened_edgelist_minor_vertices[i].data(); edgelists[i].p_edge_weights = coarsened_edgelist_weights - ? std::optional{(*coarsened_edgelist_weights)[i].data()} + ? std::optional{(*coarsened_edgelist_weights)[i].data()} : std::nullopt, edgelists[i].number_of_edges = static_cast(coarsened_edgelist_major_vertices[i].size()); } @@ -566,9 +564,9 @@ std::enable_if_t< std::tuple>, rmm::device_uvector>> coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels, + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t const* labels, bool do_expensive_check) { if (do_expensive_check) { @@ -603,7 +601,7 @@ coarsen_graph( auto [renumber_map_labels, segment_offsets] = renumber_edgelist( handle, - std::optional>{ + std::optional>{ std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, coarsened_edgelist_major_vertices.data(), coarsened_edgelist_minor_vertices.data(), @@ -611,14 +609,13 @@ coarsen_graph( do_expensive_check); edgelist_t edgelist{}; - edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelist.p_edge_weights = - coarsened_edgelist_weights - ? std::optional{(*coarsened_edgelist_weights).data()} - : std::nullopt; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights).data()} + : std::nullopt; edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); return std::make_tuple( @@ -641,9 +638,9 @@ template >, rmm::device_uvector> coarsen_graph( - raft::handle_t const &handle, - graph_view_t const &graph_view, - vertex_t const *labels, + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t const* labels, bool do_expensive_check) { return detail::coarsen_graph(handle, graph_view, labels, do_expensive_check); @@ -653,170 +650,170 @@ coarsen_graph( template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int32_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int32_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); template std::tuple>, rmm::device_uvector> -coarsen_graph(raft::handle_t const &handle, - graph_view_t const &graph_view, - int64_t const *labels, +coarsen_graph(raft::handle_t const& handle, + graph_view_t const& graph_view, + int64_t const* labels, bool do_expensive_check); } // namespace experimental diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 8e51723fddf..a01537355df 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -65,7 +65,7 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>> -compress_edgelist(edgelist_t const &edgelist, +compress_edgelist(edgelist_t const& edgelist, vertex_t major_first, vertex_t major_hypersparse_first, vertex_t major_last, @@ -196,13 +196,13 @@ template graph_t>:: - graph_t(raft::handle_t const &handle, - std::vector> const &edgelists, - partition_t const &partition, + graph_t(raft::handle_t const& handle, + std::vector> const& edgelists, + partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties, - std::optional> const &segment_offsets, + std::optional> const& segment_offsets, bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, number_of_edges, properties), @@ -210,13 +210,13 @@ graph_tget_handle_ptr()->get_comms(); + auto& comm = this->get_handle_ptr()->get_comms(); auto const comm_size = comm.get_size(); - auto &row_comm = + auto& row_comm = this->get_handle_ptr()->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); - auto &col_comm = + auto& col_comm = this->get_handle_ptr()->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); @@ -368,11 +368,11 @@ template graph_t>:: - graph_t(raft::handle_t const &handle, - edgelist_t const &edgelist, + graph_t(raft::handle_t const& handle, + edgelist_t const& edgelist, vertex_t number_of_vertices, graph_properties_t properties, - std::optional> const &segment_offsets, + std::optional> const& segment_offsets, bool do_expensive_check) : detail::graph_base_t( handle, number_of_vertices, edgelist.number_of_edges, properties), diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index fe390690aa6..c43f81bbf39 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -47,10 +47,10 @@ std::tuple, std::optional>, rmm::device_uvector> extract_induced_subgraphs( - raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets /* size == num_subgraphs + 1 */, - vertex_t const *subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, + raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets /* size == num_subgraphs + 1 */, + vertex_t const* subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, size_t num_subgraphs, bool do_expensive_check) { @@ -147,8 +147,8 @@ extract_induced_subgraphs( auto subgraph_idx = thrust::distance( subgraph_offsets + 1, thrust::upper_bound(thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, i)); - vertex_t const *indices{nullptr}; - thrust::optional weights{thrust::nullopt}; + vertex_t const* indices{nullptr}; + thrust::optional weights{thrust::nullopt}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); @@ -199,14 +199,14 @@ extract_induced_subgraphs( subgraph_vertex_output_offsets = subgraph_vertex_output_offsets.data(), edge_majors = edge_majors.data(), edge_minors = edge_minors.data(), - edge_weights = edge_weights ? thrust::optional{(*edge_weights).data()} + edge_weights = edge_weights ? thrust::optional{(*edge_weights).data()} : thrust::nullopt] __device__(auto i) { auto subgraph_idx = thrust::distance( subgraph_offsets + 1, thrust::upper_bound( thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, size_t{i})); - vertex_t const *indices{nullptr}; - thrust::optional weights{thrust::nullopt}; + vertex_t const* indices{nullptr}; + thrust::optional weights{thrust::nullopt}; edge_t local_degree{}; auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); @@ -269,10 +269,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -280,10 +280,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -291,10 +291,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -302,10 +302,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -313,10 +313,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -324,10 +324,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -335,10 +335,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -346,10 +346,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int32_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int32_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -357,10 +357,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int64_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int64_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -368,10 +368,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int64_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int64_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -379,10 +379,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int64_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int64_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); @@ -390,10 +390,10 @@ template std::tuple, rmm::device_uvector, std::optional>, rmm::device_uvector> -extract_induced_subgraphs(raft::handle_t const &handle, - graph_view_t const &graph_view, - size_t const *subgraph_offsets, - int64_t const *subgraph_vertices, +extract_induced_subgraphs(raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets, + int64_t const* subgraph_vertices, size_t num_subgraphs, bool do_expensive_check); diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 515c49fda00..b8ab45277fc 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -36,10 +36,10 @@ namespace experimental { namespace detail { template -void katz_centrality(raft::handle_t const &handle, - GraphViewType const &pull_graph_view, - result_t const *betas, - result_t *katz_centralities, +void katz_centrality(raft::handle_t const& handle, + GraphViewType const& pull_graph_view, + result_t const* betas, + result_t* katz_centralities, result_t alpha, result_t beta, // relevant only if betas == nullptr result_t epsilon, @@ -171,10 +171,10 @@ void katz_centrality(raft::handle_t const &handle, } // namespace detail template -void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - result_t const *betas, - result_t *katz_centralities, +void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + result_t const* betas, + result_t* katz_centralities, result_t alpha, result_t beta, // relevant only if beta == nullptr result_t epsilon, @@ -198,10 +198,10 @@ void katz_centrality(raft::handle_t const &handle, // explicit instantiation -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -210,10 +210,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, @@ -222,10 +222,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -234,10 +234,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, @@ -246,10 +246,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -258,10 +258,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, @@ -270,10 +270,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -282,10 +282,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, @@ -294,10 +294,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -306,10 +306,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, @@ -318,10 +318,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - float const *betas, - float *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + float const* betas, + float* katz_centralities, float alpha, float beta, float epsilon, @@ -330,10 +330,10 @@ template void katz_centrality(raft::handle_t const &handle, bool normalize, bool do_expensive_check); -template void katz_centrality(raft::handle_t const &handle, - graph_view_t const &graph_view, - double const *betas, - double *katz_centralities, +template void katz_centrality(raft::handle_t const& handle, + graph_view_t const& graph_view, + double const* betas, + double* katz_centralities, double alpha, double beta, double epsilon, diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index c9e01157628..e3b5bf91ccc 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -53,7 +53,7 @@ class Louvain { graph_view_t::is_adj_matrix_transposed, graph_view_t::is_multi_gpu>; - Louvain(raft::handle_t const &handle, graph_view_t const &graph_view) + Louvain(raft::handle_t const& handle, graph_view_t const& graph_view) : #ifdef TIMING hr_timer_(), @@ -70,9 +70,9 @@ class Louvain { { } - Dendrogram const &get_dendrogram() const { return *dendrogram_; } + Dendrogram const& get_dendrogram() const { return *dendrogram_; } - Dendrogram &get_dendrogram() { return *dendrogram_; } + Dendrogram& get_dendrogram() { return *dendrogram_; } std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } @@ -111,7 +111,7 @@ class Louvain { } protected: - void timer_start(std::string const ®ion) + void timer_start(std::string const& region) { #ifdef TIMING if (graph_view_t::is_multi_gpu) { @@ -137,7 +137,7 @@ class Louvain { #endif } - void timer_display(std::ostream &os) + void timer_display(std::ostream& os) { #ifdef TIMING if (graph_view_t::is_multi_gpu) { @@ -243,7 +243,7 @@ class Louvain { } template - T *cache_src_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &src_cache_v) + T* cache_src_vertex_properties(rmm::device_uvector& input, rmm::device_uvector& src_cache_v) { if (graph_view_t::is_multi_gpu) { src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), @@ -256,7 +256,7 @@ class Louvain { } template - T *cache_dst_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &dst_cache_v) + T* cache_dst_vertex_properties(rmm::device_uvector& input, rmm::device_uvector& dst_cache_v) { if (graph_view_t::is_multi_gpu) { dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols(), @@ -312,8 +312,8 @@ class Louvain { return cur_Q; } - void compute_cluster_sum_and_subtract(rmm::device_uvector &old_cluster_sum_v, - rmm::device_uvector &cluster_subtract_v) + void compute_cluster_sum_and_subtract(rmm::device_uvector& old_cluster_sum_v, + rmm::device_uvector& cluster_subtract_v) { auto output_buffer = cugraph::experimental::allocate_dataframe_buffer>( @@ -362,7 +362,7 @@ class Louvain { void update_by_delta_modularity(weight_t total_edge_weight, weight_t resolution, - rmm::device_uvector &next_cluster_v, + rmm::device_uvector& next_cluster_v, bool up_down) { rmm::device_uvector old_cluster_sum_v( @@ -378,9 +378,9 @@ class Louvain { cugraph::experimental::allocate_dataframe_buffer>( current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); - vertex_t *map_key_first; - vertex_t *map_key_last; - weight_t *map_value_first; + vertex_t* map_key_first; + vertex_t* map_key_last; + weight_t* map_value_first; if (graph_t::is_multi_gpu) { cugraph::experimental::detail::compute_gpu_id_from_vertex_t vertex_to_gpu_id_op{ @@ -521,8 +521,8 @@ class Louvain { relabel( handle_, - std::make_tuple(static_cast(numbering_map.begin()), - static_cast(numbering_indices.begin())), + std::make_tuple(static_cast(numbering_map.begin()), + static_cast(numbering_indices.begin())), current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), dendrogram_->current_level_size(), @@ -532,7 +532,7 @@ class Louvain { } protected: - raft::handle_t const &handle_; + raft::handle_t const& handle_; std::unique_ptr> dendrogram_; @@ -551,9 +551,9 @@ class Louvain { rmm::device_uvector cluster_keys_v_; rmm::device_uvector cluster_weights_v_; - weight_t *d_src_vertex_weights_cache_; - vertex_t *d_src_cluster_cache_; - vertex_t *d_dst_cluster_cache_; + weight_t* d_src_vertex_weights_cache_; + vertex_t* d_src_cluster_cache_; + vertex_t* d_dst_cluster_cache_; #ifdef TIMING HighResTimer hr_timer_; diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 64b80c14b2e..999a25b01c9 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -148,9 +148,9 @@ void pagerank( ? std::nullopt : std::optional>{ pull_graph_view.compute_out_weight_sums(handle)}; - auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums - ? *precomputed_vertex_out_weight_sums - : (*tmp_vertex_out_weight_sums).data(); + auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums + ? *precomputed_vertex_out_weight_sums + : (*tmp_vertex_out_weight_sums).data(); // 3. initialize pagerank values diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 4c5dac6065f..b6f85ddb3c2 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -43,9 +43,9 @@ namespace experimental { namespace detail { template -void sssp(raft::handle_t const &handle, - GraphViewType const &push_graph_view, - typename GraphViewType::weight_type *distances, +void sssp(raft::handle_t const& handle, + GraphViewType const& push_graph_view, + typename GraphViewType::weight_type* distances, PredecessorIterator predecessor_first, typename GraphViewType::vertex_type source_vertex, typename GraphViewType::weight_type cutoff, @@ -201,9 +201,9 @@ void sssp(raft::handle_t const &handle, [near_far_threshold] __device__(auto v, auto v_val, auto pushed_val) { auto new_dist = thrust::get<0>(pushed_val); auto idx = new_dist < v_val - ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) - : static_cast(Bucket::far)) - : VertexFrontier::kInvalidBucketIdx; + ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) + : static_cast(Bucket::far)) + : VertexFrontier::kInvalidBucketIdx; return new_dist < v_val ? thrust::optional>{thrust::make_tuple( static_cast(new_dist < near_far_threshold ? Bucket::next_near @@ -261,10 +261,10 @@ void sssp(raft::handle_t const &handle, } // namespace detail template -void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - weight_t *distances, - vertex_t *predecessors, +void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + weight_t* distances, + vertex_t* predecessors, vertex_t source_vertex, weight_t cutoff, bool do_expensive_check) @@ -285,98 +285,98 @@ void sssp(raft::handle_t const &handle, // explicit instantiation -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int32_t* predecessors, int32_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int32_t* predecessors, int32_t source_vertex, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int32_t* predecessors, int32_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int32_t* predecessors, int32_t source_vertex, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int64_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int64_t* predecessors, int64_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int64_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int64_t* predecessors, int64_t source_vertex, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int32_t* predecessors, int32_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int32_t* predecessors, int32_t source_vertex, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int32_t* predecessors, int32_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int32_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int32_t* predecessors, int32_t source_vertex, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - float *distances, - int64_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + float* distances, + int64_t* predecessors, int64_t source_vertex, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t const &handle, - graph_view_t const &graph_view, - double *distances, - int64_t *predecessors, +template void sssp(raft::handle_t const& handle, + graph_view_t const& graph_view, + double* distances, + int64_t* predecessors, int64_t source_vertex, double cutoff, bool do_expensive_check); diff --git a/cpp/src/generators/generator_tools.cu b/cpp/src/generators/generator_tools.cu index 3ebef13f3b1..375d96b0d99 100644 --- a/cpp/src/generators/generator_tools.cu +++ b/cpp/src/generators/generator_tools.cu @@ -33,17 +33,18 @@ namespace cugraph { namespace detail { template -rmm::device_uvector append_all(raft::handle_t const &handle, - std::vector> &&input) +rmm::device_uvector append_all(raft::handle_t const& handle, + std::vector>&& input) { size_t size{0}; // for (size_t i = 0; i < input.size(); ++i) size += input[i].size(); - for (auto &element : input) size += element.size(); + for (auto& element : input) + size += element.size(); rmm::device_uvector output(size, handle.get_stream()); auto output_iter = output.begin(); - for (auto &element : input) { + for (auto& element : input) { raft::copy(output_iter, element.begin(), element.size(), handle.get_stream()); output_iter += element.size(); } @@ -61,9 +62,9 @@ for (size_t i = 0; i < input.size(); ++i) { } // namespace detail template -void scramble_vertex_ids(raft::handle_t const &handle, - rmm::device_uvector &d_src_v, - rmm::device_uvector &d_dst_v, +void scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, vertex_t vertex_id_offset, uint64_t seed) { @@ -85,10 +86,10 @@ template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&sources, - std::vector> &&dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& sources, + std::vector>&& dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges) { CUGRAPH_EXPECTS(sources.size() == dests.size(), @@ -190,10 +191,10 @@ template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v) +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v) { auto offset = d_src_v.size(); d_src_v.resize(offset * 2, handle.get_stream_view()); @@ -220,82 +221,82 @@ symmetrize_edgelist(raft::handle_t const &handle, optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt); } -template void scramble_vertex_ids(raft::handle_t const &handle, - rmm::device_uvector &d_src_v, - rmm::device_uvector &d_dst_v, +template void scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, int32_t vertex_id_offset, uint64_t seed); -template void scramble_vertex_ids(raft::handle_t const &handle, - rmm::device_uvector &d_src_v, - rmm::device_uvector &d_dst_v, +template void scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, int64_t vertex_id_offset, uint64_t seed); template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&sources, - std::vector> &&dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& sources, + std::vector>&& dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges); template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&sources, - std::vector> &&dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& sources, + std::vector>&& dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges); template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&sources, - std::vector> &&dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& sources, + std::vector>&& dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges); template std::tuple, rmm::device_uvector, std::optional>> -combine_edgelists(raft::handle_t const &handle, - std::vector> &&sources, - std::vector> &&dests, - std::optional>> &&optional_d_weights, +combine_edgelists(raft::handle_t const& handle, + std::vector>&& sources, + std::vector>&& dests, + std::optional>>&& optional_d_weights, bool remove_multi_edges); template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v); +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v); template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v); +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v); template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v); +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v); template std::tuple, rmm::device_uvector, std::optional>> -symmetrize_edgelist(raft::handle_t const &handle, - rmm::device_uvector &&d_src_v, - rmm::device_uvector &&d_dst_v, - std::optional> &&optional_d_weights_v); +symmetrize_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& d_src_v, + rmm::device_uvector&& d_dst_v, + std::optional>&& optional_d_weights_v); } // namespace cugraph diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index 2d9265be2d3..57abde2262d 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -36,12 +36,12 @@ namespace cugraph { namespace detail { template -void barnes_hut(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +void barnes_hut(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter = 500, - float *x_start = nullptr, - float *y_start = nullptr, + float* x_start = nullptr, + float* y_start = nullptr, bool outbound_attraction_distribution = true, bool lin_log_mode = false, bool prevent_overlapping = false, @@ -52,7 +52,7 @@ void barnes_hut(raft::handle_t const &handle, bool strong_gravity_mode = false, const float gravity = 1.0, bool verbose = false, - internals::GraphBasedDimRedCallback *callback = nullptr) + internals::GraphBasedDimRedCallback* callback = nullptr) { rmm::cuda_stream_view stream_view(handle.get_stream_view()); const edge_t e = graph.number_of_edges; @@ -64,7 +64,8 @@ void barnes_hut(raft::handle_t const &handle, // We use the same array for nodes and cells. int nnodes = n * 2; if (nnodes < 1024 * blocks) nnodes = 1024 * blocks; - while ((nnodes & (32 - 1)) != 0) nnodes++; + while ((nnodes & (32 - 1)) != 0) + nnodes++; nnodes--; // Allocate more space @@ -74,10 +75,10 @@ void barnes_hut(raft::handle_t const &handle, rmm::device_uvector d_bottomd(1, stream_view); rmm::device_uvector d_radiusd(1, stream_view); - unsigned *limiter = d_limiter.data(); - int *maxdepthd = d_maxdepthd.data(); - int *bottomd = d_bottomd.data(); - float *radiusd = d_radiusd.data(); + unsigned* limiter = d_limiter.data(); + int* maxdepthd = d_maxdepthd.data(); + int* bottomd = d_bottomd.data(); + float* radiusd = d_radiusd.data(); InitializationKernel<<<1, 1, 0, stream_view.value()>>>(limiter, maxdepthd, radiusd); CHECK_CUDA(stream_view.value()); @@ -99,32 +100,32 @@ void barnes_hut(raft::handle_t const &handle, rmm::device_uvector d_minyl(blocks * FACTOR1, stream_view); // Actual mallocs - int *startl = d_startl.data(); - int *childl = d_childl.data(); - int *massl = d_massl.data(); + int* startl = d_startl.data(); + int* childl = d_childl.data(); + int* massl = d_massl.data(); - float *maxxl = d_maxxl.data(); - float *maxyl = d_maxyl.data(); - float *minxl = d_minxl.data(); - float *minyl = d_minyl.data(); + float* maxxl = d_maxxl.data(); + float* maxyl = d_maxyl.data(); + float* minxl = d_minxl.data(); + float* minyl = d_minyl.data(); // SummarizationKernel rmm::device_uvector d_countl(nnodes + 1, stream_view); - int *countl = d_countl.data(); + int* countl = d_countl.data(); // SortKernel rmm::device_uvector d_sortl(nnodes + 1, stream_view); - int *sortl = d_sortl.data(); + int* sortl = d_sortl.data(); // RepulsionKernel rmm::device_uvector d_rep_forces((nnodes + 1) * 2, stream_view); - float *rep_forces = d_rep_forces.data(); + float* rep_forces = d_rep_forces.data(); rmm::device_uvector d_radius_squared(1, stream_view); - float *radiusd_squared = d_radius_squared.data(); + float* radiusd_squared = d_radius_squared.data(); rmm::device_uvector d_nodes_pos((nnodes + 1) * 2, stream_view); - float *nodes_pos = d_nodes_pos.data(); + float* nodes_pos = d_nodes_pos.data(); // Initialize positions with random values int random_state = 0; @@ -139,10 +140,10 @@ void barnes_hut(raft::handle_t const &handle, } // Allocate arrays for force computation - float *attract{nullptr}; - float *old_forces{nullptr}; - float *swinging{nullptr}; - float *traction{nullptr}; + float* attract{nullptr}; + float* old_forces{nullptr}; + float* swinging{nullptr}; + float* traction{nullptr}; rmm::device_uvector d_attract(n * 2, stream_view); rmm::device_uvector d_old_forces(n * 2, stream_view); @@ -163,9 +164,9 @@ void barnes_hut(raft::handle_t const &handle, graph.degree(massl, cugraph::legacy::DegreeDirection::OUT); CHECK_CUDA(stream_view.value()); - const vertex_t *row = graph.src_indices; - const vertex_t *col = graph.dst_indices; - const weight_t *v = graph.edge_data; + const vertex_t* row = graph.src_indices; + const vertex_t* col = graph.dst_indices; + const weight_t* v = graph.edge_data; // Scalars used to adapt global speed. float speed = 1.f; diff --git a/cpp/src/layout/bh_kernels.hpp b/cpp/src/layout/bh_kernels.hpp index 5c170e147c9..8dbc08a6440 100644 --- a/cpp/src/layout/bh_kernels.hpp +++ b/cpp/src/layout/bh_kernels.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,9 +42,9 @@ namespace detail { /** * Intializes the states of objects. This speeds the overall kernel up. */ -__global__ void InitializationKernel(unsigned *restrict limiter, - int *restrict maxdepthd, - float *restrict radiusd) +__global__ void InitializationKernel(unsigned* restrict limiter, + int* restrict maxdepthd, + float* restrict radiusd) { maxdepthd[0] = 1; limiter[0] = 0; @@ -54,10 +54,10 @@ __global__ void InitializationKernel(unsigned *restrict limiter, /** * Reset root. */ -__global__ void ResetKernel(float *restrict radiusd_squared, - int *restrict bottomd, +__global__ void ResetKernel(float* restrict radiusd_squared, + int* restrict bottomd, const int NNODES, - const float *restrict radiusd) + const float* restrict radiusd) { radiusd_squared[0] = radiusd[0] * radiusd[0]; // create root node @@ -67,20 +67,20 @@ __global__ void ResetKernel(float *restrict radiusd_squared, /** * Figures the bounding boxes for every point in the embedding. */ -__global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int *restrict startd, - int *restrict childd, - int *restrict massd, - float *restrict posxd, - float *restrict posyd, - float *restrict maxxd, - float *restrict maxyd, - float *restrict minxd, - float *restrict minyd, +__global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int* restrict startd, + int* restrict childd, + int* restrict massd, + float* restrict posxd, + float* restrict posyd, + float* restrict maxxd, + float* restrict maxyd, + float* restrict minxd, + float* restrict minyd, const int FOUR_NNODES, const int NNODES, const int N, - unsigned *restrict limiter, - float *restrict radiusd) + unsigned* restrict limiter, + float* restrict radiusd) { float val, minx, maxx, miny, maxy; __shared__ float sminx[THREADS1], smaxx[THREADS1], sminy[THREADS1], smaxy[THREADS1]; @@ -150,14 +150,15 @@ __global__ __launch_bounds__(THREADS1, FACTOR1) void BoundingBoxKernel(int *rest posyd[NNODES] = (miny + maxy) * 0.5f; #pragma unroll - for (int a = 0; a < 4; a++) childd[FOUR_NNODES + a] = -1; + for (int a = 0; a < 4; a++) + childd[FOUR_NNODES + a] = -1; } } /** * Clear some of the state vectors up. */ -__global__ __launch_bounds__(1024, 1) void ClearKernel1(int *restrict childd, +__global__ __launch_bounds__(1024, 1) void ClearKernel1(int* restrict childd, const int FOUR_NNODES, const int FOUR_N) { @@ -167,21 +168,22 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel1(int *restrict childd, // iterate over all cells assigned to thread #pragma unroll - for (; k < FOUR_NNODES; k += inc) childd[k] = -1; + for (; k < FOUR_NNODES; k += inc) + childd[k] = -1; } /** * Build the actual KD Tree. */ __global__ __launch_bounds__(THREADS2, - FACTOR2) void TreeBuildingKernel(int *restrict childd, - const float *restrict posxd, - const float *restrict posyd, + FACTOR2) void TreeBuildingKernel(int* restrict childd, + const float* restrict posxd, + const float* restrict posyd, const int NNODES, const int N, - int *restrict maxdepthd, - int *restrict bottomd, - const float *restrict radiusd) + int* restrict maxdepthd, + int* restrict bottomd, + const float* restrict radiusd) { int j, depth; float x, y, r; @@ -294,10 +296,10 @@ __global__ __launch_bounds__(THREADS2, /** * Clean more state vectors. */ -__global__ __launch_bounds__(1024, 1) void ClearKernel2(int *restrict startd, - int *restrict massd, +__global__ __launch_bounds__(1024, 1) void ClearKernel2(int* restrict startd, + int* restrict massd, const int NNODES, - const int *restrict bottomd) + const int* restrict bottomd) { const int bottom = bottomd[0]; const int inc = blockDim.x * gridDim.x; @@ -316,14 +318,14 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel2(int *restrict startd, * Summarize the KD Tree via cell gathering */ __global__ __launch_bounds__(THREADS3, - FACTOR3) void SummarizationKernel(int *restrict countd, - const int *restrict childd, - volatile int *restrict massd, - float *restrict posxd, - float *restrict posyd, + FACTOR3) void SummarizationKernel(int* restrict countd, + const int* restrict childd, + volatile int* restrict massd, + float* restrict posxd, + float* restrict posyd, const int NNODES, const int N, - const int *restrict bottomd) + const int* restrict bottomd) { bool flag = 0; float cm, px, py; @@ -451,13 +453,13 @@ __global__ __launch_bounds__(THREADS3, /** * Sort the cells */ -__global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int *restrict sortd, - const int *restrict countd, - volatile int *restrict startd, - int *restrict childd, +__global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(int* restrict sortd, + const int* restrict countd, + volatile int* restrict startd, + int* restrict childd, const int NNODES, const int N, - const int *restrict bottomd) + const int* restrict bottomd) { const int bottom = bottomd[0]; const int dec = blockDim.x * gridDim.x; @@ -505,19 +507,19 @@ __global__ __launch_bounds__( const float scaling_ratio, const float theta, const float epssqd, // correction for zero distance - const int *restrict sortd, - const int *restrict childd, - const int *restrict massd, - const float *restrict posxd, - const float *restrict posyd, - float *restrict velxd, - float *restrict velyd, + const int* restrict sortd, + const int* restrict childd, + const int* restrict massd, + const float* restrict posxd, + const float* restrict posyd, + float* restrict velxd, + float* restrict velyd, const float theta_squared, const int NNODES, const int FOUR_NNODES, const int N, - const float *restrict radiusd_squared, - const int *restrict maxdepthd) + const float* restrict radiusd_squared, + const int* restrict maxdepthd) { __shared__ int pos[THREADS5], node[THREADS5]; __shared__ float dq[THREADS5]; @@ -611,15 +613,15 @@ __global__ __launch_bounds__( } __global__ __launch_bounds__(THREADS6, - FACTOR6) void apply_forces_bh(float *restrict Y_x, - float *restrict Y_y, - const float *restrict attract_x, - const float *restrict attract_y, - const float *restrict repel_x, - const float *restrict repel_y, - float *restrict old_dx, - float *restrict old_dy, - const float *restrict swinging, + FACTOR6) void apply_forces_bh(float* restrict Y_x, + float* restrict Y_y, + const float* restrict attract_x, + const float* restrict attract_y, + const float* restrict repel_x, + const float* restrict repel_y, + float* restrict old_dx, + float* restrict old_dy, + const float* restrict swinging, const float speed, const int n) { diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index 172ac5b8000..b908a8033e7 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -34,12 +34,12 @@ namespace cugraph { namespace detail { template -void exact_fa2(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +void exact_fa2(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter = 500, - float *x_start = nullptr, - float *y_start = nullptr, + float* x_start = nullptr, + float* y_start = nullptr, bool outbound_attraction_distribution = true, bool lin_log_mode = false, bool prevent_overlapping = false, @@ -49,18 +49,18 @@ void exact_fa2(raft::handle_t const &handle, bool strong_gravity_mode = false, const float gravity = 1.0, bool verbose = false, - internals::GraphBasedDimRedCallback *callback = nullptr) + internals::GraphBasedDimRedCallback* callback = nullptr) { auto stream_view = handle.get_stream_view(); const edge_t e = graph.number_of_edges; const vertex_t n = graph.number_of_vertices; - float *d_repel{nullptr}; - float *d_attract{nullptr}; - float *d_old_forces{nullptr}; - int *d_mass{nullptr}; - float *d_swinging{nullptr}; - float *d_traction{nullptr}; + float* d_repel{nullptr}; + float* d_attract{nullptr}; + float* d_old_forces{nullptr}; + int* d_mass{nullptr}; + float* d_swinging{nullptr}; + float* d_traction{nullptr}; rmm::device_uvector repel(n * 2, stream_view); rmm::device_uvector attract(n * 2, stream_view); @@ -95,9 +95,9 @@ void exact_fa2(raft::handle_t const &handle, graph.degree(d_mass, cugraph::legacy::DegreeDirection::OUT); CHECK_CUDA(stream_view.value()); - const vertex_t *row = graph.src_indices; - const vertex_t *col = graph.dst_indices; - const weight_t *v = graph.edge_data; + const vertex_t* row = graph.src_indices; + const vertex_t* col = graph.dst_indices; + const weight_t* v = graph.edge_data; float speed = 1.f; float speed_efficiency = 1.f; diff --git a/cpp/src/layout/exact_repulsion.hpp b/cpp/src/layout/exact_repulsion.hpp index 583d5c81e30..49b4f46c5c3 100644 --- a/cpp/src/layout/exact_repulsion.hpp +++ b/cpp/src/layout/exact_repulsion.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,18 +15,18 @@ */ #pragma once -#define restrict __restrict__ +#define restrict __restrict__ #define CUDA_MAX_BLOCKS_2D 256 namespace cugraph { namespace detail { template -__global__ void repulsion_kernel(const float *restrict x_pos, - const float *restrict y_pos, - float *restrict repel_x, - float *restrict repel_y, - const int *restrict mass, +__global__ void repulsion_kernel(const float* restrict x_pos, + const float* restrict y_pos, + float* restrict repel_x, + float* restrict repel_y, + const int* restrict mass, const float scaling_ratio, const vertex_t n) { @@ -50,11 +50,11 @@ __global__ void repulsion_kernel(const float *restrict x_pos, } template -void apply_repulsion(const float *restrict x_pos, - const float *restrict y_pos, - float *restrict repel_x, - float *restrict repel_y, - const int *restrict mass, +void apply_repulsion(const float* restrict x_pos, + const float* restrict y_pos, + float* restrict repel_x, + float* restrict repel_y, + const int* restrict mass, const float scaling_ratio, const vertex_t n, cudaStream_t stream) diff --git a/cpp/src/layout/fa2_kernels.hpp b/cpp/src/layout/fa2_kernels.hpp index 9aec348cec5..c46b249628b 100644 --- a/cpp/src/layout/fa2_kernels.hpp +++ b/cpp/src/layout/fa2_kernels.hpp @@ -23,15 +23,15 @@ namespace cugraph { namespace detail { template -__global__ void attraction_kernel(const vertex_t *restrict row, - const vertex_t *restrict col, - const weight_t *restrict v, +__global__ void attraction_kernel(const vertex_t* restrict row, + const vertex_t* restrict col, + const weight_t* restrict v, const edge_t e, - const float *restrict x_pos, - const float *restrict y_pos, - float *restrict attract_x, - float *restrict attract_y, - const int *restrict mass, + const float* restrict x_pos, + const float* restrict y_pos, + float* restrict attract_x, + float* restrict attract_y, + const int* restrict mass, bool outbound_attraction_distribution, bool lin_log_mode, const float edge_weight_influence, @@ -71,15 +71,15 @@ __global__ void attraction_kernel(const vertex_t *restrict row, } template -void apply_attraction(const vertex_t *restrict row, - const vertex_t *restrict col, - const weight_t *restrict v, +void apply_attraction(const vertex_t* restrict row, + const vertex_t* restrict col, + const weight_t* restrict v, const edge_t e, - const float *restrict x_pos, - const float *restrict y_pos, - float *restrict attract_x, - float *restrict attract_y, - const int *restrict mass, + const float* restrict x_pos, + const float* restrict y_pos, + float* restrict attract_x, + float* restrict attract_y, + const int* restrict mass, bool outbound_attraction_distribution, bool lin_log_mode, const float edge_weight_influence, @@ -116,11 +116,11 @@ void apply_attraction(const vertex_t *restrict row, } template -__global__ void linear_gravity_kernel(const float *restrict x_pos, - const float *restrict y_pos, - float *restrict attract_x, - float *restrict attract_y, - const int *restrict mass, +__global__ void linear_gravity_kernel(const float* restrict x_pos, + const float* restrict y_pos, + float* restrict attract_x, + float* restrict attract_y, + const int* restrict mass, const float gravity, const vertex_t n) { @@ -136,11 +136,11 @@ __global__ void linear_gravity_kernel(const float *restrict x_pos, } template -__global__ void strong_gravity_kernel(const float *restrict x_pos, - const float *restrict y_pos, - float *restrict attract_x, - float *restrict attract_y, - const int *restrict mass, +__global__ void strong_gravity_kernel(const float* restrict x_pos, + const float* restrict y_pos, + float* restrict attract_x, + float* restrict attract_y, + const int* restrict mass, const float gravity, const float scaling_ratio, const vertex_t n) @@ -157,11 +157,11 @@ __global__ void strong_gravity_kernel(const float *restrict x_pos, } template -void apply_gravity(const float *restrict x_pos, - const float *restrict y_pos, - float *restrict attract_x, - float *restrict attract_y, - const int *restrict mass, +void apply_gravity(const float* restrict x_pos, + const float* restrict y_pos, + float* restrict attract_x, + float* restrict attract_y, + const int* restrict mass, const float gravity, bool strong_gravity_mode, const float scaling_ratio, @@ -187,15 +187,15 @@ void apply_gravity(const float *restrict x_pos, } template -__global__ void local_speed_kernel(const float *restrict repel_x, - const float *restrict repel_y, - const float *restrict attract_x, - const float *restrict attract_y, - const float *restrict old_dx, - const float *restrict old_dy, - const int *restrict mass, - float *restrict swinging, - float *restrict traction, +__global__ void local_speed_kernel(const float* restrict repel_x, + const float* restrict repel_y, + const float* restrict attract_x, + const float* restrict attract_y, + const float* restrict old_dx, + const float* restrict old_dy, + const int* restrict mass, + float* restrict swinging, + float* restrict traction, const vertex_t n) { // For every node. @@ -210,15 +210,15 @@ __global__ void local_speed_kernel(const float *restrict repel_x, } template -void compute_local_speed(const float *restrict repel_x, - const float *restrict repel_y, - const float *restrict attract_x, - const float *restrict attract_y, - float *restrict old_dx, - float *restrict old_dy, - const int *restrict mass, - float *restrict swinging, - float *restrict traction, +void compute_local_speed(const float* restrict repel_x, + const float* restrict repel_y, + const float* restrict attract_x, + const float* restrict attract_y, + float* restrict old_dx, + float* restrict old_dy, + const int* restrict mass, + float* restrict swinging, + float* restrict traction, const vertex_t n, cudaStream_t stream) { @@ -237,9 +237,9 @@ void compute_local_speed(const float *restrict repel_x, template void adapt_speed(const float jitter_tolerance, - float *restrict jt, - float *restrict speed, - float *restrict speed_efficiency, + float* restrict jt, + float* restrict speed, + float* restrict speed_efficiency, const float s, const float t, const vertex_t n) @@ -272,15 +272,15 @@ void adapt_speed(const float jitter_tolerance, } template -__global__ void update_positions_kernel(float *restrict x_pos, - float *restrict y_pos, - const float *restrict repel_x, - const float *restrict repel_y, - const float *restrict attract_x, - const float *restrict attract_y, - float *restrict old_dx, - float *restrict old_dy, - const float *restrict swinging, +__global__ void update_positions_kernel(float* restrict x_pos, + float* restrict y_pos, + const float* restrict repel_x, + const float* restrict repel_y, + const float* restrict attract_x, + const float* restrict attract_y, + float* restrict old_dx, + float* restrict old_dy, + const float* restrict swinging, const float speed, const vertex_t n) { @@ -298,15 +298,15 @@ __global__ void update_positions_kernel(float *restrict x_pos, } template -void apply_forces(float *restrict x_pos, - float *restrict y_pos, - const float *restrict repel_x, - const float *restrict repel_y, - const float *restrict attract_x, - const float *restrict attract_y, - float *restrict old_dx, - float *restrict old_dy, - const float *restrict swinging, +void apply_forces(float* restrict x_pos, + float* restrict y_pos, + const float* restrict repel_x, + const float* restrict repel_y, + const float* restrict attract_x, + const float* restrict attract_y, + float* restrict old_dx, + float* restrict old_dy, + const float* restrict swinging, const float speed, const vertex_t n, cudaStream_t stream) diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index 50f01408775..c2db4607b40 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -20,12 +20,12 @@ namespace cugraph { template -void force_atlas2(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +void force_atlas2(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter, - float *x_start, - float *y_start, + float* x_start, + float* y_start, bool outbound_attraction_distribution, bool lin_log_mode, bool prevent_overlapping, @@ -37,7 +37,7 @@ void force_atlas2(raft::handle_t const &handle, bool strong_gravity_mode, const float gravity, bool verbose, - internals::GraphBasedDimRedCallback *callback) + internals::GraphBasedDimRedCallback* callback) { CUGRAPH_EXPECTS(pos != nullptr, "Invalid input argument: pos array should be of size 2 * V"); CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty"); @@ -80,12 +80,12 @@ void force_atlas2(raft::handle_t const &handle, } } -template void force_atlas2(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +template void force_atlas2(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter, - float *x_start, - float *y_start, + float* x_start, + float* y_start, bool outbound_attraction_distribution, bool lin_log_mode, bool prevent_overlapping, @@ -97,14 +97,14 @@ template void force_atlas2(raft::handle_t const &handle, bool strong_gravity_mode, const float gravity, bool verbose, - internals::GraphBasedDimRedCallback *callback); + internals::GraphBasedDimRedCallback* callback); -template void force_atlas2(raft::handle_t const &handle, - legacy::GraphCOOView &graph, - float *pos, +template void force_atlas2(raft::handle_t const& handle, + legacy::GraphCOOView& graph, + float* pos, const int max_iter, - float *x_start, - float *y_start, + float* x_start, + float* y_start, bool outbound_attraction_distribution, bool lin_log_mode, bool prevent_overlapping, @@ -116,6 +116,6 @@ template void force_atlas2(raft::handle_t const &handle, bool strong_gravity_mode, const float gravity, bool verbose, - internals::GraphBasedDimRedCallback *callback); + internals::GraphBasedDimRedCallback* callback); } // namespace cugraph diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index e7db5082ca1..368e119e93c 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -56,11 +56,11 @@ double default_epsilon() } template -weight_t hungarian(raft::handle_t const &handle, +weight_t hungarian(raft::handle_t const& handle, index_t num_rows, index_t num_cols, - weight_t const *d_original_cost, - index_t *d_assignment, + weight_t const* d_original_cost, + index_t* d_assignment, weight_t epsilon) { if (num_rows == num_cols) { @@ -116,11 +116,11 @@ weight_t hungarian(raft::handle_t const &handle, } template -weight_t hungarian_sparse(raft::handle_t const &handle, - legacy::GraphCOOView const &graph, +weight_t hungarian_sparse(raft::handle_t const& handle, + legacy::GraphCOOView const& graph, vertex_t num_workers, - vertex_t const *workers, - vertex_t *assignment, + vertex_t const* workers, + vertex_t* assignment, weight_t epsilon) { CUGRAPH_EXPECTS(assignment != nullptr, "Invalid input argument: assignment pointer is NULL"); @@ -148,13 +148,13 @@ weight_t hungarian_sparse(raft::handle_t const &handle, rmm::device_uvector temp_tasks_v(graph.number_of_vertices, handle.get_stream_view()); rmm::device_uvector temp_workers_v(graph.number_of_vertices, handle.get_stream_view()); - weight_t *d_cost = cost_v.data(); - vertex_t *d_tasks = tasks_v.data(); - vertex_t *d_temp_tasks = temp_tasks_v.data(); - vertex_t *d_temp_workers = temp_workers_v.data(); - vertex_t *d_src_indices = graph.src_indices; - vertex_t *d_dst_indices = graph.dst_indices; - weight_t *d_edge_data = graph.edge_data; + weight_t* d_cost = cost_v.data(); + vertex_t* d_tasks = tasks_v.data(); + vertex_t* d_temp_tasks = temp_tasks_v.data(); + vertex_t* d_temp_workers = temp_workers_v.data(); + vertex_t* d_src_indices = graph.src_indices; + vertex_t* d_dst_indices = graph.dst_indices; + weight_t* d_edge_data = graph.edge_data; // // Renumber vertices internally. Workers will become @@ -232,7 +232,7 @@ weight_t hungarian_sparse(raft::handle_t const &handle, // bipartite matrix numbering // rmm::device_uvector temp_assignment_v(matrix_dimension, handle.get_stream_view()); - vertex_t *d_temp_assignment = temp_assignment_v.data(); + vertex_t* d_temp_assignment = temp_assignment_v.data(); weight_t min_cost = detail::hungarian( handle, matrix_dimension, matrix_dimension, d_cost, d_temp_assignment, epsilon); @@ -265,106 +265,106 @@ weight_t hungarian_sparse(raft::handle_t const &handle, } // namespace detail template -weight_t hungarian(raft::handle_t const &handle, - legacy::GraphCOOView const &graph, +weight_t hungarian(raft::handle_t const& handle, + legacy::GraphCOOView const& graph, vertex_t num_workers, - vertex_t const *workers, - vertex_t *assignment) + vertex_t const* workers, + vertex_t* assignment) { return detail::hungarian_sparse( handle, graph, num_workers, workers, assignment, detail::default_epsilon()); } template -weight_t hungarian(raft::handle_t const &handle, - legacy::GraphCOOView const &graph, +weight_t hungarian(raft::handle_t const& handle, + legacy::GraphCOOView const& graph, vertex_t num_workers, - vertex_t const *workers, - vertex_t *assignment, + vertex_t const* workers, + vertex_t* assignment, weight_t epsilon) { return detail::hungarian_sparse(handle, graph, num_workers, workers, assignment, epsilon); } template int32_t hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *, + int32_t const*, + int32_t*, int32_t); template float hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *, + int32_t const*, + int32_t*, float); template double hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *, + int32_t const*, + int32_t*, double); template int32_t hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *); + int32_t const*, + int32_t*); template float hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *); + int32_t const*, + int32_t*); template double hungarian( - raft::handle_t const &, - legacy::GraphCOOView const &, + raft::handle_t const&, + legacy::GraphCOOView const&, int32_t, - int32_t const *, - int32_t *); + int32_t const*, + int32_t*); namespace dense { template -weight_t hungarian(raft::handle_t const &handle, - weight_t const *costs, +weight_t hungarian(raft::handle_t const& handle, + weight_t const* costs, index_t num_rows, index_t num_cols, - index_t *assignment) + index_t* assignment) { return detail::hungarian( handle, num_rows, num_cols, costs, assignment, detail::default_epsilon()); } template -weight_t hungarian(raft::handle_t const &handle, - weight_t const *costs, +weight_t hungarian(raft::handle_t const& handle, + weight_t const* costs, index_t num_rows, index_t num_cols, - index_t *assignment, + index_t* assignment, weight_t epsilon) { return detail::hungarian(handle, num_rows, num_cols, costs, assignment, epsilon); } template int32_t hungarian( - raft::handle_t const &, int32_t const *, int32_t, int32_t, int32_t *); + raft::handle_t const&, int32_t const*, int32_t, int32_t, int32_t*); template float hungarian( - raft::handle_t const &, float const *, int32_t, int32_t, int32_t *); + raft::handle_t const&, float const*, int32_t, int32_t, int32_t*); template double hungarian( - raft::handle_t const &, double const *, int32_t, int32_t, int32_t *); + raft::handle_t const&, double const*, int32_t, int32_t, int32_t*); template int32_t hungarian( - raft::handle_t const &, int32_t const *, int32_t, int32_t, int32_t *, int32_t); + raft::handle_t const&, int32_t const*, int32_t, int32_t, int32_t*, int32_t); template float hungarian( - raft::handle_t const &, float const *, int32_t, int32_t, int32_t *, float); + raft::handle_t const&, float const*, int32_t, int32_t, int32_t*, float); template double hungarian( - raft::handle_t const &, double const *, int32_t, int32_t, int32_t *, double); + raft::handle_t const&, double const*, int32_t, int32_t, int32_t*, double); } // namespace dense diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 6b95418e6b6..a86210cc521 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -34,13 +34,13 @@ const int HOST{1}; // gunrock should expose the device constant at the API le const int DEVICE{2}; // gunrock should expose the device constant at the API level. template -void hits(cugraph::legacy::GraphCSRView const &graph, +void hits(cugraph::legacy::GraphCSRView const& graph, int max_iter, weight_t tolerance, - weight_t const *starting_value, + weight_t const* starting_value, bool normalized, - weight_t *hubs, - weight_t *authorities) + weight_t* hubs, + weight_t* authorities) { CUGRAPH_EXPECTS(hubs != nullptr, "Invalid input argument: hubs array should be of size V"); CUGRAPH_EXPECTS(authorities != nullptr, @@ -61,13 +61,13 @@ void hits(cugraph::legacy::GraphCSRView const &graph DEVICE); } -template void hits(cugraph::legacy::GraphCSRView const &, +template void hits(cugraph::legacy::GraphCSRView const&, int, float, - float const *, + float const*, bool, - float *, - float *); + float*, + float*); } // namespace gunrock } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 2e10cd7e8c3..25186a6492b 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -32,7 +32,7 @@ namespace detail { // Volume of neighboors (*weight_s) template __global__ void jaccard_row_sum( - vertex_t n, edge_t const *csrPtr, vertex_t const *csrInd, weight_t const *v, weight_t *work) + vertex_t n, edge_t const* csrPtr, vertex_t const* csrInd, weight_t const* v, weight_t* work) { vertex_t row; edge_t start, end, length; @@ -56,12 +56,12 @@ __global__ void jaccard_row_sum( // Volume of intersections (*weight_i) and cumulated volume of neighboors (*weight_s) template __global__ void jaccard_is(vertex_t n, - edge_t const *csrPtr, - vertex_t const *csrInd, - weight_t const *v, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s) + edge_t const* csrPtr, + vertex_t const* csrInd, + weight_t const* v, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s) { edge_t i, j, Ni, Nj; vertex_t row, col; @@ -120,14 +120,14 @@ __global__ void jaccard_is(vertex_t n, // Using list of node pairs template __global__ void jaccard_is_pairs(edge_t num_pairs, - edge_t const *csrPtr, - vertex_t const *csrInd, - vertex_t const *first_pair, - vertex_t const *second_pair, - weight_t const *v, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s) + edge_t const* csrPtr, + vertex_t const* csrInd, + vertex_t const* first_pair, + vertex_t const* second_pair, + weight_t const* v, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s) { edge_t i, idx, Ni, Nj, match; vertex_t row, col, ref, cur, ref_col, cur_col; @@ -184,9 +184,9 @@ __global__ void jaccard_is_pairs(edge_t num_pairs, // Jaccard weights (*weight) template __global__ void jaccard_jw(edge_t e, - weight_t const *weight_i, - weight_t const *weight_s, - weight_t *weight_j) + weight_t const* weight_i, + weight_t const* weight_s, + weight_t* weight_j) { edge_t j; weight_t Wi, Ws, Wu; @@ -202,13 +202,13 @@ __global__ void jaccard_jw(edge_t e, template int jaccard(vertex_t n, edge_t e, - edge_t const *csrPtr, - vertex_t const *csrInd, - weight_t const *weight_in, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s, - weight_t *weight_j) + edge_t const* csrPtr, + vertex_t const* csrInd, + weight_t const* weight_in, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s, + weight_t* weight_j) { rmm::cuda_stream_view stream_view; dim3 nthreads, nblocks; @@ -258,15 +258,15 @@ int jaccard(vertex_t n, template int jaccard_pairs(vertex_t n, edge_t num_pairs, - edge_t const *csrPtr, - vertex_t const *csrInd, - vertex_t const *first_pair, - vertex_t const *second_pair, - weight_t const *weight_in, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s, - weight_t *weight_j) + edge_t const* csrPtr, + vertex_t const* csrInd, + vertex_t const* first_pair, + vertex_t const* second_pair, + weight_t const* weight_in, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s, + weight_t* weight_j) { dim3 nthreads, nblocks; int y = 4; @@ -316,7 +316,7 @@ int jaccard_pairs(vertex_t n, } // namespace detail template -void jaccard(legacy::GraphCSRView const &graph, WT const *weights, WT *result) +void jaccard(legacy::GraphCSRView const& graph, WT const* weights, WT* result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); @@ -348,12 +348,12 @@ void jaccard(legacy::GraphCSRView const &graph, WT const *weights, W } template -void jaccard_list(legacy::GraphCSRView const &graph, - WT const *weights, +void jaccard_list(legacy::GraphCSRView const& graph, + WT const* weights, ET num_pairs, - VT const *first, - VT const *second, - WT *result) + VT const* first, + VT const* second, + WT* result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first is NULL"); @@ -390,41 +390,43 @@ void jaccard_list(legacy::GraphCSRView const &graph, } } -template void jaccard( - legacy::GraphCSRView const &, float const *, float *); +template void jaccard(legacy::GraphCSRView const&, + float const*, + float*); template void jaccard( - legacy::GraphCSRView const &, double const *, double *); -template void jaccard( - legacy::GraphCSRView const &, float const *, float *); + legacy::GraphCSRView const&, double const*, double*); +template void jaccard(legacy::GraphCSRView const&, + float const*, + float*); template void jaccard( - legacy::GraphCSRView const &, double const *, double *); + legacy::GraphCSRView const&, double const*, double*); template void jaccard_list( - legacy::GraphCSRView const &, - float const *, + legacy::GraphCSRView const&, + float const*, int32_t, - int32_t const *, - int32_t const *, - float *); + int32_t const*, + int32_t const*, + float*); template void jaccard_list( - legacy::GraphCSRView const &, - double const *, + legacy::GraphCSRView const&, + double const*, int32_t, - int32_t const *, - int32_t const *, - double *); + int32_t const*, + int32_t const*, + double*); template void jaccard_list( - legacy::GraphCSRView const &, - float const *, + legacy::GraphCSRView const&, + float const*, int64_t, - int64_t const *, - int64_t const *, - float *); + int64_t const*, + int64_t const*, + float*); template void jaccard_list( - legacy::GraphCSRView const &, - double const *, + legacy::GraphCSRView const&, + double const*, int64_t, - int64_t const *, - int64_t const *, - double *); + int64_t const*, + int64_t const*, + double*); } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index f38c8326ca2..7b7470da7fc 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -31,7 +31,7 @@ namespace detail { // TODO: Identical kernel to jaccard_row_sum!! template __global__ void overlap_row_sum( - vertex_t n, edge_t const *csrPtr, vertex_t const *csrInd, weight_t const *v, weight_t *work) + vertex_t n, edge_t const* csrPtr, vertex_t const* csrInd, weight_t const* v, weight_t* work) { vertex_t row; edge_t start, end, length; @@ -56,12 +56,12 @@ __global__ void overlap_row_sum( // TODO: Identical kernel to jaccard_row_sum!! template __global__ void overlap_is(vertex_t n, - edge_t const *csrPtr, - vertex_t const *csrInd, - weight_t const *v, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s) + edge_t const* csrPtr, + vertex_t const* csrInd, + weight_t const* v, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s) { edge_t i, j, Ni, Nj; vertex_t row, col; @@ -121,14 +121,14 @@ __global__ void overlap_is(vertex_t n, // NOTE: NOT the same as jaccard template __global__ void overlap_is_pairs(edge_t num_pairs, - edge_t const *csrPtr, - vertex_t const *csrInd, - vertex_t const *first_pair, - vertex_t const *second_pair, - weight_t const *v, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s) + edge_t const* csrPtr, + vertex_t const* csrInd, + vertex_t const* first_pair, + vertex_t const* second_pair, + weight_t const* v, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s) { edge_t i, idx, Ni, Nj, match; vertex_t row, col, ref, cur, ref_col, cur_col; @@ -185,11 +185,11 @@ __global__ void overlap_is_pairs(edge_t num_pairs, // Overlap weights (*weight) template __global__ void overlap_jw(edge_t e, - edge_t const *csrPtr, - vertex_t const *csrInd, - weight_t *weight_i, - weight_t *weight_s, - weight_t *weight_j) + edge_t const* csrPtr, + vertex_t const* csrInd, + weight_t* weight_i, + weight_t* weight_s, + weight_t* weight_j) { edge_t j; weight_t Wi, Wu; @@ -204,13 +204,13 @@ __global__ void overlap_jw(edge_t e, template int overlap(vertex_t n, edge_t e, - edge_t const *csrPtr, - vertex_t const *csrInd, - weight_t const *weight_in, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s, - weight_t *weight_j) + edge_t const* csrPtr, + vertex_t const* csrInd, + weight_t const* weight_in, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s, + weight_t* weight_j) { dim3 nthreads, nblocks; int y = 4; @@ -259,15 +259,15 @@ int overlap(vertex_t n, template int overlap_pairs(vertex_t n, edge_t num_pairs, - edge_t const *csrPtr, - vertex_t const *csrInd, - vertex_t const *first_pair, - vertex_t const *second_pair, - weight_t const *weight_in, - weight_t *work, - weight_t *weight_i, - weight_t *weight_s, - weight_t *weight_j) + edge_t const* csrPtr, + vertex_t const* csrInd, + vertex_t const* first_pair, + vertex_t const* second_pair, + weight_t const* weight_in, + weight_t* work, + weight_t* weight_i, + weight_t* weight_s, + weight_t* weight_j) { dim3 nthreads, nblocks; int y = 4; @@ -314,7 +314,7 @@ int overlap_pairs(vertex_t n, } // namespace detail template -void overlap(legacy::GraphCSRView const &graph, WT const *weights, WT *result) +void overlap(legacy::GraphCSRView const& graph, WT const* weights, WT* result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); @@ -346,12 +346,12 @@ void overlap(legacy::GraphCSRView const &graph, WT const *weights, W } template -void overlap_list(legacy::GraphCSRView const &graph, - WT const *weights, +void overlap_list(legacy::GraphCSRView const& graph, + WT const* weights, ET num_pairs, - VT const *first, - VT const *second, - WT *result) + VT const* first, + VT const* second, + WT* result) { CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first column is NULL"); @@ -388,41 +388,43 @@ void overlap_list(legacy::GraphCSRView const &graph, } } -template void overlap( - legacy::GraphCSRView const &, float const *, float *); +template void overlap(legacy::GraphCSRView const&, + float const*, + float*); template void overlap( - legacy::GraphCSRView const &, double const *, double *); -template void overlap( - legacy::GraphCSRView const &, float const *, float *); + legacy::GraphCSRView const&, double const*, double*); +template void overlap(legacy::GraphCSRView const&, + float const*, + float*); template void overlap( - legacy::GraphCSRView const &, double const *, double *); + legacy::GraphCSRView const&, double const*, double*); template void overlap_list( - legacy::GraphCSRView const &, - float const *, + legacy::GraphCSRView const&, + float const*, int32_t, - int32_t const *, - int32_t const *, - float *); + int32_t const*, + int32_t const*, + float*); template void overlap_list( - legacy::GraphCSRView const &, - double const *, + legacy::GraphCSRView const&, + double const*, int32_t, - int32_t const *, - int32_t const *, - double *); + int32_t const*, + int32_t const*, + double*); template void overlap_list( - legacy::GraphCSRView const &, - float const *, + legacy::GraphCSRView const&, + float const*, int64_t, - int64_t const *, - int64_t const *, - float *); + int64_t const*, + int64_t const*, + float*); template void overlap_list( - legacy::GraphCSRView const &, - double const *, + legacy::GraphCSRView const&, + double const*, int64_t, - int64_t const *, - int64_t const *, - double *); + int64_t const*, + int64_t const*, + double*); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 361726acb56..c67f981ecf5 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -250,7 +250,7 @@ struct col_indx_extract_t{*values_} - : thrust::nullopt] __device__(auto indx, auto col_indx) { + : thrust::nullopt] __device__(auto indx, auto col_indx) { auto delta = ptr_d_sizes[indx] - 1; auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; auto start_row = row_offsets[v_indx]; diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index c86f76fcc72..b2ba74e97a2 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -213,11 +213,11 @@ struct horizontal_traversal_t { random_engine_t::generate_random(handle, ptr_d_random, d_random.size(), seed0); - auto const* col_indices = graph.get_matrix_partition_view().get_indices(); - auto const* row_offsets = graph.get_matrix_partition_view().get_offsets(); - auto const* values = graph.get_matrix_partition_view().get_weights() - ? *(graph.get_matrix_partition_view().get_weights()) - : static_cast(nullptr); + auto const* col_indices = graph.get_matrix_partition_view().get_indices(); + auto const* row_offsets = graph.get_matrix_partition_view().get_offsets(); + auto const* values = graph.get_matrix_partition_view().get_weights() + ? *(graph.get_matrix_partition_view().get_weights()) + : static_cast(nullptr); auto* ptr_d_sizes = raw_ptr(d_paths_sz); auto const& d_cached_out_degs = rand_walker.get_out_degs(); diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 192234380d9..e3bdd1d5c67 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -25,8 +25,8 @@ namespace { template void degree_from_offsets(vertex_t number_of_vertices, - edge_t const *offsets, - edge_t *degree, + edge_t const* offsets, + edge_t* degree, rmm::cuda_stream_view stream_view) { // Computes out-degree for x = 0 and x = 2 @@ -38,11 +38,11 @@ void degree_from_offsets(vertex_t number_of_vertices, } template -void degree_from_vertex_ids(const raft::handle_t *handle, +void degree_from_vertex_ids(const raft::handle_t* handle, vertex_t number_of_vertices, edge_t number_of_edges, - vertex_t const *indices, - edge_t *degree, + vertex_t const* indices, + edge_t* degree, rmm::cuda_stream_view stream_view) { thrust::for_each(rmm::exec_policy(stream_view), @@ -50,7 +50,7 @@ void degree_from_vertex_ids(const raft::handle_t *handle, thrust::make_counting_iterator(number_of_edges), [indices, degree] __device__(edge_t e) { atomicAdd(degree + indices[e], 1); }); if ((handle != nullptr) && (handle->comms_initialized())) { - auto &comm = handle->get_comms(); + auto& comm = handle->get_comms(); comm.allreduce(degree, degree, number_of_vertices, raft::comms::op_t::SUM, stream_view.value()); } } @@ -61,13 +61,13 @@ namespace cugraph { namespace legacy { template -void GraphViewBase::get_vertex_identifiers(VT *identifiers) const +void GraphViewBase::get_vertex_identifiers(VT* identifiers) const { cugraph::detail::sequence(number_of_vertices, identifiers); } template -void GraphCompressedSparseBaseView::get_source_indices(VT *src_indices) const +void GraphCompressedSparseBaseView::get_source_indices(VT* src_indices) const { CUGRAPH_EXPECTS(offsets != nullptr, "No graph specified"); cugraph::detail::offsets_to_indices( @@ -75,7 +75,7 @@ void GraphCompressedSparseBaseView::get_source_indices(VT *src_indic } template -void GraphCOOView::degree(ET *degree, DegreeDirection direction) const +void GraphCOOView::degree(ET* degree, DegreeDirection direction) const { // // NOTE: We assume offsets/indices are a CSR. If a CSC is passed @@ -112,7 +112,7 @@ void GraphCOOView::degree(ET *degree, DegreeDirection direction) con } template -void GraphCompressedSparseBaseView::degree(ET *degree, DegreeDirection direction) const +void GraphCompressedSparseBaseView::degree(ET* degree, DegreeDirection direction) const { // // NOTE: We assume offsets/indices are a CSR. If a CSC is passed diff --git a/cpp/src/traversal/bfs.cu b/cpp/src/traversal/bfs.cu index b4416b907e5..74a94ba0670 100644 --- a/cpp/src/traversal/bfs.cu +++ b/cpp/src/traversal/bfs.cu @@ -96,7 +96,7 @@ void BFS::setup() // Lets use this int* for the next 3 lines // Its dereferenced value is not initialized - so we dont care about what we // put in it - IndexType *d_nisolated = d_new_frontier_cnt; + IndexType* d_nisolated = d_new_frontier_cnt; cudaMemsetAsync(d_nisolated, 0, sizeof(IndexType), stream); // Computing isolated_bmap @@ -114,10 +114,10 @@ void BFS::setup() } template -void BFS::configure(IndexType *_distances, - IndexType *_predecessors, - double *_sp_counters, - int *_edge_mask) +void BFS::configure(IndexType* _distances, + IndexType* _predecessors, + double* _sp_counters, + int* _edge_mask) { distances = _distances; predecessors = _predecessors; @@ -473,11 +473,11 @@ template class BFS; // NOTE: SP counter increase extremely fast on large graph // It can easily reach 1e40~1e70 on GAP-road.mtx template -void bfs(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - VT *distances, - VT *predecessors, - double *sp_counters, +void bfs(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + VT* distances, + VT* predecessors, + double* sp_counters, const VT start_vertex, bool directed, bool mg_batch) @@ -497,8 +497,8 @@ void bfs(raft::handle_t const &handle, VT number_of_vertices = graph.number_of_vertices; ET number_of_edges = graph.number_of_edges; - const VT *indices_ptr = graph.indices; - const ET *offsets_ptr = graph.offsets; + const VT* indices_ptr = graph.indices; + const ET* offsets_ptr = graph.offsets; int alpha = 15; int beta = 18; @@ -512,66 +512,66 @@ void bfs(raft::handle_t const &handle, // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - uint32_t *distances, - uint32_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + uint32_t* distances, + uint32_t* predecessors, + double* sp_counters, const uint32_t source_vertex, bool directed, bool mg_batch); // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - uint32_t *distances, - uint32_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + uint32_t* distances, + uint32_t* predecessors, + double* sp_counters, const uint32_t source_vertex, bool directed, bool mg_batch); // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - int32_t *distances, - int32_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + int32_t* distances, + int32_t* predecessors, + double* sp_counters, const int32_t source_vertex, bool directed, bool mg_batch); // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - int32_t *distances, - int32_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + int32_t* distances, + int32_t* predecessors, + double* sp_counters, const int32_t source_vertex, bool directed, bool mg_batch); // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - int64_t *distances, - int64_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + int64_t* distances, + int64_t* predecessors, + double* sp_counters, const int64_t source_vertex, bool directed, bool mg_batch); // Explicit Instantiation template void bfs( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - int64_t *distances, - int64_t *predecessors, - double *sp_counters, + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + int64_t* distances, + int64_t* predecessors, + double* sp_counters, const int64_t source_vertex, bool directed, bool mg_batch); diff --git a/cpp/src/traversal/bfs.cuh b/cpp/src/traversal/bfs.cuh index 6457665ec09..6bf8e0d0197 100644 --- a/cpp/src/traversal/bfs.cuh +++ b/cpp/src/traversal/bfs.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -25,8 +25,8 @@ template class BFS { private: IndexType number_of_vertices, number_of_edges; - const IndexType *row_offsets = nullptr; - const IndexType *col_indices = nullptr; + const IndexType* row_offsets = nullptr; + const IndexType* col_indices = nullptr; bool directed; bool deterministic; @@ -36,10 +36,10 @@ class BFS { bool computeDistances; bool computePredecessors; rmm::device_vector distances_vals; - IndexType *distances = nullptr; - IndexType *predecessors = nullptr; - double *sp_counters = nullptr; - int *edge_mask = nullptr; + IndexType* distances = nullptr; + IndexType* predecessors = nullptr; + double* sp_counters = nullptr; + int* edge_mask = nullptr; rmm::device_vector original_frontier; rmm::device_vector visited_bmap; @@ -53,16 +53,16 @@ class BFS { // Working data // For complete description of each, go to bfs.cu IndexType nisolated; - IndexType *frontier = nullptr; - IndexType *new_frontier = nullptr; - IndexType *frontier_vertex_degree = nullptr; - IndexType *exclusive_sum_frontier_vertex_degree = nullptr; - IndexType *unvisited_queue = nullptr; - IndexType *left_unvisited_queue = nullptr; - IndexType *d_new_frontier_cnt = nullptr; - IndexType *d_mu = nullptr; - IndexType *d_unvisited_cnt = nullptr; - IndexType *d_left_unvisited_cnt = nullptr; + IndexType* frontier = nullptr; + IndexType* new_frontier = nullptr; + IndexType* frontier_vertex_degree = nullptr; + IndexType* exclusive_sum_frontier_vertex_degree = nullptr; + IndexType* unvisited_queue = nullptr; + IndexType* left_unvisited_queue = nullptr; + IndexType* d_new_frontier_cnt = nullptr; + IndexType* d_mu = nullptr; + IndexType* d_unvisited_cnt = nullptr; + IndexType* d_left_unvisited_cnt = nullptr; IndexType vertices_bmap_size; @@ -80,8 +80,8 @@ class BFS { BFS(IndexType _number_of_vertices, IndexType _number_of_edges, - const IndexType *_row_offsets, - const IndexType *_col_indices, + const IndexType* _row_offsets, + const IndexType* _col_indices, bool _directed, IndexType _alpha, IndexType _beta, @@ -98,10 +98,10 @@ class BFS { setup(); } - void configure(IndexType *distances, - IndexType *predecessors, - double *sp_counters, - int *edge_mask); + void configure(IndexType* distances, + IndexType* predecessors, + double* sp_counters, + int* edge_mask); void traverse(IndexType source_vertex); }; diff --git a/cpp/src/traversal/bfs_kernels.cuh b/cpp/src/traversal/bfs_kernels.cuh index e55abaa8c1d..4e482b446ba 100644 --- a/cpp/src/traversal/bfs_kernels.cuh +++ b/cpp/src/traversal/bfs_kernels.cuh @@ -40,11 +40,11 @@ namespace bfs_kernels { // visited_bmap_nints = the visited_bmap is made of that number of ints template -__global__ void fill_unvisited_queue_kernel(int *visited_bmap, +__global__ void fill_unvisited_queue_kernel(int* visited_bmap, IndexType visited_bmap_nints, IndexType n, - IndexType *unvisited, - IndexType *unvisited_cnt) + IndexType* unvisited, + IndexType* unvisited_cnt) { typedef cub::BlockScan BlockScan; __shared__ typename BlockScan::TempStorage scan_temp_storage; @@ -118,8 +118,8 @@ __global__ void fill_unvisited_queue_kernel(int *visited_bmap, vec_v.z = v_idx * INT_SIZE + traversal::getNextZeroBit(thread_visited_int); vec_v.w = v_idx * INT_SIZE + traversal::getNextZeroBit(thread_visited_int); - typename traversal::vec_t::vec4 *unvisited_i4 = - reinterpret_cast::vec4 *>( + typename traversal::vec_t::vec4* unvisited_i4 = + reinterpret_cast::vec4*>( &unvisited[current_unvisited_index]); *unvisited_i4 = vec_v; @@ -131,8 +131,8 @@ __global__ void fill_unvisited_queue_kernel(int *visited_bmap, vec_v.x = v_idx * INT_SIZE + traversal::getNextZeroBit(thread_visited_int); vec_v.y = v_idx * INT_SIZE + traversal::getNextZeroBit(thread_visited_int); - typename traversal::vec_t::vec2 *unvisited_i2 = - reinterpret_cast::vec2 *>( + typename traversal::vec_t::vec2* unvisited_i2 = + reinterpret_cast::vec2*>( &unvisited[current_unvisited_index]); *unvisited_i2 = vec_v; @@ -152,11 +152,11 @@ __global__ void fill_unvisited_queue_kernel(int *visited_bmap, // Wrapper template -void fill_unvisited_queue(int *visited_bmap, +void fill_unvisited_queue(int* visited_bmap, IndexType visited_bmap_nints, IndexType n, - IndexType *unvisited, - IndexType *unvisited_cnt, + IndexType* unvisited, + IndexType* unvisited_cnt, cudaStream_t m_stream, bool deterministic) { @@ -181,11 +181,11 @@ void fill_unvisited_queue(int *visited_bmap, // template -__global__ void count_unvisited_edges_kernel(const IndexType *potentially_unvisited, +__global__ void count_unvisited_edges_kernel(const IndexType* potentially_unvisited, const IndexType potentially_unvisited_size, - const int *visited_bmap, - IndexType *degree_vertices, - IndexType *mu) + const int* visited_bmap, + IndexType* degree_vertices, + IndexType* mu) { typedef cub::BlockReduce BlockReduce; __shared__ typename BlockReduce::TempStorage reduce_temp_storage; @@ -214,11 +214,11 @@ __global__ void count_unvisited_edges_kernel(const IndexType *potentially_unvisi // Wrapper template -void count_unvisited_edges(const IndexType *potentially_unvisited, +void count_unvisited_edges(const IndexType* potentially_unvisited, const IndexType potentially_unvisited_size, - const int *visited_bmap, - IndexType *node_degree, - IndexType *mu, + const int* visited_bmap, + IndexType* node_degree, + IndexType* mu, cudaStream_t m_stream) { dim3 grid, block; @@ -246,19 +246,19 @@ void count_unvisited_edges(const IndexType *potentially_unvisited, // template -__global__ void main_bottomup_kernel(const IndexType *unvisited, +__global__ void main_bottomup_kernel(const IndexType* unvisited, const IndexType unvisited_size, - IndexType *left_unvisited, - IndexType *left_unvisited_cnt, - int *visited_bmap, - const IndexType *row_ptr, - const IndexType *col_ind, + IndexType* left_unvisited, + IndexType* left_unvisited_cnt, + int* visited_bmap, + const IndexType* row_ptr, + const IndexType* col_ind, IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_cnt, - IndexType *distances, - IndexType *predecessors, - int *edge_mask) + IndexType* new_frontier, + IndexType* new_frontier_cnt, + IndexType* distances, + IndexType* predecessors, + int* edge_mask) { typedef cub::BlockDiscontinuity BlockDiscontinuity; typedef cub::WarpReduce WarpReduce; @@ -487,19 +487,19 @@ __global__ void main_bottomup_kernel(const IndexType *unvisited, } template -void bottom_up_main(IndexType *unvisited, +void bottom_up_main(IndexType* unvisited, IndexType unvisited_size, - IndexType *left_unvisited, - IndexType *d_left_unvisited_idx, - int *visited, - const IndexType *row_ptr, - const IndexType *col_ind, + IndexType* left_unvisited, + IndexType* d_left_unvisited_idx, + int* visited, + const IndexType* row_ptr, + const IndexType* col_ind, IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_idx, - IndexType *distances, - IndexType *predecessors, - int *edge_mask, + IndexType* new_frontier, + IndexType* new_frontier_idx, + IndexType* distances, + IndexType* predecessors, + int* edge_mask, cudaStream_t m_stream, bool deterministic) { @@ -531,17 +531,17 @@ void bottom_up_main(IndexType *unvisited, // MAIN_BOTTOMUP_MAX_EDGES && no parent found // template -__global__ void bottom_up_large_degree_kernel(IndexType *left_unvisited, +__global__ void bottom_up_large_degree_kernel(IndexType* left_unvisited, IndexType left_unvisited_size, - int *visited, - const IndexType *row_ptr, - const IndexType *col_ind, + int* visited, + const IndexType* row_ptr, + const IndexType* col_ind, IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_cnt, - IndexType *distances, - IndexType *predecessors, - int *edge_mask) + IndexType* new_frontier, + IndexType* new_frontier_cnt, + IndexType* distances, + IndexType* predecessors, + int* edge_mask) { int logical_lane_id = threadIdx.x % BOTTOM_UP_LOGICAL_WARP_SIZE; int logical_warp_id = threadIdx.x / BOTTOM_UP_LOGICAL_WARP_SIZE; @@ -610,17 +610,17 @@ __global__ void bottom_up_large_degree_kernel(IndexType *left_unvisited, } template -void bottom_up_large(IndexType *left_unvisited, +void bottom_up_large(IndexType* left_unvisited, IndexType left_unvisited_size, - int *visited, - const IndexType *row_ptr, - const IndexType *col_ind, + int* visited, + const IndexType* row_ptr, + const IndexType* col_ind, IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_idx, - IndexType *distances, - IndexType *predecessors, - int *edge_mask, + IndexType* new_frontier, + IndexType* new_frontier_idx, + IndexType* distances, + IndexType* predecessors, + int* edge_mask, cudaStream_t m_stream, bool deterministic) { @@ -680,24 +680,24 @@ void bottom_up_large(IndexType *left_unvisited, template __global__ void topdown_expand_kernel( - const IndexType *row_ptr, - const IndexType *col_ind, - const IndexType *frontier, + const IndexType* row_ptr, + const IndexType* col_ind, + const IndexType* frontier, const IndexType frontier_size, const IndexType totaldegree, const IndexType max_items_per_thread, const IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_cnt, - const IndexType *frontier_degrees_exclusive_sum, - const IndexType *frontier_degrees_exclusive_sum_buckets_offsets, - int *previous_bmap, - int *bmap, - IndexType *distances, - IndexType *predecessors, - double *sp_counters, - const int *edge_mask, - const int *isolated_bmap, + IndexType* new_frontier, + IndexType* new_frontier_cnt, + const IndexType* frontier_degrees_exclusive_sum, + const IndexType* frontier_degrees_exclusive_sum_buckets_offsets, + int* previous_bmap, + int* bmap, + IndexType* distances, + IndexType* predecessors, + double* sp_counters, + const int* edge_mask, + const int* isolated_bmap, bool directed) { // BlockScan @@ -844,7 +844,7 @@ __global__ void topdown_expand_kernel( IndexType local_buf1[TOP_DOWN_BATCH_SIZE]; IndexType local_buf2[TOP_DOWN_BATCH_SIZE]; - IndexType *vec_frontier_degrees_exclusive_sum_index = &local_buf2[0]; + IndexType* vec_frontier_degrees_exclusive_sum_index = &local_buf2[0]; #pragma unroll for (IndexType iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { @@ -869,7 +869,7 @@ __global__ void topdown_expand_kernel( } } - IndexType *vec_row_ptr_u = &local_buf1[0]; + IndexType* vec_row_ptr_u = &local_buf1[0]; #pragma unroll for (int iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { IndexType u = vec_u[iv]; @@ -878,7 +878,7 @@ __global__ void topdown_expand_kernel( } // We won't need row_ptr after that, reusing pointer - IndexType *vec_dest_v = vec_row_ptr_u; + IndexType* vec_dest_v = vec_row_ptr_u; #pragma unroll for (int iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { @@ -901,7 +901,7 @@ __global__ void topdown_expand_kernel( } // We don't need vec_frontier_degrees_exclusive_sum_index anymore - IndexType *vec_v_visited_bmap = vec_frontier_degrees_exclusive_sum_index; + IndexType* vec_v_visited_bmap = vec_frontier_degrees_exclusive_sum_index; // Visited bmap need to contain information about the previous // frontier if we actually process every edge (shortest path counting) @@ -916,7 +916,7 @@ __global__ void topdown_expand_kernel( // From now on we will consider v as a frontier candidate // If for some reason vec_candidate[iv] should be put in the // new_frontier Then set vec_candidate[iv] = -1 - IndexType *vec_frontier_candidate = vec_dest_v; + IndexType* vec_frontier_candidate = vec_dest_v; #pragma unroll for (int iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { @@ -943,7 +943,7 @@ __global__ void topdown_expand_kernel( if (directed) { // vec_v_visited_bmap is available - IndexType *vec_is_isolated_bmap = vec_v_visited_bmap; + IndexType* vec_is_isolated_bmap = vec_v_visited_bmap; #pragma unroll for (int iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { @@ -1021,7 +1021,7 @@ __global__ void topdown_expand_kernel( IndexType naccepted_vertices = 0; // We won't need vec_frontier_candidate after that - IndexType *vec_frontier_accepted_vertex = vec_frontier_candidate; + IndexType* vec_frontier_accepted_vertex = vec_frontier_candidate; #pragma unroll for (int iv = 0; iv < TOP_DOWN_BATCH_SIZE; ++iv) { @@ -1094,23 +1094,23 @@ __global__ void topdown_expand_kernel( } template -void frontier_expand(const IndexType *row_ptr, - const IndexType *col_ind, - const IndexType *frontier, +void frontier_expand(const IndexType* row_ptr, + const IndexType* col_ind, + const IndexType* frontier, const IndexType frontier_size, const IndexType totaldegree, const IndexType lvl, - IndexType *new_frontier, - IndexType *new_frontier_cnt, - const IndexType *frontier_degrees_exclusive_sum, - const IndexType *frontier_degrees_exclusive_sum_buckets_offsets, - int *previous_visited_bmap, - int *visited_bmap, - IndexType *distances, - IndexType *predecessors, - double *sp_counters, - const int *edge_mask, - const int *isolated_bmap, + IndexType* new_frontier, + IndexType* new_frontier_cnt, + const IndexType* frontier_degrees_exclusive_sum, + const IndexType* frontier_degrees_exclusive_sum_buckets_offsets, + int* previous_visited_bmap, + int* visited_bmap, + IndexType* distances, + IndexType* predecessors, + double* sp_counters, + const int* edge_mask, + const int* isolated_bmap, bool directed, cudaStream_t m_stream, bool deterministic) diff --git a/cpp/src/traversal/mg/bfs.cuh b/cpp/src/traversal/mg/bfs.cuh index 129c1a554e4..e6c8c3bf700 100644 --- a/cpp/src/traversal/mg/bfs.cuh +++ b/cpp/src/traversal/mg/bfs.cuh @@ -28,12 +28,12 @@ namespace mg { namespace detail { template -void bfs_traverse(raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, +void bfs_traverse(raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, const vertex_t start_vertex, - rmm::device_vector &visited_bmap, - rmm::device_vector &output_frontier_bmap, - operator_t &bfs_op) + rmm::device_vector& visited_bmap, + rmm::device_vector& output_frontier_bmap, + operator_t& bfs_op) { // Frontiers required for BFS rmm::device_vector input_frontier(graph.number_of_vertices); @@ -47,8 +47,8 @@ void bfs_traverse(raft::handle_t const &handle, // Reusing buffers to create isolated bitmap { - rmm::device_vector &local_isolated_ids = input_frontier; - rmm::device_vector &global_isolated_ids = output_frontier; + rmm::device_vector& local_isolated_ids = input_frontier; + rmm::device_vector& global_isolated_ids = output_frontier; detail::create_isolated_bitmap( handle, graph, local_isolated_ids, global_isolated_ids, temp_buffer_len, isolated_bmap); } @@ -110,10 +110,10 @@ void bfs_traverse(raft::handle_t const &handle, } // namespace detail template -void bfs(raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, - vertex_t *distances, - vertex_t *predecessors, +void bfs(raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, + vertex_t* distances, + vertex_t* predecessors, const vertex_t start_vertex) { CUGRAPH_EXPECTS(handle.comms_initialized(), diff --git a/cpp/src/traversal/mg/common_utils.cuh b/cpp/src/traversal/mg/common_utils.cuh index 8ceaf43c827..9a95aba7901 100644 --- a/cpp/src/traversal/mg/common_utils.cuh +++ b/cpp/src/traversal/mg/common_utils.cuh @@ -46,25 +46,25 @@ constexpr inline return_t number_of_words(return_t number_of_bits) template struct isDegreeZero { - edge_t const *offset_; - isDegreeZero(edge_t const *offset) : offset_(offset) {} + edge_t const* offset_; + isDegreeZero(edge_t const* offset) : offset_(offset) {} - __device__ bool operator()(const edge_t &id) const { return (offset_[id + 1] == offset_[id]); } + __device__ bool operator()(const edge_t& id) const { return (offset_[id + 1] == offset_[id]); } }; struct set_nth_bit { - uint32_t *bmap_; - set_nth_bit(uint32_t *bmap) : bmap_(bmap) {} + uint32_t* bmap_; + set_nth_bit(uint32_t* bmap) : bmap_(bmap) {} template - __device__ void operator()(const return_t &id) + __device__ void operator()(const return_t& id) { atomicOr(bmap_ + (id / BitsPWrd), (uint32_t{1} << (id % BitsPWrd))); } }; template -bool is_vertex_isolated(rmm::device_vector &bmap, vertex_t id) +bool is_vertex_isolated(rmm::device_vector& bmap, vertex_t id) { uint32_t word = bmap[id / BitsPWrd]; uint32_t active_bit = static_cast(1) << (id % BitsPWrd); @@ -74,11 +74,11 @@ bool is_vertex_isolated(rmm::device_vector &bmap, vertex_t id) template struct BFSStepNoDist { - uint32_t *output_frontier_; - uint32_t *visited_; - vertex_t *predecessors_; + uint32_t* output_frontier_; + uint32_t* visited_; + vertex_t* predecessors_; - BFSStepNoDist(uint32_t *output_frontier, uint32_t *visited, vertex_t *predecessors) + BFSStepNoDist(uint32_t* output_frontier, uint32_t* visited, vertex_t* predecessors) : output_frontier_(output_frontier), visited_(visited), predecessors_(predecessors) { } @@ -105,13 +105,13 @@ struct BFSStepNoDist { template struct BFSStep { - uint32_t *output_frontier_; - uint32_t *visited_; - vertex_t *predecessors_; - vertex_t *distances_; + uint32_t* output_frontier_; + uint32_t* visited_; + vertex_t* predecessors_; + vertex_t* distances_; vertex_t level_; - BFSStep(uint32_t *output_frontier, uint32_t *visited, vertex_t *predecessors, vertex_t *distances) + BFSStep(uint32_t* output_frontier, uint32_t* visited, vertex_t* predecessors, vertex_t* distances) : output_frontier_(output_frontier), visited_(visited), predecessors_(predecessors), @@ -142,9 +142,9 @@ struct BFSStep { template vertex_t populate_isolated_vertices( - raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, - rmm::device_vector &isolated_vertex_ids) + raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, + rmm::device_vector& isolated_vertex_ids) { bool is_mg = (handle.comms_initialized() && (graph.local_vertices != nullptr) && (graph.local_offsets != nullptr)); @@ -170,11 +170,11 @@ vertex_t populate_isolated_vertices( } template -return_t collect_vectors(raft::handle_t const &handle, - rmm::device_vector &buffer_len, - rmm::device_vector &local, +return_t collect_vectors(raft::handle_t const& handle, + rmm::device_vector& buffer_len, + rmm::device_vector& local, return_t local_count, - rmm::device_vector &global) + rmm::device_vector& global) { CHECK_CUDA(handle.get_stream()); buffer_len.resize(handle.get_comms().get_size()); @@ -206,9 +206,9 @@ return_t collect_vectors(raft::handle_t const &handle, } template -void add_to_bitmap(raft::handle_t const &handle, - rmm::device_vector &bmap, - rmm::device_vector &id, +void add_to_bitmap(raft::handle_t const& handle, + rmm::device_vector& bmap, + rmm::device_vector& id, return_t count) { cudaStream_t stream = handle.get_stream(); @@ -222,12 +222,12 @@ void add_to_bitmap(raft::handle_t const &handle, // For all vertex ids i which are isolated (out degree is 0), set // ith bit of isolated_bmap to 1 template -void create_isolated_bitmap(raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, - rmm::device_vector &local_isolated_ids, - rmm::device_vector &global_isolated_ids, - rmm::device_vector &temp_buffer_len, - rmm::device_vector &isolated_bmap) +void create_isolated_bitmap(raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, + rmm::device_vector& local_isolated_ids, + rmm::device_vector& global_isolated_ids, + rmm::device_vector& temp_buffer_len, + rmm::device_vector& isolated_bmap) { size_t word_count = detail::number_of_words(graph.number_of_vertices); local_isolated_ids.resize(graph.number_of_vertices); @@ -242,8 +242,8 @@ void create_isolated_bitmap(raft::handle_t const &handle, } template -return_t remove_duplicates(raft::handle_t const &handle, - rmm::device_vector &data, +return_t remove_duplicates(raft::handle_t const& handle, + rmm::device_vector& data, return_t data_len) { cudaStream_t stream = handle.get_stream(); @@ -259,13 +259,13 @@ return_t remove_duplicates(raft::handle_t const &handle, // ids. bmap is expected to be of the length // id_end/BitsPWrd and is set to 0 initially template -__global__ void remove_duplicates_kernel(uint32_t *bmap, - return_t *in_id, +__global__ void remove_duplicates_kernel(uint32_t* bmap, + return_t* in_id, return_t id_begin, return_t id_end, return_t count, - return_t *out_id, - return_t *out_count) + return_t* out_id, + return_t* out_count) { return_t tid = blockIdx.x * blockDim.x + threadIdx.x; return_t id; @@ -307,14 +307,14 @@ __global__ void remove_duplicates_kernel(uint32_t *bmap, } template -__global__ void remove_duplicates_kernel(uint32_t *bmap, - uint32_t *isolated_bmap, - return_t *in_id, +__global__ void remove_duplicates_kernel(uint32_t* bmap, + uint32_t* isolated_bmap, + return_t* in_id, return_t id_begin, return_t id_end, return_t count, - return_t *out_id, - return_t *out_count) + return_t* out_id, + return_t* out_count) { return_t tid = blockIdx.x * blockDim.x + threadIdx.x; return_t id; @@ -360,13 +360,13 @@ __global__ void remove_duplicates_kernel(uint32_t *bmap, } template -return_t remove_duplicates(raft::handle_t const &handle, - rmm::device_vector &bmap, - rmm::device_vector &data, +return_t remove_duplicates(raft::handle_t const& handle, + rmm::device_vector& bmap, + rmm::device_vector& data, return_t data_len, return_t data_begin, return_t data_end, - rmm::device_vector &out_data) + rmm::device_vector& out_data) { cudaStream_t stream = handle.get_stream(); @@ -389,13 +389,13 @@ return_t remove_duplicates(raft::handle_t const &handle, template vertex_t preprocess_input_frontier( - raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, - rmm::device_vector &bmap, - rmm::device_vector &isolated_bmap, - rmm::device_vector &input_frontier, + raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, + rmm::device_vector& bmap, + rmm::device_vector& isolated_bmap, + rmm::device_vector& input_frontier, vertex_t input_frontier_len, - rmm::device_vector &output_frontier) + rmm::device_vector& output_frontier) { cudaStream_t stream = handle.get_stream(); @@ -422,12 +422,12 @@ vertex_t preprocess_input_frontier( template vertex_t preprocess_input_frontier( - raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, - rmm::device_vector &bmap, - rmm::device_vector &input_frontier, + raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, + rmm::device_vector& bmap, + rmm::device_vector& input_frontier, vertex_t input_frontier_len, - rmm::device_vector &output_frontier) + rmm::device_vector& output_frontier) { cudaStream_t stream = handle.get_stream(); @@ -452,7 +452,7 @@ vertex_t preprocess_input_frontier( } template -__global__ void fill_kernel(vertex_t *distances, vertex_t count, vertex_t start_vertex) +__global__ void fill_kernel(vertex_t* distances, vertex_t count, vertex_t start_vertex) { vertex_t tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid >= count) { return; } @@ -464,11 +464,11 @@ __global__ void fill_kernel(vertex_t *distances, vertex_t count, vertex_t start_ } template -void fill_max_dist(raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph, +void fill_max_dist(raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph, vertex_t start_vertex, vertex_t global_number_of_vertices, - vertex_t *distances) + vertex_t* distances) { if (distances == nullptr) { return; } vertex_t array_size = global_number_of_vertices; @@ -479,8 +479,8 @@ void fill_max_dist(raft::handle_t const &handle, template vertex_t get_global_vertex_count( - raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph) + raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph) { rmm::device_vector id(1); id[0] = *thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/src/traversal/mg/frontier_expand.cuh b/cpp/src/traversal/mg/frontier_expand.cuh index 4c4d794f067..078ab085724 100644 --- a/cpp/src/traversal/mg/frontier_expand.cuh +++ b/cpp/src/traversal/mg/frontier_expand.cuh @@ -28,8 +28,8 @@ namespace detail { template class FrontierExpand { - raft::handle_t const &handle_; - cugraph::legacy::GraphCSRView const &graph_; + raft::handle_t const& handle_; + cugraph::legacy::GraphCSRView const& graph_; VertexBinner dist_; rmm::device_vector reorganized_vertices_; edge_t vertex_begin_; @@ -37,8 +37,8 @@ class FrontierExpand { rmm::device_vector output_vertex_count_; public: - FrontierExpand(raft::handle_t const &handle, - cugraph::legacy::GraphCSRView const &graph) + FrontierExpand(raft::handle_t const& handle, + cugraph::legacy::GraphCSRView const& graph) : handle_(handle), graph_(graph) { bool is_mg = (handle.comms_initialized() && (graph.local_vertices != nullptr) && @@ -59,9 +59,9 @@ class FrontierExpand { // Return the size of the output_frontier template vertex_t operator()(operator_t op, - rmm::device_vector &input_frontier, + rmm::device_vector& input_frontier, vertex_t input_frontier_len, - rmm::device_vector &output_frontier) + rmm::device_vector& output_frontier) { if (input_frontier_len == 0) { return static_cast(0); } cudaStream_t stream = handle_.get_stream(); diff --git a/cpp/src/traversal/mg/frontier_expand_kernels.cuh b/cpp/src/traversal/mg/frontier_expand_kernels.cuh index 28ba6b19dbc..32b9310f02f 100644 --- a/cpp/src/traversal/mg/frontier_expand_kernels.cuh +++ b/cpp/src/traversal/mg/frontier_expand_kernels.cuh @@ -26,13 +26,13 @@ namespace mg { namespace detail { template -__device__ void write_to_frontier(vertex_t const *thread_frontier, +__device__ void write_to_frontier(vertex_t const* thread_frontier, int thread_frontier_count, - vertex_t *block_frontier, - int *block_frontier_count, - vertex_t *output_frontier, - edge_t *block_write_offset, - edge_t *output_frontier_count) + vertex_t* block_frontier, + int* block_frontier_count, + vertex_t* output_frontier, + edge_t* block_write_offset, + edge_t* output_frontier_count) { // Set frontier count for block to 0 if (threadIdx.x == 0) { *block_frontier_count = 0; } @@ -66,13 +66,13 @@ template -__global__ void block_per_vertex(edge_t const *offsets, - vertex_t const *indices, - vertex_t const *input_frontier, +__global__ void block_per_vertex(edge_t const* offsets, + vertex_t const* indices, + vertex_t const* input_frontier, vertex_t input_frontier_count, vertex_t vertex_begin, - vertex_t *output_frontier, - edge_t *output_frontier_count, + vertex_t* output_frontier, + edge_t* output_frontier_count, operator_t op) { if (blockIdx.x >= input_frontier_count) { return; } @@ -121,13 +121,13 @@ template -__global__ void kernel_per_vertex(edge_t const *offsets, - vertex_t const *indices, - vertex_t const *input_frontier, +__global__ void kernel_per_vertex(edge_t const* offsets, + vertex_t const* indices, + vertex_t const* input_frontier, vertex_t input_frontier_count, vertex_t vertex_begin, - vertex_t *output_frontier, - edge_t *output_frontier_count, + vertex_t* output_frontier, + edge_t* output_frontier_count, operator_t op) { vertex_t current_vertex_index = 0; @@ -171,12 +171,12 @@ __global__ void kernel_per_vertex(edge_t const *offsets, } template -void large_vertex_lb(cugraph::legacy::GraphCSRView const &graph, - DegreeBucket &bucket, +void large_vertex_lb(cugraph::legacy::GraphCSRView const& graph, + DegreeBucket& bucket, operator_t op, vertex_t vertex_begin, - vertex_t *output_vertex_ids, - edge_t *output_vertex_ids_offset, + vertex_t* output_vertex_ids, + edge_t* output_vertex_ids_offset, cudaStream_t stream) { if (bucket.numberOfVertices != 0) { @@ -196,12 +196,12 @@ void large_vertex_lb(cugraph::legacy::GraphCSRView c } template -void medium_vertex_lb(cugraph::legacy::GraphCSRView const &graph, - DegreeBucket &bucket, +void medium_vertex_lb(cugraph::legacy::GraphCSRView const& graph, + DegreeBucket& bucket, operator_t op, vertex_t vertex_begin, - vertex_t *output_vertex_ids, - edge_t *output_vertex_ids_offset, + vertex_t* output_vertex_ids, + edge_t* output_vertex_ids_offset, cudaStream_t stream) { // Vertices with degrees 2^12 <= d < 2^16 are handled by this kernel @@ -223,12 +223,12 @@ void medium_vertex_lb(cugraph::legacy::GraphCSRView } template -void small_vertex_lb(cugraph::legacy::GraphCSRView const &graph, - DegreeBucket &bucket, +void small_vertex_lb(cugraph::legacy::GraphCSRView const& graph, + DegreeBucket& bucket, operator_t op, vertex_t vertex_begin, - vertex_t *output_vertex_ids, - edge_t *output_vertex_ids_offset, + vertex_t* output_vertex_ids, + edge_t* output_vertex_ids_offset, cudaStream_t stream) { int block_count = bucket.numberOfVertices; diff --git a/cpp/src/traversal/mg/vertex_binning_kernels.cuh b/cpp/src/traversal/mg/vertex_binning_kernels.cuh index dbb339fea05..57574965a3a 100644 --- a/cpp/src/traversal/mg/vertex_binning_kernels.cuh +++ b/cpp/src/traversal/mg/vertex_binning_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ __device__ inline typename std::enable_if<(sizeof(degree_t) == 8), int>::type ce } template -__global__ void simple_fill(return_t *bin0, return_t *bin1, return_t count) +__global__ void simple_fill(return_t* bin0, return_t* bin1, return_t count) { for (return_t i = 0; i < count; i++) { bin0[i] = 0; @@ -49,7 +49,7 @@ __global__ void simple_fill(return_t *bin0, return_t *bin1, return_t count) } template -__global__ void exclusive_scan(return_t *data, return_t *out) +__global__ void exclusive_scan(return_t* data, return_t* out) { constexpr int BinCount = NumberBins; return_t lData[BinCount]; @@ -71,9 +71,9 @@ __global__ void exclusive_scan(return_t *data, return_t *out) // In this function, any id in vertex_ids array is only acceptable as long // as its value is between vertex_begin and vertex_end template -__global__ void count_bin_sizes(edge_t *bins, - edge_t const *offsets, - vertex_t const *vertex_ids, +__global__ void count_bin_sizes(edge_t* bins, + edge_t const* offsets, + vertex_t const* vertex_ids, edge_t const vertex_id_count, vertex_t vertex_begin, vertex_t vertex_end) @@ -81,7 +81,9 @@ __global__ void count_bin_sizes(edge_t *bins, using cugraph::detail::traversal::atomicAdd; constexpr int BinCount = NumberBins; __shared__ edge_t lBin[BinCount]; - for (int i = threadIdx.x; i < BinCount; i += blockDim.x) { lBin[i] = 0; } + for (int i = threadIdx.x; i < BinCount; i += blockDim.x) { + lBin[i] = 0; + } __syncthreads(); for (vertex_t i = threadIdx.x + (blockIdx.x * blockDim.x); i < vertex_id_count; @@ -98,16 +100,18 @@ __global__ void count_bin_sizes(edge_t *bins, } __syncthreads(); - for (int i = threadIdx.x; i < BinCount; i += blockDim.x) { atomicAdd(bins + i, lBin[i]); } + for (int i = threadIdx.x; i < BinCount; i += blockDim.x) { + atomicAdd(bins + i, lBin[i]); + } } // Bin vertices to the appropriate bins by taking into account // the starting offsets calculated by count_bin_sizes template -__global__ void create_vertex_bins(vertex_t *out_vertex_ids, - edge_t *bin_offsets, - edge_t const *offsets, - vertex_t *in_vertex_ids, +__global__ void create_vertex_bins(vertex_t* out_vertex_ids, + edge_t* bin_offsets, + edge_t const* offsets, + vertex_t* in_vertex_ids, edge_t const vertex_id_count, vertex_t vertex_begin, vertex_t vertex_end) @@ -149,12 +153,12 @@ __global__ void create_vertex_bins(vertex_t *out_vertex_ids, } template -void bin_vertices(rmm::device_vector &input_vertex_ids, +void bin_vertices(rmm::device_vector& input_vertex_ids, vertex_t input_vertex_ids_len, - rmm::device_vector &reorganized_vertex_ids, - rmm::device_vector &bin_count_offsets, - rmm::device_vector &bin_count, - edge_t *offsets, + rmm::device_vector& reorganized_vertex_ids, + rmm::device_vector& bin_count_offsets, + rmm::device_vector& bin_count, + edge_t* offsets, vertex_t vertex_begin, vertex_t vertex_end, cudaStream_t stream) diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index ac2ab5b2063..61225dd7fd6 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -49,9 +49,9 @@ void SSSP::setup() iter_buffer_size = sizeof(int) * (edges_bmap_size + vertices_bmap_size) + sizeof(IndexType); iter_buffer.resize(iter_buffer_size, stream); // ith bit of relaxed_edges_bmap <=> ith edge was relaxed - relaxed_edges_bmap = static_cast(iter_buffer.data()); + relaxed_edges_bmap = static_cast(iter_buffer.data()); // ith bit of next_frontier_bmap <=> vertex is active in the next frontier - next_frontier_bmap = static_cast(iter_buffer.data()) + edges_bmap_size; + next_frontier_bmap = static_cast(iter_buffer.data()) + edges_bmap_size; // num vertices in the next frontier d_new_frontier_cnt = next_frontier_bmap + vertices_bmap_size; @@ -73,7 +73,7 @@ void SSSP::setup() exclusive_sum_frontier_vertex_buckets_offsets.resize(bucket_off_size); // Repurpose d_new_frontier_cnt temporarily - IndexType *d_nisolated = d_new_frontier_cnt; + IndexType* d_nisolated = d_new_frontier_cnt; cudaMemsetAsync(d_nisolated, 0, sizeof(IndexType), stream); // Computing isolated_bmap @@ -89,9 +89,9 @@ void SSSP::setup() } template -void SSSP::configure(DistType *_distances, - IndexType *_predecessors, - int *_edge_mask) +void SSSP::configure(DistType* _distances, + IndexType* _predecessors, + int* _edge_mask) { distances = _distances; predecessors = _predecessors; @@ -242,9 +242,9 @@ void SSSP::clean() * @file sssp.cu * --------------------------------------------------------------------------*/ template -void sssp(legacy::GraphCSRView const &graph, - WT *distances, - VT *predecessors, +void sssp(legacy::GraphCSRView const& graph, + WT* distances, + VT* predecessors, const VT source_vertex) { CUGRAPH_EXPECTS(distances || predecessors, "Invalid input argument, both outputs are nullptr"); @@ -257,9 +257,9 @@ void sssp(legacy::GraphCSRView const &graph, int num_vertices = graph.number_of_vertices; int num_edges = graph.number_of_edges; - const ET *offsets_ptr = graph.offsets; - const VT *indices_ptr = graph.indices; - const WT *edge_weights_ptr = nullptr; + const ET* offsets_ptr = graph.offsets; + const VT* indices_ptr = graph.indices; + const WT* edge_weights_ptr = nullptr; // Both if / else branch operate own calls due to // thrust::device_vector lifetime @@ -293,13 +293,13 @@ void sssp(legacy::GraphCSRView const &graph, } // explicit instantiation -template void sssp(legacy::GraphCSRView const &graph, - float *distances, - int *predecessors, +template void sssp(legacy::GraphCSRView const& graph, + float* distances, + int* predecessors, const int source_vertex); -template void sssp(legacy::GraphCSRView const &graph, - double *distances, - int *predecessors, +template void sssp(legacy::GraphCSRView const& graph, + double* distances, + int* predecessors, const int source_vertex); } // namespace cugraph diff --git a/cpp/src/traversal/traversal_common.cuh b/cpp/src/traversal/traversal_common.cuh index 64a21a89b04..ea77173870e 100644 --- a/cpp/src/traversal/traversal_common.cuh +++ b/cpp/src/traversal/traversal_common.cuh @@ -21,7 +21,7 @@ #define MAXBLOCKS 65535 #define WARP_SIZE 32 -#define INT_SIZE 32 +#define INT_SIZE 32 // // Bottom up macros @@ -31,7 +31,7 @@ #define COUNT_UNVISITED_EDGES_DIMX 256 -#define MAIN_BOTTOMUP_DIMX 256 +#define MAIN_BOTTOMUP_DIMX 256 #define MAIN_BOTTOMUP_NWARPS (MAIN_BOTTOMUP_DIMX / WARP_SIZE) #define LARGE_BOTTOMUP_DIMX 256 diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu index 17183d48f3d..332ccb21834 100644 --- a/cpp/src/traversal/tsp.cu +++ b/cpp/src/traversal/tsp.cu @@ -27,17 +27,17 @@ namespace cugraph { namespace detail { -TSP::TSP(raft::handle_t const &handle, - int const *vtx_ptr, - float const *x_pos, - float const *y_pos, +TSP::TSP(raft::handle_t const& handle, + int const* vtx_ptr, + float const* x_pos, + float const* y_pos, int nodes, int restarts, bool beam_search, int k, int nstart, bool verbose, - int *route) + int* route) : handle_(handle), vtx_ptr_(vtx_ptr), x_pos_(x_pos), @@ -117,11 +117,11 @@ float TSP::compute() h_x_pos.reserve(nodes_ + 1); h_y_pos.reserve(nodes_ + 1); h_route.reserve(nodes_); - std::vector addr_best_x_pos(1); - std::vector addr_best_y_pos(1); - std::vector addr_best_route(1); + std::vector addr_best_x_pos(1); + std::vector addr_best_y_pos(1); + std::vector addr_best_route(1); HighResTimer hr_timer; - auto create_timer = [&hr_timer, this](char const *name) { + auto create_timer = [&hr_timer, this](char const* name) { return VerboseTimer(name, hr_timer, verbose_); }; @@ -213,18 +213,18 @@ void TSP::knn() bool row_major_order = false; rmm::device_uvector input(nodes_ * dim, stream_); - float *input_ptr = input.data(); + float* input_ptr = input.data(); raft::copy(input_ptr, x_pos_, nodes_, stream_); raft::copy(input_ptr + nodes_, y_pos_, nodes_, stream_); rmm::device_uvector search_data(nodes_ * dim, stream_); - float *search_data_ptr = search_data.data(); + float* search_data_ptr = search_data.data(); raft::copy(search_data_ptr, input_ptr, nodes_ * dim, stream_); rmm::device_uvector distances(nodes_ * (k_ + 1), stream_); - float *distances_ptr = distances.data(); + float* distances_ptr = distances.data(); - std::vector input_vec; + std::vector input_vec; std::vector sizes_vec; input_vec.push_back(input_ptr); sizes_vec.push_back(nodes_); @@ -246,17 +246,17 @@ void TSP::knn() } } // namespace detail -float traveling_salesperson(raft::handle_t const &handle, - int const *vtx_ptr, - float const *x_pos, - float const *y_pos, +float traveling_salesperson(raft::handle_t const& handle, + int const* vtx_ptr, + float const* x_pos, + float const* y_pos, int nodes, int restarts, bool beam_search, int k, int nstart, bool verbose, - int *route) + int* route) { RAFT_EXPECTS(route != nullptr, "route should equal the number of nodes"); RAFT_EXPECTS(nodes > 0, "nodes should be strictly positive"); diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index 8c6948f218c..6073f46ab28 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -29,25 +29,25 @@ namespace cugraph { namespace detail { struct TSPResults { - float **best_x_pos; - float **best_y_pos; - int **best_route; - int *best_cost; + float** best_x_pos; + float** best_y_pos; + int** best_route; + int* best_cost; }; class TSP { public: - TSP(raft::handle_t const &handle, - int const *vtx_ptr, - float const *x_pos, - float const *y_pos, + TSP(raft::handle_t const& handle, + int const* vtx_ptr, + float const* x_pos, + float const* y_pos, int nodes, int restarts, bool beam_search, int k, int nstart, bool verbose, - int *route); + int* route); void setup(); void reset_batch(); @@ -58,7 +58,7 @@ class TSP { private: // Config - raft::handle_t const &handle_; + raft::handle_t const& handle_; cudaStream_t stream_; int max_blocks_; int max_threads_; @@ -69,10 +69,10 @@ class TSP { int best_thread_num_; // TSP - int const *vtx_ptr_; - int *route_; - float const *x_pos_; - float const *y_pos_; + int const* vtx_ptr_; + int* route_; + float const* x_pos_; + float const* y_pos_; int nodes_; int restarts_; bool beam_search_; @@ -84,25 +84,25 @@ class TSP { rmm::device_scalar mylock_scalar_; rmm::device_scalar best_cost_scalar_; - int *mylock_; - int *best_cost_; + int* mylock_; + int* best_cost_; // Vectors rmm::device_uvector neighbors_vec_; rmm::device_uvector work_vec_; - rmm::device_uvector best_x_pos_vec_; - rmm::device_uvector best_y_pos_vec_; - rmm::device_uvector best_route_vec_; + rmm::device_uvector best_x_pos_vec_; + rmm::device_uvector best_y_pos_vec_; + rmm::device_uvector best_route_vec_; - int64_t *neighbors_; - int *work_; - int *work_route_; + int64_t* neighbors_; + int* work_; + int* work_route_; TSPResults results_; }; class VerboseTimer { public: - VerboseTimer(char const *name, HighResTimer &hr_timer, bool verbose) + VerboseTimer(char const* name, HighResTimer& hr_timer, bool verbose) : name_(name), hr_timer_(hr_timer), verbose_(verbose) { if (verbose_) hr_timer_.start(name_); @@ -114,8 +114,8 @@ class VerboseTimer { } private: - const char *name_; - HighResTimer &hr_timer_; + const char* name_; + HighResTimer& hr_timer_; bool verbose_; }; diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp index c7b8cdaaf1d..5fb3ff1d449 100644 --- a/cpp/src/traversal/tsp_solver.hpp +++ b/cpp/src/traversal/tsp_solver.hpp @@ -29,19 +29,19 @@ namespace cugraph { namespace detail { -__global__ void random_init(int *work, - float const *posx, - float const *posy, - int const *vtx_ptr, +__global__ void random_init(int* work, + float const* posx, + float const* posy, + int const* vtx_ptr, int const nstart, int const nodes, int const batch, int const restart_batch) { - int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); + int* buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float* px = (float*)(&buf[nodes]); + float* py = &px[nodes + 1]; + int* path = (int*)(&py[nodes + 1]); // Fill values for (int i = threadIdx.x; i <= nodes; i += blockDim.x) { @@ -74,23 +74,24 @@ __global__ void random_init(int *work, } } -__global__ void knn_init(int *work, - float const *posx, - float const *posy, - int const *vtx_ptr, - int64_t const *neighbors, +__global__ void knn_init(int* work, + float const* posx, + float const* posy, + int const* vtx_ptr, + int64_t const* neighbors, int const nstart, int const nodes, int const K, int const batch, int const restart_batch) { - int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); + int* buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float* px = (float*)(&buf[nodes]); + float* py = &px[nodes + 1]; + int* path = (int*)(&py[nodes + 1]); - for (int i = threadIdx.x; i < nodes; i += blockDim.x) buf[i] = 0; + for (int i = threadIdx.x; i < nodes; i += blockDim.x) + buf[i] = 0; __syncthreads(); @@ -106,7 +107,8 @@ __global__ void knn_init(int *work, int v = 0; buf[head] = 1; while (progress < nodes - 1) { // beam search as starting point - for (int i = 1; i <= progress; i++) buf[i] = 0; + for (int i = 1; i <= progress; i++) + buf[i] = 0; progress = 0; // reset current location in path and visited array int randjumps = 0; while (progress < nodes - 1) { @@ -130,7 +132,9 @@ __global__ void knn_init(int *work, break; // give up on this traversal, we failed to find a next link randjumps += 1; int nr = (head + 1) % nodes; // jump to next node - while (buf[nr] == 1) { nr = (nr + 1) % nodes; } + while (buf[nr] == 1) { + nr = (nr + 1) % nodes; + } head = nr; progress += 1; buf[head] = 1; @@ -148,7 +152,7 @@ __global__ void knn_init(int *work, } __device__ void two_opt_search( - int *buf, float *px, float *py, int *shbuf, int *minchange, int *mini, int *minj, int const nodes) + int* buf, float* px, float* py, int* shbuf, int* minchange, int* mini, int* minj, int const nodes) { __shared__ float shmem_x[tilesize]; __shared__ float shmem_y[tilesize]; @@ -203,20 +207,20 @@ __device__ void two_opt_search( } __global__ __launch_bounds__(2048, 2) void search_solution(TSPResults results, - int *mylock, - int const *vtx_ptr, + int* mylock, + int const* vtx_ptr, bool beam_search, int const K, int nodes, - float const *posx, - float const *posy, - int *work, + float const* posx, + float const* posy, + int* work, int const nstart) { - int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); + int* buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float* px = (float*)(&buf[nodes]); + float* py = &px[nodes + 1]; + int* path = (int*)(&py[nodes + 1]); __shared__ int shbuf[tilesize]; __shared__ int best_change[kswaps]; @@ -339,14 +343,14 @@ __global__ __launch_bounds__(2048, 2) void search_solution(TSPResults results, } while (minchange < 0 && myswaps < 2 * nodes); } -__global__ void get_optimal_tour(TSPResults results, int *mylock, int *work, int const nodes) +__global__ void get_optimal_tour(TSPResults results, int* mylock, int* work, int const nodes) { extern __shared__ int accumulator[]; int climber_id = blockIdx.x; - int *buf = &work[climber_id * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); + int* buf = &work[climber_id * ((4 * nodes + 3 + 31) / 32 * 32)]; + float* px = (float*)(&buf[nodes]); + float* py = &px[nodes + 1]; + int* path = (int*)(&py[nodes + 1]); // Now find actual length of the last tour, result of the climb int term = 0; diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp index 2a3445f1c81..48a3e702f09 100644 --- a/cpp/src/traversal/tsp_utils.hpp +++ b/cpp/src/traversal/tsp_utils.hpp @@ -17,7 +17,7 @@ #pragma once #define tilesize 128 -#define kswaps 4 +#define kswaps 4 #include #include @@ -26,7 +26,7 @@ namespace cugraph { namespace detail { -constexpr float euclidean_dist(float *px, float *py, int a, int b) +constexpr float euclidean_dist(float* px, float* py, int a, int b) { return sqrtf((px[a] - px[b]) * (px[a] - px[b]) + (py[a] - py[b]) * (py[a] - py[b])); } @@ -48,7 +48,8 @@ int best_thread_count(int nodes, int max_threads, int sm_count, int warp_size) blocks = (16384 * 2) / smem; if (blocks > sm_count) blocks = sm_count; thr = (threads + warp_size - 1) / warp_size * warp_size; - while (blocks * thr > max_threads_sm) blocks--; + while (blocks * thr > max_threads_sm) + blocks--; perf = threads * blocks; if (perf > best) { best = perf; diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index eff91f03127..e1fce911130 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -33,12 +33,12 @@ namespace cugraph { template std::unique_ptr> get_two_hop_neighbors( - legacy::GraphCSRView const &graph) + legacy::GraphCSRView const& graph) { cudaStream_t stream{nullptr}; rmm::device_vector exsum_degree(graph.number_of_edges + 1); - ET *d_exsum_degree = exsum_degree.data().get(); + ET* d_exsum_degree = exsum_degree.data().get(); // Find the degree of the out vertex of each edge degree_iterator deg_it(graph.offsets); @@ -63,14 +63,14 @@ std::unique_ptr> get_two_hop_neighbors( rmm::device_vector first_pair(output_size); rmm::device_vector second_pair(output_size); - VT *d_first_pair = first_pair.data().get(); - VT *d_second_pair = second_pair.data().get(); + VT* d_first_pair = first_pair.data().get(); + VT* d_second_pair = second_pair.data().get(); // Figure out number of blocks and allocate memory for block bucket offsets ET num_blocks = (output_size + TWO_HOP_BLOCK_SIZE - 1) / TWO_HOP_BLOCK_SIZE; rmm::device_vector block_bucket_offsets(num_blocks + 1); - ET *d_block_bucket_offsets = block_bucket_offsets.data().get(); + ET* d_block_bucket_offsets = block_bucket_offsets.data().get(); // Compute the block bucket offsets dim3 grid, block; @@ -119,9 +119,9 @@ std::unique_ptr> get_two_hop_neighbors( } template std::unique_ptr> get_two_hop_neighbors( - legacy::GraphCSRView const &); + legacy::GraphCSRView const&); template std::unique_ptr> get_two_hop_neighbors( - legacy::GraphCSRView const &); + legacy::GraphCSRView const&); } // namespace cugraph diff --git a/cpp/src/traversal/two_hop_neighbors.cuh b/cpp/src/traversal/two_hop_neighbors.cuh index 87d3b36b861..e830fb4a95f 100644 --- a/cpp/src/traversal/two_hop_neighbors.cuh +++ b/cpp/src/traversal/two_hop_neighbors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,13 +21,13 @@ #include -#define MAXBLOCKS 65535 +#define MAXBLOCKS 65535 #define TWO_HOP_BLOCK_SIZE 512 template struct degree_iterator { - edge_t const *offsets; - degree_iterator(edge_t const *_offsets) : offsets(_offsets) {} + edge_t const* offsets; + degree_iterator(edge_t const* _offsets) : offsets(_offsets) {} __host__ __device__ edge_t operator[](edge_t place) { @@ -53,7 +53,7 @@ struct self_loop_flagger { }; template -__device__ edge_t binsearch_maxle(const edge_t *vec, const edge_t val, edge_t low, edge_t high) +__device__ edge_t binsearch_maxle(const edge_t* vec, const edge_t val, edge_t low, edge_t high) { while (true) { if (low == high) return low; // we know it exists @@ -69,8 +69,8 @@ __device__ edge_t binsearch_maxle(const edge_t *vec, const edge_t val, edge_t lo } template -__global__ void compute_bucket_offsets_kernel(const edge_t *frontier_degrees_exclusive_sum, - edge_t *bucket_offsets, +__global__ void compute_bucket_offsets_kernel(const edge_t* frontier_degrees_exclusive_sum, + edge_t* bucket_offsets, const edge_t frontier_size, edge_t total_degree) { @@ -86,15 +86,15 @@ __global__ void compute_bucket_offsets_kernel(const edge_t *frontier_degrees_exc } template -__global__ void scatter_expand_kernel(const edge_t *exsum_degree, - const vertex_t *indices, - const edge_t *offsets, - const edge_t *bucket_offsets, +__global__ void scatter_expand_kernel(const edge_t* exsum_degree, + const vertex_t* indices, + const edge_t* offsets, + const edge_t* bucket_offsets, vertex_t num_verts, edge_t max_item, edge_t max_block, - vertex_t *output_first, - vertex_t *output_second) + vertex_t* output_first, + vertex_t* output_second) { __shared__ edge_t blockRange[2]; for (edge_t bid = blockIdx.x; bid < max_block; bid += gridDim.x) { diff --git a/cpp/src/tree/mst.cu b/cpp/src/tree/mst.cu index 0fae4f2bef7..e6caa629cd1 100644 --- a/cpp/src/tree/mst.cu +++ b/cpp/src/tree/mst.cu @@ -39,9 +39,9 @@ namespace detail { template std::unique_ptr> mst_impl( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr) + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + rmm::mr::device_memory_resource* mr) { auto stream = handle.get_stream(); @@ -69,19 +69,19 @@ std::unique_ptr> mst_impl( template std::unique_ptr> minimum_spanning_tree( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr) + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + rmm::mr::device_memory_resource* mr) { return detail::mst_impl(handle, graph, mr); } template std::unique_ptr> minimum_spanning_tree( - raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr); + raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + rmm::mr::device_memory_resource* mr); template std::unique_ptr> -minimum_spanning_tree(raft::handle_t const &handle, - legacy::GraphCSRView const &graph, - rmm::mr::device_memory_resource *mr); +minimum_spanning_tree(raft::handle_t const& handle, + legacy::GraphCSRView const& graph, + rmm::mr::device_memory_resource* mr); } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 08402b38c1e..909e3d5b31f 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -88,7 +88,7 @@ std::vector compute_edge_counts(raft::handle_t const& handle, auto major_vertices = transposed ? reinterpret_cast(graph_container.dst_vertices) : reinterpret_cast(graph_container.src_vertices); - auto key_first = thrust::make_transform_iterator( + auto key_first = thrust::make_transform_iterator( major_vertices, compute_local_partition_id_t{d_lasts.data(), num_local_partitions}); rmm::device_uvector d_local_partition_ids(num_local_partitions, handle.get_stream()); rmm::device_uvector d_edge_counts(d_local_partition_ids.size(), handle.get_stream()); diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index 14275f52b9b..4b7c2baab19 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -32,12 +32,12 @@ namespace cugraph { namespace detail { //#define DEBUG 1 -#define CUDA_MAX_BLOCKS 65535 +#define CUDA_MAX_BLOCKS 65535 #define CUDA_MAX_KERNEL_THREADS 256 // kernel will launch at most 256 threads per block #define US template -__inline__ __device__ value_t parallel_prefix_sum(count_t n, index_t const *ind, value_t const *w) +__inline__ __device__ value_t parallel_prefix_sum(count_t n, index_t const* ind, value_t const* w) { count_t i, j, mn; value_t v, last; @@ -86,11 +86,11 @@ template struct axpy_functor : public thrust::binary_function { const T a; axpy_functor(T _a) : a(_a) {} - __host__ __device__ T operator()(const T &x, const T &y) const { return a * x + y; } + __host__ __device__ T operator()(const T& x, const T& y) const { return a * x + y; } }; template -void axpy(size_t n, T a, T *x, T *y) +void axpy(size_t n, T a, T* x, T* y) { rmm::cuda_stream_view stream_view; thrust::transform(rmm::exec_policy(stream_view), @@ -105,11 +105,11 @@ void axpy(size_t n, T a, T *x, T *y) // norm template struct square { - __host__ __device__ T operator()(const T &x) const { return x * x; } + __host__ __device__ T operator()(const T& x) const { return x * x; } }; template -T nrm2(size_t n, T *x) +T nrm2(size_t n, T* x) { rmm::cuda_stream_view stream_view; T init = 0; @@ -124,7 +124,7 @@ T nrm2(size_t n, T *x) } template -T nrm1(size_t n, T *x) +T nrm1(size_t n, T* x) { rmm::cuda_stream_view stream_view; T result = thrust::reduce(rmm::exec_policy(stream_view), @@ -135,7 +135,7 @@ T nrm1(size_t n, T *x) } template -void scal(size_t n, T val, T *x) +void scal(size_t n, T val, T* x) { rmm::cuda_stream_view stream_view; thrust::transform(rmm::exec_policy(stream_view), @@ -148,7 +148,7 @@ void scal(size_t n, T val, T *x) } template -void addv(size_t n, T val, T *x) +void addv(size_t n, T val, T* x) { rmm::cuda_stream_view stream_view; thrust::transform(rmm::exec_policy(stream_view), @@ -161,7 +161,7 @@ void addv(size_t n, T val, T *x) } template -void fill(size_t n, T *x, T value) +void fill(size_t n, T* x, T value) { rmm::cuda_stream_view stream_view; thrust::fill(rmm::exec_policy(stream_view), @@ -172,7 +172,7 @@ void fill(size_t n, T *x, T value) } template -void scatter(size_t n, T *src, T *dst, M *map) +void scatter(size_t n, T* src, T* dst, M* map) { rmm::cuda_stream_view stream_view; thrust::scatter(rmm::exec_policy(stream_view), @@ -184,7 +184,7 @@ void scatter(size_t n, T *src, T *dst, M *map) } template -void printv(size_t n, T *vec, int offset) +void printv(size_t n, T* vec, int offset) { thrust::device_ptr dev_ptr(vec); std::cout.precision(15); @@ -199,7 +199,7 @@ void printv(size_t n, T *vec, int offset) } template -void copy(size_t n, T *x, T *res) +void copy(size_t n, T* x, T* res) { thrust::device_ptr dev_ptr(x); thrust::device_ptr res_ptr(res); @@ -217,11 +217,11 @@ template struct dangling_functor : public thrust::unary_function { const T val; dangling_functor(T _val) : val(_val) {} - __host__ __device__ T operator()(const T &x) const { return val + x; } + __host__ __device__ T operator()(const T& x) const { return val + x; } }; template -void update_dangling_nodes(size_t n, T *dangling_nodes, T damping_factor) +void update_dangling_nodes(size_t n, T* dangling_nodes, T damping_factor) { rmm::cuda_stream_view stream_view; thrust::transform_if(rmm::exec_policy(stream_view), @@ -237,15 +237,15 @@ void update_dangling_nodes(size_t n, T *dangling_nodes, T damping_factor) template __global__ void degree_coo(const IndexType n, const IndexType e, - const IndexType *ind, - ValueType *degree) + const IndexType* ind, + ValueType* degree) { for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) atomicAdd(°ree[ind[i]], (ValueType)1.0); } template -__global__ void flag_leafs_kernel(const size_t n, const IndexType *degree, ValueType *bookmark) +__global__ void flag_leafs_kernel(const size_t n, const IndexType* degree, ValueType* bookmark) { for (auto i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) if (degree[i] == 0) bookmark[i] = 1.0; @@ -254,19 +254,19 @@ __global__ void flag_leafs_kernel(const size_t n, const IndexType *degree, Value template __global__ void degree_offsets(const IndexType n, const IndexType e, - const IndexType *ind, - ValueType *degree) + const IndexType* ind, + ValueType* degree) { for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) degree[i] += ind[i + 1] - ind[i]; } template -__global__ void type_convert(FromType *array, int n) +__global__ void type_convert(FromType* array, int n) { for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) { ToType val = array[i]; - ToType *vals = (ToType *)array; + ToType* vals = (ToType*)array; vals[i] = val; } } @@ -274,10 +274,10 @@ __global__ void type_convert(FromType *array, int n) template __global__ void equi_prob3(const IndexType n, const IndexType e, - const IndexType *csrPtr, - const IndexType *csrInd, - ValueType *val, - IndexType *degree) + const IndexType* csrPtr, + const IndexType* csrInd, + ValueType* val, + IndexType* degree) { int j, row, col; for (row = threadIdx.z + blockIdx.z * blockDim.z; row < n; row += gridDim.z * blockDim.z) { @@ -293,10 +293,10 @@ __global__ void equi_prob3(const IndexType n, template __global__ void equi_prob2(const IndexType n, const IndexType e, - const IndexType *csrPtr, - const IndexType *csrInd, - ValueType *val, - IndexType *degree) + const IndexType* csrPtr, + const IndexType* csrInd, + ValueType* val, + IndexType* degree) { int row = blockIdx.x * blockDim.x + threadIdx.x; if (row < n) { @@ -314,10 +314,10 @@ __global__ void equi_prob2(const IndexType n, template void HT_matrix_csc_coo(const IndexType n, const IndexType e, - const IndexType *csrPtr, - const IndexType *csrInd, - ValueType *val, - ValueType *bookmark) + const IndexType* csrPtr, + const IndexType* csrInd, + ValueType* val, + ValueType* bookmark) { rmm::cuda_stream_view stream_view; rmm::device_uvector degree(n, stream_view); @@ -360,7 +360,7 @@ void HT_matrix_csc_coo(const IndexType n, } template -__global__ void offsets_to_indices_kernel(const offsets_t *offsets, index_t v, index_t *indices) +__global__ void offsets_to_indices_kernel(const offsets_t* offsets, index_t v, index_t* indices) { auto tid{threadIdx.x}; auto ctaStart{blockIdx.x}; @@ -377,7 +377,7 @@ __global__ void offsets_to_indices_kernel(const offsets_t *offsets, index_t v, i } template -void offsets_to_indices(const offsets_t *offsets, index_t v, index_t *indices) +void offsets_to_indices(const offsets_t* offsets, index_t v, index_t* indices) { cudaStream_t stream{nullptr}; index_t nthreads = min(v, (index_t)CUDA_MAX_KERNEL_THREADS); @@ -387,7 +387,7 @@ void offsets_to_indices(const offsets_t *offsets, index_t v, index_t *indices) } template -void sequence(IndexType n, IndexType *vec, IndexType init = 0) +void sequence(IndexType n, IndexType* vec, IndexType init = 0) { thrust::sequence( thrust::device, thrust::device_pointer_cast(vec), thrust::device_pointer_cast(vec + n), init); @@ -395,7 +395,7 @@ void sequence(IndexType n, IndexType *vec, IndexType init = 0) } template -bool has_negative_val(DistType *arr, size_t n) +bool has_negative_val(DistType* arr, size_t n) { // custom kernel with boolean bitwise reduce may be // faster. diff --git a/cpp/src/utilities/high_res_timer.hpp b/cpp/src/utilities/high_res_timer.hpp index 807496c8f86..cf265991f21 100644 --- a/cpp/src/utilities/high_res_timer.hpp +++ b/cpp/src/utilities/high_res_timer.hpp @@ -54,7 +54,7 @@ class HighResTimer { it->second.second += stop_time.tv_sec * 1000000000 + stop_time.tv_nsec; } - double get_average_runtime(std::string const &label) + double get_average_runtime(std::string const& label) { auto it = timers.find(label); if (it != timers.end()) { @@ -70,7 +70,7 @@ class HighResTimer { // // Add display functions... specific label or entire structure // - void display(std::ostream &os) + void display(std::ostream& os) { os << "Timer Results (in ms):" << std::endl; for (auto it = timers.begin(); it != timers.end(); ++it) { @@ -80,7 +80,7 @@ class HighResTimer { } } - void display(std::ostream &os, std::string label) + void display(std::ostream& os, std::string label) { auto it = timers.find(label); os << it->first << " called " << it->second.first @@ -88,7 +88,7 @@ class HighResTimer { << std::endl; } - void display_and_clear(std::ostream &os) + void display_and_clear(std::ostream& os) { os << "Timer Results (in ms):" << std::endl; for (auto it = timers.begin(); it != timers.end(); ++it) { diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu index 2d862f659e5..765cccc1916 100644 --- a/cpp/src/utilities/path_retrieval.cu +++ b/cpp/src/utilities/path_retrieval.cu @@ -26,11 +26,11 @@ namespace cugraph { namespace detail { template -__global__ void get_traversed_cost_kernel(vertex_t const *vertices, - vertex_t const *preds, - vertex_t const *vtx_map, - weight_t const *info_weights, - weight_t *out, +__global__ void get_traversed_cost_kernel(vertex_t const* vertices, + vertex_t const* preds, + vertex_t const* vtx_map, + weight_t const* info_weights, + weight_t* out, vertex_t stop_vertex, vertex_t num_vertices) { @@ -48,11 +48,11 @@ __global__ void get_traversed_cost_kernel(vertex_t const *vertices, } template -void get_traversed_cost_impl(raft::handle_t const &handle, - vertex_t const *vertices, - vertex_t const *preds, - weight_t const *info_weights, - weight_t *out, +void get_traversed_cost_impl(raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t const* preds, + weight_t const* info_weights, + weight_t* out, vertex_t stop_vertex, vertex_t num_vertices) { @@ -70,8 +70,8 @@ void get_traversed_cost_impl(raft::handle_t const &handle, rmm::device_uvector vtx_map_v(num_vertices, stream); rmm::device_uvector vtx_keys_v(num_vertices, stream); - vertex_t *vtx_map = vtx_map_v.data(); - vertex_t *vtx_keys = vtx_keys_v.data(); + vertex_t* vtx_map = vtx_map_v.data(); + vertex_t* vtx_keys = vtx_keys_v.data(); raft::copy(vtx_keys, vertices, num_vertices, stream); thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices); @@ -85,11 +85,11 @@ void get_traversed_cost_impl(raft::handle_t const &handle, } // namespace detail template -void get_traversed_cost(raft::handle_t const &handle, - vertex_t const *vertices, - vertex_t const *preds, - weight_t const *info_weights, - weight_t *out, +void get_traversed_cost(raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t const* preds, + weight_t const* info_weights, + weight_t* out, vertex_t stop_vertex, vertex_t num_vertices) { @@ -99,35 +99,35 @@ void get_traversed_cost(raft::handle_t const &handle, handle, vertices, preds, info_weights, out, stop_vertex, num_vertices); } -template void get_traversed_cost(raft::handle_t const &handle, - int32_t const *vertices, - int32_t const *preds, - float const *info_weights, - float *out, +template void get_traversed_cost(raft::handle_t const& handle, + int32_t const* vertices, + int32_t const* preds, + float const* info_weights, + float* out, int32_t stop_vertex, int32_t num_vertices); -template void get_traversed_cost(raft::handle_t const &handle, - int32_t const *vertices, - int32_t const *preds, - double const *info_weights, - double *out, +template void get_traversed_cost(raft::handle_t const& handle, + int32_t const* vertices, + int32_t const* preds, + double const* info_weights, + double* out, int32_t stop_vertex, int32_t num_vertices); -template void get_traversed_cost(raft::handle_t const &handle, - int64_t const *vertices, - int64_t const *preds, - float const *info_weights, - float *out, +template void get_traversed_cost(raft::handle_t const& handle, + int64_t const* vertices, + int64_t const* preds, + float const* info_weights, + float* out, int64_t stop_vertex, int64_t num_vertices); -template void get_traversed_cost(raft::handle_t const &handle, - int64_t const *vertices, - int64_t const *preds, - double const *info_weights, - double *out, +template void get_traversed_cost(raft::handle_t const& handle, + int64_t const* vertices, + int64_t const* preds, + double const* info_weights, + double* out, int64_t stop_vertex, int64_t num_vertices); } // namespace cugraph diff --git a/cpp/src/utilities/spmv_1D.cu b/cpp/src/utilities/spmv_1D.cu index 8a7378e69d3..b4db219fb89 100644 --- a/cpp/src/utilities/spmv_1D.cu +++ b/cpp/src/utilities/spmv_1D.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,13 +19,13 @@ namespace cugraph { namespace mg { template -MGcsrmv::MGcsrmv(raft::handle_t const &handle, - vertex_t *local_vertices, - vertex_t *part_off, - edge_t *off, - vertex_t *ind, - weight_t *val, - weight_t *x) +MGcsrmv::MGcsrmv(raft::handle_t const& handle, + vertex_t* local_vertices, + vertex_t* part_off, + edge_t* off, + vertex_t* ind, + weight_t* val, + weight_t* x) : handle_(handle), local_vertices_(local_vertices), part_off_(part_off), @@ -49,7 +49,7 @@ MGcsrmv::~MGcsrmv() } template -void MGcsrmv::run(weight_t *x) +void MGcsrmv::run(weight_t* x) { using namespace raft::matrix; @@ -72,7 +72,7 @@ void MGcsrmv::run(weight_t *x) auto stream = handle_.get_stream(); - auto const &comm{handle_.get_comms()}; // local + auto const& comm{handle_.get_comms()}; // local std::vector recvbuf(comm.get_size()); std::vector displs(comm.get_size()); diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index c3883d9694f..a171b0010d1 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -50,68 +50,80 @@ // C++ Reference Implementation // ============================================================================ template -void ref_accumulation(result_t *result, +void ref_accumulation(result_t* result, vertex_t const number_of_vertices, - std::stack &S, - std::vector> &pred, - std::vector &sigmas, - std::vector &deltas, + std::stack& S, + std::vector>& pred, + std::vector& sigmas, + std::vector& deltas, vertex_t source) { - for (vertex_t v = 0; v < number_of_vertices; ++v) { deltas[v] = 0; } + for (vertex_t v = 0; v < number_of_vertices; ++v) { + deltas[v] = 0; + } while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); } + for (vertex_t v : pred[w]) { + deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + } if (w != source) { result[w] += deltas[w]; } } } template -void ref_endpoints_accumulation(result_t *result, +void ref_endpoints_accumulation(result_t* result, vertex_t const number_of_vertices, - std::stack &S, - std::vector> &pred, - std::vector &sigmas, - std::vector &deltas, + std::stack& S, + std::vector>& pred, + std::vector& sigmas, + std::vector& deltas, vertex_t source) { result[source] += S.size() - 1; - for (vertex_t v = 0; v < number_of_vertices; ++v) { deltas[v] = 0; } + for (vertex_t v = 0; v < number_of_vertices; ++v) { + deltas[v] = 0; + } while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); } + for (vertex_t v : pred[w]) { + deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + } if (w != source) { result[w] += deltas[w] + 1; } } } template -void ref_edge_accumulation(result_t *result, +void ref_edge_accumulation(result_t* result, vertex_t const number_of_vertices, - std::stack &S, - std::vector> &pred, - std::vector &sigmas, - std::vector &deltas, + std::stack& S, + std::vector>& pred, + std::vector& sigmas, + std::vector& deltas, vertex_t source) { - for (vertex_t v = 0; v < number_of_vertices; ++v) { deltas[v] = 0; } + for (vertex_t v = 0; v < number_of_vertices; ++v) { + deltas[v] = 0; + } while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); } + for (vertex_t v : pred[w]) { + deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + } if (w != source) { result[w] += deltas[w]; } } } // Algorithm 1: Shortest-path vertex betweenness, (Brandes, 2001) template -void reference_betweenness_centrality_impl(vertex_t *indices, - edge_t *offsets, +void reference_betweenness_centrality_impl(vertex_t* indices, + edge_t* offsets, vertex_t const number_of_vertices, - result_t *result, + result_t* result, bool endpoints, - vertex_t const *sources, + vertex_t const* sources, vertex_t const number_of_sources) { std::queue Q; @@ -159,7 +171,7 @@ void reference_betweenness_centrality_impl(vertex_t *indices, } template -void reference_rescale(result_t *result, +void reference_rescale(result_t* result, bool directed, bool normalize, bool endpoints, @@ -190,25 +202,27 @@ void reference_rescale(result_t *result, rescale_factor *= (casted_number_of_vertices / casted_number_of_sources); } } - for (auto idx = 0; idx < number_of_vertices; ++idx) { result[idx] *= rescale_factor; } + for (auto idx = 0; idx < number_of_vertices; ++idx) { + result[idx] *= rescale_factor; + } } template void reference_betweenness_centrality( - cugraph::legacy::GraphCSRView const &graph, - result_t *result, + cugraph::legacy::GraphCSRView const& graph, + result_t* result, bool normalize, bool endpoints, // This is not yet implemented vertex_t const number_of_sources, - vertex_t const *sources) + vertex_t const* sources) { vertex_t number_of_vertices = graph.number_of_vertices; edge_t number_of_edges = graph.number_of_edges; thrust::host_vector h_indices(number_of_edges); thrust::host_vector h_offsets(number_of_vertices + 1); - thrust::device_ptr d_indices((vertex_t *)&graph.indices[0]); - thrust::device_ptr d_offsets((edge_t *)&graph.offsets[0]); + thrust::device_ptr d_indices((vertex_t*)&graph.indices[0]); + thrust::device_ptr d_offsets((edge_t*)&graph.offsets[0]); thrust::copy(d_indices, d_indices + number_of_edges, h_indices.begin()); thrust::copy(d_offsets, d_offsets + (number_of_vertices + 1), h_offsets.begin()); @@ -249,7 +263,7 @@ template void reference_betweenness_centrality( // Compare while allowing relatie error of epsilon // zero_threshold indicates when we should drop comparison for small numbers template -bool compare_close(const T &a, const T &b, const precision_t epsilon, precision_t zero_threshold) +bool compare_close(const T& a, const T& b, const precision_t epsilon, precision_t zero_threshold) { return ((zero_threshold > a && zero_threshold > b)) || (a >= b * (1.0 - epsilon)) && (a <= b * (1.0 + epsilon)); @@ -265,12 +279,12 @@ typedef struct BC_Usecase_t { std::string config_; // Path to graph file std::string file_path_; // Complete path to graph using dataset_root_dir int number_of_sources_; // Starting point from the traversal - BC_Usecase_t(const std::string &config, int number_of_sources) + BC_Usecase_t(const std::string& config, int number_of_sources) : config_(config), number_of_sources_(number_of_sources) { // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR // FIXME: Use platform independent stuff from c++14/17 on compiler update - const std::string &rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); + const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); if ((config_ != "") && (config_[0] != '/')) { file_path_ = rapidsDatasetRootDir + "/" + config_; } else { @@ -301,7 +315,7 @@ class Tests_BC : public ::testing::TestWithParam { typename result_t, bool normalize, bool endpoints> - void run_current_test(const BC_Usecase &configuration) + void run_current_test(const BC_Usecase& configuration) { // Step 1: Construction of the graph based on configuration bool is_directed = false; @@ -324,7 +338,7 @@ class Tests_BC : public ::testing::TestWithParam { std::vector sources(configuration.number_of_sources_); thrust::sequence(thrust::host, sources.begin(), sources.end(), 0); - vertex_t *sources_ptr = nullptr; + vertex_t* sources_ptr = nullptr; if (configuration.number_of_sources_ > 0) { sources_ptr = sources.data(); } reference_betweenness_centrality( @@ -339,7 +353,7 @@ class Tests_BC : public ::testing::TestWithParam { d_result.data().get(), normalize, endpoints, - static_cast(nullptr), + static_cast(nullptr), configuration.number_of_sources_, sources_ptr); cudaDeviceSynchronize(); diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index 5f832475b3b..67fdb22f953 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -53,8 +53,8 @@ template edge_t get_edge_index_from_source_and_destination(vertex_t source_vertex, vertex_t destination_vertex, - vertex_t const *indices, - edge_t const *offsets) + vertex_t const* indices, + edge_t const* offsets) { edge_t index = -1; edge_t first_edge_idx = offsets[source_vertex]; @@ -65,17 +65,19 @@ edge_t get_edge_index_from_source_and_destination(vertex_t source_vertex, } template -void ref_accumulation(result_t *result, - vertex_t const *indices, - edge_t const *offsets, +void ref_accumulation(result_t* result, + vertex_t const* indices, + edge_t const* offsets, vertex_t const number_of_vertices, - std::stack &S, - std::vector> &pred, - std::vector &sigmas, - std::vector &deltas, + std::stack& S, + std::vector>& pred, + std::vector& sigmas, + std::vector& deltas, vertex_t source) { - for (vertex_t v = 0; v < number_of_vertices; ++v) { deltas[v] = 0; } + for (vertex_t v = 0; v < number_of_vertices; ++v) { + deltas[v] = 0; + } while (!S.empty()) { vertex_t w = S.top(); S.pop(); @@ -93,11 +95,11 @@ void ref_accumulation(result_t *result, // Algorithm 1: Shortest-path vertex betweenness, (Brandes, 2001) template -void reference_edge_betweenness_centrality_impl(vertex_t *indices, - edge_t *offsets, +void reference_edge_betweenness_centrality_impl(vertex_t* indices, + edge_t* offsets, vertex_t const number_of_vertices, - result_t *result, - vertex_t const *sources, + result_t* result, + vertex_t const* sources, vertex_t const number_of_sources) { std::queue Q; @@ -135,7 +137,7 @@ void reference_edge_betweenness_centrality_impl(vertex_t *indices, } template -void reference_rescale(result_t *result, +void reference_rescale(result_t* result, bool directed, bool normalize, vertex_t const number_of_vertices, @@ -150,24 +152,26 @@ void reference_rescale(result_t *result, } else { if (!directed) { rescale_factor /= static_cast(2); } } - for (auto idx = 0; idx < number_of_edges; ++idx) { result[idx] *= rescale_factor; } + for (auto idx = 0; idx < number_of_edges; ++idx) { + result[idx] *= rescale_factor; + } } template void reference_edge_betweenness_centrality( - cugraph::legacy::GraphCSRView const &graph, - result_t *result, + cugraph::legacy::GraphCSRView const& graph, + result_t* result, bool normalize, vertex_t const number_of_sources, - vertex_t const *sources) + vertex_t const* sources) { vertex_t number_of_vertices = graph.number_of_vertices; edge_t number_of_edges = graph.number_of_edges; thrust::host_vector h_indices(number_of_edges); thrust::host_vector h_offsets(number_of_vertices + 1); - thrust::device_ptr d_indices((vertex_t *)&graph.indices[0]); - thrust::device_ptr d_offsets((edge_t *)&graph.offsets[0]); + thrust::device_ptr d_indices((vertex_t*)&graph.indices[0]); + thrust::device_ptr d_offsets((edge_t*)&graph.offsets[0]); thrust::copy(d_indices, d_indices + number_of_edges, h_indices.begin()); thrust::copy(d_offsets, d_offsets + (number_of_vertices + 1), h_offsets.begin()); @@ -186,7 +190,7 @@ void reference_edge_betweenness_centrality( // Compare while allowing relatie error of epsilon // zero_threshold indicates when we should drop comparison for small numbers template -bool compare_close(const T &a, const T &b, const precision_t epsilon, precision_t zero_threshold) +bool compare_close(const T& a, const T& b, const precision_t epsilon, precision_t zero_threshold) { return ((zero_threshold > a && zero_threshold > b)) || (a >= b * (1.0 - epsilon)) && (a <= b * (1.0 + epsilon)); @@ -202,12 +206,12 @@ typedef struct EdgeBC_Usecase_t { std::string config_; // Path to graph file std::string file_path_; // Complete path to graph using dataset_root_dir int number_of_sources_; // Starting point from the traversal - EdgeBC_Usecase_t(const std::string &config, int number_of_sources) + EdgeBC_Usecase_t(const std::string& config, int number_of_sources) : config_(config), number_of_sources_(number_of_sources) { // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR // FIXME: Use platform independent stuff from c++14/17 on compiler update - const std::string &rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); + const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); if ((config_ != "") && (config_[0] != '/')) { file_path_ = rapidsDatasetRootDir + "/" + config_; } else { @@ -237,7 +241,7 @@ class Tests_EdgeBC : public ::testing::TestWithParam { typename weight_t, typename result_t, bool normalize> - void run_current_test(const EdgeBC_Usecase &configuration) + void run_current_test(const EdgeBC_Usecase& configuration) { // Step 1: Construction of the graph based on configuration bool is_directed = false; @@ -260,7 +264,7 @@ class Tests_EdgeBC : public ::testing::TestWithParam { std::vector sources(configuration.number_of_sources_); thrust::sequence(thrust::host, sources.begin(), sources.end(), 0); - vertex_t *sources_ptr = nullptr; + vertex_t* sources_ptr = nullptr; if (configuration.number_of_sources_ > 0) { sources_ptr = sources.data(); } reference_edge_betweenness_centrality( @@ -274,7 +278,7 @@ class Tests_EdgeBC : public ::testing::TestWithParam { G, d_result.data().get(), normalize, - static_cast(nullptr), + static_cast(nullptr), configuration.number_of_sources_, sources_ptr); CUDA_TRY(cudaMemcpy(result.data(), diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index e4951dd9098..ee2df5347fc 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -35,7 +35,9 @@ std::vector getGoldenTopKIds(std::ifstream& fs_result, int k = 10) std::vector vec; int val; int count = 0; - while (fs_result >> val && ((count++) < k)) { vec.push_back(val); } + while (fs_result >> val && ((count++) < k)) { + vec.push_back(val); + } vec.resize(k); return vec; } diff --git a/cpp/tests/community/ecg_test.cpp b/cpp/tests/community/ecg_test.cpp index 15c2fb118c9..f174d882937 100644 --- a/cpp/tests/community/ecg_test.cpp +++ b/cpp/tests/community/ecg_test.cpp @@ -143,7 +143,9 @@ TEST(ecg, dolphin) ASSERT_EQ((min >= 0), 1); std::set cluster_ids; - for (auto c : cluster_id) { cluster_ids.insert(c); } + for (auto c : cluster_id) { + cluster_ids.insert(c); + } ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index e5b99c5cbdd..b5347778b4c 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -32,10 +32,10 @@ namespace cugraph { namespace test { template -void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, - rmm::device_uvector &edgelist_rows_v, - rmm::device_uvector &edgelist_cols_v, - rmm::device_uvector &renumber_map_gathered_v) +void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const& handle, + rmm::device_uvector& edgelist_rows_v, + rmm::device_uvector& edgelist_cols_v, + rmm::device_uvector& renumber_map_gathered_v) { rmm::device_uvector index_v(renumber_map_gathered_v.size(), handle.get_stream()); @@ -63,9 +63,9 @@ template std::tuple, rmm::device_uvector, std::optional>> -compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - std::optional compressed_sparse_weights, +compressed_sparse_to_edgelist(edge_t const* compressed_sparse_offsets, + vertex_t const* compressed_sparse_indices, + std::optional compressed_sparse_weights, vertex_t major_first, vertex_t major_last, cudaStream_t stream) @@ -112,9 +112,9 @@ compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, template void sort_and_coarsen_edgelist( - rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - std::optional> &edgelist_weights /* [INOUT] */, + rmm::device_uvector& edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector& edgelist_minor_vertices /* [INOUT] */, + std::optional>& edgelist_weights /* [INOUT] */, cudaStream_t stream) { auto pair_first = thrust::make_zip_iterator( @@ -170,11 +170,11 @@ std::tuple, rmm::device_uvector, std::optional>> compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( - edge_t const *compressed_sparse_offsets, - vertex_t const *compressed_sparse_indices, - std::optional compressed_sparse_weights, - vertex_t const *p_major_labels, - vertex_t const *p_minor_labels, + edge_t const* compressed_sparse_offsets, + vertex_t const* compressed_sparse_indices, + std::optional compressed_sparse_weights, + vertex_t const* p_major_labels, + vertex_t const* p_minor_labels, vertex_t major_first, vertex_t major_last, vertex_t minor_first, @@ -217,10 +217,10 @@ compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( template std::unique_ptr> coarsen_graph( - raft::handle_t const &handle, - cugraph::experimental::graph_view_t const - &graph_view, - vertex_t const *labels) + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& + graph_view, + vertex_t const* labels) { auto [coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices, @@ -238,14 +238,13 @@ coarsen_graph( handle.get_stream()); cugraph::experimental::edgelist_t edgelist{}; - edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelist.p_edge_weights = - coarsened_edgelist_weights - ? std::optional{(*coarsened_edgelist_weights).data()} - : std::nullopt; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights + ? std::optional{(*coarsened_edgelist_weights).data()} + : std::nullopt; edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); vertex_t new_number_of_vertices = @@ -267,16 +266,16 @@ coarsen_graph( // explicit instantiation template void single_gpu_renumber_edgelist_given_number_map( - raft::handle_t const &handle, - rmm::device_uvector &d_edgelist_rows, - rmm::device_uvector &d_edgelist_cols, - rmm::device_uvector &d_renumber_map_gathered_v); + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + rmm::device_uvector& d_renumber_map_gathered_v); template std::unique_ptr> coarsen_graph( - raft::handle_t const &handle, - cugraph::experimental::graph_view_t const &graph_view, - int32_t const *labels); + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& graph_view, + int32_t const* labels); } // namespace test } // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp index 5ed710b7417..7e03f435724 100644 --- a/cpp/tests/community/mg_louvain_helper.hpp +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -25,29 +25,29 @@ namespace cugraph { namespace test { template -bool compare_renumbered_vectors(raft::handle_t const &handle, - std::vector const &v1, - std::vector const &v2); +bool compare_renumbered_vectors(raft::handle_t const& handle, + std::vector const& v1, + std::vector const& v2); template -bool compare_renumbered_vectors(raft::handle_t const &handle, - rmm::device_uvector const &v1, - rmm::device_uvector const &v2); +bool compare_renumbered_vectors(raft::handle_t const& handle, + rmm::device_uvector const& v1, + rmm::device_uvector const& v2); template void single_gpu_renumber_edgelist_given_number_map( - raft::handle_t const &handle, - rmm::device_uvector &d_edgelist_rows, - rmm::device_uvector &d_edgelist_cols, - rmm::device_uvector &d_renumber_map_gathered_v); + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + rmm::device_uvector& d_renumber_map_gathered_v); template std::unique_ptr> coarsen_graph( - raft::handle_t const &handle, - cugraph::experimental::graph_view_t const - &graph_view, - vertex_t const *labels); + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& + graph_view, + vertex_t const* labels); } // namespace test } // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 971685d7537..1f7276fa116 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -183,7 +183,9 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam auto const comm_rank = comm.get_rank(); auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index 9285b3ed668..11e3f803b38 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -75,7 +75,9 @@ class Tests_MGWeaklyConnectedComponents auto const comm_rank = comm.get_rank(); auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index 5448cf79cce..eaf6daf2d29 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -115,7 +115,8 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { } std::cout << "#iterations:\n"; - for (auto&& count : strongly_cc_counts) std::cout << count << std::endl; + for (auto&& count : strongly_cc_counts) + std::cout << count << std::endl; } } virtual void SetUp() {} diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index 75d4d4ce6a3..5257c728ce4 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -74,7 +74,9 @@ class Tests_MGBFS : public ::testing::TestWithParam(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index d0120e3a2e8..9071701324e 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -57,8 +57,8 @@ class Tests_MGKatzCentrality // Compare the results of running Katz Centrality on multiple GPUs to that of a single-GPU run template - void run_current_test(KatzCentrality_Usecase const &katz_usecase, - input_usecase_t const &input_usecase) + void run_current_test(KatzCentrality_Usecase const& katz_usecase, + input_usecase_t const& input_usecase) { // 1. initialize handle @@ -66,12 +66,14 @@ class Tests_MGKatzCentrality HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); - auto &comm = handle.get_comms(); + auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); @@ -118,7 +120,7 @@ class Tests_MGKatzCentrality cugraph::experimental::katz_centrality(handle, mg_graph_view, - static_cast(nullptr), + static_cast(nullptr), d_mg_katz_centralities.data(), alpha, beta, @@ -173,7 +175,7 @@ class Tests_MGKatzCentrality cugraph::experimental::katz_centrality( handle, sg_graph_view, - static_cast(nullptr), + static_cast(nullptr), d_sg_katz_centralities.data(), alpha, beta, diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index efc40bd6b97..ce2556bed00 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -71,7 +71,9 @@ class Tests_MGSSSP : public ::testing::TestWithParam(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); diff --git a/cpp/tests/generators/erdos_renyi_test.cpp b/cpp/tests/generators/erdos_renyi_test.cpp index c91a9af7c41..3606ce2ddef 100644 --- a/cpp/tests/generators/erdos_renyi_test.cpp +++ b/cpp/tests/generators/erdos_renyi_test.cpp @@ -28,7 +28,7 @@ struct GenerateErdosRenyiTest : public ::testing::Test { }; template -void test_symmetric(std::vector &h_src_v, std::vector &h_dst_v) +void test_symmetric(std::vector& h_src_v, std::vector& h_dst_v) { std::vector reverse_src_v(h_src_v.size()); std::vector reverse_dst_v(h_dst_v.size()); diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index 1a259418bc7..e736f2d2db5 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -215,7 +215,8 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam // Transpose the data std::vector> C_contiguous_embedding(m, std::vector(2)); for (int i = 0; i < m; i++) { - for (int j = 0; j < 2; j++) C_contiguous_embedding[i][j] = h_pos[j * m + i]; + for (int j = 0; j < 2; j++) + C_contiguous_embedding[i][j] = h_pos[j * m + i]; } // Test trustworthiness diff --git a/cpp/tests/layout/knn.h b/cpp/tests/layout/knn.h index 07d07528769..26666794896 100644 --- a/cpp/tests/layout/knn.h +++ b/cpp/tests/layout/knn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,8 @@ double sq_euclid_dist(const point& x, const point& y) double total = 0; auto i = x.attributes.begin(); auto j = y.attributes.begin(); - for (; i != x.attributes.end() && j != y.attributes.end(); ++i, ++j) total += pow(*i - *j, 2); + for (; i != x.attributes.end() && j != y.attributes.end(); ++i, ++j) + total += pow(*i - *j, 2); return total; } @@ -63,6 +64,7 @@ std::vector knn_classify(std::list& dataframe, const point& c, const auto count = 0; auto j = distances.begin(); ++j; - for (; j != distances.end() && count < k; ++j, ++count) res.push_back(j->index); + for (; j != distances.end() && count < k; ++j, ++count) + res.push_back(j->index); return res; } diff --git a/cpp/tests/layout/trust_worthiness.h b/cpp/tests/layout/trust_worthiness.h index 40c9782a76e..5a112ea3c6b 100644 --- a/cpp/tests/layout/trust_worthiness.h +++ b/cpp/tests/layout/trust_worthiness.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,8 @@ double euclidian_dist(const std::vector& x, const std::vector& y) double total = 0; auto i = x.begin(); auto j = y.begin(); - for (; i != x.end() && j != y.end(); ++i, ++j) total += pow(*i, 2) - 2 * *i * *j + pow(*j, 2); + for (; i != x.end() && j != y.end(); ++i, ++j) + total += pow(*i, 2) - 2 * *i * *j + pow(*j, 2); return sqrt(total); } @@ -62,7 +63,8 @@ std::vector argsort(Iter begin, Iter end, Compare comp) return comp(*prev.second, *next.second); }); - for (auto i : pairList) ret.push_back(i.first); + for (auto i : pairList) + ret.push_back(i.first); return ret; } @@ -109,7 +111,8 @@ double compute_rank(const std::vector>& ind_X, ranks[j] = idx; } } - for (auto& val : ranks) val -= k; + for (auto& val : ranks) + val -= k; for (const auto& val : ranks) if (val > 0) rank += val; @@ -122,7 +125,9 @@ void print_matrix(const std::vector>& matrix) { for (size_t i = 0; i < matrix.size(); ++i) { std::cout << "[ "; - for (size_t j = 0; j < matrix[i].size(); ++j) { std::cout << matrix[i][j] << ' '; } + for (size_t j = 0; j < matrix[i].size(); ++j) { + std::cout << matrix[i][j] << ' '; + } std::cout << "]\n"; } } diff --git a/cpp/tests/linear_assignment/hungarian_test.cu b/cpp/tests/linear_assignment/hungarian_test.cu index 26496e95271..f806a217a8f 100644 --- a/cpp/tests/linear_assignment/hungarian_test.cu +++ b/cpp/tests/linear_assignment/hungarian_test.cu @@ -26,14 +26,14 @@ #include #include -__global__ void setup_generator(curandState *state) +__global__ void setup_generator(curandState* state) { int id = threadIdx.x + blockIdx.x * blockDim.x; curand_init(43, id, 0, &state[id]); } template -__global__ void generate_random(curandState *state, int n, T *data, int32_t upper_bound) +__global__ void generate_random(curandState* state, int n, T* data, int32_t upper_bound) { int first = threadIdx.x + blockIdx.x * blockDim.x; int stride = blockDim.x * gridDim.x; diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 27d9bd1cbd1..fca889c3299 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -74,7 +74,9 @@ class Tests_MGPageRank auto const comm_rank = comm.get_rank(); auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); - while (comm_size % row_comm_size != 0) { --row_comm_size; } + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 6049230e21a..595086a570e 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -181,7 +181,9 @@ void run(RandomWalks_Usecase const& configuration, traversal_id_t trv_id) // FIXME: the num_paths vector might be better specified via the // configuration input instead of hardcoding here. std::vector v_np{1, 10, 100}; - for (auto&& num_paths : v_np) { output_random_walks_time(graph_view, num_paths, trv_id); } + for (auto&& num_paths : v_np) { + output_random_walks_time(graph_view, num_paths, trv_id); + } } /** diff --git a/cpp/tests/traversal/bfs_ref.h b/cpp/tests/traversal/bfs_ref.h index a32b2f99787..5efdce818e7 100644 --- a/cpp/tests/traversal/bfs_ref.h +++ b/cpp/tests/traversal/bfs_ref.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #include template -void populate_neighbors(VT *indices, ET *offsets, VT w, std::vector &neighbors) +void populate_neighbors(VT* indices, ET* offsets, VT w, std::vector& neighbors) { ET edge_start = offsets[w]; ET edge_end = offsets[w + 1]; @@ -31,14 +31,14 @@ void populate_neighbors(VT *indices, ET *offsets, VT w, std::vector &neighbo // This implements the BFS based on (Brandes, 2001) for shortest path counting template -void ref_bfs(VT *indices, - ET *offsets, +void ref_bfs(VT* indices, + ET* offsets, VT const number_of_vertices, - std::queue &Q, - std::stack &S, - std::vector &dist, - std::vector> &pred, - std::vector &sigmas, + std::queue& Q, + std::stack& S, + std::vector& dist, + std::vector>& pred, + std::vector& sigmas, VT source) { std::vector neighbors; diff --git a/cpp/tests/traversal/bfs_test.cu b/cpp/tests/traversal/bfs_test.cu index 03ca1ec9455..b0da605a0a0 100644 --- a/cpp/tests/traversal/bfs_test.cu +++ b/cpp/tests/traversal/bfs_test.cu @@ -46,7 +46,7 @@ // C++ Reference Implementation // ============================================================================ template -bool compare_close(const T &a, const T &b, const precision_t epsilon, precision_t zero_threshold) +bool compare_close(const T& a, const T& b, const precision_t epsilon, precision_t zero_threshold) { return ((zero_threshold > a && zero_threshold > b)) || (a >= b * (1.0 - epsilon)) && (a <= b * (1.0 + epsilon)); @@ -59,9 +59,9 @@ typedef struct BFS_Usecase_t { std::string config_; // Path to graph file std::string file_path_; // Complete path to graph using dataset_root_dir int source_; // Starting point from the traversal - BFS_Usecase_t(const std::string &config, int source) : config_(config), source_(source) + BFS_Usecase_t(const std::string& config, int source) : config_(config), source_(source) { - const std::string &rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); + const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); if ((config_ != "") && (config_[0] != '/')) { file_path_ = rapidsDatasetRootDir + "/" + config_; } else { @@ -86,7 +86,7 @@ class Tests_BFS : public ::testing::TestWithParam { // WT edge weight data type // return_sp_counter should BFS return shortest path countner template - void run_current_test(const BFS_Usecase &configuration) + void run_current_test(const BFS_Usecase& configuration) { // Step 1: Construction of the graph based on configuration VT number_of_vertices; diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 3121d3074d4..75570b2c467 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -46,14 +46,14 @@ namespace test { * ``` **/ class BaseFixture : public ::testing::Test { - rmm::mr::device_memory_resource *_mr{rmm::mr::get_current_device_resource()}; + rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; public: /** * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting *from this fixture **/ - rmm::mr::device_memory_resource *mr() { return _mr; } + rmm::mr::device_memory_resource* mr() { return _mr; } }; /// MR factory functions @@ -90,7 +90,7 @@ inline auto make_binning() * @return Memory resource instance */ inline std::shared_ptr create_memory_resource( - std::string const &allocation_mode) + std::string const& allocation_mode) { if (allocation_mode == "binning") return make_binning(); if (allocation_mode == "cuda") return make_cuda(); @@ -110,7 +110,7 @@ inline std::shared_ptr create_memory_resource( * * @return Parsing results in the form of cxxopts::ParseResult */ -inline auto parse_test_options(int argc, char **argv) +inline auto parse_test_options(int argc, char** argv) { try { cxxopts::Options options(argv[0], " - cuGraph tests command line options"); @@ -118,7 +118,7 @@ inline auto parse_test_options(int argc, char **argv) "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value("pool")); return options.parse(argc, argv); - } catch (const cxxopts::OptionException &e) { + } catch (const cxxopts::OptionException& e) { CUGRAPH_FAIL("Error parsing command line options"); } } @@ -133,7 +133,7 @@ inline auto parse_test_options(int argc, char **argv) * creating the default memory resource. */ #define CUGRAPH_TEST_PROGRAM_MAIN() \ - int main(int argc, char **argv) \ + int main(int argc, char** argv) \ { \ ::testing::InitGoogleTest(&argc, argv); \ auto const cmd_opts = parse_test_options(argc, argv); \ @@ -144,7 +144,7 @@ inline auto parse_test_options(int argc, char **argv) } #define CUGRAPH_MG_TEST_PROGRAM_MAIN() \ - int main(int argc, char **argv) \ + int main(int argc, char** argv) \ { \ MPI_TRY(MPI_Init(&argc, &argv)); \ int comm_rank{}; \ diff --git a/cpp/tests/utilities/cxxopts.hpp b/cpp/tests/utilities/cxxopts.hpp index 9a0b6e500d6..5aa77723a1f 100644 --- a/cpp/tests/utilities/cxxopts.hpp +++ b/cpp/tests/utilities/cxxopts.hpp @@ -17,6 +17,22 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #ifndef CXXOPTS_HPP_INCLUDED #define CXXOPTS_HPP_INCLUDED @@ -89,7 +105,9 @@ inline String& stringAppend(String& s, String a) { return s.append(std::move(a)) inline String& stringAppend(String& s, int n, UChar32 c) { - for (int i = 0; i != n; ++i) { s.append(c); } + for (int i = 0; i != n; ++i) { + s.append(c); + } return s; } @@ -1449,7 +1467,9 @@ inline void Options::generate_all_groups_help(String& result) const std::vector all_groups; all_groups.reserve(m_help.size()); - for (auto& group : m_help) { all_groups.push_back(group.first); } + for (auto& group : m_help) { + all_groups.push_back(group.first); + } generate_group_help(result, all_groups); } @@ -1494,4 +1514,4 @@ inline const HelpGroupDetails& Options::group_help(const std::string& group) con } // namespace cxxopts -#endif // CXXOPTS_HPP_INCLUDED \ No newline at end of file +#endif // CXXOPTS_HPP_INCLUDED diff --git a/cpp/tests/utilities/device_comm_wrapper.cu b/cpp/tests/utilities/device_comm_wrapper.cu index 2d66e05c59f..2fee7719e36 100644 --- a/cpp/tests/utilities/device_comm_wrapper.cu +++ b/cpp/tests/utilities/device_comm_wrapper.cu @@ -26,7 +26,7 @@ namespace cugraph { namespace test { template -rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size) +rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_input, size_t size) { bool is_root = handle.get_comms().get_rank() == int{0}; auto rx_sizes = cugraph::experimental::host_scalar_gather( @@ -52,20 +52,20 @@ rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_i // explicit instantiation -template rmm::device_uvector device_gatherv(raft::handle_t const &handle, - int32_t const *d_input, +template rmm::device_uvector device_gatherv(raft::handle_t const& handle, + int32_t const* d_input, size_t size); -template rmm::device_uvector device_gatherv(raft::handle_t const &handle, - int64_t const *d_input, +template rmm::device_uvector device_gatherv(raft::handle_t const& handle, + int64_t const* d_input, size_t size); -template rmm::device_uvector device_gatherv(raft::handle_t const &handle, - float const *d_input, +template rmm::device_uvector device_gatherv(raft::handle_t const& handle, + float const* d_input, size_t size); -template rmm::device_uvector device_gatherv(raft::handle_t const &handle, - double const *d_input, +template rmm::device_uvector device_gatherv(raft::handle_t const& handle, + double const* d_input, size_t size); } // namespace test diff --git a/cpp/tests/utilities/device_comm_wrapper.hpp b/cpp/tests/utilities/device_comm_wrapper.hpp index f56f24248d6..55145edd71b 100644 --- a/cpp/tests/utilities/device_comm_wrapper.hpp +++ b/cpp/tests/utilities/device_comm_wrapper.hpp @@ -23,7 +23,7 @@ namespace cugraph { namespace test { template -rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size); +rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_input, size_t size); } // namespace test } // namespace cugraph From 7c41bb91e7fcf91fe79368ccec8eb98b94db42f1 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 8 Jul 2021 16:39:22 -0400 Subject: [PATCH 315/343] Update `conda` environment name for CI (#1699) The `gdf` conda environment has been replaced with the `rapids` environment. A symlink was put in place for `gdf` to continue to work, but the symlink will be removed in the near future. This PR updates all scripts to use the `rapids` environment name. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1699 --- ci/checks/style.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/checks/style.sh b/ci/checks/style.sh index e590e4aafa7..81388fa7b20 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -18,7 +18,8 @@ ERRORCODE=0 PATH=/conda/bin:$PATH # Activate common conda env -source activate gdf +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # Run flake8 and get results/return code FLAKE=`flake8 --config=python/.flake8 python` From 6ad797ff8cfade3fa302dedff98b9e2839a3bc0e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 9 Jul 2021 22:05:29 +0900 Subject: [PATCH 316/343] Fix the Louvain failure with 64 bit vertex IDs (#1696) Louvain currently crashes with 64 bit vertex ID (thrust::copy with thrust::make_transform_iterator and a device lambda returns cudaErrorInvalidDeviceFunction). This PR fixes this error (replaces the device lambda with a ```struct minor_to_key_t``` functor). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1696 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 22 +++++++++++++++---- cpp/tests/community/louvain_test.cpp | 5 +++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 4aa80956745..81913c3dab6 100644 --- a/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -36,6 +36,22 @@ namespace cugraph { namespace experimental { +namespace detail { + +// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +template +struct minor_to_key_t { + using vertex_t = typename std::iterator_traits::value_type; + VertexIterator adj_matrix_col_key_first{}; + vertex_t minor_first{}; + __device__ vertex_t operator()(vertex_t minor) + { + return *(adj_matrix_col_key_first + (minor - minor_first)); + } +}; + +} // namespace detail + /** * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties. * @@ -255,10 +271,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( if (matrix_partition.get_major_size() > 0) { auto minor_key_first = thrust::make_transform_iterator( matrix_partition.get_indices(), - [adj_matrix_col_key_first, matrix_partition] __device__(auto minor) { - return *(adj_matrix_col_key_first + - matrix_partition.get_minor_offset_from_minor_nocheck(minor)); - }); + detail::minor_to_key_t{adj_matrix_col_key_first, + matrix_partition.get_minor_first()}); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_key_first, minor_key_first + matrix_partition.get_number_of_edges(), diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index bcf7dddc459..4dca720483e 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -312,6 +312,11 @@ TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) run_current_test(GetParam()); } +TEST_P(Tests_Louvain, CheckInt64Int64FloatFloat) +{ + run_current_test(GetParam()); +} + // FIXME: Expand testing once we evaluate RMM memory use INSTANTIATE_TEST_SUITE_P( simple_test, From aba34455b6a508c99a2eb3058210fe98a37fcc4c Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 9 Jul 2021 08:11:42 -0500 Subject: [PATCH 317/343] Use `library_dirs` for cython linking, link cudatoolkit libs, allow setting UCX install location (#1698) This PR is a continuation of https://github.com/rapidsai/cugraph/pull/1694. Similar to https://github.com/rapidsai/cuml/pull/4015, this PR updates setup.py to: * Use `library_dirs` instead of `runtime_library_dirs` when linking Cython. * Allow overriding UCX lib and include dirs via a `UCX_HOME` envvar. * Link `cudart`, `cusparse`, and `cusolver`. These are necessary to compile the Cython via `pip` when not inside a conda environment and when UCX is installed to a location other than `/usr` or `/usr/local`. Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1698 --- cpp/cmake/thirdparty/get_faiss.cmake | 5 ++- python/setup.py | 55 +++++++++++++++++++--------- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 4991990e379..a65401579cb 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -40,7 +40,10 @@ function(find_and_configure_faiss) if(FAISS_ADDED) set(FAISS_GPU_HEADERS ${FAISS_SOURCE_DIR} PARENT_SCOPE) - add_library(FAISS::FAISS ALIAS faiss) + endif() + + if(TARGET faiss AND NOT TARGET FAISS::FAISS) + add_library(FAISS::FAISS ALIAS faiss) endif() endfunction() diff --git a/python/setup.py b/python/setup.py index d614eef24d6..6009e56d7f1 100644 --- a/python/setup.py +++ b/python/setup.py @@ -13,6 +13,7 @@ import os import sys +import sysconfig import shutil from setuptools import setup, find_packages, Command @@ -29,13 +30,25 @@ INSTALL_REQUIRES = ['numba', 'cython'] +CYTHON_FILES = ['cugraph/**/*.pyx'] + +UCX_HOME = get_environment_option("UCX_HOME") +CUDA_HOME = get_environment_option('CUDA_HOME') +CONDA_PREFIX = get_environment_option('CONDA_PREFIX') conda_lib_dir = os.path.normpath(sys.prefix) + '/lib' conda_include_dir = os.path.normpath(sys.prefix) + '/include' -CYTHON_FILES = ['cugraph/**/*.pyx'] +if CONDA_PREFIX: + conda_include_dir = CONDA_PREFIX + '/include' + conda_lib_dir = CONDA_PREFIX + '/lib' + +if not UCX_HOME: + UCX_HOME = CONDA_PREFIX if CONDA_PREFIX else os.sys.prefix + +ucx_include_dir = os.path.join(UCX_HOME, "include") +ucx_lib_dir = os.path.join(UCX_HOME, "lib") -CUDA_HOME = os.environ.get("CUDA_HOME", False) if not CUDA_HOME: path_to_cuda_gdb = shutil.which("cuda-gdb") if path_to_cuda_gdb is None: @@ -53,11 +66,7 @@ ) cuda_include_dir = os.path.join(CUDA_HOME, "include") - -if (os.environ.get('CONDA_PREFIX', None)): - conda_prefix = os.environ.get('CONDA_PREFIX') - conda_include_dir = conda_prefix + '/include' - conda_lib_dir = conda_prefix + '/lib' +cuda_lib_dir = os.path.join(CUDA_HOME, "lib64") # Optional location of C++ build folder that can be configured by the user libcugraph_path = get_environment_option('CUGRAPH_BUILD_PATH') @@ -69,6 +78,9 @@ # https://github.com/rapidsai/raft/issues/83 raft_include_dir = use_raft_package(raft_path, libcugraph_path) +if not libcugraph_path: + libcugraph_path = conda_lib_dir + class CleanCommand(Command): """Custom clean command to tidy up the project root.""" @@ -101,16 +113,25 @@ def run(self): EXTENSIONS = [ Extension("*", sources=CYTHON_FILES, - include_dirs=[conda_include_dir, - '../cpp/include', - "../thirdparty/cub", - raft_include_dir, - os.path.join( - conda_include_dir, "libcudacxx"), - cuda_include_dir], - library_dirs=[get_python_lib()], - runtime_library_dirs=[conda_lib_dir], - libraries=['cugraph', 'nccl'], + include_dirs=[ + conda_include_dir, + ucx_include_dir, + '../cpp/include', + "../thirdparty/cub", + raft_include_dir, + os.path.join(conda_include_dir, "libcudacxx"), + cuda_include_dir, + os.path.dirname(sysconfig.get_path("include")) + ], + library_dirs=[ + get_python_lib(), + conda_lib_dir, + libcugraph_path, + ucx_lib_dir, + cuda_lib_dir, + os.path.join(os.sys.prefix, "lib") + ], + libraries=['cudart', 'cusparse', 'cusolver', 'cugraph', 'nccl'], language='c++', extra_compile_args=['-std=c++17']) ] From 1a636029db9cafcbb746837ac21ff1c30d1bc479 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 13 Jul 2021 00:39:45 +0900 Subject: [PATCH 318/343] Delete unused/out-dated primitives (#1704) any_of_adj_matrix_row & transform_reduce_v_with_adj_matrix_row (graph primitives) were initially added to be used in error checking but they are no longer used (and may not be used in the future as well). Deleting the dead code. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1704 --- .../cugraph/prims/any_of_adj_matrix_row.cuh | 73 ----------- ...transform_reduce_v_with_adj_matrix_row.cuh | 120 ------------------ cpp/src/experimental/pagerank.cu | 1 - 3 files changed, 194 deletions(-) delete mode 100644 cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh delete mode 100644 cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh diff --git a/cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh b/cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh deleted file mode 100644 index 94cdae1ec95..00000000000 --- a/cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include - -#include -#include - -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief Check any of graph adjacency matrix row properties satisfy the given predicate. - * - * Returns true if @p row_op returns true for at least once (in any process in multi-GPU), returns - * false otherwise. This function is inspired by thrust::any_of(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row - * input properties. - * @tparam RowOp Type of the unary predicate operator. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row properties - * for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param row_op Unary predicate operator that takes *(@p adj_matrix_row_value_input_first + i) - * (where i = [0, @p graph_view.get_number_of_local_adj_matrix_partition_rows()) and returns either - * true or false. - * @return true If the predicate returns true at least once (in any process in multi-GPU). - * @return false If the predicate never returns true (in any process in multi-GPU). - */ -template -bool any_of_adj_matrix_row(raft::handle_t const& handle, - GraphViewType const& graph_view, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - RowOp row_op) -{ - // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved - auto count = thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - adj_matrix_row_value_input_first, - adj_matrix_row_value_input_first + graph_view.get_number_of_local_adj_matrix_partition_rows(), - row_op); - if (GraphViewType::is_multi_gpu) { - count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); - } - return (count > 0); -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh deleted file mode 100644 index bfb6f296075..00000000000 --- a/cpp/include/cugraph/prims/transform_reduce_v_with_adj_matrix_row.cuh +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include - -#include - -#include -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief Apply an operator to the matching vertex and adjacency matrix row properties and reduce. - * - * i'th vertex matches with the i'th row in the graph adjacency matrix. @p v_op takes vertex - * properties and adjacency matrix row properties for the matching row, and @p v_op outputs are - * reduced. This function is inspired by thrust::transform_reduce(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix column - * input properties. - * @tparam VertexOp Type of the binary vertex operator. - * @tparam T Type of the initial value. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input - * properties for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param v_op Binary operator takes *(@p vertex_value_input_first + i) and *(@p - * adj_matrix_row_value_input_first + j) (where i and j are set for a vertex and the matching row) - * and returns a transformed value to be reduced. - * @param init Initial value to be added to the transform-reduced input vertex properties. - * @return T Reduction of the @p v_op outputs. - */ -template -T transform_reduce_v_with_adj_matrix_row( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexValueInputIterator vertex_value_input_first, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - VertexOp v_op, - T init) -{ - using vertex_t = GraphViewtype::vertex_type; - using edge_t = GraphViewtype::edge_type; - using weight_t = GraphViewtype::weight_type; - - T ret{}; - - auto vertex_first = graph_view.get_local_vertex_first(); - auto vertex_last = graph_view.get_local_vertex_last(); - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - auto row_first = graph_view.get_local_adj_matrix_partition_row_first(i); - auto row_last = graph_view.get_local_adj_matrix_partition_row_last(i); - - auto range_first = std::max(vertex_first, row_first); - auto range_last = std::min(vertex_last, row_last); - - if (range_last > range_first) { - auto matrix_partition = - matrix_partition_device_view_t( - graph_view.get_matrix_partition_view(i)); - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? 0 - : matrix_partition.get_major_value_start_offset(); - - auto input_first = thrust::make_zip_iterator(thrust::make_tuple( - vertex_value_input_first + (range_first - vertex_first), - adj_matrix_row_value_input_first + row_value_input_offset + (range_first - row_first))); - auto v_op_wrapper = [v_op] __device__(auto v_and_row_val) { - return v_op(thrust::get<0>(v_and_row_val), thrust::get<1>(v_and_row_val)); - }; - ret += - thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - input_first, - input_first + (range_last - range_first), - v_op_wrapper, - T{}, - thrust::plus()); - } - } - - if (GraphViewType::is_multi_gpu) { - ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); - } - - return init + ret; -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 999a25b01c9..7c3e4b03e9e 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -16,7 +16,6 @@ #include #include -#include #include #include #include From 04f73b8bb9cccd4e771c10b58bf577cf4882f56c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 14 Jul 2021 01:45:55 +0900 Subject: [PATCH 319/343] Update primitives to support DCSR (DCSC) segments (Part 1) (#1690) Update graph primitives to support DCSR (DCSC) segments (except for the ones used by Louvain, graph primitives used in Louvain will be updated in a separate PR with thread-divergence optimization & more testing). DCSR (DCSC) segment support is still disabled (as enabling this will break Louvain). Authors: - Seunghwa Kang (https://github.com/seunghwak) - Mark Harris (https://github.com/harrism) - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1690 --- .../experimental/detail/graph_utils.cuh | 61 ++-- .../cugraph/matrix_partition_device_view.cuh | 85 +++++- cpp/include/cugraph/matrix_partition_view.hpp | 3 + .../copy_v_transform_reduce_in_out_nbr.cuh | 263 +++++++++++------- .../cugraph/prims/transform_reduce_e.cuh | 103 ++++++- .../update_frontier_v_push_if_out_nbr.cuh | 216 ++++++++++++-- .../components/weakly_connected_components.cu | 4 +- cpp/src/experimental/graph.cu | 3 +- cpp/src/experimental/graph_view.cu | 61 ++-- 9 files changed, 607 insertions(+), 192 deletions(-) diff --git a/cpp/include/cugraph/experimental/detail/graph_utils.cuh b/cpp/include/cugraph/experimental/detail/graph_utils.cuh index c384e2842ab..02da9a80854 100644 --- a/cpp/include/cugraph/experimental/detail/graph_utils.cuh +++ b/cpp/include/cugraph/experimental/detail/graph_utils.cuh @@ -43,7 +43,10 @@ template rmm::device_uvector compute_major_degrees( raft::handle_t const& handle, std::vector const& adj_matrix_partition_offsets, - partition_t const& partition) + std::optional> const& adj_matrix_partition_dcs_nzd_vertices, + std::optional> const& adj_matrix_partition_dcs_nzd_vertex_counts, + partition_t const& partition, + std::optional> const& adj_matrix_partition_segment_offsets) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); @@ -52,6 +55,8 @@ rmm::device_uvector compute_major_degrees( auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); + auto use_dcs = adj_matrix_partition_dcs_nzd_vertices.has_value(); + rmm::device_uvector local_degrees(0, handle.get_stream()); rmm::device_uvector degrees(0, handle.get_stream()); @@ -69,11 +74,37 @@ rmm::device_uvector compute_major_degrees( vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); auto p_offsets = adj_matrix_partition_offsets[i]; + auto major_hypersparse_first = + use_dcs ? major_first + (*adj_matrix_partition_segment_offsets) + [(detail::num_sparse_segments_per_vertex_partition + 2) * i + + detail::num_sparse_segments_per_vertex_partition] + : major_last; thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(major_last - major_first), - local_degrees.data(), + thrust::make_counting_iterator(major_hypersparse_first - major_first), + local_degrees.begin(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); + if (use_dcs) { + auto p_dcs_nzd_vertices = (*adj_matrix_partition_dcs_nzd_vertices)[i]; + auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i]; + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_degrees.begin() + (major_hypersparse_first - major_first), + local_degrees.begin() + (major_last - major_first), + edge_t{0}); + thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(dcs_nzd_vertex_count), + [p_offsets, + p_dcs_nzd_vertices, + major_first, + major_hypersparse_first, + local_degrees = local_degrees.data()] __device__(auto i) { + auto d = p_offsets[(major_hypersparse_first - major_first) + i + 1] - + p_offsets[(major_hypersparse_first - major_first) + i]; + auto v = p_dcs_nzd_vertices[i]; + local_degrees[v - major_first] = d; + }); + } col_comm.reduce(local_degrees.data(), i == col_comm_rank ? degrees.data() : static_cast(nullptr), static_cast(major_last - major_first), @@ -85,23 +116,6 @@ rmm::device_uvector compute_major_degrees( return degrees; } -// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = -// false) or columns (of the graph adjacency matrix, if store_transposed = true) -template -rmm::device_uvector compute_major_degrees( - raft::handle_t const& handle, - std::vector> const& adj_matrix_partition_offsets, - partition_t const& partition) -{ - // we can avoid creating this temporary with "if constexpr" supported from C++17 - std::vector tmp_offsets(adj_matrix_partition_offsets.size(), nullptr); - std::transform(adj_matrix_partition_offsets.begin(), - adj_matrix_partition_offsets.end(), - tmp_offsets.begin(), - [](auto const& offsets) { return offsets.data(); }); - return compute_major_degrees(handle, tmp_offsets, partition); -} - // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template @@ -117,13 +131,6 @@ rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, return degrees; } -template -struct degree_from_offsets_t { - edge_t const* offsets{nullptr}; - - __device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; } -}; - template struct compute_gpu_id_from_vertex_t { int comm_size{0}; diff --git a/cpp/include/cugraph/matrix_partition_device_view.cuh b/cpp/include/cugraph/matrix_partition_device_view.cuh index 9653e5c94d9..f96419f9600 100644 --- a/cpp/include/cugraph/matrix_partition_device_view.cuh +++ b/cpp/include/cugraph/matrix_partition_device_view.cuh @@ -15,12 +15,16 @@ */ #pragma once +#include #include #include +#include +#include #include #include +#include #include #include @@ -49,25 +53,28 @@ class matrix_partition_device_view_base_t { __host__ __device__ vertex_t const* get_indices() const { return indices_; } __host__ __device__ thrust::optional get_weights() const { return weights_; } + // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC __device__ thrust::tuple, edge_t> - get_local_edges(vertex_t major_offset) const noexcept + get_local_edges(vertex_t major_idx) const noexcept { - auto edge_offset = *(offsets_ + major_offset); - auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset; + auto edge_offset = *(offsets_ + major_idx); + auto local_degree = *(offsets_ + (major_idx + 1)) - edge_offset; auto indices = indices_ + edge_offset; auto weights = weights_ ? thrust::optional{*weights_ + edge_offset} : thrust::nullopt; return thrust::make_tuple(indices, weights, local_degree); } - __device__ edge_t get_local_degree(vertex_t major_offset) const noexcept + // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC + __device__ edge_t get_local_degree(vertex_t major_idx) const noexcept { - return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); + return *(offsets_ + (major_idx + 1)) - *(offsets_ + major_idx); } - __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept + // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC + __device__ edge_t get_local_offset(vertex_t major_idx) const noexcept { - return *(offsets_ + major_offset); + return *(offsets_ + major_idx); } private: @@ -148,6 +155,34 @@ class matrix_partition_device_view_t get_major_hypersparse_idx_from_major_nocheck( + vertex_t major) const noexcept + { + if (dcs_nzd_vertices_) { + // we can avoid binary search (and potentially improve performance) if we add an auxiliary + // array or cuco::static_map (at the expense of additional memory) + auto it = thrust::lower_bound( + thrust::seq, *dcs_nzd_vertices_, *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_, major); + return it != *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_ + ? (*it == major ? thrust::optional{static_cast( + thrust::distance(*dcs_nzd_vertices_, it))} + : thrust::nullopt) + : thrust::nullopt; + } else { + return thrust::nullopt; + } + } + + // major_hypersparse_idx: index within the hypersparse segment + __host__ __device__ thrust::optional get_major_from_major_hypersparse_idx_nocheck( + vertex_t major_hypersparse_idx) const noexcept + { + return dcs_nzd_vertices_ + ? thrust::optional{(*dcs_nzd_vertices_)[major_hypersparse_idx]} + : thrust::nullopt; + } + __host__ __device__ vertex_t get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept { @@ -159,6 +194,15 @@ class matrix_partition_device_view_t get_dcs_nzd_vertices() const + { + return dcs_nzd_vertices_; + } + __host__ __device__ thrust::optional get_dcs_nzd_vertex_count() const + { + return dcs_nzd_vertex_count_; + } + private: // should be trivially copyable to device @@ -220,12 +264,39 @@ class matrix_partition_device_view_t get_major_hypersparse_idx_from_major_nocheck( + vertex_t major) const noexcept + { + assert(false); + return thrust::nullopt; + } + + // major_hypersparse_idx: index within the hypersparse segment + __host__ __device__ thrust::optional get_major_from_major_hypersparse_idx_nocheck( + vertex_t major_hypersparse_idx) const noexcept + { + assert(false); + return thrust::nullopt; + } + __host__ __device__ vertex_t get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept { return minor_offset; } + __host__ __device__ thrust::optional get_dcs_nzd_vertices() const + { + assert(false); + return thrust::nullopt; + } + __host__ __device__ thrust::optional get_dcs_nzd_vertex_count() const + { + assert(false); + return thrust::nullopt; + } + private: vertex_t number_of_vertices_; }; diff --git a/cpp/include/cugraph/matrix_partition_view.hpp b/cpp/include/cugraph/matrix_partition_view.hpp index 85c366a5a74..8a81bc7db4a 100644 --- a/cpp/include/cugraph/matrix_partition_view.hpp +++ b/cpp/include/cugraph/matrix_partition_view.hpp @@ -123,6 +123,9 @@ class matrix_partition_view_t get_dcs_nzd_vertices() const { return std::nullopt; } + std::optional get_dcs_nzd_vertex_count() const { return std::nullopt; } + vertex_t get_major_first() const { return vertex_t{0}; } vertex_t get_major_last() const { return number_of_vertices_; } vertex_t get_minor_first() const { return vertex_t{0}; } diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh index cae0220e6bc..1a07bf98ec9 100644 --- a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh @@ -45,6 +45,102 @@ namespace detail { int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 512; +template +__global__ void for_all_major_for_all_nbr_hypersparse( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_hypersparse_first, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultValueOutputIterator result_value_output_first, + EdgeOp e_op, + T init /* relevent only if update_major == true */) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = + static_cast(major_hypersparse_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + + while (idx < static_cast(dcs_nzd_vertex_count)) { + auto major = + *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(static_cast(idx))); + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_idx)); + auto transform_op = [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + major, + indices, + weights] __device__(auto i) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto minor = indices[i]; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed ? minor : major; + auto col = GraphViewType::is_adj_matrix_transposed ? major : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + return evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + }; + + if (update_major) { + *(result_value_output_first + (major - major_hypersparse_first)) = thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + } else { + thrust::for_each( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, indices, &result_value_output_first, &transform_op] __device__(auto i) { + auto e_op_result = transform_op(i); + auto minor = indices[i]; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); + }); + } + idx += gridDim.x * blockDim.x; + } +} + template (major_first - matrix_partition.get_major_first()); @@ -383,7 +478,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : vertex_t{0}; - auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + std::conditional_t< + GraphViewType::is_multi_gpu, + std::conditional_t, + VertexValueOutputIterator> + output_buffer_first{}; + if constexpr (GraphViewType::is_multi_gpu) { + output_buffer_first = update_major ? major_buffer_first : minor_buffer_first; + } else { + output_buffer_first = vertex_value_output_first; + } + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more @@ -393,88 +498,67 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if - // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. - if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_high_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_first() + (*segment_offsets)[1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_high_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_first() + (*segment_offsets)[1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first, - e_op, - major_init); - } + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + (*segment_offsets)[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + output_buffer_first, + e_op, + major_init); } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if - // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. - if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_mid_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first() + (*segment_offsets)[1], - matrix_partition.get_major_first() + (*segment_offsets)[2], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first + (*segment_offsets)[1] : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_mid_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first() + (*segment_offsets)[1], - matrix_partition.get_major_first() + (*segment_offsets)[2], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first + (update_major ? (*segment_offsets)[1] : vertex_t{0}), - e_op, - major_init); - } + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + output_buffer_first + (update_major ? (*segment_offsets)[1] : vertex_t{0}), + e_op, + major_init); } if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if - // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. - if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first() + (*segment_offsets)[2], - matrix_partition.get_major_first() + (*segment_offsets)[3], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first + (*segment_offsets)[2] : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_low_degree + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[2], + matrix_partition.get_major_first() + (*segment_offsets)[3], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + output_buffer_first + (update_major ? (*segment_offsets)[2] : vertex_t{0}), + e_op, + major_init); + } + if (matrix_partition.get_dcs_nzd_vertex_count()) { + if constexpr (update_major) { // this is necessary as we don't visit every vertex in the + // hypersparse segment in + // for_all_major_for_all_nbr_hypersparse + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + output_buffer_first + (*segment_offsets)[3], + output_buffer_first + (*segment_offsets)[4], + major_init); + } + if (*(matrix_partition.get_dcs_nzd_vertex_count()) > 0) { + raft::grid_1d_thread_t update_grid(*(matrix_partition.get_dcs_nzd_vertex_count()), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_hypersparse <<>>( matrix_partition, - matrix_partition.get_major_first() + (*segment_offsets)[2], matrix_partition.get_major_first() + (*segment_offsets)[3], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first + (update_major ? (*segment_offsets)[2] : vertex_t{0}), + output_buffer_first + (update_major ? (*segment_offsets)[3] : vertex_t{0}), e_op, major_init); } @@ -484,31 +568,16 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if - // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. - if (GraphViewType::is_multi_gpu) { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_last(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - update_major ? major_buffer_first : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_last(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - vertex_value_output_first, - e_op, - major_init); - } + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + output_buffer_first, + e_op, + major_init); } } diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh index 9bc7fe1cdf1..ae9413f7857 100644 --- a/cpp/include/cugraph/prims/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -37,6 +37,92 @@ namespace detail { // FIXME: block size requires tuning int32_t constexpr transform_reduce_e_for_all_block_size = 128; +template +__global__ void for_all_major_for_all_nbr_hypersparse( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_hypersparse_first, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultIterator result_iter /* size 1 */, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = + static_cast(major_hypersparse_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid); + + auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(dcs_nzd_vertex_count)) { + auto major = + *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(static_cast(idx))); + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_idx); + auto sum = thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + major, + indices, + weights] __device__(auto i) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto minor = indices[i]; + auto weight = weights ? (*weights)[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed ? minor : major; + auto col = GraphViewType::is_adj_matrix_transposed ? major : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + return evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + }, + e_op_result_t{}, + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + + e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); + idx += gridDim.x * blockDim.x; + } + + e_op_result_sum = + block_reduce_edge_op_result().compute( + e_op_result_sum); + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } +} + template <<>>( matrix_partition, @@ -356,7 +441,6 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_mid_degree <<>>( matrix_partition, @@ -371,7 +455,6 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_major_for_all_nbr_low_degree <<>>( matrix_partition, @@ -382,6 +465,20 @@ T transform_reduce_e(raft::handle_t const& handle, get_dataframe_buffer_begin(result_buffer), e_op); } + if (matrix_partition.get_dcs_nzd_vertex_count() && + (*(matrix_partition.get_dcs_nzd_vertex_count()) > 0)) { + raft::grid_1d_thread_t update_grid(*(matrix_partition.get_dcs_nzd_vertex_count()), + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_hypersparse + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[3], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } } else { if (matrix_partition.get_major_size() > 0) { raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), diff --git a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh index 3caa760f498..88a30a45f03 100644 --- a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh @@ -240,6 +240,82 @@ __device__ void push_if_buffer_element( } } +template +__global__ void for_all_frontier_row_for_all_nbr_hypersparse( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_hypersparse_first, + KeyIterator key_first, + KeyIterator key_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto row_start_offset = + static_cast(major_hypersparse_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + auto dcs_nzd_vertices = *(matrix_partition.get_dcs_nzd_vertices()); + auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } + auto row_hypersparse_idx = matrix_partition.get_major_hypersparse_idx_from_major_nocheck(row); + if (row_hypersparse_idx) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + auto row_idx = row_start_offset + *row_hypersparse_idx; + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_idx); + for (edge_t i = 0; i < local_out_degree; ++i) { + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); + } + } + idx += gridDim.x * blockDim.x; + } +} + template (i), handle.get_stream()); - ret += thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_vertices.begin(), - frontier_vertices.end(), - [matrix_partition] __device__(auto major) { - auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); - return matrix_partition.get_local_degree(major_offset); - }, - edge_t{0}, - thrust::plus()); + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + auto use_dcs = + segment_offsets + ? ((*segment_offsets).size() > (detail::num_sparse_segments_per_vertex_partition + 1)) + : false; + + ret += + use_dcs + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_vertices.begin(), + frontier_vertices.end(), + [matrix_partition, + major_hypersparse_first = + matrix_partition.get_major_first() + + (*segment_offsets) + [detail::num_sparse_segments_per_vertex_partition]] __device__(auto major) { + if (major < major_hypersparse_first) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + } else { + auto major_hypersparse_idx = + matrix_partition.get_major_hypersparse_idx_from_major_nocheck(major); + return major_hypersparse_idx + ? matrix_partition.get_local_degree( + matrix_partition.get_major_offset_from_major_nocheck( + major_hypersparse_first) + + *major_hypersparse_idx) + : edge_t{0}; + } + }, + edge_t{0}, + thrust::plus()) + : thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_vertices.begin(), + frontier_vertices.end(), + [matrix_partition] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); } else { assert(i == 0); ret += thrust::transform_reduce( @@ -821,19 +930,49 @@ void update_frontier_v_push_if_out_nbr( matrix_partition_frontier_row_last = thrust::get<0>( get_dataframe_buffer_end(matrix_partition_frontier_key_buffer).get_iterator_tuple()); } + + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + auto use_dcs = + segment_offsets + ? ((*segment_offsets).size() > (detail::num_sparse_segments_per_vertex_partition + 1)) + : false; + auto max_pushes = - thrust::distance(matrix_partition_frontier_row_first, matrix_partition_frontier_row_last) > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - matrix_partition_frontier_row_first, - matrix_partition_frontier_row_last, - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : edge_t{0}; + use_dcs ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, + [matrix_partition, + major_hypersparse_first = + matrix_partition.get_major_first() + + (*segment_offsets) + [detail::num_sparse_segments_per_vertex_partition]] __device__(auto row) { + if (row < major_hypersparse_first) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + } else { + auto row_hypersparse_idx = + matrix_partition.get_major_hypersparse_idx_from_major_nocheck(row); + return row_hypersparse_idx + ? matrix_partition.get_local_degree( + matrix_partition.get_major_offset_from_major_nocheck( + major_hypersparse_first) + + *row_hypersparse_idx) + : edge_t{0}; + } + }, + edge_t{0}, + thrust::plus()) + : thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain // additional per column data for filtering in e_op). If we can pause & resume execution if @@ -858,12 +997,13 @@ void update_frontier_v_push_if_out_nbr( auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} : matrix_partition.get_major_value_start_offset(); - auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); if (segment_offsets) { static_assert(detail::num_sparse_segments_per_vertex_partition == 3); - std::vector h_thresholds(detail::num_sparse_segments_per_vertex_partition - 1); + std::vector h_thresholds(detail::num_sparse_segments_per_vertex_partition + + (use_dcs ? 1 : 0) - 1); h_thresholds[0] = matrix_partition.get_major_first() + (*segment_offsets)[1]; h_thresholds[1] = matrix_partition.get_major_first() + (*segment_offsets)[2]; + if (use_dcs) { h_thresholds[2] = matrix_partition.get_major_first() + (*segment_offsets)[3]; } rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); @@ -877,6 +1017,7 @@ void update_frontier_v_push_if_out_nbr( std::vector h_offsets(d_offsets.size()); raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + h_offsets.push_back(matrix_partition_frontier_size); // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more // segment for very high degree vertices and running segmented reduction @@ -885,7 +1026,6 @@ void update_frontier_v_push_if_out_nbr( h_offsets[0], detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_high_degree <<>>( matrix_partition, @@ -903,7 +1043,6 @@ void update_frontier_v_push_if_out_nbr( h_offsets[1] - h_offsets[0], detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_mid_degree <<>>( matrix_partition, @@ -916,17 +1055,34 @@ void update_frontier_v_push_if_out_nbr( buffer_idx.data(), e_op); } - if (matrix_partition_frontier_size - h_offsets[1] > 0) { + if (h_offsets[2] - h_offsets[1] > 0) { raft::grid_1d_thread_t update_grid( - matrix_partition_frontier_size - h_offsets[1], + h_offsets[2] - h_offsets[1], detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - detail::for_all_frontier_row_for_all_nbr_low_degree <<>>( matrix_partition, get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], - get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (matrix_partition.get_dcs_nzd_vertex_count() && (h_offsets[3] - h_offsets[2] > 0)) { + raft::grid_1d_thread_t update_grid( + h_offsets[3] - h_offsets[2], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_frontier_row_for_all_nbr_hypersparse + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[3], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[2], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[3], adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, get_dataframe_buffer_begin(key_buffer), diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu index 57441cb9b4f..da63161da33 100644 --- a/cpp/src/components/weakly_connected_components.cu +++ b/cpp/src/components/weakly_connected_components.cu @@ -422,7 +422,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, raft::update_host( &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); #else - iinit_max_new_roots = + init_max_new_roots = host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); #endif } else { @@ -434,7 +434,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, raft::update_host( &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); #else - iinit_max_new_roots = + init_max_new_roots = host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); #endif } diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index a01537355df..c5f21cfa4f7 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -226,7 +226,8 @@ graph_t update_adj_matrix_partition_edge_counts( (use_dcs ? ((*adj_matrix_partition_segment_offsets) [(detail::num_sparse_segments_per_vertex_partition + 2) * i + detail::num_sparse_segments_per_vertex_partition] - - major_first + (*adj_matrix_partition_dcs_nzd_vertex_counts)[i]) + (*adj_matrix_partition_dcs_nzd_vertex_counts)[i]) : (major_last - major_first)), 1, stream); @@ -251,9 +251,10 @@ graph_view_t{}), - "Invalid Invalid input argument: sorted_by_global_degree_within_vertex_partition is " - "set to true, but degrees are not non-ascending."); + auto degrees = detail::compute_major_degrees(handle, + adj_matrix_partition_offsets, + adj_matrix_partition_dcs_nzd_vertices, + adj_matrix_partition_dcs_nzd_vertex_counts, + partition, + adj_matrix_partition_segment_offsets); + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), + degrees.begin(), + degrees.end(), + thrust::greater{}), + "Invalid Invalid input argument: adj_matrix_partition_segment_offsets are " + "provided, but degrees are not in descending order."); auto num_segments_per_vertex_partition = detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 1 : 0); @@ -377,15 +382,13 @@ graph_view_t{offsets}); - CUGRAPH_EXPECTS( - thrust::is_sorted(rmm::exec_policy(default_stream_view), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Internal Error: segment_offsets are provided, but degrees are not in descending order."); + auto degrees = detail::compute_major_degrees(handle, offsets, number_of_vertices); + CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream_view), + degrees.begin(), + degrees.end(), + thrust::greater{}), + "Invalid Invalid input argument: segment_offsets are provided, but degrees " + "are not in descending order."); CUGRAPH_EXPECTS(std::is_sorted((*segment_offsets).begin(), (*segment_offsets).end()), "Internal Error: erroneous segment_offsets."); @@ -412,8 +415,12 @@ graph_view_tadj_matrix_partition_offsets_, this->partition_); + return detail::compute_major_degrees(handle, + this->adj_matrix_partition_offsets_, + this->adj_matrix_partition_dcs_nzd_vertices_, + this->adj_matrix_partition_dcs_nzd_vertex_counts_, + this->partition_, + this->adj_matrix_partition_segment_offsets_); } else { return compute_minor_degrees(handle, *this); } @@ -452,8 +459,12 @@ graph_view_tadj_matrix_partition_offsets_, this->partition_); + return detail::compute_major_degrees(handle, + this->adj_matrix_partition_offsets_, + this->adj_matrix_partition_dcs_nzd_vertices_, + this->adj_matrix_partition_dcs_nzd_vertex_counts_, + this->partition_, + this->adj_matrix_partition_segment_offsets_); } } From 2b7d02f8123494e2cfb4ecb61c56c6be6dbe5b1d Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Tue, 13 Jul 2021 11:47:00 -0500 Subject: [PATCH 320/343] replace cudf assert_eq (#1693) replace `cudf assert_eq` by `assert_frame_equal` which is more stable Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1693 --- python/cugraph/tests/test_graph.py | 4 +- python/cugraph/tests/test_hypergraph.py | 51 +++++++++++-------------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 71046f65cd8..fa68a50c952 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -20,7 +20,7 @@ import scipy import cudf -from cudf.testing._utils import assert_eq +from cudf.testing.testing import assert_frame_equal import cugraph from cugraph.tests import utils @@ -327,7 +327,7 @@ def test_edges_for_Graph(graph_file): else: edges.append([edge[0], edge[1]]) nx_edge_list = cudf.DataFrame(list(edges), columns=['src', 'dst']) - assert_eq( + assert_frame_equal( nx_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), cu_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), check_dtype=False diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index 79ae7139180..be48168e834 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -39,8 +39,7 @@ import pandas as pd import pytest import cudf -from cudf.testing._utils import assert_eq - +from cudf.testing.testing import assert_frame_equal import cugraph @@ -109,11 +108,10 @@ def test_hyperedges(categorical_metadata): h = cugraph.hypergraph(simple_df, categorical_metadata=categorical_metadata) - assert_eq( - len(h.keys()), len(["entities", "nodes", "edges", "events", "graph"]) - ) + assert len(h.keys()) == len( + ["entities", "nodes", "edges", "events", "graph"]) - edges = pd.DataFrame({ + edges = cudf.from_pandas(pd.DataFrame({ "event_id": [ "event_id::0", "event_id::1", @@ -160,25 +158,24 @@ def test_hyperedges(categorical_metadata): "a1": [1, 2, 3] * 4, "a2": ["red", "blue", "green"] * 4, "🙈": ["æski ēˈmōjē", "😋", "s"] * 4, - }) + })) if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_eq(edges, h["edges"]) - + assert_frame_equal(edges, h["edges"], check_dtype=False) for (k, v) in [ ("entities", 12), ("nodes", 15), ("edges", 12), ("events", 3) ]: - assert_eq(len(h[k]), v) + assert len(h[k]) == v def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) - assert_eq(len(h["edges"]), 9) - assert_eq(len(h["nodes"]), 9) + assert len(h["edges"]) == 9 + assert len(h["nodes"]) == 9 def test_hyperedges_direct_categories(): @@ -193,8 +190,8 @@ def test_hyperedges_direct_categories(): }, ) - assert_eq(len(h["edges"]), 9) - assert_eq(len(h["nodes"]), 6) + assert len(h["edges"]) == 9 + assert len(h["nodes"]) == 6 def test_hyperedges_direct_manual_shaping(): @@ -204,14 +201,14 @@ def test_hyperedges_direct_manual_shaping(): direct=True, EDGES={"aa": ["cc"], "cc": ["cc"]}, ) - assert_eq(len(h1["edges"]), 6) + assert len(h1["edges"]) == 6 h2 = cugraph.hypergraph( hyper_df, direct=True, EDGES={"aa": ["cc", "bb", "aa"], "cc": ["cc"]}, ) - assert_eq(len(h2["edges"]), 12) + assert len(h2["edges"]) == 12 @pytest.mark.parametrize("categorical_metadata", [False, True]) @@ -222,9 +219,8 @@ def test_drop_edge_attrs(categorical_metadata): drop_edge_attrs=True, categorical_metadata=categorical_metadata) - assert_eq( - len(h.keys()), len(["entities", "nodes", "edges", "events", "graph"]) - ) + assert len(h.keys()) == len( + ["entities", "nodes", "edges", "events", "graph"]) edges = cudf.DataFrame.from_pandas(pd.DataFrame({ "event_id": [ @@ -257,12 +253,12 @@ def test_drop_edge_attrs(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_eq(edges, h["edges"]) + assert_frame_equal(edges, h["edges"], check_dtype=False) for (k, v) in [ ("entities", 9), ("nodes", 12), ("edges", 9), ("events", 3) ]: - assert_eq(len(h[k]), v) + assert len(h[k]) == v @pytest.mark.parametrize("categorical_metadata", [False, True]) @@ -277,9 +273,8 @@ def test_drop_edge_attrs_direct(categorical_metadata): categorical_metadata=categorical_metadata, ) - assert_eq( - len(h.keys()), len(["entities", "nodes", "edges", "events", "graph"]) - ) + assert len(h.keys()) == len( + ["entities", "nodes", "edges", "events", "graph"]) edges = cudf.DataFrame.from_pandas(pd.DataFrame({ "event_id": [ @@ -300,10 +295,10 @@ def test_drop_edge_attrs_direct(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_eq(edges, h["edges"]) + assert_frame_equal(edges, h["edges"], check_dtype=False) for (k, v) in [("entities", 9), ("nodes", 9), ("edges", 6), ("events", 0)]: - assert_eq(len(h[k]), v) + assert len(h[k]) == v def test_skip_hyper(): @@ -399,10 +394,10 @@ def test_skip_na_hyperedge(): nans_df, drop_edge_attrs=True )["edges"] - assert_eq(len(skip_attr_h_edges), len(expected_hits)) + assert len(skip_attr_h_edges) == len(expected_hits) default_h_edges = cugraph.hypergraph(nans_df)["edges"] - assert_eq(len(default_h_edges), len(expected_hits)) + assert len(default_h_edges) == len(expected_hits) def test_hyper_to_pa_vanilla(): From 7ad8fc36254bbc7d1a1dc7d321a93b08a66c26ab Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Tue, 13 Jul 2021 14:30:45 -0400 Subject: [PATCH 321/343] Removed depricated code (#1705) Removed BSP code Removed Unneeded environment Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) - https://github.com/Iroy30 URL: https://github.com/rapidsai/cugraph/pull/1705 --- conda/environments/cugraph_dev_cuda11.1.yml | 47 ------- python/cugraph/__init__.py | 2 - python/cugraph/bsp/__init__.py | 15 -- python/cugraph/bsp/traversal/__init__.py | 15 -- python/cugraph/bsp/traversal/bfs_bsp.py | 147 -------------------- 5 files changed, 226 deletions(-) delete mode 100644 conda/environments/cugraph_dev_cuda11.1.yml delete mode 100644 python/cugraph/bsp/__init__.py delete mode 100644 python/cugraph/bsp/traversal/__init__.py delete mode 100644 python/cugraph/bsp/traversal/bfs_bsp.py diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml deleted file mode 100644 index 26ecf04f0ea..00000000000 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: cugraph_dev -channels: -- rapidsai -- nvidia -- rapidsai-nightly -- conda-forge -dependencies: -- cudatoolkit=11.1 -- cudf=21.08.* -- libcudf=21.08.* -- rmm=21.08.* -- librmm=21.08.* -- dask>=2021.6.0 -- distributed>=2021.6.0 -- dask-cuda=21.08.* -- dask-cudf=21.08.* -- nccl>=2.9.9 -- ucx-py=0.21.* -- ucx-proc=*=gpu -- scipy -- networkx>=2.5.1 -- clang=11.0.0 -- clang-tools=11.0.0 -- cmake>=3.20.1 -- python>=3.6,<3.9 -- notebook>=0.5.0 -- boost -- cython>=0.29,<0.30 -- pytest -- libfaiss=1.7.0 -- faiss-proc=*=cuda -- scikit-learn>=0.23.1 -- sphinx -- sphinx_rtd_theme -- sphinxcontrib-websupport -- sphinx-markdown-tables -- sphinx-copybutton -- nbsphinx -- numpydoc -- ipython -- recommonmark -- pip -- rapids-pytest-benchmark -- doxygen -- pytest-cov -- gtest -- gmock diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 55c35fa7b4b..9421bee6869 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -97,8 +97,6 @@ from cugraph.utilities import utils -from cugraph.bsp.traversal import bfs_df_pregel - from cugraph.proto.components import strong_connected_component from cugraph.proto.structure import find_bicliques diff --git a/python/cugraph/bsp/__init__.py b/python/cugraph/bsp/__init__.py deleted file mode 100644 index dbb94895cec..00000000000 --- a/python/cugraph/bsp/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.bsp.traversal import bfs_df_pregel - diff --git a/python/cugraph/bsp/traversal/__init__.py b/python/cugraph/bsp/traversal/__init__.py deleted file mode 100644 index 061d1d7e3a1..00000000000 --- a/python/cugraph/bsp/traversal/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.bsp.traversal.bfs_bsp import bfs_df_pregel - diff --git a/python/cugraph/bsp/traversal/bfs_bsp.py b/python/cugraph/bsp/traversal/bfs_bsp.py deleted file mode 100644 index 9a2fd48e201..00000000000 --- a/python/cugraph/bsp/traversal/bfs_bsp.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -import cudf -from collections import OrderedDict - - -def bfs_df_pregel(_df, start, src_col='src', dst_col='dst', copy_data=True): - """ - This function executes an unwieghted Breadth-First-Search (BFS) traversal - to find the distances and predecessors from a specified starting vertex - - NOTE: Only reachable vertices are returned - NOTE: data is not sorted - - Parameters - ---------- - _df : cudf.dataframe - a dataframe containing the source and destination edge list - - start : same type as 'src' and 'dst' - The index of the graph vertex from which the traversal begins - - src : string - the source column name - - dst : string - the destination column name - - copy_data : Bool - whether we can manipulate the dataframe or if a copy should be made - - - Returns - ------- - df : cudf.DataFrame - df['vertex'][i] gives the vertex id of the i'th vertex - df['distance'][i] gives the path distance for the i'th vertex - from the starting vertex - df['predecessor'][i] gives for the i'th vertex the vertex it was - reached from in the traversal - - Examples - -------- - >>> data_df = - cudf.read_csv('datasets/karate.csv', delimiter=' ', header=None) - >>> df = cugraph.pregel_bfs(data_df, 1, '0', '1') - - """ - - warnings.warn( - "This feature is deprecated and will be" - "dropped from cuGraph in release 0.20.", - FutureWarning, - ) - - # extract the src and dst into a dataframe that can be modified - if copy_data: - coo_data = _df[[src_col, dst_col]] - else: - coo_data = _df - - coo_data.rename(columns={src_col: 'src', dst_col: 'dst'}, inplace=True) - - # convert the "start" vertex into a series - frontier = cudf.Series(start).to_frame('dst') - - # create the answer DF - answer = cudf.DataFrame() - answer['vertex'] = start - answer['distance'] = 0 - answer['predecessor'] = -1 - - # init some variables - distance = 0 - done = False - - while not done: - - # --------------------------------- - # update the distance and add it to the dataframe - distance = distance + 1 - frontier['distance'] = distance - - # ----------------------------------- - # Removed all instances of the frontier vertices from 'dst' side - # we do not want to hop to a vertex that has already been seen - coo_data = coo_data.merge(frontier, on=['dst'], how='left') - coo_data = coo_data[coo_data.distance.isnull()] - coo_data.drop('distance', inplace=True) - - # now update column names for finding source vertices - frontier.rename(columns={'dst': 'src'}, inplace=True) - - # --------------------------------- - # merge the list of vertices and distances with the COO list - # there are two sets of results that we get from the "hop_df" merge - # (A) the set of edges that start with a vertice in the frontier set - # - this goes into the answer set - # - this also forms the next frontier set - # (B) the set of edges that did not start with a frontier vertex - # - this form the new set of coo_data - hop_df = coo_data.merge(frontier, on=['src'], how='left') - - # --------------------------------- - # (A) get the data where the 'src' was in the frontier list - # create a new dataframe of vertices to hop out from (the 'dst') - one_hop = hop_df.query("distance == @distance") - frontier = one_hop['dst'].to_frame('dst') - - # --------------------------------- - # (B) get all the edges that where not touched - coo_data = hop_df[hop_df.distance.isnull()] - coo_data.drop('distance', inplace=True) - - # --------------------------------- - # update the answer - one_hop.rename( - columns={'dst': 'vertex', 'src': 'predecessor'}, inplace=True) - - # remote duplicates. smallest vertex wins - aggsOut = OrderedDict() - aggsOut['predecessor'] = 'min' - aggsOut['distance'] = 'min' - _a = one_hop.groupby(['vertex'], as_index=False).agg(aggsOut) - - answer = cudf.concat([answer, _a]) - - if len(coo_data) == 0: - done = True - - if not done and len(frontier) == 0: - done = True - - # all done, return the answer - return answer From 790a369dbdd3aadc01368d7278794164f8037642 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 13 Jul 2021 21:22:48 -0500 Subject: [PATCH 322/343] Updated for CUDA-specific py packages (#1709) Updated cugraph recipe to include CUDA version in build string and depend on a specific cudatoolkit based on CUDA version. This pattern was copied from cuML's recipe. closes #1708 Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1709 --- conda/recipes/cugraph/meta.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 834fbab5838..d6dba8e3f63 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -4,6 +4,7 @@ # conda build -c nvidia -c rapidsai -c conda-forge . {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set cuda_version='.'.join(environ.get('CUDA', 'unknown').split('.')[:2]) %} {% set py_version=environ.get('CONDA_PY', 36) %} package: name: cugraph @@ -14,7 +15,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_version }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - CC - CXX @@ -29,6 +30,7 @@ requirements: - cudf={{ minor_version }} - ucx-py 0.21 - ucx-proc=*=gpu + - cudatoolkit {{ cuda_version }}.* run: - python x.x - libcugraph={{ version }} @@ -39,14 +41,10 @@ requirements: - distributed>=2021.6.0 - ucx-py 0.21 - ucx-proc=*=gpu - -#test: -# commands: -# - test -f $PREFIX/include/cugraph.h - + - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} about: home: http://rapids.ai/ license: Apache-2.0 license_file: ../../../LICENSE - summary: libcugraph library + summary: cuGraph library From 7059467876c84973f90c6b3c2a657b33fc8fce69 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Wed, 14 Jul 2021 11:46:01 -0400 Subject: [PATCH 323/343] Count if test (#1697) Added test for count_if primitive. Authors: - Kumar Aatish (https://github.com/kaatish) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1697 --- cpp/tests/CMakeLists.txt | 4 + cpp/tests/prims/mg_count_if_v.cu | 207 +++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 cpp/tests/prims/mg_count_if_v.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 524b681601f..a8dda519feb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -451,6 +451,10 @@ if(BUILD_CUGRAPH_MG_TESTS) ########################################################################################### # - MG GRAPH BROADCAST tests -------------------------------------------------------------- ConfigureTestMG(MG_GRAPH_BROADCAST_TEST bcast/mg_graph_bcast.cpp) + + ########################################################################################### + # - MG PRIMS COUNT_IF_V tests ------------------------------------------------------------- + ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) else() message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif() diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu new file mode 100644 index 00000000000..c956067cae8 --- /dev/null +++ b/cpp/tests/prims/mg_count_if_v.cu @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +template +struct test_predicate { + int mod{}; + test_predicate(int mod_count) : mod(mod_count) {} + __device__ bool operator()(const vertex_t& val) + { + cuco::detail::MurmurHash3_32 hash_func{}; + return (0 == (hash_func(val) % mod)); + } +}; + +struct Prims_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_MG_CountIfV + : public ::testing::TestWithParam> { + public: + Tests_MG_CountIfV() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of count_if_v primitive and thrust count_if on a single GPU + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + // 1. initialize handle + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + auto [mg_graph, d_mg_renumber_map_labels] = + input_usecase.template construct_graph( + handle, true, true); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + const int hash_bin_count = 5; + + // 3. run MG count if + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + vertex_t const* data = (*d_mg_renumber_map_labels).data(); + auto vertex_count = + count_if_v(handle, mg_graph_view, data, test_predicate(hash_bin_count)); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG count if took " << elapsed_time * 1e-6 << " s.\n"; + } + + // 4. compare SG & MG results + + if (prims_usecase.check_correctness) { + cugraph::experimental::graph_t sg_graph( + handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + auto sg_graph_view = sg_graph.view(); + auto expected_vertex_count = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(sg_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(sg_graph_view.get_local_vertex_last()), + test_predicate(hash_bin_count)); + ASSERT_TRUE(expected_vertex_count == vertex_count); + } + } +}; + +using Tests_MG_CountIfV_File = Tests_MG_CountIfV; +using Tests_MG_CountIfV_Rmat = Tests_MG_CountIfV; + +TEST_P(Tests_MG_CountIfV_File, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_CountIfV_Rmat, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_CountIfV_File, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_CountIfV_Rmat, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MG_CountIfV_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MG_CountIfV_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MG_CountIfV_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() From 9bcfa146103a4d17803da9b7605c3368c9558e51 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Wed, 14 Jul 2021 12:34:25 -0400 Subject: [PATCH 324/343] Fix `conda` uploads (#1712) This PR removes some conditional statements & variables that prevent all the variants of the `cugraph` package from being uploaded to Anaconda.org. This is a continuation of #1709. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Ray Douglass (https://github.com/raydouglass) - Jordan Jacobelli (https://github.com/Ethyling) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1712 --- ci/cpu/prebuild.sh | 6 ------ ci/cpu/upload.sh | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index 6665757181d..9f2629d153c 100644 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -18,12 +18,6 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then export BUILD_LIBCUGRAPH=1 fi -if [[ "$CUDA" == "11.0" ]]; then - export UPLOAD_CUGRAPH=1 -else - export UPLOAD_CUGRAPH=0 -fi - if [[ "$PYTHON" == "3.7" ]]; then export UPLOAD_LIBCUGRAPH=1 else diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index a333d8828d8..11f28366c5f 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -46,7 +46,7 @@ if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} --no-progress fi -if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then +if [[ "$BUILD_CUGRAPH" == "1" ]]; then test -e ${CUGRAPH_FILE} echo "Upload cugraph" echo ${CUGRAPH_FILE} From bc888a77fa3e516160d7e048c3aae4b9460c8944 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Tue, 20 Jul 2021 14:54:36 -0400 Subject: [PATCH 325/343] Doc updates (#1719) Added generator to API docs Removed reference to 11.2 environment Updated list of algorithms Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1719 --- README.md | 28 +++++++++++++--------------- SOURCEBUILD.md | 12 +----------- docs/cugraph/source/api.rst | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 2e94bd87d34..76015f528f6 100644 --- a/README.md +++ b/README.md @@ -39,12 +39,12 @@ There are 3 ways to get cuGraph : --- # Currently Supported Features -As of Release 0.18 - including 0.18 nightly +As of Release 21.08 - including 21.08 nightly ## Supported Algorithms -| Category | Algorithm | Scale | Notes +| Category | Algorithm | Scale | Notes | | ------------ | -------------------------------------- | ------------ | ------------------- | | Centrality | | | | | | Katz | Multi-GPU | | @@ -61,7 +61,7 @@ As of Release 0.18 - including 0.18 nightly | | Triangle Counting | Single-GPU | | | | K-Truss | Single-GPU | | | Components | | | | -| | Weakly Connected Components | Single-GPU | | +| | Weakly Connected Components | Multi-GPU | | | | Strongly Connected Components | Single-GPU | | | Core | | | | | | K-Core | Single-GPU | | @@ -78,17 +78,18 @@ As of Release 0.18 - including 0.18 nightly | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | +| Sampling | | | | +| | Random Walks (RW) | Single-GPU | | | Traversal | | | | | | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) | | | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) | | | Traveling Salesperson Problem (TSP) | Single-GPU | | -| Sampling | Random Walks (RW) | Single-GPU | | -| Structure | | | | -| | Renumbering | Single-GPU | multiple columns, any data type | -| | Symmetrize | Multi-GPU | | -| Other | | | | +| Tree | | | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | +| Other | | | | +| | Renumbering | Multi-GPU | multiple columns, any data type | +| | Symmetrize | Multi-GPU | | | | |

@@ -101,6 +102,9 @@ As of Release 0.18 - including 0.18 nightly | MultiDigraph | A Directed Graph with multiple edges between a vertex pair | | | | +ALL Algorithms support Graphs and MultiGraph (directed and undirected) + +

## Supported Data Types cuGraph supports graph creation with Source and Destination being expressed as: @@ -117,11 +121,8 @@ cuGraph tries to match the return type based on the input type. So a NetworkX i ## cuGraph Notice -The current version of cuGraph has some limitations: -- Vertex IDs are expected to be contiguous integers starting from 0. - -cuGraph provides the renumber function to mitigate this problem, which is by default automatically called when data is addted to a graph. Input vertex IDs for the renumber function can be any type, can be non-contiguous, can be multiple columns, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to 32-bit contiguous integers starting from 0. cuGraph still requires the renumbered vertex IDs to be representable in 32-bit integers. These limitations are being addressed and will be fixed soon. +Vertex IDs are expected to be contiguous integers starting from 0. If your data doesn't match that restriction, we have a solution. cuGraph provides the renumber function, which is by default automatically called when data is addted to a graph. Input vertex IDs for the renumber function can be any type, can be non-contiguous, can be multiple columns, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to either 32- or 64-bit contiguous integers starting from 0. Additionally, when using the auto-renumbering feature, vertices are automatically un-renumbered in results. @@ -154,9 +155,6 @@ Install and update cuGraph using the conda command: # CUDA 11.0 conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.0 -# CUDA 11.1 -conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.1 - # CUDA 11.2 conda install -c nvidia -c rapidsai -c numba -c conda-forge cugraph cudatoolkit=11.2 ``` diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 0c825197cee..47b842a0ce6 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -9,17 +9,13 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot __Compiler__: * `gcc` version 9.3+ * `nvcc` version 11.0+ -* `cmake` version 3.18+ +* `cmake` version 3.20.1+ __CUDA:__ * CUDA 11.0+ * NVIDIA driver 450.80.02+ * Pascal architecture or better -__Other__ -* `git` - - You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). @@ -50,9 +46,6 @@ __Create the conda development environment__ # for CUDA 11.0 conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml -# for CUDA 11.1 -conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.1.yml - # for CUDA 11.2 conda env create --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.2.yml @@ -71,9 +64,6 @@ conda deactivate # for CUDA 11.0 conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.0.yml -# for CUDA 11.1 -conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.1.yml - # for CUDA 11.2 conda env update --name cugraph_dev --file conda/environments/cugraph_dev_cuda11.2.yml diff --git a/docs/cugraph/source/api.rst b/docs/cugraph/source/api.rst index adcf33d1191..a07044ecf3e 100644 --- a/docs/cugraph/source/api.rst +++ b/docs/cugraph/source/api.rst @@ -133,6 +133,12 @@ Connected Components :members: :undoc-members: +Connected Components (MG) +-------------------- + +.. automodule:: cugraph.dask.components.connectivity + :members: + :undoc-members: Cores ===== @@ -294,6 +300,18 @@ Maximum Spanning Tree :noindex: +Generator +========= + + RMAT + --------------------- + +.. automodule:: cugraph.generators +:members: rmat +:undoc-members: +:noindex: + + DASK MG Helper functions =========================== From 7cb60216dc27cc6bebafcc213c4c282e1efbdda0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 21 Jul 2021 06:08:12 +0900 Subject: [PATCH 326/343] Update primitives to support DCSR (DCSC) segments (Part 2/2) (#1703) - [x] Update key-based primitives (used by Louvain) to reduce thread-divergence - [x] Update key-based primitives (used by Louvain) to support DCSR (DCSC) segments Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/rapidsai/cugraph/pull/1703 --- .../cugraph/experimental/graph_view.hpp | 2 +- cpp/include/cugraph/matrix_partition_view.hpp | 2 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 170 ++++++- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 419 ++++++++++++++---- cpp/src/experimental/coarsen_graph.cu | 57 ++- cpp/src/experimental/graph.cu | 28 +- cpp/src/experimental/graph_view.cu | 2 +- 7 files changed, 532 insertions(+), 148 deletions(-) diff --git a/cpp/include/cugraph/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp index e243bddca1c..34905a7c596 100644 --- a/cpp/include/cugraph/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -221,7 +221,7 @@ namespace detail { // FIXME: threshold values require tuning // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller // than col_comm_size * hypersparse_threshold_ratio, should be less than 1.0 -double constexpr hypersparse_threshold_ratio = 0.0; +double constexpr hypersparse_threshold_ratio = 0.5; size_t constexpr low_degree_threshold{raft::warp_size()}; size_t constexpr mid_degree_threshold{1024}; size_t constexpr num_sparse_segments_per_vertex_partition{3}; diff --git a/cpp/include/cugraph/matrix_partition_view.hpp b/cpp/include/cugraph/matrix_partition_view.hpp index 8a81bc7db4a..6036bd2af48 100644 --- a/cpp/include/cugraph/matrix_partition_view.hpp +++ b/cpp/include/cugraph/matrix_partition_view.hpp @@ -79,7 +79,7 @@ class matrix_partition_view_t struct minor_to_key_t { @@ -50,6 +53,151 @@ struct minor_to_key_t { } }; +template +__global__ void for_all_major_for_all_nbr_mid_degree( + matrix_partition_device_view_t matrix_partition, + vertex_t major_first, + vertex_t major_last, + vertex_t* majors) +{ + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert( + copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid / raft::warp_size()); + + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + auto major = + matrix_partition.get_major_from_major_offset_nocheck(static_cast(major_offset)); + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + auto local_offset = matrix_partition.get_local_offset(major_offset); + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + majors[local_offset + i] = major; + } + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_major_for_all_nbr_high_degree( + matrix_partition_device_view_t matrix_partition, + vertex_t major_first, + vertex_t major_last, + vertex_t* majors) +{ + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(blockIdx.x); + + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + auto major = + matrix_partition.get_major_from_major_offset_nocheck(static_cast(major_offset)); + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + auto local_offset = matrix_partition.get_local_offset(major_offset); + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + majors[local_offset + i] = major; + } + idx += gridDim.x; + } +} + +template +void decompress_matrix_partition_to_fill_edgelist_majors( + raft::handle_t const& handle, + matrix_partition_device_view_t matrix_partition, + vertex_t* majors, + std::optional> const& segment_offsets) +{ + if (segment_offsets) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + if ((*segment_offsets)[1] > 0) { + raft::grid_1d_block_t update_grid( + (*segment_offsets)[1], + detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_high_degree<<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + (*segment_offsets)[1], + majors); + } + if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { + raft::grid_1d_warp_t update_grid( + (*segment_offsets)[2] - (*segment_offsets)[1], + detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_mid_degree<<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], + majors); + } + if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(matrix_partition.get_major_first()) + (*segment_offsets)[2], + thrust::make_counting_iterator(matrix_partition.get_major_first()) + (*segment_offsets)[3], + [matrix_partition, majors] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto local_degree = matrix_partition.get_local_degree(major_offset); + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::fill( + thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + }); + } + if (matrix_partition.get_dcs_nzd_vertex_count() && + (*(matrix_partition.get_dcs_nzd_vertex_count()) > 0)) { + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(*(matrix_partition.get_dcs_nzd_vertex_count())), + [matrix_partition, major_start_offset = (*segment_offsets)[3], majors] __device__( + auto idx) { + auto major = *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(idx)); + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + auto local_degree = matrix_partition.get_local_degree(major_idx); + auto local_offset = matrix_partition.get_local_offset(major_idx); + thrust::fill( + thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + }); + } + } else { + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(matrix_partition.get_major_first()), + thrust::make_counting_iterator(matrix_partition.get_major_first()) + + matrix_partition.get_major_size(), + [matrix_partition, majors] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto local_degree = matrix_partition.get_local_degree(major_offset); + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::fill( + thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + }); + } +} + } // namespace detail /** @@ -283,23 +431,11 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( *(matrix_partition.get_weights()) + matrix_partition.get_number_of_edges(), tmp_key_aggregated_edge_weights.begin()); } - // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in - // decreasing order, we will apply this kernel only to low out-degree vertices. - thrust::for_each( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(matrix_partition.get_major_first()), - thrust::make_counting_iterator(matrix_partition.get_major_first()) + - matrix_partition.get_major_size(), - [matrix_partition, tmp_major_vertices = tmp_major_vertices.begin()] __device__(auto major) { - auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); - auto local_degree = matrix_partition.get_local_degree(major_offset); - auto local_offset = matrix_partition.get_local_offset(major_offset); - thrust::fill(thrust::seq, - tmp_major_vertices + local_offset, - tmp_major_vertices + local_offset + local_degree, - major); - }); + detail::decompress_matrix_partition_to_fill_edgelist_majors( + handle, + matrix_partition, + tmp_major_vertices.data(), + graph_view.get_local_adj_matrix_partition_segment_offsets(i)); rmm::device_uvector reduced_major_vertices(tmp_major_vertices.size(), handle.get_stream()); rmm::device_uvector reduced_minor_keys(reduced_major_vertices.size(), diff --git a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 257db0339cc..2d254991c26 100644 --- a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -33,7 +33,114 @@ namespace experimental { namespace detail { // FIXME: block size requires tuning -int32_t constexpr transform_reduce_by_key_e_for_all_block_size = 128; +int32_t constexpr transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size = 128; + +template +__device__ void update_buffer_element( + matrix_partition_device_view_t& matrix_partition, + typename GraphViewType::vertex_type major, + typename GraphViewType::vertex_type minor, + typename GraphViewType::weight_type weight, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* key, + T* value) +{ + using vertex_t = typename GraphViewType::vertex_type; + + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed ? minor : major; + auto col = GraphViewType::is_adj_matrix_transposed ? major : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed ? minor_offset : major_offset; + auto col_offset = GraphViewType::is_adj_matrix_transposed ? major_offset : minor_offset; + + *key = *(adj_matrix_row_col_key_first + + ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset + : minor_offset)); + *value = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); +} + +template +__global__ void for_all_major_for_all_nbr_hypersparse( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_hypersparse_first, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* keys, + T* values) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = + static_cast(major_hypersparse_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + + while (idx < static_cast(dcs_nzd_vertex_count)) { + auto major = + *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(static_cast(idx))); + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_idx)); + auto local_offset = matrix_partition.get_local_offset(major_idx); + for (edge_t i = 0; i < local_degree; ++i) { + update_buffer_element( + matrix_partition, + major, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + keys + local_offset + i, + values + local_offset + i); + } + + idx += gridDim.x * blockDim.x; + } +} template (major_last - major_first)) { auto major_offset = major_start_offset + idx; + auto major = + matrix_partition.get_major_from_major_offset_nocheck(static_cast(major_offset)); vertex_t const* indices{nullptr}; thrust::optional weights{nullptr}; edge_t local_degree{}; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); - if (local_degree > 0) { - auto transform_op = [&matrix_partition, - &adj_matrix_row_value_input_first, - &adj_matrix_col_value_input_first, - &adj_matrix_row_col_key_first, - &e_op, - major_offset, - indices, - weights] __device__(auto i) { - auto minor = indices[i]; - auto weight = weights ? (*weights)[i] : weight_t{1.0}; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(major_offset); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) - : minor; - auto row_offset = GraphViewType::is_adj_matrix_transposed - ? minor_offset - : static_cast(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - - auto key = - *(adj_matrix_row_col_key_first + - ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset - : minor_offset)); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - - return thrust::make_tuple(key, e_op_result); - }; - - auto local_offset = matrix_partition.get_local_offset(major_offset); - thrust::transform( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - thrust::make_zip_iterator(thrust::make_tuple(keys + local_offset, values + local_offset)), - transform_op); + auto local_offset = matrix_partition.get_local_offset(major_offset); + for (edge_t i = 0; i < local_degree; ++i) { + update_buffer_element( + matrix_partition, + major, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + keys + local_offset + i, + values + local_offset + i); } idx += gridDim.x * blockDim.x; } } +template +__global__ void for_all_major_for_all_nbr_mid_degree( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* keys, + T* values) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert( + transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid / raft::warp_size()); + + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + auto major = + matrix_partition.get_major_from_major_offset_nocheck(static_cast(major_offset)); + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + auto local_offset = matrix_partition.get_local_offset(major_offset); + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + update_buffer_element( + matrix_partition, + major, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + keys + local_offset + i, + values + local_offset + i); + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_major_for_all_nbr_high_degree( + matrix_partition_device_view_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* keys, + T* values) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(blockIdx.x); + + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + auto major = + matrix_partition.get_major_from_major_offset_nocheck(static_cast(major_offset)); + vertex_t const* indices{nullptr}; + thrust::optional weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + auto local_offset = matrix_partition.get_local_offset(major_offset); + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + update_buffer_element( + matrix_partition, + major, + indices[i], + weights ? (*weights)[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + keys + local_offset + i, + values + local_offset + i); + } + + idx += gridDim.x; + } +} + // FIXME: better derive value_t from BufferType template std::tuple, BufferType> reduce_to_unique_kv_pairs( @@ -200,47 +387,115 @@ transform_reduce_by_adj_matrix_row_col_key_e( comm_root_rank = i * row_comm_size + row_comm_rank; } - auto num_edges = thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), - thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), - [matrix_partition] __device__(auto row) { - auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(major_offset); - }, - edge_t{0}, - thrust::plus()); + auto num_edges = matrix_partition.get_number_of_edges(); rmm::device_uvector tmp_keys(num_edges, handle.get_stream()); auto tmp_value_buffer = allocate_dataframe_buffer(tmp_keys.size(), handle.get_stream()); if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { - raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), - detail::transform_reduce_by_key_e_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} : matrix_partition.get_major_value_start_offset(); auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : vertex_t{0}; - - // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in - // decreasing order, we will apply this kernel only to low out-degree vertices. - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - adj_matrix_row_col_key_first + - (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), - e_op, - tmp_keys.data(), - get_dataframe_buffer_begin(tmp_value_buffer)); + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one + // more segment for very high degree vertices and running segmented reduction + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + if ((*segment_offsets)[1] > 0) { + raft::grid_1d_block_t update_grid( + (*segment_offsets)[1], + detail::transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + (*segment_offsets)[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } + if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { + raft::grid_1d_warp_t update_grid( + (*segment_offsets)[2] - (*segment_offsets)[1], + detail::transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[1], + matrix_partition.get_major_first() + (*segment_offsets)[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } + if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { + raft::grid_1d_thread_t update_grid( + (*segment_offsets)[3] - (*segment_offsets)[2], + detail::transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[2], + matrix_partition.get_major_first() + (*segment_offsets)[3], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } + if (matrix_partition.get_dcs_nzd_vertex_count() && + (*(matrix_partition.get_dcs_nzd_vertex_count()) > 0)) { + raft::grid_1d_thread_t update_grid( + *(matrix_partition.get_dcs_nzd_vertex_count()), + detail::transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + detail::for_all_major_for_all_nbr_hypersparse + <<>>( + matrix_partition, + matrix_partition.get_major_first() + (*segment_offsets)[3], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } + } else { + raft::grid_1d_thread_t update_grid( + matrix_partition.get_major_size(), + detail::transform_reduce_by_adj_matrix_row_col_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } } std::tie(tmp_keys, tmp_value_buffer) = reduce_to_unique_kv_pairs( std::move(tmp_keys), std::move(tmp_value_buffer), handle.get_stream()); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index e785b770ff7..1b48411ac91 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -48,37 +49,26 @@ std::tuple, rmm::device_uvector, std::optional>> decompress_matrix_partition_to_edgelist( + raft::handle_t const& handle, matrix_partition_device_view_t const matrix_partition, - cudaStream_t stream) + std::optional> const& segment_offsets) { auto number_of_edges = matrix_partition.get_number_of_edges(); - rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_major_vertices(number_of_edges, handle.get_stream()); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, handle.get_stream()); auto edgelist_weights = matrix_partition.get_weights() - ? std::make_optional>(number_of_edges, stream) + ? std::make_optional>(number_of_edges, handle.get_stream()) : std::nullopt; - auto major_first = matrix_partition.get_major_first(); - auto major_last = matrix_partition.get_major_last(); - // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can - // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA - // warp per vertex, and low-degree vertices using one CUDA thread per block - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(major_first), - thrust::make_counting_iterator(major_last), - [matrix_partition, major_first, p_majors = edgelist_major_vertices.begin()] __device__(auto v) { - auto first = matrix_partition.get_local_offset(v - major_first); - auto last = first + matrix_partition.get_local_degree(v - major_first); - thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); - }); - thrust::copy(rmm::exec_policy(stream)->on(stream), + decompress_matrix_partition_to_fill_edgelist_majors( + handle, matrix_partition, edgelist_major_vertices.data(), segment_offsets); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), matrix_partition.get_indices(), matrix_partition.get_indices() + number_of_edges, edgelist_minor_vertices.begin()); if (edgelist_weights) { - thrust::copy(rmm::exec_policy(stream)->on(stream), + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), *(matrix_partition.get_weights()), *(matrix_partition.get_weights()) + number_of_edges, (*edgelist_weights).data()); @@ -145,20 +135,21 @@ std::tuple, rmm::device_uvector, std::optional>> decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + raft::handle_t const& handle, matrix_partition_device_view_t const matrix_partition, vertex_t const* p_major_labels, vertex_t const* p_minor_labels, - cudaStream_t stream) + std::optional> const& segment_offsets) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the // compressed sparse format to save memory auto [edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights] = - decompress_matrix_partition_to_edgelist(matrix_partition, stream); + decompress_matrix_partition_to_edgelist(handle, matrix_partition, segment_offsets); auto pair_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), pair_first, pair_first + edgelist_major_vertices.size(), pair_first, @@ -175,14 +166,14 @@ decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( edgelist_minor_vertices.data(), edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, static_cast(edgelist_major_vertices.size()), - stream); - edgelist_major_vertices.resize(number_of_edges, stream); - edgelist_major_vertices.shrink_to_fit(stream); - edgelist_minor_vertices.resize(number_of_edges, stream); - edgelist_minor_vertices.shrink_to_fit(stream); + handle.get_stream()); + edgelist_major_vertices.resize(number_of_edges, handle.get_stream()); + edgelist_major_vertices.shrink_to_fit(handle.get_stream()); + edgelist_minor_vertices.resize(number_of_edges, handle.get_stream()); + edgelist_minor_vertices.shrink_to_fit(handle.get_stream()); if (edgelist_weights) { - (*edgelist_weights).resize(number_of_edges, stream); - (*edgelist_weights).shrink_to_fit(stream); + (*edgelist_weights).resize(number_of_edges, handle.get_stream()); + (*edgelist_weights).shrink_to_fit(handle.get_stream()); } return std::make_tuple(std::move(edgelist_major_vertices), @@ -297,11 +288,12 @@ coarsen_graph( auto [edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights] = decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + handle, matrix_partition_device_view_t( graph_view.get_matrix_partition_view(i)), major_labels.data(), adj_matrix_minor_labels.data(), - handle.get_stream()); + graph_view.get_local_adj_matrix_partition_segment_offsets(i)); // 1-2. globaly shuffle @@ -577,11 +569,12 @@ coarsen_graph( coarsened_edgelist_minor_vertices, coarsened_edgelist_weights] = decompress_matrix_partition_to_relabeled_and_grouped_and_coarsened_edgelist( + handle, matrix_partition_device_view_t( graph_view.get_matrix_partition_view()), labels, labels, - handle.get_stream()); + graph_view.get_local_adj_matrix_partition_segment_offsets(0)); rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), handle.get_stream()); diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index c5f21cfa4f7..980356a96db 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -67,7 +67,7 @@ std::tuple, std::optional>> compress_edgelist(edgelist_t const& edgelist, vertex_t major_first, - vertex_t major_hypersparse_first, + std::optional major_hypersparse_first, vertex_t major_last, vertex_t minor_first, vertex_t minor_last, @@ -141,16 +141,16 @@ compress_edgelist(edgelist_t const& edgelist, }); } - auto dcs_nzd_vertices = (major_hypersparse_first < major_last) + auto dcs_nzd_vertices = major_hypersparse_first ? std::make_optional>( - major_last - major_hypersparse_first, stream_view) + major_last - *major_hypersparse_first, stream_view) : std::nullopt; if (dcs_nzd_vertices) { auto constexpr invalid_vertex = invalid_vertex_id::value; thrust::transform( rmm::exec_policy(stream_view), - thrust::make_counting_iterator(major_hypersparse_first), + thrust::make_counting_iterator(*major_hypersparse_first), thrust::make_counting_iterator(major_last), (*dcs_nzd_vertices).begin(), [major_first, offsets = offsets.data()] __device__(auto major) { @@ -159,7 +159,7 @@ compress_edgelist(edgelist_t const& edgelist, }); auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( - (*dcs_nzd_vertices).begin(), offsets.begin() + (major_hypersparse_first - major_first))); + (*dcs_nzd_vertices).begin(), offsets.begin() + (*major_hypersparse_first - major_first))); (*dcs_nzd_vertices) .resize(thrust::distance(pair_first, thrust::remove_if(rmm::exec_policy(stream_view), @@ -170,13 +170,13 @@ compress_edgelist(edgelist_t const& edgelist, })), stream_view); (*dcs_nzd_vertices).shrink_to_fit(stream_view); - if (static_cast((*dcs_nzd_vertices).size()) < major_last - major_hypersparse_first) { + if (static_cast((*dcs_nzd_vertices).size()) < major_last - *major_hypersparse_first) { thrust::copy( rmm::exec_policy(stream_view), offsets.begin() + (major_last - major_first), offsets.end(), - offsets.begin() + (major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size()); - offsets.resize((major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size() + 1, + offsets.begin() + (*major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size()); + offsets.resize((*major_hypersparse_first - major_first) + (*dcs_nzd_vertices).size() + 1, stream_view); offsets.shrink_to_fit(stream_view); } @@ -328,11 +328,11 @@ graph_t{major_first + + (*adj_matrix_partition_segment_offsets_) + [(*segment_offsets).size() * i + + detail::num_sparse_segments_per_vertex_partition]} + : std::nullopt; auto [offsets, indices, weights, dcs_nzd_vertices] = compress_edgelist(edgelists[i], major_first, @@ -428,7 +428,7 @@ graph_t(edgelist, vertex_t{0}, - this->get_number_of_vertices(), + std::optional{std::nullopt}, this->get_number_of_vertices(), vertex_t{0}, this->get_number_of_vertices(), diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 9bfdaa0f2e0..70dd6a326ac 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -68,7 +68,7 @@ std::vector update_adj_matrix_partition_edge_counts( adj_matrix_partition_offsets[i] + (use_dcs ? ((*adj_matrix_partition_segment_offsets) [(detail::num_sparse_segments_per_vertex_partition + 2) * i + - detail::num_sparse_segments_per_vertex_partition] - + detail::num_sparse_segments_per_vertex_partition] + (*adj_matrix_partition_dcs_nzd_vertex_counts)[i]) : (major_last - major_first)), 1, From c8f3754d132100927bd76ca37f8ff2de3deab4fc Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 20 Jul 2021 17:45:51 -0500 Subject: [PATCH 327/343] Cascaded dispatch for type-erased API (#1711) This enhancement PR provides the C++ infrastructure for a type-erased API to be exposed to C-only, Cython, and potentially other glue languages. This will be the first in a series of such API PRs to expose cugraph functionality to other languages. Authors: - Andrei Schaffer (https://github.com/aschaffer) - Mark Harris (https://github.com/harrism) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/rapidsai/cugraph/pull/1711 --- cpp/CMakeLists.txt | 3 + .../cugraph/experimental/graph_view.hpp | 15 +- cpp/include/cugraph/visitors/bfs_visitor.hpp | 82 +++++ .../cugraph/visitors/cascaded_dispatch.hpp | 254 +++++++++++++++ cpp/include/cugraph/visitors/enum_mapping.hpp | 76 +++++ cpp/include/cugraph/visitors/erased_api.hpp | 46 +++ cpp/include/cugraph/visitors/erased_pack.hpp | 73 +++++ cpp/include/cugraph/visitors/graph_enum.hpp | 26 ++ .../cugraph/visitors/graph_enum_mapping.hpp | 44 +++ .../cugraph/visitors/graph_envelope.hpp | 208 +++++++++++++ .../cugraph/visitors/graph_factory.hpp | 211 +++++++++++++ cpp/include/cugraph/visitors/ret_terased.hpp | 101 ++++++ cpp/src/visitors/bfs_visitor.cpp | 142 +++++++++ cpp/src/visitors/graph_envelope.cpp | 52 ++++ cpp/src/visitors/visitors_factory.cpp | 103 +++++++ cpp/tests/CMakeLists.txt | 8 + cpp/tests/visitors/bfs_test.cpp | 290 ++++++++++++++++++ 17 files changed, 1732 insertions(+), 2 deletions(-) create mode 100644 cpp/include/cugraph/visitors/bfs_visitor.hpp create mode 100755 cpp/include/cugraph/visitors/cascaded_dispatch.hpp create mode 100755 cpp/include/cugraph/visitors/enum_mapping.hpp create mode 100644 cpp/include/cugraph/visitors/erased_api.hpp create mode 100644 cpp/include/cugraph/visitors/erased_pack.hpp create mode 100755 cpp/include/cugraph/visitors/graph_enum.hpp create mode 100755 cpp/include/cugraph/visitors/graph_enum_mapping.hpp create mode 100755 cpp/include/cugraph/visitors/graph_envelope.hpp create mode 100644 cpp/include/cugraph/visitors/graph_factory.hpp create mode 100644 cpp/include/cugraph/visitors/ret_terased.hpp create mode 100644 cpp/src/visitors/bfs_visitor.cpp create mode 100755 cpp/src/visitors/graph_envelope.cpp create mode 100644 cpp/src/visitors/visitors_factory.cpp create mode 100644 cpp/tests/visitors/bfs_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ea1be5d01b8..a42635fd617 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -204,6 +204,9 @@ add_library(cugraph SHARED src/components/weakly_connected_components.cu src/structure/create_graph_from_edgelist.cu src/utilities/host_barrier.cpp + src/visitors/graph_envelope.cpp + src/visitors/visitors_factory.cpp + src/visitors/bfs_visitor.cpp ) set_target_properties(cugraph diff --git a/cpp/include/cugraph/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp index 34905a7c596..272f02259e3 100644 --- a/cpp/include/cugraph/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -19,6 +19,10 @@ #include #include +// visitor logic: +// +#include + #include #include @@ -218,6 +222,8 @@ struct graph_properties_t { namespace detail { +using namespace cugraph::visitors; + // FIXME: threshold values require tuning // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller // than col_comm_size * hypersparse_threshold_ratio, should be less than 1.0 @@ -228,9 +234,9 @@ size_t constexpr num_sparse_segments_per_vertex_partition{3}; // Common for both graph_view_t & graph_t and both single-GPU & multi-GPU versions template -class graph_base_t { +class graph_base_t : public graph_envelope_t::base_graph_t /*<- visitor logic*/ { public: - graph_base_t() = default; + graph_base_t() = default; // Note: required by visitor logic graph_base_t(raft::handle_t const& handle, vertex_t number_of_vertices, @@ -259,6 +265,11 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } + void apply(visitor_t& v) const override // <- visitor logic + { + v.visit_graph(*this); + } + protected: friend class cugraph::serializer::serializer_t; diff --git a/cpp/include/cugraph/visitors/bfs_visitor.hpp b/cpp/include/cugraph/visitors/bfs_visitor.hpp new file mode 100644 index 00000000000..75b6d9169f0 --- /dev/null +++ b/cpp/include/cugraph/visitors/bfs_visitor.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once +#include "erased_pack.hpp" +#include "graph_envelope.hpp" +#include "ret_terased.hpp" + +namespace cugraph { +namespace visitors { + +using namespace cugraph::experimental; + +// macro option: MAKE_VISITOR(bfs) + +// primary empty template: +// +template +struct bfs_visitor; + +// dummy out non-candidate instantiation paths: +// +template +struct bfs_visitor::value)>> + : visitor_t { + void visit_graph(graph_envelope_t::base_graph_t const&) override + { + // purposely empty + } + return_t const& get_result(void) const override + { + static return_t r{}; + return r; + } +}; + +template +struct bfs_visitor::value>> : visitor_t { + bfs_visitor(erased_pack_t& ep) : ep_(ep) {} + + void visit_graph(graph_envelope_t::base_graph_t const&) override; + + return_t const& get_result(void) const override { return result_; } + + private: + erased_pack_t& ep_; + return_t result_; +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/cascaded_dispatch.hpp b/cpp/include/cugraph/visitors/cascaded_dispatch.hpp new file mode 100755 index 00000000000..b513c364ce7 --- /dev/null +++ b/cpp/include/cugraph/visitors/cascaded_dispatch.hpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "enum_mapping.hpp" +#include "graph_enum_mapping.hpp" + +#include +#include "graph_factory.hpp" + +namespace cugraph { +namespace visitors { + +using namespace cugraph::experimental; +using pair_uniques_t = graph_envelope_t::pair_uniques_t; + +// dummy-out non-candidate paths: +// +template ::value, void*> = nullptr> +constexpr pair_uniques_t graph_dispatcher(GTypes graph_type, erased_pack_t& ep) +{ + /// return nullptr; + return pair_uniques_t{nullptr, nullptr}; +} + +// final step of cascading: calls factory on erased pack: +// +template ::value, void*> = nullptr> +constexpr pair_uniques_t graph_dispatcher(GTypes graph_type, erased_pack_t& ep) +{ + switch (graph_type) { + case GTypes::GRAPH_T: { + using graph_t = typename GMapType::type; + graph_factory_t factory; + + pair_uniques_t p_uniques = + std::make_pair(factory.make_graph(ep), + std::make_unique>()); + + return p_uniques; + } break; + + default: { + std::stringstream ss; + ss << "ERROR: Unknown type enum:" << static_cast(graph_type); + throw std::runtime_error(ss.str()); + } + } +} + +// multi_gpu bool dispatcher: +// resolves bool `multi_gpu` +// and using template arguments vertex_t, edge_t, weight_t, store_transpose +// cascades into next level +// graph_dispatcher() +// +template +constexpr decltype(auto) multi_gpu_dispatcher(bool multi_gpu, GTypes graph_type, erased_pack_t& ep) +{ + switch (multi_gpu) { + case true: { + return graph_dispatcher(graph_type, ep); + } break; + case false: { + return graph_dispatcher(graph_type, ep); + } + } +} + +// transpose bool dispatcher: +// resolves bool `store_transpose` +// and using template arguments vertex_t, edge_t, weight_t +// cascades into next level +// multi_gpu_dispatcher() +// +template +constexpr decltype(auto) transp_dispatcher(bool store_transposed, + bool multi_gpu, + GTypes graph_type, + erased_pack_t& ep) +{ + switch (store_transposed) { + case true: { + return multi_gpu_dispatcher(multi_gpu, graph_type, ep); + } break; + case false: { + return multi_gpu_dispatcher(multi_gpu, graph_type, ep); + } + } +} + +// weight type dispatcher: +// resolves weigth_t from weight_type enum +// and using template arguments vertex_t, edge_t +// cascades into next level +// transp_dispatcher() +// +template +constexpr decltype(auto) weight_dispatcher( + DTypes weight_type, bool store_transposed, bool multi_gpu, GTypes graph_type, erased_pack_t& ep) +{ + switch (weight_type) { + case DTypes::INT32: { + using weight_t = typename DMapType::type; + return transp_dispatcher( + store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::INT64: { + using weight_t = typename DMapType::type; + return transp_dispatcher( + store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT32: { + using weight_t = typename DMapType::type; + return transp_dispatcher( + store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT64: { + using weight_t = typename DMapType::type; + return transp_dispatcher( + store_transposed, multi_gpu, graph_type, ep); + } break; + default: { + std::stringstream ss; + ss << "ERROR: Unknown type enum:" << static_cast(weight_type); + throw std::runtime_error(ss.str()); + } + } +} + +// edge type dispatcher: +// resolves edge_t from edge_type enum +// and using template argument vertex_t +// cascades into the next level +// weight_dispatcher(); +// +template +constexpr decltype(auto) edge_dispatcher(DTypes edge_type, + DTypes weight_type, + bool store_transposed, + bool multi_gpu, + GTypes graph_type, + erased_pack_t& ep) +{ + switch (edge_type) { + case DTypes::INT32: { + using edge_t = typename DMapType::type; + return weight_dispatcher( + weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::INT64: { + using edge_t = typename DMapType::type; + return weight_dispatcher( + weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT32: { + using edge_t = typename DMapType::type; + return weight_dispatcher( + weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT64: { + using edge_t = typename DMapType::type; + return weight_dispatcher( + weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + default: { + std::stringstream ss; + ss << "ERROR: Unknown type enum:" << static_cast(edge_type); + throw std::runtime_error(ss.str()); + } + } +} + +// vertex type dispatcher: +// entry point, +// resolves vertex_t from vertex_type enum +// and cascades into the next level +// edge_dispatcher(); +// +inline decltype(auto) vertex_dispatcher(DTypes vertex_type, + DTypes edge_type, + DTypes weight_type, + bool store_transposed, + bool multi_gpu, + GTypes graph_type, + erased_pack_t& ep) +{ + switch (vertex_type) { + case DTypes::INT32: { + using vertex_t = typename DMapType::type; + return edge_dispatcher( + edge_type, weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::INT64: { + using vertex_t = typename DMapType::type; + return edge_dispatcher( + edge_type, weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT32: { + using vertex_t = typename DMapType::type; + return edge_dispatcher( + edge_type, weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + case DTypes::FLOAT64: { + using vertex_t = typename DMapType::type; + return edge_dispatcher( + edge_type, weight_type, store_transposed, multi_gpu, graph_type, ep); + } break; + default: { + std::stringstream ss; + ss << "ERROR: Unknown type enum:" << static_cast(vertex_type); + throw std::runtime_error(ss.str()); + } + } +} + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/enum_mapping.hpp b/cpp/include/cugraph/visitors/enum_mapping.hpp new file mode 100755 index 00000000000..ab72f87bcab --- /dev/null +++ b/cpp/include/cugraph/visitors/enum_mapping.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +#include + +namespace cugraph { +namespace visitors { + +enum class DTypes { INT32 = 0, INT64, FLOAT32, FLOAT64, NTYPES }; + +template +struct DMapType; + +template <> +struct DMapType { + using type = int32_t; +}; + +template <> +struct DMapType { + using type = int64_t; +}; + +template <> +struct DMapType { + using type = float; +}; + +template <> +struct DMapType { + using type = double; +}; + +template +struct reverse_dmap_t; + +template <> +struct reverse_dmap_t { + static constexpr DTypes type_id = DTypes::INT32; +}; + +template <> +struct reverse_dmap_t { + static constexpr DTypes type_id = DTypes::INT64; +}; + +template <> +struct reverse_dmap_t { + static constexpr DTypes type_id = DTypes::FLOAT32; +}; + +template <> +struct reverse_dmap_t { + static constexpr DTypes type_id = DTypes::FLOAT64; +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/erased_api.hpp b/cpp/include/cugraph/visitors/erased_api.hpp new file mode 100644 index 00000000000..b85ee84bff6 --- /dev/null +++ b/cpp/include/cugraph/visitors/erased_api.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +/** + * @brief Set of type-erased wrappers, following the (almost) universal general signature: + * graph_envelope reference; erased_pack_t pack of erased arguments, that the caller is responsible + * to set correctly (FIXME: handshake protocol must be put in place); return set; + */ + +#pragma once + +namespace cugraph { +namespace api { + +using namespace cugraph::visitors; + +/** + * @brief Type-erased BFS wrapper. + * + * @param[in] g graph_envelope reference; + * @param[in] ep erased_pack_t pack of erased arguments, that the caller is responsible to set + * correctly (FIXME: handshake protocol must be put in place); + * @return return set; + */ +return_t bfs(graph_envelope_t const& g, erased_pack_t& ep); + +// TODO: more to follow... + +} // namespace api +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/erased_pack.hpp b/cpp/include/cugraph/visitors/erased_pack.hpp new file mode 100644 index 00000000000..eab1310eb02 --- /dev/null +++ b/cpp/include/cugraph/visitors/erased_pack.hpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +#ifdef _DEBUG_ +#include +#endif + +#include +#include + +namespace cugraph { +namespace visitors { + +struct erased_pack_t { + erased_pack_t(void** p_args, size_t n) + : args_{[](void** p, size_t n) { + std::vector v; + v.insert(v.begin(), p, p + n); + return v; + }(p_args, n)} + { + // args_.insert(args_.begin(), p_args, p_args + n); + } + + erased_pack_t(std::initializer_list args) : args_{args} {} + + std::vector const& get_args(void) const { return args_; } + + erased_pack_t(erased_pack_t const&) = delete; + erased_pack_t& operator=(erased_pack_t const&) = delete; + + erased_pack_t(erased_pack_t&& other) : args_(std::move(other.args_)) {} + + erased_pack_t& operator=(erased_pack_t&& other) + { + args_ = std::move(other.args_); + return *this; + } + +#ifdef _DEBUG_ + void print(void) const + { + std::cout << "list args addresses:\n"; + for (auto&& elem : args_) + std::cout << elem << ", "; + std::cout << '\n'; + } +#endif + + private: + std::vector args_; +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/graph_enum.hpp b/cpp/include/cugraph/visitors/graph_enum.hpp new file mode 100755 index 00000000000..eca53035313 --- /dev/null +++ b/cpp/include/cugraph/visitors/graph_enum.hpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +namespace cugraph { +namespace visitors { +enum class GTypes { GRAPH_T = 0, GRAPH_VIEW_T, NTYPES }; +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/graph_enum_mapping.hpp b/cpp/include/cugraph/visitors/graph_enum_mapping.hpp new file mode 100755 index 00000000000..dfde78b6b4b --- /dev/null +++ b/cpp/include/cugraph/visitors/graph_enum_mapping.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +#include +#include "graph_enum.hpp" + +namespace cugraph { +namespace visitors { + +template +struct GMapType; // primary template, purposely empty + +// partial specializations: +// +template +struct GMapType { + using type = graph_t; +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/graph_envelope.hpp b/cpp/include/cugraph/visitors/graph_envelope.hpp new file mode 100755 index 00000000000..d5701088783 --- /dev/null +++ b/cpp/include/cugraph/visitors/graph_envelope.hpp @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +/** + * @brief Set of classes abstracting the type-erasure, templates, and template constraints + * to client code that must supply run-time type information (RTTI) and has no template +constructs. + * + * Goal: be able to call an algorithm (say. louvain() on a type erased graph created from RTTI: + * { + * auto graph = make_graph(flags...); + * auto res = louvain(graph, params...); + * } + * params will be also type-erased (or same type regardless of graph-type); and will + * be appropriately passed to the Factory and then converted and passed to Visitor constructor +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "enum_mapping.hpp" +#include "graph_enum.hpp" + +#include + +namespace cugraph { +namespace visitors { + +using namespace cugraph::experimental; + +class erased_pack_t; // forward... +class return_t; // forward... + +// visitor base, incomplete: +// +class visitor_t; // forward... + +// envelope class around all +// graph classes: +// +struct graph_envelope_t { + struct base_graph_t { // necessary to avoid circular dependency + // between graph_base_t and graph_envelope_t + virtual ~base_graph_t() {} + + /// virtual void print(void) const = 0; + + virtual void apply(visitor_t& v) const = 0; + }; + + // abstract factory: + // + struct visitor_factory_t { + virtual std::unique_ptr make_louvain_visitor(erased_pack_t&) const = 0; + + virtual std::unique_ptr make_bfs_visitor(erased_pack_t&) const = 0; + }; + + using pair_uniques_t = + std::pair, std::unique_ptr>; + + void apply(visitor_t& v) const + { + if (p_impl_fact_.first) + p_impl_fact_.first->apply(v); + else + throw std::runtime_error("ERROR: Implementation not allocated."); + } + + // void print(void) const + // { + // if (p_impl_fact_.first) + // p_impl_fact_.first->print(); + // else + // throw std::runtime_error("ERROR: Implementation not allocated."); + // } + + std::unique_ptr const& graph(void) const { return p_impl_fact_.first; } + + std::unique_ptr const& factory(void) const { return p_impl_fact_.second; } + + graph_envelope_t(DTypes vertex_tid, + DTypes edge_tid, + DTypes weight_tid, + bool, + bool, + GTypes graph_tid, + erased_pack_t&); + + private: + // need it to hide the parameterization of + // (graph implementation, factory implementation) + // by dependent types: vertex_t, edge_t, weight_t + // + pair_uniques_t p_impl_fact_; +}; + +// visitor base: +// +class visitor_t { + public: + virtual ~visitor_t(void) {} + + virtual void visit_graph(graph_envelope_t::base_graph_t const&) = 0; + + virtual return_t const& get_result(void) const = 0; +}; + +// convenience templatized base: +// +template +struct dependent_graph_t : graph_envelope_t::base_graph_t { + using vertex_type = vertex_t; + using edge_type = edge_t; + using weight_type = weight_t; +}; + +// primary empty template: +// +template +struct dependent_factory_t; + +// dummy out non-candidate instantiation paths: +// +template +struct dependent_factory_t::value>> + : graph_envelope_t::visitor_factory_t { + using vertex_type = vertex_t; + using edge_type = edge_t; + using weight_type = weight_t; + + std::unique_ptr make_louvain_visitor(erased_pack_t&) const override { return nullptr; } + + std::unique_ptr make_bfs_visitor(erased_pack_t&) const override { return nullptr; } +}; + +template +struct dependent_factory_t::value>> + : graph_envelope_t::visitor_factory_t { + using vertex_type = vertex_t; + using edge_type = edge_t; + using weight_type = weight_t; + + std::unique_ptr make_louvain_visitor(erased_pack_t&) const override; + + std::unique_ptr make_bfs_visitor(erased_pack_t&) const override; +}; + +// utility factory selector: +// +template +std::unique_ptr make_visitor( + graph_type const& tag, // necessary to extract dependent types + std::function(graph_envelope_t::visitor_factory_t const&, + erased_pack_t&)> + f, // selector functor that picks up the make memf of the visitor_factory and passes `ep` to it + erased_pack_t& ep) // erased pack of args to be passed to factory +{ + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + constexpr bool st = graph_type::is_adj_matrix_transposed; + constexpr bool mg = graph_type::is_multi_gpu; + + dependent_factory_t factory; + + return f(factory, ep); +} + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/graph_factory.hpp b/cpp/include/cugraph/visitors/graph_factory.hpp new file mode 100644 index 00000000000..22f53b72b01 --- /dev/null +++ b/cpp/include/cugraph/visitors/graph_factory.hpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#pragma once + +#include +#include + +#include "graph_envelope.hpp" +// prevent clang-format to rearange order of headers +#include "erased_pack.hpp" +// +// not really needed here; +// just to make happy the clang-format policy +// of header inclusion to be order-independent... +// +#include +#include + +#define _DEBUG_ + +#ifdef _DEBUG_ +#include +#endif + +namespace cugraph { +namespace visitors { + +using namespace cugraph::experimental; + +struct graph_factory_base_t { + virtual ~graph_factory_base_t(void) {} + + virtual std::unique_ptr make_graph(erased_pack_t&) const = 0; +}; + +// argument unpacker (from `erased_pack_t`) +// for graph construction +// +template +struct graph_arg_unpacker_t { + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + static constexpr bool st = graph_type::is_adj_matrix_transposed; + static constexpr bool mg = graph_type::is_multi_gpu; + + void operator()(erased_pack_t& ep, + std::tuple& t_args) const + { + } +}; + +// primary template factory; to be (partiallY) specialized; +// and explicitly instantiated for concrete graphs +// +template +struct graph_factory_t : graph_factory_base_t { + std::unique_ptr make_graph(erased_pack_t&) const override + { + throw std::runtime_error("Empty factory, not to be called..."); + } +}; + +// Linker PROBLEM (FIXED): +// dispatcher needs _ALL_ paths instantiated, +// not just the ones explicitly instantiated +// (EIDir) in `graph.cpp` +// +// Posiible SOLUTIONS: +// +// (1.) the _factory_ must provide "dummy" +// instantiations for paths not needed; +// +// or: +// +// (2.) (Adopted solution) +// the _dispatcher_ (graph_dispatcher()) +// must provide empty implementation +// for the instantiations that are not needed; (Done!) +// +template +struct graph_factory_t< + graph_t>> + : graph_factory_base_t { + std::unique_ptr make_graph(erased_pack_t& ep) const override + { + /// std::cout << "Multi-GPU factory.\n"; + std::vector const& v_args{ep.get_args()}; + + // invoke cnstr. using cython arg pack: + // + assert(v_args.size() == 9); + +#ifdef _DEBUG_ + std::cout << "Enter graph factory...\n"; +#endif + + // cnstr. args unpacking: + // + raft::handle_t const& handle = *static_cast(v_args[0]); + + vertex_t* src_vertices = static_cast(v_args[1]); + vertex_t* dst_vertices = static_cast(v_args[2]); + weight_t* weights = static_cast(v_args[3]); + vertex_t* vertex_partition_offsets = static_cast(v_args[4]); + edge_t num_partition_edges = *static_cast(v_args[5]); + vertex_t num_global_vertices = *static_cast(v_args[6]); + edge_t num_global_edges = *static_cast(v_args[7]); + bool sorted_by_degree = *static_cast(v_args[8]); + + // TODO: un-hardcode: have it passed int `ep` + // + graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; + bool do_expensive_check{false}; // FIXME: check what should this default to + + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); // pcols + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); // prows + + std::vector> edgelist( + {{src_vertices, dst_vertices, weights, num_partition_edges}}); + + std::vector partition_offsets_vector( + vertex_partition_offsets, vertex_partition_offsets + (row_comm_size * col_comm_size) + 1); + + partition_t partition( + partition_offsets_vector, row_comm_size, col_comm_size, row_comm_rank, col_comm_rank); + + std::optional> + opt_seg_off{}; // FIXME: may needd to pass/extract segment_offsets vector + + return std::make_unique>( + handle, + edgelist, + partition, + num_global_vertices, + num_global_edges, + graph_props, + opt_seg_off, + do_expensive_check); + } +}; + +template +struct graph_factory_t< + graph_t>> + : graph_factory_base_t { + std::unique_ptr make_graph(erased_pack_t& ep) const override + { + /// std::cout << "Single-GPU factory.\n"; + std::vector const& v_args{ep.get_args()}; + + assert(v_args.size() == 6); + + raft::handle_t const& handle = *static_cast(v_args[0]); + + auto const& elist = *static_cast const*>(v_args[1]); + + auto nv = *static_cast(v_args[2]); + + auto props = *static_cast(v_args[3]); + + bool sorted = *static_cast(v_args[4]); // FIXME: no need to pass this! + + bool check = *static_cast(v_args[5]); + + std::optional> opt_seg_off{}; // should not be needed for (!multi_gpu) + + return std::make_unique>( + handle, elist, nv, props, opt_seg_off, check); + } +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/include/cugraph/visitors/ret_terased.hpp b/cpp/include/cugraph/visitors/ret_terased.hpp new file mode 100644 index 00000000000..19a7920e81b --- /dev/null +++ b/cpp/include/cugraph/visitors/ret_terased.hpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include +#include + +namespace cugraph { +namespace visitors { + +struct return_t { + struct base_return_t { + virtual ~base_return_t(void) {} + + virtual void copy(return_t const&) = 0; + virtual std::unique_ptr clone(void) const = 0; + }; + + template + struct generic_return_t : base_return_t { + generic_return_t(T const& t) : return_(t) {} + + void copy(return_t const& r) override + { + base_return_t const* p_B = static_cast(r.p_impl_.get()); + return_ = *(dynamic_cast(p_B)); + } + + std::unique_ptr clone(void) const override + { + return std::make_unique>(return_); + } + + T const& get(void) const { return return_; } + + private: + T return_; + }; + + return_t(void) = default; + + template + return_t(T const& t) : p_impl_(std::make_unique>(t)) + { + } + + return_t(return_t const& r) : p_impl_{r.clone()} {} + + return_t& operator=(return_t const& r) + { + p_impl_ = r.clone(); + return *this; + } + + return_t(return_t&& other) : p_impl_(std::move(other.p_impl_)) {} + return_t& operator=(return_t&& other) + { + p_impl_ = std::move(other.p_impl_); + return *this; + } + + std::unique_ptr clone(void) const + { + if (p_impl_) + return p_impl_->clone(); + else + return nullptr; + } + + template + T get(void) const + { + if (p_impl_) { + generic_return_t const* p = static_cast const*>(p_impl_.get()); + return p->get(); + } else + throw std::runtime_error("ERROR: nullptr impl."); + } + + private: + std::unique_ptr p_impl_; +}; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/src/visitors/bfs_visitor.cpp b/cpp/src/visitors/bfs_visitor.cpp new file mode 100644 index 00000000000..672cc35f00b --- /dev/null +++ b/cpp/src/visitors/bfs_visitor.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include +#include + +namespace cugraph { +namespace visitors { + +// +// wrapper code: +// +template +void bfs_visitor::value>>:: + visit_graph(graph_envelope_t::base_graph_t const& graph) +{ + // Note: this must be called only on: + // graph_view_t + // + if constexpr (st == false) { + // unless algorithms only call virtual graph methods + // under the hood, the algos require this conversion: + // + graph_t const* p_g = + static_cast const*>(&graph); + + auto gview = p_g->view(); + + auto const& v_args = ep_.get_args(); + + // unpack bfs() args: + // + assert(v_args.size() == 7); + + // cnstr. args unpacking: + // + raft::handle_t const& handle = *static_cast(v_args[0]); + + vertex_t* p_d_dist = static_cast(v_args[1]); + + vertex_t* p_d_predec = static_cast(v_args[2]); + + vertex_t src_v = *static_cast(v_args[3]); + + bool dir_opt = *static_cast(v_args[4]); + + auto depth_l = *static_cast(v_args[5]); + + bool check = *static_cast(v_args[6]); + + // call algorithm + // (no result; void) + // + bfs(handle, gview, p_d_dist, p_d_predec, src_v, dir_opt, depth_l, check); + } else { + CUGRAPH_FAIL("Unsupported BFS algorithm (store_transposed == true)."); + } +} + +// EIDir's: +// +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +//------ + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +//------ + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +template class bfs_visitor; +template class bfs_visitor; + +} // namespace visitors + +namespace api { +using namespace cugraph::visitors; +// wrapper: +// macro option: MAKE_WRAPPER(bfs) +// +return_t bfs(graph_envelope_t const& g, erased_pack_t& ep) +{ + auto p_visitor = g.factory()->make_bfs_visitor(ep); + + g.apply(*p_visitor); + + return_t ret{p_visitor->get_result()}; + + return ret; // RVO-ed; +} + +} // namespace api +} // namespace cugraph diff --git a/cpp/src/visitors/graph_envelope.cpp b/cpp/src/visitors/graph_envelope.cpp new file mode 100755 index 00000000000..927c5060b1e --- /dev/null +++ b/cpp/src/visitors/graph_envelope.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include +#include + +namespace cugraph { +namespace visitors { + +// call cascaded dispatcher with factory and erased_pack_t +// +graph_envelope_t::graph_envelope_t(DTypes vertex_tid, + DTypes edge_tid, + DTypes weight_tid, + bool st, + bool mg, + GTypes graph_tid, + erased_pack_t& ep) + : p_impl_fact_(vertex_dispatcher(vertex_tid, edge_tid, weight_tid, st, mg, graph_tid, ep)) +{ +} + +template class graph_factory_t>; +template class graph_factory_t>; + +template class graph_factory_t>; +template class graph_factory_t>; + +template class graph_factory_t>; +template class graph_factory_t>; + +template class graph_factory_t>; +template class graph_factory_t>; + +} // namespace visitors +} // namespace cugraph diff --git a/cpp/src/visitors/visitors_factory.cpp b/cpp/src/visitors/visitors_factory.cpp new file mode 100644 index 00000000000..c4238166c6a --- /dev/null +++ b/cpp/src/visitors/visitors_factory.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include +#include + +namespace cugraph { +namespace visitors { + +template +std::unique_ptr +dependent_factory_t::value>>:: + make_louvain_visitor(erased_pack_t& ep) const +{ + /// return std::unique_ptr( + /// static_cast(new louvain_visitor(ep))); + + return nullptr; // for now... +} + +template +std::unique_ptr +dependent_factory_t::value>>:: + make_bfs_visitor(erased_pack_t& ep) const +{ + // return nullptr; // for now... + return std::make_unique>(ep); +} + +// EIDir's: +// +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +//------ + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +//------ + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +template class dependent_factory_t; +template class dependent_factory_t; + +// Either use EIDir or specialization, can't have both; +// Prefer specialization when EIdir's are not enough +// because of cascaded-dispatcher exhaustive instantiations +// In this case EIDir above are enough; +} // namespace visitors +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a8dda519feb..77526ce9225 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -403,6 +403,14 @@ set(SERIALIZATION_TEST_SRCS ConfigureTest(SERIALIZATION_TEST "${SERIALIZATION_TEST_SRCS}") +################################################################################################### +# - BFS Visitor tests ----------------------------------------------------------------------------- + +set(BFS_VISITOR_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/visitors/bfs_test.cpp") + +ConfigureTest(BFS_VISITOR_TEST "${BFS_VISITOR_TEST_SRCS}") + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- diff --git a/cpp/tests/visitors/bfs_test.cpp b/cpp/tests/visitors/bfs_test.cpp new file mode 100644 index 00000000000..0e216683b0c --- /dev/null +++ b/cpp/tests/visitors/bfs_test.cpp @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +// visitor artifacts: +// +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +template +void bfs_reference(edge_t* offsets, + vertex_t* indices, + vertex_t* distances, + vertex_t* predecessors, + vertex_t num_vertices, + vertex_t source, + vertex_t depth_limit = std::numeric_limits::max()) +{ + vertex_t depth{0}; + + std::fill(distances, distances + num_vertices, std::numeric_limits::max()); + std::fill(predecessors, + predecessors + num_vertices, + cugraph::experimental::invalid_vertex_id::value); + + *(distances + source) = depth; + std::vector cur_frontier_rows{source}; + std::vector new_frontier_rows{}; + + while (cur_frontier_rows.size() > 0) { + for (auto const row : cur_frontier_rows) { + auto nbr_offset_first = *(offsets + row); + auto nbr_offset_last = *(offsets + row + 1); + for (auto nbr_offset = nbr_offset_first; nbr_offset != nbr_offset_last; ++nbr_offset) { + auto nbr = *(indices + nbr_offset); + if (*(distances + nbr) == std::numeric_limits::max()) { + *(distances + nbr) = depth + 1; + *(predecessors + nbr) = row; + new_frontier_rows.push_back(nbr); + } + } + } + std::swap(cur_frontier_rows, new_frontier_rows); + new_frontier_rows.clear(); + ++depth; + if (depth >= depth_limit) { break; } + } + + return; +} + +typedef struct BFS_Usecase_t { + std::string graph_file_full_path{}; + size_t source{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} BFS_Usecase; + +class Tests_BFS : public ::testing::TestWithParam { + public: + Tests_BFS() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(BFS_Usecase const& configuration) + { + using namespace cugraph::experimental; + using namespace cugraph::visitors; + + using weight_t = float; + + raft::handle_t handle{}; + + bool test_weighted = false; + + // extract graph data from graph matrix file: + // + auto&& [d_src, d_dst, opt_d_w, num_vertices, is_sym] = + cugraph::test::read_edgelist_from_matrix_market_file( + handle, configuration.graph_file_full_path, test_weighted); + + graph_properties_t graph_props{is_sym, false}; + edge_t num_edges = d_dst.size(); + + std::optional opt_ptr_w; + if (opt_d_w.has_value()) { opt_ptr_w = opt_d_w->data(); } + + // to be filled: + // + cugraph::experimental::edgelist_t edgelist{ + d_src.data(), d_dst.data(), opt_ptr_w, num_edges}; + bool sorted{false}; + bool check{false}; + + erased_pack_t ep_graph{&handle, &edgelist, &num_vertices, &graph_props, &sorted, &check}; + + DTypes vertex_tid = reverse_dmap_t::type_id; + DTypes edge_tid = reverse_dmap_t::type_id; + DTypes weight_tid = reverse_dmap_t::type_id; + bool st = false; + bool mg = false; + GTypes graph_tid = GTypes::GRAPH_T; + + graph_envelope_t graph_envelope{vertex_tid, edge_tid, weight_tid, st, mg, graph_tid, ep_graph}; + + auto const* p_graph = dynamic_cast const*>( + graph_envelope.graph().get()); + + auto graph_view = p_graph->view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.get_matrix_partition_view().get_offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.get_matrix_partition_view().get_indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(configuration.source >= 0 && + configuration.source <= graph_view.get_number_of_vertices()) + << "Starting sources should be >= 0 and" + << " less than the number of vertices in the graph."; + + std::vector h_reference_distances(graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); + + bfs_reference(h_offsets.data(), + h_indices.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + graph_view.get_number_of_vertices(), + static_cast(configuration.source), + std::numeric_limits::max()); + + rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), + handle.get_stream()); + rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + { + // visitors version: + // + // using namespace cugraph::experimental; + + // in a context where dependent types are known, + // type-erasing the graph is not necessary, + // hence the `_wrapper()` is not necessary; + // + + // packing visitor arguments = bfs algorithm arguments + // + vertex_t* p_d_dist = d_distances.begin(); + vertex_t* p_d_predec = d_predecessors.begin(); + auto src = static_cast(configuration.source); + bool dir_opt{false}; + auto depth_l = std::numeric_limits::max(); + bool check{false}; + erased_pack_t ep{ + &handle, p_d_dist, p_d_predec, &src, &dir_opt, &depth_l, &check}; // args for bfs() + + // several options to run the BFS algorithm: + // + // (1.) if a graph object already exists, + // we can use it to make the appropriate + // visitor: + // + // auto v_uniq_ptr = make_visitor( + // *p_graph, + // [](graph_envelope_t::visitor_factory_t const& vfact, erased_pack_t& parg) { + // return vfact.make_bfs_visitor(parg); + // }, + // ep); + // p_graph->apply(*v_uniq_ptr); + + // (2.) if a graph object already exists, alternatively we can + // explicitly instantiate the factory and call its make method: + // + // dependent_factory_t visitor_factory{}; // okay + // auto v_uniq_ptr = visitor_factory.make_bfs_visitor(ep); // okay + // p_graph->apply(*v_uniq_ptr); + + // (3.) if only the `graph_envelope_t` object exists, + // we can invoke the algorithm via the wrapper: + // + return_t ret = cugraph::api::bfs(graph_envelope, ep); + } + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(std::equal( + h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::experimental::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability do not match with the reference."; + } else { + ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) + << "distance to this vertex != distance to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + found = true; + break; + } + } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; + } + } + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_BFS, + ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/polbooks.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 100), + BFS_Usecase("test/datasets/wiki2003.mtx", 1000), + BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); + +CUGRAPH_TEST_PROGRAM_MAIN() From 0708601b690b6c978165a40aa805a9330c848c34 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 21 Jul 2021 09:52:45 -0400 Subject: [PATCH 328/343] revert cuco to latest dev branch, issues should be fixed (#1721) We had an issue in CI that uncovered the fact that raft is referencing the `dev` branch of coco while `cugraph` has it pinned to a previous commit. CI is using the newer `dev` branch version, which doesn't match what happens when we do local builds. The original motivation for pinning was a temporary fix because of a bug introduced in `coco` which has since been resolved. This PR reverts us back to using the `dev` branch. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1721 --- cpp/cmake/thirdparty/get_cuco.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake index b9542a42f26..009a6642415 100644 --- a/cpp/cmake/thirdparty/get_cuco.cmake +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -20,7 +20,7 @@ function(find_and_configure_cuco VERSION) GLOBAL_TARGETS cuco cuco::cuco CPM_ARGS GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700 + GIT_TAG dev OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" From 49aaff6fda6f21abcfa2749839d49a4e5528e146 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 22 Jul 2021 10:37:21 -0400 Subject: [PATCH 329/343] Fea speedup compile (#1702) Some changes to speed up compilation of cugraph. With 8 CPUs, this speeds up the overall compile time on my workstation from 13 minutes to about 9 minutes. Should speed up C++ compilation in CI by about 10 minutes total. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/rapidsai/cugraph/pull/1702 --- cpp/CMakeLists.txt | 4 +- .../cugraph/detail/shuffle_wrappers.hpp | 102 ++++++++ .../cugraph/detail/utility_wrappers.hpp | 89 +++++++ cpp/src/detail/shuffle_wrappers.cu | 229 ++++++++++++++++++ cpp/src/detail/utility_wrappers.cu | 97 ++++++++ cpp/src/experimental/coarsen_graph.cu | 90 +------ cpp/src/generators/generate_rmat_edgelist.cu | 10 +- .../layout/{barnes_hut.hpp => barnes_hut.cuh} | 11 +- .../layout/{bh_kernels.hpp => bh_kernels.cuh} | 0 .../layout/{exact_fa2.hpp => exact_fa2.cuh} | 10 +- ...xact_repulsion.hpp => exact_repulsion.cuh} | 0 .../{fa2_kernels.hpp => fa2_kernels.cuh} | 0 cpp/src/layout/force_atlas2.cu | 4 +- cpp/src/layout/utils.hpp | 2 - cpp/src/sampling/random_walks.cuh | 6 +- ...list.cu => create_graph_from_edgelist.cpp} | 48 +--- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/components/wcc_graphs.cu | 7 +- .../{rmat_utilities.cu => rmat_utilities.cpp} | 156 +++--------- 19 files changed, 605 insertions(+), 262 deletions(-) create mode 100644 cpp/include/cugraph/detail/shuffle_wrappers.hpp create mode 100644 cpp/include/cugraph/detail/utility_wrappers.hpp create mode 100644 cpp/src/detail/shuffle_wrappers.cu create mode 100644 cpp/src/detail/utility_wrappers.cu rename cpp/src/layout/{barnes_hut.hpp => barnes_hut.cuh} (98%) rename cpp/src/layout/{bh_kernels.hpp => bh_kernels.cuh} (100%) rename cpp/src/layout/{exact_fa2.hpp => exact_fa2.cuh} (97%) rename cpp/src/layout/{exact_repulsion.hpp => exact_repulsion.cuh} (100%) rename cpp/src/layout/{fa2_kernels.hpp => fa2_kernels.cuh} (100%) rename cpp/src/structure/{create_graph_from_edgelist.cu => create_graph_from_edgelist.cpp} (90%) rename cpp/tests/utilities/{rmat_utilities.cu => rmat_utilities.cpp} (74%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a42635fd617..1ab227e01c1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -156,6 +156,8 @@ endif() # - library targets ------------------------------------------------------------------------------- add_library(cugraph SHARED + src/detail/utility_wrappers.cu + src/detail/shuffle_wrappers.cu src/utilities/spmv_1D.cu src/utilities/cython.cu src/utilities/path_retrieval.cu @@ -202,7 +204,7 @@ add_library(cugraph SHARED src/serialization/serializer.cu src/tree/mst.cu src/components/weakly_connected_components.cu - src/structure/create_graph_from_edgelist.cu + src/structure/create_graph_from_edgelist.cpp src/utilities/host_barrier.cpp src/visitors/graph_envelope.cpp src/visitors/visitors_factory.cpp diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp new file mode 100644 index 00000000000..fcfd98db447 --- /dev/null +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace detail { + +/** + * @brief Shuffle edgelist using the edge key function + * + * NOTE: d_edgelist_rows, d_edgelist_cols and d_edgelist_weights + * are modified within this function (data is sorted) + * But the actual output is returned. The exact contents + * of d_edgelist_rows, d_edgelist_cols and d_edgelist_weights + * after the function is undefined. + * + * @tparam vertex_t vertex type + * @tparam weight_t weight type + * + * @param[in] handle raft handle + * @param[in/out] d_edgelist_rows vertex ids for row + * @param[in/out] d_edgelist_cols vertex ids for column + * @param[in/out] d_edgelist_weights optional edge weights + * @param[in] store_transposed true if operating on + * transposed matrix + * + * @return tuple of shuffled rows, columns and optional weights + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed); + +/** + * @brief Shuffle vertices using the vertex key function + * + * NOTE: d_value is modified within this function + * (data is sorted). But the actual output is returned. + * The exact contents of d_value after the function is + * undefined. + * + * @tparam vertex_t vertex type + * + * @param[in] handle raft handle + * @param[in/out] d_vertices vertex ids to shuffle + * + * @return device vector of shuffled vertices + */ +template +rmm::device_uvector shuffle_vertices(raft::handle_t const& handle, + rmm::device_uvector& d_vertices); + +/** + * @brief Groupby and count edgelist using the edge key function + * + * NOTE: d_edgelist_rows, d_edgelist_cols and d_edgelist_weights + * are modified within this function (data is sorted) + * But the actual output is returned. The exact contents + * of d_edgelist_rows, d_edgelist_cols and d_edgelist_weights + * after the function is undefined. + * + * @tparam vertex_t vertex type + * @tparam weight_t weight type + * + * @param[in] handle raft handle + * @param[in/out] d_edgelist_rows vertex ids for row + * @param[in/out] d_edgelist_cols vertex ids for column + * @param[in/out] d_edgelist_weights optional edge weights + * + * @return tuple of shuffled rows, columns and optional weights + */ +template +rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/include/cugraph/detail/utility_wrappers.hpp b/cpp/include/cugraph/detail/utility_wrappers.hpp new file mode 100644 index 00000000000..580ca00250a --- /dev/null +++ b/cpp/include/cugraph/detail/utility_wrappers.hpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace detail { + +/** + * @brief Fill a buffer with uniformly distributed random values + * + * Fills a buffer with uniformly distributed random values between + * the specified minimum and maximum values. + * + * @tparam value_t type of the value to operate on + * + * @param[in] stream_view stream view + * @param[out] d_value device array to fill + * @param[in] size number of elements in array + * @param[in] min_value minimum value + * @param[in] max_value maximum value + * @param[in] seed seed for initializing random number generator + * + */ +template +void uniform_random_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t min_value, + value_t max_value, + uint64_t seed); + +/** + * @brief Fill a buffer with a sequence of values + * + * Fills the buffer with the sequence: + * {start_value, start_value+1, start_value+2, ..., start_value+size-1} + * + * Similar to the function std::iota, wraps the function thrust::sequence + * + * @tparam value_t type of the value to operate on + * + * @param[in] stream_view stream view + * @param[out] d_value device array to fill + * @param[in] size number of elements in array + * @param[in] start_value starting value for sequence + * + */ +template +void sequence_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t start_value); + +/** + * @brief Compute the maximum vertex id of an edge list + * + * max(d_edgelist_rows.max(), d_edgelist_cols.max()) + * + * @tparam vertex_t vertex type + * + * @param[in] stream_view stream view + * @param[in] d_edgelist_rows device array to fill + * @param[in] d_edgelist_cols number of elements in array + * + * @param the maximum value occurring in the edge list + */ +template +vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, + rmm::device_uvector const& d_edgelist_rows, + rmm::device_uvector const& d_edgelist_cols); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/detail/shuffle_wrappers.cu b/cpp/src/detail/shuffle_wrappers.cu new file mode 100644 index 00000000000..adf5fdfbc11 --- /dev/null +++ b/cpp/src/detail/shuffle_wrappers.cu @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace detail { + +template +std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + // TODO: Make a shuffle_edges and shuffle_vertices out of these... + rmm::device_uvector d_rx_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_rx_edgelist_cols(0, handle.get_stream()); + std::optional> d_rx_edgelist_weights{std::nullopt}; + if (d_edgelist_weights) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin(), + (*d_edgelist_weights).begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols, + d_rx_edgelist_weights), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + return std::make_tuple( + std::move(d_rx_edgelist_rows), std::move(d_rx_edgelist_cols), std::move(d_rx_edgelist_weights)); +} + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed); + +template std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_edgelist_by_edge(raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + bool store_transposed); + +template +rmm::device_uvector shuffle_vertices(raft::handle_t const& handle, + rmm::device_uvector& d_vertices) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_rx_vertices(0, handle.get_stream()); + std::tie(d_rx_vertices, std::ignore) = cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ + comm_size}] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + + return d_rx_vertices; +} + +template rmm::device_uvector shuffle_vertices(raft::handle_t const& handle, + rmm::device_uvector& d_vertices); + +template rmm::device_uvector shuffle_vertices(raft::handle_t const& handle, + rmm::device_uvector& d_vertices); + +template +rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + auto local_partition_id_op = + [comm_size, + key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + + return d_edgelist_weights + ? cugraph::experimental::groupby_and_count(pair_first, + pair_first + d_edgelist_rows.size(), + d_edgelist_weights->begin(), + local_partition_id_op, + number_of_local_adj_matrix_partitions, + handle.get_stream()) + : cugraph::experimental::groupby_and_count(pair_first, + pair_first + d_edgelist_rows.size(), + local_partition_id_op, + number_of_local_adj_matrix_partitions, + handle.get_stream()); +} + +template rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions); + +template rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions); + +template rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions); + +template rmm::device_uvector groupby_and_count_by_edge( + raft::handle_t const& handle, + rmm::device_uvector& d_edgelist_rows, + rmm::device_uvector& d_edgelist_cols, + std::optional>& d_edgelist_weights, + size_t number_of_local_adj_matrix_partitions); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu new file mode 100644 index 00000000000..83a37d6b316 --- /dev/null +++ b/cpp/src/detail/utility_wrappers.cu @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include + +#include +#include + +namespace cugraph { +namespace detail { + +template +void uniform_random_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t min_value, + value_t max_value, + uint64_t seed) +{ + raft::random::Rng rng(seed); + rng.uniform(d_value, size, min_value, max_value, stream_view.value()); +} + +template void uniform_random_fill(rmm::cuda_stream_view const& stream_view, + float* d_value, + size_t size, + float min_value, + float max_value, + uint64_t seed); + +template void uniform_random_fill(rmm::cuda_stream_view const& stream_view, + double* d_value, + size_t size, + double min_value, + double max_value, + uint64_t seed); + +template +void sequence_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t start_value) +{ + thrust::sequence(rmm::exec_policy(stream_view), d_value, d_value + size, start_value); +} + +template void sequence_fill(rmm::cuda_stream_view const& stream_view, + int32_t* d_value, + size_t size, + int32_t start_value); + +template void sequence_fill(rmm::cuda_stream_view const& stream_view, + int64_t* d_value, + size_t size, + int64_t start_value); + +template +vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, + rmm::device_uvector const& d_edgelist_rows, + rmm::device_uvector const& d_edgelist_cols) +{ + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + + return thrust::transform_reduce( + rmm::exec_policy(stream_view), + edge_first, + edge_first + d_edgelist_rows.size(), + [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + vertex_t{0}, + thrust::maximum()); +} + +template int32_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, + rmm::device_uvector const& d_edgelist_rows, + rmm::device_uvector const& d_edgelist_cols); + +template int64_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, + rmm::device_uvector const& d_edgelist_rows, + rmm::device_uvector const& d_edgelist_cols); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 1b48411ac91..f34332464f2 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include @@ -295,53 +295,11 @@ coarsen_graph( adj_matrix_minor_labels.data(), graph_view.get_local_adj_matrix_partition_segment_offsets(i)); - // 1-2. globaly shuffle + // 1-2. globally shuffle - { - rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); - auto rx_edgelist_weights = - edgelist_weights ? std::make_optional>(0, handle.get_stream()) - : std::nullopt; - if (edgelist_weights) { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), - edgelist_minor_vertices.begin(), - (*edgelist_weights).begin())); - std::forward_as_tuple( - std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, *rx_edgelist_weights), - std::ignore) = - groupby_gpuid_and_shuffle_values( - handle.get_comms(), - edge_first, - edge_first + edgelist_major_vertices.size(), - [key_func = - detail::compute_gpu_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); - } else { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - std::forward_as_tuple(std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices), - std::ignore) = - groupby_gpuid_and_shuffle_values( - handle.get_comms(), - edge_first, - edge_first + edgelist_major_vertices.size(), - [key_func = - detail::compute_gpu_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); - } - - edgelist_major_vertices = std::move(rx_edgelist_major_vertices); - edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); - edgelist_weights = std::move(rx_edgelist_weights); - } + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + cugraph::detail::shuffle_edgelist_by_edge( + handle, edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, false); // 1-3. append data to local adjacency matrix partitions @@ -349,27 +307,12 @@ coarsen_graph( // list based on the final matrix partition (maybe add // groupby_adj_matrix_partition_and_shuffle_values). - auto local_partition_id_op = - [comm_size, - key_func = detail::compute_partition_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { - return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / - comm_size; // global partition id to local partition id - }; - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto counts = edgelist_weights - ? groupby_and_count(pair_first, - pair_first + edgelist_major_vertices.size(), - (*edgelist_weights).begin(), - local_partition_id_op, - graph_view.get_number_of_local_adj_matrix_partitions(), - handle.get_stream()) - : groupby_and_count(pair_first, - pair_first + edgelist_major_vertices.size(), - local_partition_id_op, - graph_view.get_number_of_local_adj_matrix_partitions(), - handle.get_stream()); + auto counts = cugraph::detail::groupby_and_count_by_edge( + handle, + edgelist_major_vertices, + edgelist_minor_vertices, + edgelist_weights, + graph_view.get_number_of_local_adj_matrix_partitions()); std::vector h_counts(counts.size()); raft::update_host(h_counts.data(), counts.data(), counts.size(), handle.get_stream()); @@ -465,16 +408,7 @@ coarsen_graph( unique_labels.end())), handle.get_stream()); - rmm::device_uvector rx_unique_labels(0, handle.get_stream()); - std::tie(rx_unique_labels, std::ignore) = groupby_gpuid_and_shuffle_values( - handle.get_comms(), - unique_labels.begin(), - unique_labels.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm.get_size()}] __device__( - auto val) { return key_func(val); }, - handle.get_stream()); - - unique_labels = std::move(rx_unique_labels); + unique_labels = cugraph::detail::shuffle_vertices(handle, unique_labels); thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), unique_labels.begin(), diff --git a/cpp/src/generators/generate_rmat_edgelist.cu b/cpp/src/generators/generate_rmat_edgelist.cu index 40df2fa5568..c7d8a5682bc 100644 --- a/cpp/src/generators/generate_rmat_edgelist.cu +++ b/cpp/src/generators/generate_rmat_edgelist.cu @@ -14,11 +14,11 @@ * limitations under the License. */ +#include #include #include #include -#include #include #include @@ -48,7 +48,6 @@ std::tuple, rmm::device_uvector> generat "Invalid input argument: a, b, c should be non-negative and a + b + c should not " "be larger than 1.0."); - raft::random::Rng rng(seed); // to limit memory footprint (1024 is a tuning parameter) auto max_edges_to_generate_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * 1024; @@ -64,8 +63,11 @@ std::tuple, rmm::device_uvector> generat std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration); auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) + num_edges_generated; - rng.uniform( - rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, handle.get_stream()); + + detail::uniform_random_fill( + handle.get_stream_view(), rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, seed); + seed += num_edges_to_generate * 2 * scale; + thrust::transform( rmm::exec_policy(handle.get_stream_view()), thrust::make_counting_iterator(size_t{0}), diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.cuh similarity index 98% rename from cpp/src/layout/barnes_hut.hpp rename to cpp/src/layout/barnes_hut.cuh index 57abde2262d..d05c6051d8b 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.cuh @@ -16,19 +16,18 @@ #pragma once -#include "bh_kernels.hpp" -#include "fa2_kernels.hpp" +#include "bh_kernels.cuh" +#include "fa2_kernels.cuh" #include "utils.hpp" #include #include +#include #include #include #include -#include - #include #include @@ -135,8 +134,8 @@ void barnes_hut(raft::handle_t const& handle, raft::copy(nodes_pos, x_start, n, stream_view.value()); raft::copy(nodes_pos + nnodes + 1, y_start, n, stream_view.value()); } else { - raft::random::Rng rng(random_state); - rng.uniform(nodes_pos, (nnodes + 1) * 2, -100.0f, 100.0f, stream_view.value()); + uniform_random_fill( + handle.get_stream_view(), nodes_pos, (nnodes + 1) * 2, -100.0f, 100.0f, random_state); } // Allocate arrays for force computation diff --git a/cpp/src/layout/bh_kernels.hpp b/cpp/src/layout/bh_kernels.cuh similarity index 100% rename from cpp/src/layout/bh_kernels.hpp rename to cpp/src/layout/bh_kernels.cuh diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.cuh similarity index 97% rename from cpp/src/layout/exact_fa2.hpp rename to cpp/src/layout/exact_fa2.cuh index b908a8033e7..5b5c3f5e82e 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.cuh @@ -21,13 +21,13 @@ #include +#include #include #include #include -#include -#include "exact_repulsion.hpp" -#include "fa2_kernels.hpp" +#include "exact_repulsion.cuh" +#include "fa2_kernels.cuh" #include "utils.hpp" namespace cugraph { @@ -79,9 +79,7 @@ void exact_fa2(raft::handle_t const& handle, d_swinging = swinging.data(); d_traction = traction.data(); - int seed{0}; - raft::random::Rng rng(seed); - rng.uniform(pos, n * 2, -100.0f, 100.0f, handle.get_stream()); + uniform_random_fill(handle.get_stream_view(), pos, n * 2, -100.0f, 100.0f, uint64_t{0}); if (x_start && y_start) { raft::copy(pos, x_start, n, stream_view.value()); diff --git a/cpp/src/layout/exact_repulsion.hpp b/cpp/src/layout/exact_repulsion.cuh similarity index 100% rename from cpp/src/layout/exact_repulsion.hpp rename to cpp/src/layout/exact_repulsion.cuh diff --git a/cpp/src/layout/fa2_kernels.hpp b/cpp/src/layout/fa2_kernels.cuh similarity index 100% rename from cpp/src/layout/fa2_kernels.hpp rename to cpp/src/layout/fa2_kernels.cuh diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index c2db4607b40..9dc4ee286b0 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "barnes_hut.hpp" -#include "exact_fa2.hpp" +#include "barnes_hut.cuh" +#include "exact_fa2.cuh" namespace cugraph { diff --git a/cpp/src/layout/utils.hpp b/cpp/src/layout/utils.hpp index 822459c7751..ffbeb291e58 100644 --- a/cpp/src/layout/utils.hpp +++ b/cpp/src/layout/utils.hpp @@ -18,8 +18,6 @@ #include -#include - namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index c67f981ecf5..2286fa28697 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -18,13 +18,13 @@ // #pragma once +#include #include #include #include #include -#include #include #include @@ -144,8 +144,8 @@ struct rrandom_gen_t { // static void generate_random(raft::handle_t const& handle, real_t* p_d_rnd, size_t sz, seed_t seed) { - raft::random::Rng rng(seed); - rng.uniform(p_d_rnd, sz, real_t{0.0}, real_t{1.0}, handle.get_stream()); + cugraph::detail::uniform_random_fill( + handle.get_stream_view(), p_d_rnd, sz, real_t{0.0}, real_t{1.0}, seed); } private: diff --git a/cpp/src/structure/create_graph_from_edgelist.cu b/cpp/src/structure/create_graph_from_edgelist.cpp similarity index 90% rename from cpp/src/structure/create_graph_from_edgelist.cu rename to cpp/src/structure/create_graph_from_edgelist.cpp index a39ea3b87e4..6ce10c7ccdf 100644 --- a/cpp/src/structure/create_graph_from_edgelist.cu +++ b/cpp/src/structure/create_graph_from_edgelist.cpp @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include +#include #include +#include #include -#include #include @@ -24,6 +25,7 @@ #include #include +#include namespace cugraph { namespace experimental { @@ -59,29 +61,12 @@ create_graph_from_edgelist_impl( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); - auto local_partition_id_op = - [comm_size, - key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { - return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / - comm_size; // global partition id to local partition id - }; - auto pair_first = - store_transposed - ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) - : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - auto edge_counts = edgelist_weights - ? cugraph::experimental::groupby_and_count(pair_first, - pair_first + edgelist_rows.size(), - (*edgelist_weights).begin(), - local_partition_id_op, - col_comm_size, - handle.get_stream()) - : cugraph::experimental::groupby_and_count(pair_first, - pair_first + edgelist_rows.size(), - local_partition_id_op, - col_comm_size, - handle.get_stream()); + auto edge_counts = + cugraph::detail::groupby_and_count_by_edge(handle, + store_transposed ? edgelist_cols : edgelist_rows, + store_transposed ? edgelist_rows : edgelist_cols, + edgelist_weights, + col_comm_size); std::vector h_edge_counts(edge_counts.size()); raft::update_host( @@ -182,17 +167,8 @@ create_graph_from_edgelist_impl( if (optional_vertex_span) { num_vertices = std::get<1>(*optional_vertex_span); } else { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - num_vertices = - thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_rows.size(), - [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, - vertex_t{0}, - thrust::maximum()) + - 1; + num_vertices = 1 + cugraph::detail::compute_maximum_vertex_id( + handle.get_stream_view(), edgelist_rows, edgelist_cols); } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 77526ce9225..b0013db1dd6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -21,7 +21,7 @@ add_library(cugraphtestutil STATIC utilities/matrix_market_file_utilities.cu - utilities/rmat_utilities.cu + utilities/rmat_utilities.cpp utilities/thrust_wrapper.cu utilities/misc_utilities.cpp components/wcc_graphs.cu diff --git a/cpp/tests/components/wcc_graphs.cu b/cpp/tests/components/wcc_graphs.cu index ff1681076f6..3ceebfd46a2 100644 --- a/cpp/tests/components/wcc_graphs.cu +++ b/cpp/tests/components/wcc_graphs.cu @@ -12,10 +12,9 @@ #include #include +#include #include -#include - #include #include @@ -35,7 +34,6 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, bool renumber) const { uint64_t seed{0}; - raft::random::Rng rng(seed); edge_t num_edges = 2 * (num_vertices_ - 1); @@ -47,7 +45,8 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, thrust::sequence( rmm::exec_policy(handle.get_stream()), vertices_v.begin(), vertices_v.end(), vertex_t{0}); - rng.uniform(order_v.data(), num_vertices_, 0.0f, 1.0f, handle.get_stream()); + cugraph::detail::uniform_random_fill( + handle.get_stream_view(), order_v.data(), num_vertices_, double{0.0}, double{1.0}, seed); thrust::sort_by_key( rmm::exec_policy(handle.get_stream()), order_v.begin(), order_v.end(), vertices_v.begin()); diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cpp similarity index 74% rename from cpp/tests/utilities/rmat_utilities.cu rename to cpp/tests/utilities/rmat_utilities.cpp index 51867bc733d..8502fe1b1c9 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cpp @@ -16,17 +16,11 @@ #include -#include +#include +#include #include #include -#include #include -#include - -#include -#include - -#include #include @@ -114,32 +108,33 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::make_optional>(d_tmp_rows.size(), handle.get_stream()); } - raft::random::Rng rng(base_seed + num_partitions + id); - rng.uniform(i == 0 ? (*d_edgelist_weights).data() : (*d_tmp_weights).data(), - i == 0 ? (*d_edgelist_weights).size() : (*d_tmp_weights).size(), - weight_t{0.0}, - weight_t{1.0}, - handle.get_stream()); + cugraph::detail::uniform_random_fill( + handle.get_stream_view(), + i == 0 ? (*d_edgelist_weights).data() : (*d_tmp_weights).data(), + i == 0 ? (*d_edgelist_weights).size() : (*d_tmp_weights).size(), + weight_t{0.0}, + weight_t{1.0}, + base_seed + num_partitions + id); } if (i > 0) { auto start_offset = d_edgelist_rows.size(); d_edgelist_rows.resize(start_offset + d_tmp_rows.size(), handle.get_stream()); d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_tmp_rows.begin(), - d_tmp_rows.end(), - d_edgelist_rows.begin() + start_offset); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_tmp_cols.begin(), - d_tmp_cols.end(), - d_edgelist_cols.begin() + start_offset); + raft::copy(d_edgelist_rows.begin() + start_offset, + d_tmp_rows.begin(), + d_tmp_rows.size(), + handle.get_stream()); + raft::copy(d_edgelist_cols.begin() + start_offset, + d_tmp_cols.begin(), + d_tmp_cols.size(), + handle.get_stream()); if (d_edgelist_weights) { (*d_edgelist_weights).resize(d_edgelist_rows.size(), handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - (*d_tmp_weights).begin(), - (*d_tmp_weights).end(), - (*d_edgelist_weights).begin() + start_offset); + raft::copy(d_edgelist_weights->begin() + start_offset, + d_tmp_weights->begin(), + d_tmp_weights->size(), + handle.get_stream()); } } } @@ -148,83 +143,20 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, // FIXME: may need to undo this and handle symmetrization elsewhere once the new test graph // generation API gets integrated #if 1 - auto offset = d_edgelist_rows.size(); - d_edgelist_rows.resize(offset * 2, handle.get_stream()); - d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); - if (d_edgelist_weights) { - (*d_edgelist_weights).resize(d_edgelist_rows.size(), handle.get_stream()); - } - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_edgelist_cols.begin(), - d_edgelist_cols.begin() + offset, - d_edgelist_rows.begin() + offset); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_edgelist_rows.begin(), - d_edgelist_rows.begin() + offset, - d_edgelist_cols.begin() + offset); - if (d_edgelist_weights) { - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - (*d_edgelist_weights).begin(), - (*d_edgelist_weights).begin() + offset, - (*d_edgelist_weights).begin() + offset); - } + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights) = + cugraph::symmetrize_edgelist( + handle, + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + test_weighted ? std::optional>(std::move(d_edgelist_weights)) + : std::nullopt); #endif } if (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); - - rmm::device_uvector d_rx_edgelist_rows(0, handle.get_stream()); - rmm::device_uvector d_rx_edgelist_cols(0, handle.get_stream()); - std::optional> d_rx_edgelist_weights{std::nullopt}; - if (d_edgelist_weights) { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), - store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin(), - (*d_edgelist_weights).begin())); - - std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, - store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols, - d_rx_edgelist_weights), - std::ignore) = - cugraph::experimental::groupby_gpuid_and_shuffle_values( - comm, // handle.get_comms(), - edge_first, - edge_first + d_edgelist_rows.size(), - [key_func = - cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); - } else { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), - store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin())); - - std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, - store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols), - std::ignore) = - cugraph::experimental::groupby_gpuid_and_shuffle_values( - comm, // handle.get_comms(), - edge_first, - edge_first + d_edgelist_rows.size(), - [key_func = - cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - comm_size, row_comm_size, col_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); - } - - d_edgelist_rows = std::move(d_rx_edgelist_rows); - d_edgelist_cols = std::move(d_rx_edgelist_cols); - d_edgelist_weights = std::move(d_rx_edgelist_weights); + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights) = + cugraph::detail::shuffle_edgelist_by_edge( + handle, d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, store_transposed); } rmm::device_uvector d_vertices(0, handle.get_stream()); @@ -234,27 +166,13 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, auto start_offset = d_vertices.size(); d_vertices.resize(start_offset + (partition_vertex_lasts[i] - partition_vertex_firsts[i]), handle.get_stream()); - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin() + start_offset, - d_vertices.end(), - partition_vertex_firsts[i]); + cugraph::detail::sequence_fill(handle.get_stream_view(), + d_vertices.begin() + start_offset, + d_vertices.size() - start_offset, + partition_vertex_firsts[i]); } - if (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - rmm::device_uvector d_rx_vertices(0, handle.get_stream()); - std::tie(d_rx_vertices, std::ignore) = cugraph::experimental::groupby_gpuid_and_shuffle_values( - comm, // handle.get_comms(), - d_vertices.begin(), - d_vertices.end(), - [key_func = - cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ - comm_size}] __device__(auto val) { return key_func(val); }, - handle.get_stream()); - d_vertices = std::move(d_rx_vertices); - } + if (multi_gpu) { d_vertices = cugraph::detail::shuffle_vertices(handle, d_vertices); } return cugraph::experimental:: create_graph_from_edgelist( @@ -266,7 +184,7 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::move(d_edgelist_weights), cugraph::experimental::graph_properties_t{undirected, true}, renumber); -} +} // namespace test // explicit instantiations From f3f67e86ace61114b8a820cb2235b9a2e9f171cf Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 22 Jul 2021 09:57:04 -0500 Subject: [PATCH 330/343] Fix MG_test bug (#1718) Fix `test_mg_batch_betweenness_centrality`, `test_mg_batch_edge_betweenness_centrality`, `test_mg_replication`, `test_mg_renumber` closes #1706 closes #1715 closes #1716 Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1718 --- python/cugraph/dask/common/input_utils.py | 13 +++++++++---- python/cugraph/dask/common/part_utils.py | 10 +++++++--- python/cugraph/dask/structure/replication.pyx | 6 +++--- python/cugraph/tests/dask/test_mg_renumber.py | 9 ++++++--- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index f7f866e1211..9d1c28b6c4e 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -66,7 +66,7 @@ def get_client(cls, client=None): """ Class methods for initalization """ @classmethod - def create(cls, data, client=None): + def create(cls, data, client=None, batch_enabled=False): """ Creates a distributed data handler instance with the given distributed data set(s). @@ -90,7 +90,8 @@ def create(cls, data, client=None): else: raise Exception("Graph data must be dask-cudf dataframe") - gpu_futures = client.sync(_extract_partitions, data, client) + gpu_futures = client.sync( + _extract_partitions, data, client, batch_enabled=batch_enabled) workers = tuple(OrderedDict.fromkeys(map(lambda x: x[0], gpu_futures))) return DistributedDataHandler(gpu_futures=gpu_futures, workers=workers, datatype=datatype, multiple=multiple, @@ -196,6 +197,9 @@ def _workers_to_parts(futures): if w_to_p_map[w] is None: w_to_p_map[w] = [] w_to_p_map[w].append(p) + keys_to_delete = [w for (w, p) in w_to_p_map.items() if p is None] + for k in keys_to_delete: + del w_to_p_map[k] return w_to_p_map @@ -205,8 +209,9 @@ def get_obj(x): return x[0] if multiple else x return total, reduce(lambda a, b: a + b, total) -def get_mg_batch_data(dask_cudf_data): - data = DistributedDataHandler.create(data=dask_cudf_data) +def get_mg_batch_data(dask_cudf_data, batch_enabled=False): + data = DistributedDataHandler.create( + data=dask_cudf_data, batch_enabled=batch_enabled) return data diff --git a/python/cugraph/dask/common/part_utils.py b/python/cugraph/dask/common/part_utils.py index ac0ff6a9a43..2bff490d35c 100644 --- a/python/cugraph/dask/common/part_utils.py +++ b/python/cugraph/dask/common/part_utils.py @@ -78,13 +78,17 @@ def persist_distributed_data(dask_df, client): return parts -async def _extract_partitions(dask_obj, client=None): - +async def _extract_partitions(dask_obj, client=None, batch_enabled=False): client = default_client() if client is None else client + worker_list = Comms.get_workers() # dask.dataframe or dask.array if isinstance(dask_obj, (daskDataFrame, daskArray, daskSeries)): # parts = persist_distributed_data(dask_obj, client) - persisted = client.persist(dask_obj) + # FIXME: persist data to the same worker when batch_enabled=True + if batch_enabled: + persisted = client.persist(dask_obj, workers=worker_list[0]) + else: + persisted = client.persist(dask_obj) parts = futures_of(persisted) # iterable of dask collections (need to colocate them) elif isinstance(dask_obj, collections.Sequence): diff --git a/python/cugraph/dask/structure/replication.pyx b/python/cugraph/dask/structure/replication.pyx index 6d579e126bf..417300f806f 100644 --- a/python/cugraph/dask/structure/replication.pyx +++ b/python/cugraph/dask/structure/replication.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,7 +38,7 @@ def replicate_cudf_dataframe(cudf_dataframe, client=None, comms=None): dask_cudf_df = dask_cudf.from_cudf(cudf_dataframe, npartitions=1) df_length = len(dask_cudf_df) - _df_data = get_mg_batch_data(dask_cudf_df) + _df_data = get_mg_batch_data(dask_cudf_df, batch_enabled=True) df_data = mg_utils.prepare_worker_to_parts(_df_data, client) workers_to_futures = {worker: client.submit(_replicate_cudf_dataframe, @@ -90,7 +90,7 @@ def replicate_cudf_series(cudf_series, client=None, comms=None): dask_cudf_series = dask_cudf.from_cudf(cudf_series, npartitions=1) series_length = len(dask_cudf_series) - _series_data = get_mg_batch_data(dask_cudf_series) + _series_data = get_mg_batch_data(dask_cudf_series, batch_enabled=True) series_data = mg_utils.prepare_worker_to_parts(_series_data) dtype = cudf_series.dtype diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index 68ec3de35f8..de6d1ea4587 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -28,7 +28,8 @@ from cugraph.structure.number_map import NumberMap from cugraph.dask.common.mg_utils import (is_single_gpu, setup_local_dask_cluster, - teardown_local_dask_cluster) + teardown_local_dask_cluster, + get_visible_devices) @pytest.fixture(scope="module") @@ -59,7 +60,8 @@ def test_mg_renumber(graph_file, client_connection): gdf["src"] = sources + translate gdf["dst"] = destinations + translate - ddf = dask.dataframe.from_pandas(gdf, npartitions=2) + ddf = dask.dataframe.from_pandas( + gdf, npartitions=len(get_visible_devices())) # preserve_order is not supported for MG renumbered_df, renumber_map = NumberMap.renumber(ddf, @@ -107,7 +109,8 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, client_connection): gdf["dst"] = destinations + translate gdf["weight"] = gdf.index.astype(np.float) - ddf = dask.dataframe.from_pandas(gdf, npartitions=2) + ddf = dask.dataframe.from_pandas( + gdf, npartitions=len(get_visible_devices())) ren2, num2 = NumberMap.renumber( ddf, ["src", "src_old"], ["dst", "dst_old"] From e5b359970c5912b37a861e6baf67fe5fff3f07ae Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Fri, 23 Jul 2021 00:10:01 +0200 Subject: [PATCH 331/343] Fix libfaiss dependency to not expressly depend on conda-forge (#1728) Authors: - Jordan Jacobelli (https://github.com/Ethyling) Approvers: - Ray Douglass (https://github.com/raydouglass) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1728 --- conda/recipes/libcugraph/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 144eb85ce9e..570a0ec09b2 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -40,13 +40,13 @@ requirements: - gtest - gmock - faiss-proc=*=cuda - - conda-forge::libfaiss=1.7.0 + - libfaiss 1.7.0 *_cuda run: - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.9.9 - ucx-proc=*=gpu - faiss-proc=*=cuda - - conda-forge::libfaiss=1.7.0 + - libfaiss 1.7.0 *_cuda about: home: http://rapids.ai/ From e074f82ea0c9abe833793d026572183fc9cd3f52 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Wed, 28 Jul 2021 09:39:11 -0400 Subject: [PATCH 332/343] ReduceV test (#1710) Added test for reduce_v primitive. Authors: - Kumar Aatish (https://github.com/kaatish) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1710 --- .../copy_v_transform_reduce_in_out_nbr.cuh | 38 +- cpp/include/cugraph/prims/count_if_e.cuh | 2 +- ...dge_op_utils.cuh => property_op_utils.cuh} | 32 +- cpp/include/cugraph/prims/reduce_v.cuh | 17 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- .../cugraph/prims/transform_reduce_e.cuh | 33 +- .../update_frontier_v_push_if_out_nbr.cuh | 2 +- .../cugraph/utilities/thrust_tuple_utils.cuh | 26 -- cpp/tests/CMakeLists.txt | 4 + cpp/tests/prims/mg_reduce_v.cu | 365 ++++++++++++++++++ 10 files changed, 445 insertions(+), 76 deletions(-) rename cpp/include/cugraph/prims/{edge_op_utils.cuh => property_op_utils.cuh} (90%) create mode 100644 cpp/tests/prims/mg_reduce_v.cu diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh index 1a07bf98ec9..148549fa99a 100644 --- a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -75,6 +75,7 @@ __global__ void for_all_major_for_all_nbr_hypersparse( auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + property_add edge_property_add{}; while (idx < static_cast(dcs_nzd_vertex_count)) { auto major = *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(static_cast(idx))); @@ -118,13 +119,13 @@ __global__ void for_all_major_for_all_nbr_hypersparse( }; if (update_major) { - *(result_value_output_first + (major - major_hypersparse_first)) = thrust::transform_reduce( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + *(result_value_output_first + (major - major_hypersparse_first)) = + thrust::transform_reduce(thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + edge_property_add); } else { thrust::for_each( thrust::seq, @@ -169,6 +170,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); auto idx = static_cast(tid); + property_add edge_property_add{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; @@ -212,13 +214,13 @@ __global__ void for_all_major_for_all_nbr_low_degree( }; if (update_major) { - *(result_value_output_first + idx) = thrust::transform_reduce( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + *(result_value_output_first + idx) = + thrust::transform_reduce(thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + edge_property_add); } else { thrust::for_each( thrust::seq, @@ -266,6 +268,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); auto idx = static_cast(tid / raft::warp_size()); + property_add edge_property_add{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; @@ -302,7 +305,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( *(adj_matrix_col_value_input_first + col_offset), e_op); if (update_major) { - e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result); } else { atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); } @@ -344,6 +347,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); auto idx = static_cast(blockIdx.x); + property_add edge_property_add{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; @@ -380,7 +384,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( *(adj_matrix_col_value_input_first + col_offset), e_op); if (update_major) { - e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result); } else { atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); } diff --git a/cpp/include/cugraph/prims/count_if_e.cuh b/cpp/include/cugraph/prims/count_if_e.cuh index b8fa5dc9d8d..cfbb81d9bc9 100644 --- a/cpp/include/cugraph/prims/count_if_e.cuh +++ b/cpp/include/cugraph/prims/count_if_e.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/include/cugraph/prims/edge_op_utils.cuh b/cpp/include/cugraph/prims/property_op_utils.cuh similarity index 90% rename from cpp/include/cugraph/prims/edge_op_utils.cuh rename to cpp/include/cugraph/prims/property_op_utils.cuh index 23a66e8a0c1..ec3ed788cc1 100644 --- a/cpp/include/cugraph/prims/edge_op_utils.cuh +++ b/cpp/include/cugraph/prims/property_op_utils.cuh @@ -123,18 +123,28 @@ struct cast_edge_op_bool_to_integer { }; template -__host__ __device__ std::enable_if_t::value, T> plus_edge_op_result( - T const& lhs, T const& rhs) -{ - return lhs + rhs; -} +struct property_add : public thrust::plus { +}; -template -__host__ __device__ std::enable_if_t::value, T> plus_edge_op_result(T const& lhs, - T const& rhs) -{ - return plus_thrust_tuple()(lhs, rhs); -} +template +struct property_add> + : public thrust:: + binary_function, thrust::tuple, thrust::tuple> { + using Type = thrust::tuple; + + private: + template + __device__ constexpr auto sum_impl(T& t1, T& t2, std::index_sequence) + { + return thrust::make_tuple((thrust::get(t1) + thrust::get(t2))...); + } + + public: + __device__ constexpr auto operator()(const Type& t1, const Type& t2) + { + return sum_impl(t1, t2, std::make_index_sequence::value>()); + } +}; template __device__ std::enable_if_t::value, void> diff --git a/cpp/include/cugraph/prims/reduce_v.cuh b/cpp/include/cugraph/prims/reduce_v.cuh index d27a45e2737..c7c504942d1 100644 --- a/cpp/include/cugraph/prims/reduce_v.cuh +++ b/cpp/include/cugraph/prims/reduce_v.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -51,10 +52,12 @@ T reduce_v(raft::handle_t const& handle, VertexValueInputIterator vertex_value_input_first, T init) { - auto ret = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_input_first, - vertex_value_input_first + graph_view.get_number_of_local_vertices(), - init); + auto ret = thrust::reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() == 0)) ? init : T{}, + property_add()); if (GraphViewType::is_multi_gpu) { ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } @@ -87,7 +90,11 @@ T reduce_v(raft::handle_t const& handle, T init) { auto ret = thrust::reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, init); + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_first, + input_last, + ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() == 0)) ? init : T{}, + property_add()); if (GraphViewType::is_multi_gpu) { ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } diff --git a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 2d254991c26..c1887433fd1 100644 --- a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh index ae9413f7857..5ce40ea20cf 100644 --- a/cpp/include/cugraph/prims/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include @@ -65,6 +65,7 @@ __global__ void for_all_major_for_all_nbr_hypersparse( auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count()); + property_add edge_property_add{}; e_op_result_t e_op_result_sum{}; while (idx < static_cast(dcs_nzd_vertex_count)) { auto major = @@ -111,9 +112,9 @@ __global__ void for_all_major_for_all_nbr_hypersparse( e_op); }, e_op_result_t{}, - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + edge_property_add); - e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); + e_op_result_sum = edge_property_add(e_op_result_sum, sum); idx += gridDim.x * blockDim.x; } @@ -149,6 +150,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); size_t idx = static_cast(tid); + property_add edge_property_add{}; e_op_result_t e_op_result_sum{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; @@ -195,9 +197,9 @@ __global__ void for_all_major_for_all_nbr_low_degree( e_op); }, e_op_result_t{}, - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + edge_property_add); - e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); + e_op_result_sum = edge_property_add(e_op_result_sum, sum); idx += gridDim.x * blockDim.x; } @@ -235,6 +237,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); size_t idx = static_cast(tid / raft::warp_size()); + property_add edge_property_add{}; e_op_result_t e_op_result_sum{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; @@ -269,7 +272,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( *(adj_matrix_row_value_input_first + row_offset), *(adj_matrix_col_value_input_first + col_offset), e_op); - e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result); } idx += gridDim.x * (blockDim.x / raft::warp_size()); } @@ -305,6 +308,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); size_t idx = static_cast(blockIdx.x); + property_add edge_property_add{}; e_op_result_t e_op_result_sum{}; while (idx < static_cast(major_last - major_first)) { auto major_offset = major_start_offset + idx; @@ -339,7 +343,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( *(adj_matrix_row_value_input_first + row_offset), *(adj_matrix_col_value_input_first + col_offset), e_op); - e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result); } idx += gridDim.x; } @@ -400,6 +404,8 @@ T transform_reduce_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + property_add edge_property_add{}; + auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), get_dataframe_buffer_begin(result_buffer), @@ -498,18 +504,17 @@ T transform_reduce_e(raft::handle_t const& handle, } } - auto result = - thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(result_buffer), - get_dataframe_buffer_begin(result_buffer) + 1, - T{}, - [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); + auto result = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(result_buffer), + get_dataframe_buffer_begin(result_buffer) + 1, + T{}, + edge_property_add); if (GraphViewType::is_multi_gpu) { result = host_scalar_allreduce(handle.get_comms(), result, handle.get_stream()); } - return plus_edge_op_result(init, result); + return edge_property_add(init, result); } } // namespace experimental diff --git a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh index 88a30a45f03..e2f72c66d0b 100644 --- a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh b/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh index d5ce6ff1a29..ddc325b6bbb 100644 --- a/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh @@ -61,20 +61,6 @@ struct compute_thrust_tuple_element_sizes_impl { void compute(std::array::value>& arr) const {} }; -template -struct plus_thrust_tuple_impl { - __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const - { - thrust::get(lhs) += thrust::get(rhs); - plus_thrust_tuple_impl().compute(lhs, rhs); - } -}; - -template -struct plus_thrust_tuple_impl { - __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const {} -}; - template __device__ std::enable_if_t::value, void> atomic_accumulate_impl( thrust::detail::any_assign& /* dereferencing thrust::discard_iterator results in this type */ lhs, @@ -193,18 +179,6 @@ struct compute_thrust_tuple_element_sizes { } }; -template -struct plus_thrust_tuple { - __host__ __device__ constexpr TupleType operator()(TupleType const& lhs, - TupleType const& rhs) const - { - size_t constexpr tuple_size = thrust::tuple_size::value; - auto ret = lhs; - detail::plus_thrust_tuple_impl().compute(ret, rhs); - return ret; - } -}; - template struct atomic_accumulate_thrust_tuple { __device__ constexpr void operator()(Iterator iter, TupleType const& value) const diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b0013db1dd6..2d13b46ac61 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -463,6 +463,10 @@ if(BUILD_CUGRAPH_MG_TESTS) ########################################################################################### # - MG PRIMS COUNT_IF_V tests ------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) + + ########################################################################################### + # - MG PRIMS REDUCE_V tests --------------------------------------------------------------- + ConfigureTestMG(MG_REDUCE_V_TEST prims/mg_reduce_v.cu) else() message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif() diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu new file mode 100644 index 00000000000..539e4e35ded --- /dev/null +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +template +struct property_transform : public thrust::unary_function> { + int mod{}; + property_transform(int mod_count) : mod(mod_count) {} + __device__ auto operator()(const vertex_t& val) + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto value = hash_func(val) % mod; + return thrust::make_tuple(static_cast(value)...); + } +}; +template typename Tuple, typename... T> +struct property_transform> : public property_transform { +}; + +template +auto make_iterator_tuple(Tuple& data, std::index_sequence) +{ + return thrust::make_tuple((std::get(data).begin())...); +} + +template +auto get_zip_iterator(std::tuple& data) +{ + return thrust::make_zip_iterator(make_iterator_tuple( + data, std::make_index_sequence>::value>())); +} + +template +auto get_property_iterator(std::tuple& data) +{ + return (std::get<0>(data)).begin(); +} + +template +auto get_property_iterator(std::tuple& data) +{ + return get_zip_iterator(data); +} + +template +struct generate_impl { + static thrust::tuple initial_value(int init) + { + return thrust::make_tuple(static_cast(init)...); + } + template + static std::tuple...> property(rmm::device_uvector& labels, + int hash_bin_count, + raft::handle_t const& handle) + { + auto data = std::make_tuple(rmm::device_uvector(labels.size(), handle.get_stream())...); + auto zip = get_zip_iterator(data); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels.begin(), + labels.end(), + zip, + property_transform(hash_bin_count)); + return data; + } + template + static std::tuple...> property(thrust::counting_iterator begin, + thrust::counting_iterator end, + int hash_bin_count, + raft::handle_t const& handle) + { + auto length = thrust::distance(begin, end); + auto data = std::make_tuple(rmm::device_uvector(length, handle.get_stream())...); + auto zip = get_zip_iterator(data); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + begin, + end, + zip, + property_transform(hash_bin_count)); + return data; + } +}; + +template +struct result_compare { + constexpr auto operator()(const T& t1, const T& t2) { return (t1 == t2); } +}; + +template +struct result_compare> { + static constexpr double threshold_ratio{1e-3}; + + private: + template + bool equal(T t1, T t2) + { + if constexpr (std::is_floating_point_v) { + return std::abs(t1 - t2) < (std::max(t1, t2) * threshold_ratio); + } + return t1 == t2; + } + template + constexpr auto equality_impl(T& t1, T& t2, std::index_sequence) + { + return (... && (equal(thrust::get(t1), thrust::get(t2)))); + } + + public: + using Type = thrust::tuple; + constexpr auto operator()(const Type& t1, const Type& t2) + { + return equality_impl(t1, t2, std::make_index_sequence::value>()); + } +}; + +template +struct generate : public generate_impl { + static T initial_value(int init) { return static_cast(init); } +}; +template +struct generate> : public generate_impl { +}; + +struct Prims_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_MG_ReduceIfV + : public ::testing::TestWithParam> { + public: + Tests_MG_ReduceIfV() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of reduce_if_v primitive and thrust reduce on a single GPU + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + // 1. initialize handle + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + auto [mg_graph, d_mg_renumber_map_labels] = + input_usecase.template construct_graph( + handle, true, true); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 3. run MG count if + + const int hash_bin_count = 5; + const int initial_value = 10; + + auto property_initial_value = generate::initial_value(initial_value); + auto property_data = + generate::property((*d_mg_renumber_map_labels), hash_bin_count, handle); + auto property_iter = get_property_iterator(property_data); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + auto result = reduce_v(handle, + mg_graph_view, + property_iter, + property_iter + (*d_mg_renumber_map_labels).size(), + property_initial_value); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG count if took " << elapsed_time * 1e-6 << " s.\n"; + } + + //// 4. compare SG & MG results + + if (prims_usecase.check_correctness) { + cugraph::experimental::graph_t sg_graph( + handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + auto sg_graph_view = sg_graph.view(); + + auto sg_property_data = generate::property( + thrust::make_counting_iterator(sg_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(sg_graph_view.get_local_vertex_last()), + hash_bin_count, + handle); + auto sg_property_iter = get_property_iterator(sg_property_data); + using property_t = decltype(property_initial_value); + + auto expected_result = + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sg_property_iter, + sg_property_iter + sg_graph_view.get_number_of_local_vertices(), + property_initial_value, + cugraph::experimental::property_add()); + result_compare compare; + ASSERT_TRUE(compare(expected_result, result)); + } + } +}; + +using Tests_MG_ReduceIfV_File = Tests_MG_ReduceIfV; +using Tests_MG_ReduceIfV_Rmat = Tests_MG_ReduceIfV; + +TEST_P(Tests_MG_ReduceIfV_File, CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_File, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_Rmat, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_File, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MG_ReduceIfV_Rmat, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MG_ReduceIfV_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MG_ReduceIfV_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MG_ReduceIfV_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() From 7f17b27364230e5fb78ee224141b761a29a026f7 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 28 Jul 2021 08:42:54 -0500 Subject: [PATCH 333/343] Pin max version for `dask` & `distributed` (#1736) Changes to be in-line with: https://github.com/rapidsai/cudf/pull/8859 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1736 --- conda/environments/cugraph_dev_cuda11.0.yml | 4 ++-- conda/environments/cugraph_dev_cuda11.2.yml | 4 ++-- conda/recipes/cugraph/meta.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 7d38f73eb1f..d19ac1bd00e 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -10,8 +10,8 @@ dependencies: - libcudf=21.08.* - rmm=21.08.* - librmm=21.08.* -- dask>=2021.6.0 -- distributed>=2021.6.0 +- dask>=2021.6.0,<=2021.07.1 +- distributed>=2021.6.0,<=2021.07.1 - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.9.9 diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 11e826d784d..7d2f3d26ef5 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -10,8 +10,8 @@ dependencies: - libcudf=21.08.* - rmm=21.08.* - librmm=21.08.* -- dask>=2021.6.0 -- distributed>=2021.6.0 +- dask>=2021.6.0,<=2021.07.1 +- distributed>=2021.6.0,<=2021.07.1 - dask-cuda=21.08.* - dask-cudf=21.08.* - nccl>=2.9.9 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index d6dba8e3f63..b335ec7753e 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -37,8 +37,8 @@ requirements: - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} - - dask>=2021.6.0 - - distributed>=2021.6.0 + - dask>=2021.6.0,<=2021.07.1 + - distributed>=2021.6.0,<=2021.07.1 - ucx-py 0.21 - ucx-proc=*=gpu - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} From 7c603dd18d0ac69cb8a39404626010cb95e10501 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Wed, 28 Jul 2021 14:58:05 -0500 Subject: [PATCH 334/343] Pinning cuco to a specific commit hash for release (#1741) Using the latest `cuco` commit hash as the `GIT_TAG` for release purposes. See also RAFT PR https://github.com/rapidsai/raft/pull/304 . Also updated a debug message. NOTE: using `skip-ci` until https://github.com/rapidsai/raft/pull/304 is merged. NOTE: The commit hash will likely be reverted to `dev` in 21.10 during development, then a new hash will be used for 21.10 release, and so on. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1741 --- cpp/cmake/thirdparty/get_cuco.cmake | 2 +- cpp/cmake/thirdparty/get_raft.cmake | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake index 009a6642415..e49722b4823 100644 --- a/cpp/cmake/thirdparty/get_cuco.cmake +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -20,7 +20,7 @@ function(find_and_configure_cuco VERSION) GLOBAL_TARGETS cuco cuco::cuco CPM_ARGS GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG dev + GIT_TAG b1fea0cbe4c384160740af00f7c8760846539abb OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index d8c9358e023..a819d7158e1 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -30,7 +30,7 @@ function(find_and_configure_raft) OPTIONS "BUILD_TESTS OFF" ) - message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + message(VERBOSE "CUGRAPH: Using RAFT located in ${raft_SOURCE_DIR}") endfunction() @@ -45,4 +45,3 @@ find_and_configure_raft(VERSION ${CUGRAPH_MIN_VERSION_raft} FORK rapidsai PINNED_TAG branch-${CUGRAPH_BRANCH_VERSION_raft} ) - From aab2a5c7d1d1c863f9bd4f0fdc68e63d9f8d5638 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Thu, 29 Jul 2021 09:09:02 -0400 Subject: [PATCH 335/343] Docs for RMAT (#1735) Fixed indentations so that docs appear correctly Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1735 --- docs/cugraph/source/api.rst | 11 +++--- python/cugraph/generators/rmat.py | 61 ++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/docs/cugraph/source/api.rst b/docs/cugraph/source/api.rst index a07044ecf3e..81c5e3e4ee2 100644 --- a/docs/cugraph/source/api.rst +++ b/docs/cugraph/source/api.rst @@ -302,14 +302,13 @@ Maximum Spanning Tree Generator ========= - - RMAT - --------------------- + +RMAT +--------------------- .. automodule:: cugraph.generators -:members: rmat -:undoc-members: -:noindex: + :members: rmat + :undoc-members: DASK MG Helper functions diff --git a/python/cugraph/generators/rmat.py b/python/cugraph/generators/rmat.py index 46859ccd42d..d93ceb34cd1 100644 --- a/python/cugraph/generators/rmat.py +++ b/python/cugraph/generators/rmat.py @@ -243,49 +243,68 @@ def rmat( Parameters ---------- scale : int - Scale factor to set the number of verties in the graph Vertex IDs have - values in [0, V), where V = 1 << 'scale' + Scale factor to set the number of vertices in the graph Vertex IDs have + values in [0, V), where V = 1 << 'scale' num_edges : int - Number of edges to generate + Number of edges to generate a : float - Probability of the first partition + Probability of the first partition b : float - Probability of the second partition + Probability of the second partition c : float - Probability of the thrid partition + Probability of the thrid partition seed : int - Seed value for the random number generator + Seed value for the random number generator clip_and_flip : bool - Flag controlling whether to generate edges only in the lower triangular - part (including the diagonal) of the graph adjacency matrix - (if set to 'true') or not (if set to 'false). + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false). scramble_vertex_ids : bool - Flag controlling whether to scramble vertex ID bits (if set to `true`) or - not (if set to `false`); scrambling vertx ID bits breaks correlation - between vertex ID values and vertex degrees + Flag controlling whether to scramble vertex ID bits (if set to `true`) + or not (if set to `false`); scrambling vertex ID bits breaks + correlation between vertex ID values and vertex degrees. create_using : cugraph Graph type or None The graph type to construct - containing the generated edges and vertices. If None is specified, the - edgelist cuDF DataFrame (or dask_cudf DataFrame for MG) is returned as-is. - This is useful for benchmarking Graph construction steps that require raw - data that includes potential self-loops, isolated vertices, and duplicated - edges. Default is cugraph.DiGraph. NOTE: only the cugraph.DiGraph type is - supported for multi-GPU + containing the generated edges and vertices. If None is specified, the + edgelist cuDF DataFrame (or dask_cudf DataFrame for MG) is returned + as-is. This is useful for benchmarking Graph construction steps that + require raw data that includes potential self-loops, isolated vertices, + and duplicated edges. Default is cugraph.DiGraph. + NOTE: only the cugraph.DiGraph type is supported for multi-GPU mg : bool - If True, R-MAT generation occurs across multiple GPUs. If False, only a - single GPU is used. Default is False (single-GPU) + If True, R-MAT generation occurs across multiple GPUs. If False, only a + single GPU is used. Default is False (single-GPU) Returns ------- instance of cugraph.Graph + + Examples + -------- + import cugraph + from cugraph.generators import rmat + + df = rmat( + scale, + (2**scale)*edgefactor, + 0.1, + 0.2, + 0.3, + seed or 42, + clip_and_flip=False, + scramble_vertex_ids=True, + create_using=None, # return edgelist instead of Graph instance + mg=False + ) + """ _ensure_args_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, scramble_vertex_ids, create_using, mg) From 421e4be813e1ea96362c1fcf19be53cbdb631acd Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 29 Jul 2021 16:52:39 -0500 Subject: [PATCH 336/343] Changed cuco cmake function to return early if cuco has already been added as a target (#1746) Changed cuco cmake function to return early if cuco has already been added as a target. This matches the technique used by raft [here](https://github.com/rapidsai/raft/blob/a3af3895410c19f3e713caa608ea2024f6008350/cpp/cmake/thirdparty/get_cuco.cmake#L19). Tested by doing a build and install of cuML, followed by a build of cuGraph and observing a CPM error about the alias target `cuco::cuco` already existing. Made the change to return early if cuco is already a target and observed the cuGraph `libcugraph.so` build succeed. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1746 --- cpp/cmake/thirdparty/get_cuco.cmake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake index e49722b4823..7b9ab17bef6 100644 --- a/cpp/cmake/thirdparty/get_cuco.cmake +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -16,6 +16,10 @@ function(find_and_configure_cuco VERSION) + if(TARGET cuco::cuco) + return() + endif() + rapids_cpm_find(cuco ${VERSION} GLOBAL_TARGETS cuco cuco::cuco CPM_ARGS @@ -26,9 +30,7 @@ function(find_and_configure_cuco VERSION) "BUILD_EXAMPLES OFF" ) - if(NOT TARGET cuco::cuco) - add_library(cuco::cuco ALIAS cuco) - endif() + add_library(cuco::cuco ALIAS cuco) endfunction() From 96178f91d5f9fffc9babf58a194c7754a5180f68 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 4 Aug 2021 10:11:23 -0400 Subject: [PATCH 337/343] update changelog --- CHANGELOG.md | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72776bea37b..601ac2fb4f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,68 @@ -# cuGraph 21.08.00 (Date TBD) +# cuGraph 21.08.00 (4 Aug 2021) -Please see https://github.com/rapidsai/cugraph/releases/tag/v21.08.00a for the latest changes to this development branch. +## 🚨 Breaking Changes + +- Removed depricated code ([#1705](https://github.com/rapidsai/cugraph/pull/1705)) [@BradReesWork](https://github.com/BradReesWork) +- Delete legacy renumbering implementation ([#1681](https://github.com/rapidsai/cugraph/pull/1681)) [@ChuckHastings](https://github.com/ChuckHastings) +- Migrate old graph to legacy directory/namespace ([#1675](https://github.com/rapidsai/cugraph/pull/1675)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 🐛 Bug Fixes + +- Changed cuco cmake function to return early if cuco has already been added as a target ([#1746](https://github.com/rapidsai/cugraph/pull/1746)) [@rlratzel](https://github.com/rlratzel) +- revert cuco to latest dev branch, issues should be fixed ([#1721](https://github.com/rapidsai/cugraph/pull/1721)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fix `conda` uploads ([#1712](https://github.com/rapidsai/cugraph/pull/1712)) [@ajschmidt8](https://github.com/ajschmidt8) +- Updated for CUDA-specific py packages ([#1709](https://github.com/rapidsai/cugraph/pull/1709)) [@rlratzel](https://github.com/rlratzel) +- Use `library_dirs` for cython linking, link cudatoolkit libs, allow setting UCX install location ([#1698](https://github.com/rapidsai/cugraph/pull/1698)) [@trxcllnt](https://github.com/trxcllnt) +- Fix the Louvain failure with 64 bit vertex IDs ([#1696](https://github.com/rapidsai/cugraph/pull/1696)) [@seunghwak](https://github.com/seunghwak) +- Use nested include in destination of install headers to avoid docker permission issues ([#1656](https://github.com/rapidsai/cugraph/pull/1656)) [@dantegd](https://github.com/dantegd) +- Added accidentally-removed cpp-mgtests target back to the valid args list ([#1652](https://github.com/rapidsai/cugraph/pull/1652)) [@rlratzel](https://github.com/rlratzel) +- Update UCX-Py version to 0.21 ([#1650](https://github.com/rapidsai/cugraph/pull/1650)) [@pentschev](https://github.com/pentschev) + +## 📖 Documentation + +- Docs for RMAT ([#1735](https://github.com/rapidsai/cugraph/pull/1735)) [@BradReesWork](https://github.com/BradReesWork) +- Doc updates ([#1719](https://github.com/rapidsai/cugraph/pull/1719)) [@BradReesWork](https://github.com/BradReesWork) + +## 🚀 New Features + +- Fea cleanup stream part1 ([#1653](https://github.com/rapidsai/cugraph/pull/1653)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 🛠️ Improvements + +- Pinning cuco to a specific commit hash for release ([#1741](https://github.com/rapidsai/cugraph/pull/1741)) [@rlratzel](https://github.com/rlratzel) +- Pin max version for `dask` & `distributed` ([#1736](https://github.com/rapidsai/cugraph/pull/1736)) [@galipremsagar](https://github.com/galipremsagar) +- Fix libfaiss dependency to not expressly depend on conda-forge ([#1728](https://github.com/rapidsai/cugraph/pull/1728)) [@Ethyling](https://github.com/Ethyling) +- Fix MG_test bug ([#1718](https://github.com/rapidsai/cugraph/pull/1718)) [@jnke2016](https://github.com/jnke2016) +- Cascaded dispatch for type-erased API ([#1711](https://github.com/rapidsai/cugraph/pull/1711)) [@aschaffer](https://github.com/aschaffer) +- ReduceV test ([#1710](https://github.com/rapidsai/cugraph/pull/1710)) [@kaatish](https://github.com/kaatish) +- Removed depricated code ([#1705](https://github.com/rapidsai/cugraph/pull/1705)) [@BradReesWork](https://github.com/BradReesWork) +- Delete unused/out-dated primitives ([#1704](https://github.com/rapidsai/cugraph/pull/1704)) [@seunghwak](https://github.com/seunghwak) +- Update primitives to support DCSR (DCSC) segments (Part 2/2) ([#1703](https://github.com/rapidsai/cugraph/pull/1703)) [@seunghwak](https://github.com/seunghwak) +- Fea speedup compile ([#1702](https://github.com/rapidsai/cugraph/pull/1702)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update `conda` environment name for CI ([#1699](https://github.com/rapidsai/cugraph/pull/1699)) [@ajschmidt8](https://github.com/ajschmidt8) +- Count if test ([#1697](https://github.com/rapidsai/cugraph/pull/1697)) [@kaatish](https://github.com/kaatish) +- replace cudf assert_eq ([#1693](https://github.com/rapidsai/cugraph/pull/1693)) [@jnke2016](https://github.com/jnke2016) +- Fix int64 vertex_t ([#1691](https://github.com/rapidsai/cugraph/pull/1691)) [@Iroy30](https://github.com/Iroy30) +- Update primitives to support DCSR (DCSC) segments (Part 1) ([#1690](https://github.com/rapidsai/cugraph/pull/1690)) [@seunghwak](https://github.com/seunghwak) +- remove hardcoded dtype ([#1689](https://github.com/rapidsai/cugraph/pull/1689)) [@Iroy30](https://github.com/Iroy30) +- Updating Clang Version to 11.0.0 ([#1688](https://github.com/rapidsai/cugraph/pull/1688)) [@codereport](https://github.com/codereport) +- `CHECK_CUDA` macros in debug builds ([#1687](https://github.com/rapidsai/cugraph/pull/1687)) [@trxcllnt](https://github.com/trxcllnt) +- fixing symmetrize_ddf ([#1686](https://github.com/rapidsai/cugraph/pull/1686)) [@jnke2016](https://github.com/jnke2016) +- Improve Random Walks performance ([#1685](https://github.com/rapidsai/cugraph/pull/1685)) [@aschaffer](https://github.com/aschaffer) +- Use the 21.08 branch of rapids-cmake as rmm requires it ([#1683](https://github.com/rapidsai/cugraph/pull/1683)) [@robertmaynard](https://github.com/robertmaynard) +- Delete legacy renumbering implementation ([#1681](https://github.com/rapidsai/cugraph/pull/1681)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fix vertex partition offsets ([#1680](https://github.com/rapidsai/cugraph/pull/1680)) [@Iroy30](https://github.com/Iroy30) +- Ues std::optional (or thrust::optional) for optional parameters & first part of DCSR (DCSC) implementation. ([#1676](https://github.com/rapidsai/cugraph/pull/1676)) [@seunghwak](https://github.com/seunghwak) +- Migrate old graph to legacy directory/namespace ([#1675](https://github.com/rapidsai/cugraph/pull/1675)) [@ChuckHastings](https://github.com/ChuckHastings) +- Expose epsilon parameter (precision) through python layer ([#1674](https://github.com/rapidsai/cugraph/pull/1674)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fea hungarian expose precision ([#1673](https://github.com/rapidsai/cugraph/pull/1673)) [@ChuckHastings](https://github.com/ChuckHastings) +- Branch 21.08 merge 21.06 ([#1672](https://github.com/rapidsai/cugraph/pull/1672)) [@BradReesWork](https://github.com/BradReesWork) +- Update pins to Dask/Distributed >= 2021.6.0 ([#1666](https://github.com/rapidsai/cugraph/pull/1666)) [@pentschev](https://github.com/pentschev) +- Fix conflicts in `1643` ([#1651](https://github.com/rapidsai/cugraph/pull/1651)) [@ajschmidt8](https://github.com/ajschmidt8) +- Rename include/cugraph/patterns to include/cugraph/prims ([#1644](https://github.com/rapidsai/cugraph/pull/1644)) [@seunghwak](https://github.com/seunghwak) +- Fix merge conflicts in 1631 ([#1639](https://github.com/rapidsai/cugraph/pull/1639)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update to changed `rmm::device_scalar` API ([#1637](https://github.com/rapidsai/cugraph/pull/1637)) [@harrism](https://github.com/harrism) +- Fix merge conflicts ([#1614](https://github.com/rapidsai/cugraph/pull/1614)) [@ajschmidt8](https://github.com/ajschmidt8) # cuGraph 21.06.00 (9 Jun 2021) From f7c22a6382d3637ba902e8a117e9480cf2215ae5 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Fri, 6 Aug 2021 11:39:16 -0400 Subject: [PATCH 338/343] include cpack (#1760) Co-authored-by: ptaylor --- cpp/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1ab227e01c1..9fcb40b1eba 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -337,6 +337,8 @@ endif(BUILD_TESTS) ################################################################################################### # - install targets ------------------------------------------------------------------------------- +include(CPack) + install(TARGETS cugraph DESTINATION lib EXPORT cugraph-exports) From 97bfeed5c706c44308cd2a69ed010025d4a1dd97 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 16 Aug 2021 11:28:30 -0500 Subject: [PATCH 339/343] Added guards for preventing k-truss execution on CUDA 11.4 systems (#1773) Added guards for preventing k-truss execution on CUDA 11.4 systems and the appropriate user-friendly messaging, along with a new unit test. Also updated the k-truss notebook with a note about the unsupported CUDA version and a comment to skip automated tests on CUDA 11.4, along with updates to the notebook skip mechanism to also look at CUDA version. Tested on a CUDA 11.2 system by changing the unsupported version to 11.2 to verify correct behavior. Behavior on a CUDA 11.4 systems should be to allow cugraph to be imported as always, but if the user calls k_truss or kturss_subgraph, a NotImplementedError exception with "is not currently supported in CUDA 11.4 environments" message is raised. If the user directly imports the module (ie. from cugraph.community.ktruss_subgraph import k_truss), the exception is raised immediately. Behavior on non-CUDA 11.4 systems should be exactly the same as prior to this change. --- ci/gpu/notebook_list.py | 8 ++++- notebooks/cores/ktruss.ipynb | 10 ++---- python/cugraph/community/__init__.py | 35 +++++++++++++++++-- python/cugraph/community/ktruss_subgraph.py | 15 ++++++++ python/cugraph/tests/test_k_truss_subgraph.py | 29 +++++++++++++++ 5 files changed, 87 insertions(+), 10 deletions(-) diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py index 8748c434006..23a198830a8 100644 --- a/ci/gpu/notebook_list.py +++ b/ci/gpu/notebook_list.py @@ -17,12 +17,12 @@ from numba import cuda +cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()]) # # Not strictly true... however what we mean is # Pascal or earlier # pascal = False - device = cuda.get_current_device() # check for the attribute using both pre and post numba 0.53 names cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ @@ -45,6 +45,12 @@ print(f'SKIPPING {filename} (does not run on Pascal)', file=sys.stderr) skip = True break; + elif re.search('# Does not run on CUDA ', line) and \ + (cuda_version_string in line): + print(f'SKIPPING {filename} (does not run on CUDA {cuda_version_string})', + file=sys.stderr) + skip = True + break; if not skip: print(filename) diff --git a/notebooks/cores/ktruss.ipynb b/notebooks/cores/ktruss.ipynb index e6470110666..3f283558f27 100644 --- a/notebooks/cores/ktruss.ipynb +++ b/notebooks/cores/ktruss.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# K-Truss\n", + "# Does not run on CUDA 11.4\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Truss clusters in a test graph \n", @@ -12,18 +13,13 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees\n", "* Created: 10/28/2019\n", - "* Last Edit: 08/16/2020\n", - "\n", - "RAPIDS Versions: 0.13\n", - "\n", - "Test Hardware\n", - "* GV100 32G, CUDA 10.2\n", - "\n", + "* Last Edit: 08/13/2021\n", "\n", "\n", "## Introduction\n", "\n", "Compute the k-truss of the graph G. A K-Truss is a relaxed cliques where every vertex is supported by at least k-2 triangle.\n", + "NOTE: k-truss is currently not supported on CUDA 11.4 systems.\n", "\n", "Ref:\n", "\n", diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index 9cc92637e20..4e1115a71ee 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -23,7 +23,38 @@ ) from cugraph.community.subgraph_extraction import subgraph from cugraph.community.triangle_count import triangles -from cugraph.community.ktruss_subgraph import ktruss_subgraph -from cugraph.community.ktruss_subgraph import k_truss from cugraph.community.egonet import ego_graph from cugraph.community.egonet import batched_ego_graphs + +# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to +# crash in that environment. Allow ktruss to import on non-11.4 systems, but +# replace ktruss with a __UnsupportedModule instance, which lazily raises an +# exception when referenced. +from numba import cuda +__cuda_version = cuda.runtime.get_version() +__ktruss_unsupported_cuda_version = (11, 4) + +class __UnsupportedModule: + def __init__(self, exception): + self.__exception = exception + + def __getattr__(self, attr): + raise self.__exception + + def __call__(self, *args, **kwargs): + raise self.__exception + + +if __cuda_version != __ktruss_unsupported_cuda_version: + from cugraph.community.ktruss_subgraph import ktruss_subgraph + from cugraph.community.ktruss_subgraph import k_truss +else: + __kuvs = ".".join([str(n) for n in __ktruss_unsupported_cuda_version]) + k_truss = __UnsupportedModule( + NotImplementedError("k_truss is not currently supported in CUDA" + f" {__kuvs} environments.") + ) + ktruss_subgraph = __UnsupportedModule( + NotImplementedError("ktruss_subgraph is not currently supported in CUDA" + f" {__kuvs} environments.") + ) diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index f4e4f7fb1cc..b9682e072c2 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -16,11 +16,24 @@ from cugraph.utilities import check_nx_graph from cugraph.utilities import cugraph_to_nx +# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to +# crash in that environment. Allow ktruss to import on non-11.4 systems, but +# raise an exception if ktruss is directly imported on 11.4. +from numba import cuda +__cuda_version = cuda.runtime.get_version() +__ktruss_unsupported_cuda_version = (11, 4) +if __cuda_version == __ktruss_unsupported_cuda_version: + __kuvs = ".".join([str(n) for n in __ktruss_unsupported_cuda_version]) + raise NotImplementedError("k_truss is not currently supported in CUDA" + f" {__kuvs} environments.") + def k_truss(G, k): """ Returns the K-Truss subgraph of a graph for a specific k. + NOTE: this function is currently not available on CUDA 11.4 systems. + The k-truss of a graph is a subgraph where each edge is part of at least (k−2) triangles. K-trusses are used for finding tighlty knit groups of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph @@ -60,6 +73,8 @@ def ktruss_subgraph(G, k, use_weights=True): """ Returns the K-Truss subgraph of a graph for a specific k. + NOTE: this function is currently not available on CUDA 11.4 systems. + The k-truss of a graph is a subgraph where each edge is part of at least (k−2) triangles. K-trusses are used for finding tighlty knit groups of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index a86490fb561..1a1f5c66693 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -19,6 +19,7 @@ from cugraph.tests import utils import numpy as np +from numba import cuda # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -73,6 +74,31 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True +__cuda_version = cuda.runtime.get_version() +__unsupported_cuda_version = (11, 4) + + +# FIXME: remove when ktruss is supported on CUDA 11.4 +def test_unsupported_cuda_version(): + """ + Ensures the proper exception is raised when ktruss is called in an + unsupported env, and not when called in a supported env. + """ + k = 5 + cu_M = utils.read_csv_file(utils.DATASETS_KTRUSS[0][0]) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + + if __cuda_version == __unsupported_cuda_version: + with pytest.raises(NotImplementedError): + cugraph.k_truss(G, k) + else: + cugraph.k_truss(G, k) + + +@pytest.mark.skipif((__cuda_version == __unsupported_cuda_version), + reason="skipping on unsupported CUDA " + f"{__unsupported_cuda_version} environment.") @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() @@ -86,6 +112,9 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): compare_k_truss(k_subgraph, k, nx_ground_truth) +@pytest.mark.skipif((__cuda_version == __unsupported_cuda_version), + reason="skipping on unsupported CUDA " + f"{__unsupported_cuda_version} environment.") @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): gc.collect() From bf64c2c8e5cdbb4bc0d397702c335652aa31aa75 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 17 Aug 2021 08:59:26 -0500 Subject: [PATCH 340/343] Updated to handle CUDA version check on machines with no GPUs (#1777) Updated to handle CUDA version check on machines with no GPUs. When no GPUs are present, cugraph can import and ktruss is available, which should match the original behavior prior to when the checks were added for disabling ktruss on CUDA 11.4 Tested by setting CUDA_VISIBLE_DEVICES to " ", verifying the expected exception was raised, made the change, then verified cugraph could be imported. --- python/cugraph/community/__init__.py | 6 +++++- python/cugraph/community/ktruss_subgraph.py | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index 4e1115a71ee..e8bea0cbaa0 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -31,7 +31,11 @@ # replace ktruss with a __UnsupportedModule instance, which lazily raises an # exception when referenced. from numba import cuda -__cuda_version = cuda.runtime.get_version() +try: + __cuda_version = cuda.runtime.get_version() +except cuda.cudadrv.runtime.CudaRuntimeAPIError: + __cuda_version = "n/a" + __ktruss_unsupported_cuda_version = (11, 4) class __UnsupportedModule: diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index b9682e072c2..c80f65c1608 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -20,8 +20,13 @@ # crash in that environment. Allow ktruss to import on non-11.4 systems, but # raise an exception if ktruss is directly imported on 11.4. from numba import cuda -__cuda_version = cuda.runtime.get_version() +try: + __cuda_version = cuda.runtime.get_version() +except cuda.cudadrv.runtime.CudaRuntimeAPIError: + __cuda_version = "n/a" + __ktruss_unsupported_cuda_version = (11, 4) + if __cuda_version == __ktruss_unsupported_cuda_version: __kuvs = ".".join([str(n) for n in __ktruss_unsupported_cuda_version]) raise NotImplementedError("k_truss is not currently supported in CUDA" From 8abf4ac6e3c5bdac42868caacd20ab365e665d02 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Wed, 25 Aug 2021 13:16:07 -0500 Subject: [PATCH 341/343] Remove -g from cython compile commands (#1783) Removes `-g` from the compile commands generated by distutils to compile Cython files. This will make our container images, conda packages, and python wheels smaller. --- python/setup.py | 123 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 40 deletions(-) diff --git a/python/setup.py b/python/setup.py index 6009e56d7f1..a2dde239cca 100644 --- a/python/setup.py +++ b/python/setup.py @@ -16,18 +16,27 @@ import sysconfig import shutil -from setuptools import setup, find_packages, Command -from setuptools.extension import Extension -from setuputils import use_raft_package, get_environment_option +# Must import in this order: +# setuptools -> Cython.Distutils.build_ext -> setuptools.command.build_ext +# Otherwise, setuptools.command.build_ext ends up inheriting from +# Cython.Distutils.old_build_ext which we do not want +import setuptools try: - from Cython.Distutils.build_ext import new_build_ext as build_ext + from Cython.Distutils.build_ext import new_build_ext as _build_ext except ImportError: - from setuptools.command.build_ext import build_ext + from setuptools.command.build_ext import build_ext as _build_ext -import versioneer from distutils.sysconfig import get_python_lib +import setuptools.command.build_ext +from setuptools import find_packages, setup, Command +from setuptools.extension import Extension + +from setuputils import use_raft_package, get_environment_option + +import versioneer + INSTALL_REQUIRES = ['numba', 'cython'] CYTHON_FILES = ['cugraph/**/*.pyx'] @@ -81,6 +90,32 @@ if not libcugraph_path: libcugraph_path = conda_lib_dir +extensions = [ + Extension("*", + sources=CYTHON_FILES, + include_dirs=[ + conda_include_dir, + ucx_include_dir, + '../cpp/include', + "../thirdparty/cub", + raft_include_dir, + os.path.join(conda_include_dir, "libcudacxx"), + cuda_include_dir, + os.path.dirname(sysconfig.get_path("include")) + ], + library_dirs=[ + get_python_lib(), + conda_lib_dir, + libcugraph_path, + ucx_lib_dir, + cuda_lib_dir, + os.path.join(os.sys.prefix, "lib") + ], + libraries=['cudart', 'cusparse', 'cusolver', 'cugraph', 'nccl'], + language='c++', + extra_compile_args=['-std=c++17']) +] + class CleanCommand(Command): """Custom clean command to tidy up the project root.""" @@ -105,42 +140,50 @@ def run(self): os.system('find . -name "*.cpython*.so" -type f -delete') +class build_ext_no_debug(_build_ext): + + def build_extensions(self): + def remove_flags(compiler, *flags): + for flag in flags: + try: + compiler.compiler_so = list( + filter((flag).__ne__, compiler.compiler_so) + ) + except Exception: + pass + # Full optimization + self.compiler.compiler_so.append("-O3") + # No debug symbols, full optimization, no '-Wstrict-prototypes' warning + remove_flags( + self.compiler, "-g", "-G", "-O1", "-O2", "-Wstrict-prototypes" + ) + super().build_extensions() + + def finalize_options(self): + if self.distribution.ext_modules: + # Delay import this to allow for Cython-less installs + from Cython.Build.Dependencies import cythonize + + nthreads = getattr(self, "parallel", None) # -j option in Py3.5+ + nthreads = int(nthreads) if nthreads else None + self.distribution.ext_modules = cythonize( + self.distribution.ext_modules, + nthreads=nthreads, + force=self.force, + gdb_debug=False, + compiler_directives=dict( + profile=False, language_level=3, embedsignature=True + ), + ) + # Skip calling super() and jump straight to setuptools + setuptools.command.build_ext.build_ext.finalize_options(self) + + cmdclass = dict() cmdclass.update(versioneer.get_cmdclass()) -cmdclass["build_ext"] = build_ext +cmdclass["build_ext"] = build_ext_no_debug cmdclass["clean"] = CleanCommand -EXTENSIONS = [ - Extension("*", - sources=CYTHON_FILES, - include_dirs=[ - conda_include_dir, - ucx_include_dir, - '../cpp/include', - "../thirdparty/cub", - raft_include_dir, - os.path.join(conda_include_dir, "libcudacxx"), - cuda_include_dir, - os.path.dirname(sysconfig.get_path("include")) - ], - library_dirs=[ - get_python_lib(), - conda_lib_dir, - libcugraph_path, - ucx_lib_dir, - cuda_lib_dir, - os.path.join(os.sys.prefix, "lib") - ], - libraries=['cudart', 'cusparse', 'cusolver', 'cugraph', 'nccl'], - language='c++', - extra_compile_args=['-std=c++17']) -] - -for e in EXTENSIONS: - e.cython_directives = dict( - profile=False, language_level=3, embedsignature=True - ) - setup(name='cugraph', description="cuGraph - GPU Graph Analytics", version=versioneer.get_version(), @@ -154,8 +197,8 @@ def run(self): ], # Include the separately-compiled shared library author="NVIDIA Corporation", - setup_requires=['cython'], - ext_modules=EXTENSIONS, + setup_requires=['Cython>=0.29,<0.30'], + ext_modules=extensions, packages=find_packages(include=['cugraph', 'cugraph.*']), install_requires=INSTALL_REQUIRES, license="Apache", From b681f73665fc6576341974c8dd377b401a1bdd32 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 16 Sep 2021 11:48:29 -0400 Subject: [PATCH 342/343] update TSP with Texas State Copyright (#1814) --- cpp/src/traversal/tsp.cu | 1 + cpp/src/traversal/tsp.hpp | 1 + cpp/src/traversal/tsp_solver.hpp | 1 + cpp/src/traversal/tsp_utils.hpp | 1 + .../LICENSES/LICENSE.texas_state_university | 24 +++++++++++++++++++ 5 files changed, 28 insertions(+) create mode 100644 thirdparty/LICENSES/LICENSE.texas_state_university diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu index 332ccb21834..9be4f4f3767 100644 --- a/cpp/src/traversal/tsp.cu +++ b/cpp/src/traversal/tsp.cu @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2014-2020, Texas State University. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index 6073f46ab28..f052462156f 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2014-2020, Texas State University. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp index 5fb3ff1d449..9d36357046f 100644 --- a/cpp/src/traversal/tsp_solver.hpp +++ b/cpp/src/traversal/tsp_solver.hpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2014-2020, Texas State University. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp index 48a3e702f09..eab5c09eb2f 100644 --- a/cpp/src/traversal/tsp_utils.hpp +++ b/cpp/src/traversal/tsp_utils.hpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2014-2020, Texas State University. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/thirdparty/LICENSES/LICENSE.texas_state_university b/thirdparty/LICENSES/LICENSE.texas_state_university new file mode 100644 index 00000000000..7862557ac87 --- /dev/null +++ b/thirdparty/LICENSES/LICENSE.texas_state_university @@ -0,0 +1,24 @@ +Copyright (c) 2014-2020, Texas State University. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Texas State University nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL TEXAS STATE UNIVERSITY BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From fa6f0f1581e072fdcfdeb43a8a8db1a7c698a266 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 16 Sep 2021 10:49:53 -0500 Subject: [PATCH 343/343] back porting ktruss CUDA 11.4 guard to 21.08 (#1813) --- python/cugraph/community/__init__.py | 39 ++------------------- python/cugraph/community/ktruss_subgraph.py | 27 ++++++++------ 2 files changed, 19 insertions(+), 47 deletions(-) diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index e8bea0cbaa0..9cc92637e20 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -23,42 +23,7 @@ ) from cugraph.community.subgraph_extraction import subgraph from cugraph.community.triangle_count import triangles +from cugraph.community.ktruss_subgraph import ktruss_subgraph +from cugraph.community.ktruss_subgraph import k_truss from cugraph.community.egonet import ego_graph from cugraph.community.egonet import batched_ego_graphs - -# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to -# crash in that environment. Allow ktruss to import on non-11.4 systems, but -# replace ktruss with a __UnsupportedModule instance, which lazily raises an -# exception when referenced. -from numba import cuda -try: - __cuda_version = cuda.runtime.get_version() -except cuda.cudadrv.runtime.CudaRuntimeAPIError: - __cuda_version = "n/a" - -__ktruss_unsupported_cuda_version = (11, 4) - -class __UnsupportedModule: - def __init__(self, exception): - self.__exception = exception - - def __getattr__(self, attr): - raise self.__exception - - def __call__(self, *args, **kwargs): - raise self.__exception - - -if __cuda_version != __ktruss_unsupported_cuda_version: - from cugraph.community.ktruss_subgraph import ktruss_subgraph - from cugraph.community.ktruss_subgraph import k_truss -else: - __kuvs = ".".join([str(n) for n in __ktruss_unsupported_cuda_version]) - k_truss = __UnsupportedModule( - NotImplementedError("k_truss is not currently supported in CUDA" - f" {__kuvs} environments.") - ) - ktruss_subgraph = __UnsupportedModule( - NotImplementedError("ktruss_subgraph is not currently supported in CUDA" - f" {__kuvs} environments.") - ) diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index c80f65c1608..afa7d66d31d 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -16,21 +16,24 @@ from cugraph.utilities import check_nx_graph from cugraph.utilities import cugraph_to_nx +from numba import cuda + + # FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to # crash in that environment. Allow ktruss to import on non-11.4 systems, but # raise an exception if ktruss is directly imported on 11.4. -from numba import cuda -try: - __cuda_version = cuda.runtime.get_version() -except cuda.cudadrv.runtime.CudaRuntimeAPIError: - __cuda_version = "n/a" +def _ensure_compatible_cuda_version(): + try: + cuda_version = cuda.runtime.get_version() + except cuda.cudadrv.runtime.CudaRuntimeAPIError: + cuda_version = "n/a" -__ktruss_unsupported_cuda_version = (11, 4) + unsupported_cuda_version = (11, 4) -if __cuda_version == __ktruss_unsupported_cuda_version: - __kuvs = ".".join([str(n) for n in __ktruss_unsupported_cuda_version]) - raise NotImplementedError("k_truss is not currently supported in CUDA" - f" {__kuvs} environments.") + if cuda_version == unsupported_cuda_version: + ver_string = ".".join([str(n) for n in unsupported_cuda_version]) + raise NotImplementedError("k_truss is not currently supported in CUDA" + f" {ver_string} environments.") def k_truss(G, k): @@ -62,6 +65,8 @@ def k_truss(G, k): The networkx graph will NOT have all attributes copied over """ + _ensure_compatible_cuda_version() + G, isNx = check_nx_graph(G) if isNx is True: @@ -137,6 +142,8 @@ def ktruss_subgraph(G, k, use_weights=True): >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ + _ensure_compatible_cuda_version() + KTrussSubgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected")