From 6fc3eaf30961b995aa156689c68cdaaced3ac197 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Wed, 25 Sep 2024 14:15:10 +0000 Subject: [PATCH] review suggestions --- cpp/src/svm/results.cuh | 27 ++++++----- cpp/src/svm/smosolver.cuh | 12 ++--- cpp/src/svm/smosolver.h | 10 ++--- cpp/src/svm/svc_impl.cuh | 17 ++++--- cpp/src/svm/svm_api.cpp | 94 +++++++++++++++------------------------ cpp/src/svm/svr_impl.cuh | 10 ++--- cpp/test/sg/svc_test.cu | 32 ++++++------- 7 files changed, 94 insertions(+), 108 deletions(-) diff --git a/cpp/src/svm/results.cuh b/cpp/src/svm/results.cuh index 782823d528..7a7cf32632 100644 --- a/cpp/src/svm/results.cuh +++ b/cpp/src/svm/results.cuh @@ -118,16 +118,16 @@ class Results { void Get(const math_t* alpha, const math_t* f, rmm::device_buffer& dual_coefs, - int* n_support, + int& n_support, rmm::device_buffer& idx, SupportStorage& support_matrix, - math_t* b) + math_t& b) { CombineCoefs(alpha, val_tmp.data()); GetDualCoefs(val_tmp.data(), dual_coefs, n_support); - *b = CalcB(alpha, f, *n_support); - GetSupportVectorIndices(idx, val_tmp.data(), *n_support); - CollectSupportVectorMatrix(support_matrix, idx, *n_support); + b = CalcB(alpha, f, n_support); + GetSupportVectorIndices(idx, val_tmp.data(), n_support); + CollectSupportVectorMatrix(support_matrix, idx, n_support); // Make sure that all pending GPU calculations finished before we return handle.sync_stream(stream); } @@ -151,8 +151,11 @@ class Results { support_matrix.indices.resize(0, stream); support_matrix.data.resize(n_support * n_cols * sizeof(math_t), stream); if (n_support > 0) { - ML::SVM::extractRows( - matrix, (math_t*)support_matrix.data.data(), (int*)idx.data(), n_support, handle); + ML::SVM::extractRows(matrix, + reinterpret_cast(support_matrix.data.data()), + reinterpret_cast(idx.data()), + n_support, + handle); } } else { ML::SVM::extractRows(matrix, @@ -160,7 +163,7 @@ class Results { support_matrix.indices, support_matrix.data, &(support_matrix.nnz), - (int*)idx.data(), + reinterpret_cast(idx.data()), n_support, handle); } @@ -202,13 +205,13 @@ class Results { * unallocated on entry, on exit size [n_support] * @param [out] n_support number of support vectors */ - void GetDualCoefs(const math_t* val_tmp, rmm::device_buffer& dual_coefs, int* n_support) + void GetDualCoefs(const math_t* val_tmp, rmm::device_buffer& dual_coefs, int& n_support) { // Return only the non-zero coefficients auto select_op = [] __device__(math_t a) { return 0 != a; }; - *n_support = SelectByCoef(val_tmp, n_rows, val_tmp, select_op, val_selected.data()); - dual_coefs.resize(*n_support * sizeof(math_t), stream); - raft::copy((math_t*)dual_coefs.data(), val_selected.data(), *n_support, stream); + n_support = SelectByCoef(val_tmp, n_rows, val_tmp, select_op, val_selected.data()); + dual_coefs.resize(n_support * sizeof(math_t), stream); + raft::copy((math_t*)dual_coefs.data(), val_selected.data(), n_support, stream); handle.sync_stream(stream); } diff --git a/cpp/src/svm/smosolver.cuh b/cpp/src/svm/smosolver.cuh index 34a96dd068..933b1042f6 100644 --- a/cpp/src/svm/smosolver.cuh +++ b/cpp/src/svm/smosolver.cuh @@ -103,11 +103,11 @@ void SmoSolver::Solve(MatrixViewType matrix, int n_cols, math_t* y, const math_t* sample_weight, - rmm::device_buffer* dual_coefs, - int* n_support, - SupportStorage* support_matrix, - rmm::device_buffer* idx, - math_t* b, + rmm::device_buffer& dual_coefs, + int& n_support, + SupportStorage& support_matrix, + rmm::device_buffer& idx, + math_t& b, int max_outer_iter, int max_inner_iter) { @@ -210,7 +210,7 @@ void SmoSolver::Solve(MatrixViewType matrix, diff_prev); Results res(handle, matrix, n_rows, n_cols, y, C_vec.data(), svmType); - res.Get(alpha.data(), f.data(), *dual_coefs, n_support, *idx, *support_matrix, b); + res.Get(alpha.data(), f.data(), dual_coefs, n_support, idx, support_matrix, b); ReleaseBuffers(); } diff --git a/cpp/src/svm/smosolver.h b/cpp/src/svm/smosolver.h index 36c540a3dc..c3c6df3216 100644 --- a/cpp/src/svm/smosolver.h +++ b/cpp/src/svm/smosolver.h @@ -124,11 +124,11 @@ class SmoSolver { int n_cols, math_t* y, const math_t* sample_weight, - rmm::device_buffer* dual_coefs, - int* n_support, - SupportStorage* support_matrix, - rmm::device_buffer* idx, - math_t* b, + rmm::device_buffer& dual_coefs, + int& n_support, + SupportStorage& support_matrix, + rmm::device_buffer& idx, + math_t& b, int max_outer_iter = -1, int max_inner_iter = 10000); diff --git a/cpp/src/svm/svc_impl.cuh b/cpp/src/svm/svc_impl.cuh index e60767a73a..beb72d376b 100644 --- a/cpp/src/svm/svc_impl.cuh +++ b/cpp/src/svm/svc_impl.cuh @@ -91,11 +91,11 @@ void svcFitX(const raft::handle_t& handle, n_cols, y.data(), sample_weight, - &(model.dual_coefs), - &(model.n_support), - &(model.support_matrix), - &(model.support_idx), - &(model.b), + model.dual_coefs, + model.n_support, + model.support_matrix, + model.support_idx, + model.b, param.max_iter); model.n_cols = n_cols; handle_impl.sync_stream(stream); @@ -356,17 +356,22 @@ void svmFreeBuffers(const raft::handle_t& handle, SvmModel& m) { cudaStream_t stream = handle.get_stream(); - // Note that the underlying allocations are not *freed* but rather reset m.n_support = 0; m.n_cols = 0; m.b = (math_t)0; m.dual_coefs.resize(0, stream); + m.dual_coefs.shrink_to_fit(stream); m.support_idx.resize(0, stream); + m.support_idx.shrink_to_fit(stream); m.support_matrix.indptr.resize(0, stream); + m.support_matrix.indptr.shrink_to_fit(stream); m.support_matrix.indices.resize(0, stream); + m.support_matrix.indices.shrink_to_fit(stream); m.support_matrix.data.resize(0, stream); + m.support_matrix.data.shrink_to_fit(stream); m.support_matrix.nnz = -1; m.unique_labels.resize(0, stream); + m.unique_labels.shrink_to_fit(stream); } }; // end namespace SVM diff --git a/cpp/src/svm/svm_api.cpp b/cpp/src/svm/svm_api.cpp index e92d207c8a..b61caf863a 100644 --- a/cpp/src/svm/svm_api.cpp +++ b/cpp/src/svm/svm_api.cpp @@ -87,41 +87,34 @@ cumlError_t cumlSpSvcFit(cumlHandle_t handle, if (model.dual_coefs.size() > 0) { *dual_coefs = (float*)rmm_alloc.allocate_async( model.dual_coefs.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync( - dual_coefs, model.dual_coefs.data(), model.dual_coefs.size(), cudaMemcpyDefault, stream)); + raft::copy( + *dual_coefs, reinterpret_cast(model.dual_coefs.data()), *n_support, stream); } else { *dual_coefs = nullptr; } if (model.support_matrix.data.size() > 0) { *x_support = (float*)rmm_alloc.allocate_async( model.support_matrix.data.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(x_support, - model.support_matrix.data.data(), - model.support_matrix.data.size(), - cudaMemcpyDefault, - stream)); + raft::copy(*x_support, + reinterpret_cast(model.support_matrix.data.data()), + *n_support * n_cols, + stream); } else { *x_support = nullptr; } if (model.support_idx.size() > 0) { *support_idx = (int*)rmm_alloc.allocate_async( model.support_idx.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(support_idx, - model.support_idx.data(), - model.support_idx.size(), - cudaMemcpyDefault, - stream)); + raft::copy( + *support_idx, reinterpret_cast(model.support_idx.data()), *n_support, stream); } else { *support_idx = nullptr; } if (model.unique_labels.size() > 0) { *unique_labels = (float*)rmm_alloc.allocate_async( model.unique_labels.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(unique_labels, - model.unique_labels.data(), - model.unique_labels.size(), - cudaMemcpyDefault, - stream)); + raft::copy( + *unique_labels, reinterpret_cast(model.unique_labels.data()), *n_classes, stream); } else { *unique_labels = nullptr; } @@ -203,41 +196,36 @@ cumlError_t cumlDpSvcFit(cumlHandle_t handle, if (model.dual_coefs.size() > 0) { *dual_coefs = (double*)rmm_alloc.allocate_async( model.dual_coefs.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync( - dual_coefs, model.dual_coefs.data(), model.dual_coefs.size(), cudaMemcpyDefault, stream)); + raft::copy( + *dual_coefs, reinterpret_cast(model.dual_coefs.data()), *n_support, stream); } else { *dual_coefs = nullptr; } if (model.support_matrix.data.size() > 0) { *x_support = (double*)rmm_alloc.allocate_async( model.support_matrix.data.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(x_support, - model.support_matrix.data.data(), - model.support_matrix.data.size(), - cudaMemcpyDefault, - stream)); + raft::copy(*x_support, + reinterpret_cast(model.support_matrix.data.data()), + *n_support * n_cols, + stream); } else { *x_support = nullptr; } if (model.support_idx.size() > 0) { *support_idx = (int*)rmm_alloc.allocate_async( model.support_idx.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(support_idx, - model.support_idx.data(), - model.support_idx.size(), - cudaMemcpyDefault, - stream)); + raft::copy( + *support_idx, reinterpret_cast(model.support_idx.data()), *n_support, stream); } else { *support_idx = nullptr; } if (model.unique_labels.size() > 0) { *unique_labels = (double*)rmm_alloc.allocate_async( model.unique_labels.size(), rmm::CUDA_ALLOCATION_ALIGNMENT, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(unique_labels, - model.unique_labels.data(), - model.unique_labels.size(), - cudaMemcpyDefault, - stream)); + raft::copy(*unique_labels, + reinterpret_cast(model.unique_labels.data()), + *n_classes, + stream); } else { *unique_labels = nullptr; } @@ -291,24 +279,19 @@ cumlError_t cumlSpSvcPredict(cumlHandle_t handle, model.n_classes = n_classes; if (n_support > 0) { model.dual_coefs.resize(n_support * sizeof(float), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync( - model.dual_coefs.data(), dual_coefs, n_support * sizeof(float), cudaMemcpyDefault, stream)); + raft::copy(reinterpret_cast(model.dual_coefs.data()), dual_coefs, n_support, stream); model.support_matrix.data.resize(n_support * n_cols * sizeof(float), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(model.support_matrix.data.data(), - x_support, - n_support * n_cols * sizeof(float), - cudaMemcpyDefault, - stream)); + raft::copy(reinterpret_cast(model.support_matrix.data.data()), + x_support, + n_support * n_cols, + stream); } if (n_classes > 0) { model.unique_labels.resize(n_classes * sizeof(float), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(model.unique_labels.data(), - unique_labels, - n_classes * sizeof(float), - cudaMemcpyDefault, - stream)); + raft::copy( + reinterpret_cast(model.unique_labels.data()), unique_labels, n_classes, stream); } if (status == CUML_SUCCESS) { @@ -364,24 +347,19 @@ cumlError_t cumlDpSvcPredict(cumlHandle_t handle, model.n_classes = n_classes; if (n_support > 0) { model.dual_coefs.resize(n_support * sizeof(double), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync( - model.dual_coefs.data(), dual_coefs, n_support * sizeof(double), cudaMemcpyDefault, stream)); + raft::copy(reinterpret_cast(model.dual_coefs.data()), dual_coefs, n_support, stream); model.support_matrix.data.resize(n_support * n_cols * sizeof(double), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(model.support_matrix.data.data(), - x_support, - n_support * n_cols * sizeof(double), - cudaMemcpyDefault, - stream)); + raft::copy(reinterpret_cast(model.support_matrix.data.data()), + x_support, + n_support * n_cols, + stream); } if (n_classes > 0) { model.unique_labels.resize(n_classes * sizeof(double), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(model.unique_labels.data(), - unique_labels, - n_classes * sizeof(double), - cudaMemcpyDefault, - stream)); + raft::copy( + reinterpret_cast(model.unique_labels.data()), unique_labels, n_classes, stream); } if (status == CUML_SUCCESS) { diff --git a/cpp/src/svm/svr_impl.cuh b/cpp/src/svm/svr_impl.cuh index 1ff62a03c7..3c7cc64923 100644 --- a/cpp/src/svm/svr_impl.cuh +++ b/cpp/src/svm/svr_impl.cuh @@ -71,11 +71,11 @@ void svrFitX(const raft::handle_t& handle, n_cols, y, sample_weight, - &(model.dual_coefs), - &(model.n_support), - &(model.support_matrix), - &(model.support_idx), - &(model.b), + model.dual_coefs, + model.n_support, + model.support_matrix, + model.support_idx, + model.b, param.max_iter); model.n_cols = n_cols; delete kernel; diff --git a/cpp/test/sg/svc_test.cu b/cpp/test/sg/svc_test.cu index 0497b3d260..ec9ee3701a 100644 --- a/cpp/test/sg/svc_test.cu +++ b/cpp/test/sg/svc_test.cu @@ -528,10 +528,10 @@ class GetResultsTest : public ::testing::Test { res.Get(alpha_dev.data(), f_dev.data(), model.dual_coefs, - &(model.n_support), + model.n_support, model.support_idx, model.support_matrix, - &(model.b)); + model.b); ASSERT_EQ(model.n_support, 7); @@ -563,10 +563,10 @@ class GetResultsTest : public ::testing::Test { res.Get(alpha_dev.data(), f_dev.data(), model.dual_coefs, - &(model.n_support), + model.n_support, model.support_idx, model.support_matrix, - &(model.b)); + model.b); FreeDenseSupport(); EXPECT_FLOAT_EQ(model.b, -5.5f); } @@ -1130,11 +1130,11 @@ TYPED_TEST(SmoSolverTest, SmoSolveTest) this->n_cols, this->y_dev.data(), nullptr, - &model1.dual_coefs, - &model1.n_support, - &model1.support_matrix, - &model1.support_idx, - &model1.b, + model1.dual_coefs, + model1.n_support, + model1.support_matrix, + model1.support_idx, + model1.b, p.max_iter, p.max_inner_iter); checkResults(model1, exp, stream); @@ -1155,11 +1155,11 @@ TYPED_TEST(SmoSolverTest, SmoSolveTest) this->n_cols, this->y_dev.data(), nullptr, - &model2.dual_coefs, - &model2.n_support, - &model2.support_matrix, - &model2.support_idx, - &model2.b, + model2.dual_coefs, + model2.n_support, + model2.support_matrix, + model2.support_idx, + model2.b, p.max_iter, p.max_inner_iter); checkResults(model2, exp, stream); @@ -1929,10 +1929,10 @@ class SvrTest : public ::testing::Test { res.Get(alpha.data(), f.data(), model.dual_coefs, - &model.n_support, + model.n_support, model.support_idx, model.support_matrix, - &model.b); + model.b); ASSERT_EQ(model.n_support, 5); math_t dc_exp[] = {0.1, 0.3, -0.4, 0.9, -0.9}; EXPECT_TRUE(devArrMatchHost(dc_exp,