Skip to content

Commit

Permalink
fix move construct performance and conversion MagmaQueue->CudaStream
Browse files Browse the repository at this point in the history
  • Loading branch information
gbalduzz committed Aug 13, 2020
1 parent c9e8f79 commit 1a2c079
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 15 deletions.
21 changes: 11 additions & 10 deletions include/dca/linalg/util/magma_queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,20 @@ class MagmaQueue {
cusparseCreate(&cusparse_handle_);
int device;
cudaGetDevice(&device);
magma_queue_create_from_cuda(device, stream_, cublas_handle_,
cusparse_handle_, &queue_);
magma_queue_create_from_cuda(device, stream_, cublas_handle_, cusparse_handle_, &queue_);
}

MagmaQueue(const MagmaQueue& rhs) = delete;
MagmaQueue& operator=(const MagmaQueue& rhs) = delete;

MagmaQueue(MagmaQueue&& rhs) noexcept {
swapMembers(rhs);
MagmaQueue(MagmaQueue&& rhs) noexcept : queue_(std::move(rhs.queue_)) {
std::swap(cublas_handle_, rhs.cublas_handle_);
std::swap(cusparse_handle_, rhs.cusparse_handle_);
std::swap(queue_, rhs.queue_);
}

MagmaQueue& operator=(MagmaQueue&& rhs) noexcept {
swapMembers(rhs);
swap(rhs);
return *this;
}

Expand All @@ -62,21 +63,21 @@ class MagmaQueue {
// take a MagmaQueue, this makes all this code less intelligible
// but less verbose. Consider this carefully.
operator cudaStream_t() const {
return stream_;
return static_cast<cudaStream_t>(stream_);
}

const CudaStream& getStream() const {
return stream_;
}

private:
void swapMembers(MagmaQueue& rhs) noexcept {

void swap(MagmaQueue& rhs) noexcept {
std::swap(stream_, rhs.stream_);
std::swap(cublas_handle_, rhs.cublas_handle_);
std::swap(cusparse_handle_, rhs.cusparse_handle_);
std::swap(queue_, rhs.queue_);
}

private:
CudaStream stream_;
magma_queue_t queue_ = nullptr;
cublasHandle_t cublas_handle_ = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class CtintAccumulator {
MatrixConfiguration configuration_;
int sign_ = 0;

std::vector<linalg::util::CudaStream*> streams_;
std::vector<const linalg::util::CudaStream*> streams_;
linalg::util::CudaEvent event_;

util::Accumulator<int> accumulated_sign_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ class TpAccumulator<Parameters, linalg::CPU, DT> {
return 0;
}

linalg::util::CudaStream* get_stream() const {
static dca::linalg::util::CudaStream mock_stream;
const linalg::util::CudaStream* get_stream() const {
static const dca::linalg::util::CudaStream mock_stream;
return &mock_stream;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ class TpAccumulator<Parameters, linalg::GPU> : public TpAccumulator<Parameters,
// other_acc.
void sumTo(this_type& other_acc);

linalg::util::CudaStream* get_stream() {
return &queues_[0];
const linalg::util::CudaStream* get_stream() {
return &queues_[0].getStream();
}

void synchronizeCopy() {
Expand Down

0 comments on commit 1a2c079

Please sign in to comment.