Skip to content

Commit

Permalink
Enable use of cudaMemcpyAsync for thrust::copy
Browse files Browse the repository at this point in the history
In case of contigous ranges of trivially relocatable types we can directly utilize `cudaMemcpyAsync` instead of going through transform.

Fixes NVIDIA#210
  • Loading branch information
miscco committed Jul 12, 2023
1 parent 25b7a06 commit de055e0
Showing 1 changed file with 44 additions and 4 deletions.
48 changes: 44 additions & 4 deletions thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@
#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <thrust/system/cuda/detail/transform.h>
#include <thrust/system/cuda/detail/util.h>
#include <thrust/functional.h>
#include <thrust/type_traits/is_trivially_relocatable.h>

THRUST_NAMESPACE_BEGIN
namespace cuda_cub {

namespace __copy {

template <class Derived,
class InputIt,
class OutputIt>
Expand All @@ -49,12 +50,51 @@ namespace __copy {
InputIt last,
OutputIt result)
{
typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
return cuda_cub::transform(policy,
return device_to_device(policy,
first,
last,
result,
thrust::identity<InputTy>());
typename is_indirectly_trivially_relocatable_to<InputIt, OutputIt>::type());
}

template <class Derived,
class InputIt,
class OutputIt>
OutputIt THRUST_RUNTIME_FUNCTION
device_to_device(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
OutputIt result,
thrust::detail::true_type)
{
typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
const auto n = thrust::distance(first, last);
if (n > 0) {
trivial_copy_device_to_device(policy,
reinterpret_cast<InputTy*>(thrust::raw_pointer_cast(&*result)),
reinterpret_cast<InputTy const*>(thrust::raw_pointer_cast(&*first)),
n);
}

return result + n;
}

template <class Derived,
class InputIt,
class OutputIt>
OutputIt THRUST_RUNTIME_FUNCTION
device_to_device(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
OutputIt result,
thrust::detail::false_type)
{
typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
return cuda_cub::transform(policy,
first,
last,
result,
thrust::identity<InputTy>());
}

} // namespace __copy
Expand Down

0 comments on commit de055e0

Please sign in to comment.