Skip to content

Commit

Permalink
Apply feedback from ahendriksen
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Oct 28, 2024
1 parent df0b6c3 commit a4a823b
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions cub/cub/device/dispatch/dispatch_transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ template <typename T>
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE const char* round_down_ptr(const T* ptr, unsigned alignment)
{
#if _CCCL_STD_VER > 2011
_LIBCUDACXX_ASSERT(::cuda::std::has_single_bit(alignment), "");
_CCCL_ASSERT(::cuda::std::has_single_bit(alignment), "");
#endif // _CCCL_STD_VER > 2011
return reinterpret_cast<const char*>(
reinterpret_cast<::cuda::std::uintptr_t>(ptr) & ~::cuda::std::uintptr_t{alignment - 1});
Expand All @@ -161,7 +161,7 @@ template <typename T>
_CCCL_DEVICE _CCCL_FORCEINLINE void prefetch(const T* addr)
{
// TODO(bgruber): prefetch to L1 may be even better
asm volatile("prefetch.global.L2 [%0];" : : "l"(addr) : "memory");
asm volatile("prefetch.global.L2 [%0];" : : "l"(__cvta_generic_to_global(addr)) : "memory");
}

template <int BlockDim, typename T>
Expand All @@ -184,7 +184,9 @@ template <int, typename It, ::cuda::std::__enable_if_t<!::cuda::std::is_pointer<
_CCCL_DEVICE _CCCL_FORCEINLINE void prefetch_tile(It, int)
{}

// this kernel guarantees stable addresses for the parameters of the user provided function
// This kernel guarantees that objects passed as arguments to the user-provided transformation function f reside in
// global memory. No intermediate copies are taken. If the parameter type of f is a reference, taking the address of the
// parameter yields a global memory address.
template <typename PrefetchPolicy,
typename Offset,
typename F,
Expand Down

0 comments on commit a4a823b

Please sign in to comment.