Skip to content

Commit

Permalink
Enable support for asynchronous memory in ROCm 5.3 and later
Browse files Browse the repository at this point in the history
Include protections for 0-sized allocations in ROCm 5.3-5.5 .
  • Loading branch information
fwyzard authored and psychocoderHPC committed Dec 19, 2023
1 parent 8d04265 commit ccb2c35
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 11 deletions.
30 changes: 25 additions & 5 deletions include/alpaka/core/ApiHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,17 @@ namespace alpaka

static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
{
// hipFreeAsync is implemented only in ROCm 5.2.0 and later.
# if HIP_VERSION >= 50'200'000
return ::hipFreeAsync(devPtr, stream);
// stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
# if HIP_VERSION >= 50'300'000
// hipFreeAsync fails on a null pointer deallocation
if(devPtr)
{
return ::hipFreeAsync(devPtr, stream);
}
else
{
return ::hipSuccess;
}
# else
// Not implemented.
return errorUnknown;
Expand Down Expand Up @@ -289,9 +297,21 @@ namespace alpaka
[[maybe_unused]] size_t size,
[[maybe_unused]] Stream_t stream)
{
// hipMallocAsync is implemented only in ROCm 5.2.0 and later.
# if HIP_VERSION >= 50'200'000
// stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
# if HIP_VERSION >= 50'600'000
return ::hipMallocAsync(devPtr, size, stream);
# elif HIP_VERSION >= 50'300'000
// before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes
if(size > 0)
{
return ::hipMallocAsync(devPtr, size, stream);
}
else
{
// make sure the pointer can safely be passed to hipFreeAsync
*devPtr = nullptr;
return ::hipSuccess;
}
# else
// Not implemented.
return errorUnknown;
Expand Down
18 changes: 12 additions & 6 deletions include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,8 @@ namespace alpaka
# endif
# if defined(ALPAKA_ACC_GPU_HIP_ENABLED)
static_assert(
!std::is_same_v<TApi, ApiHipRt>,
"HIP devices do not support stream-ordered memory buffers.");
std::is_same_v<TApi, ApiHipRt> && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0),
"Support for stream-ordered memory buffers requires HIP/ROCm 5.3 or higher.");
# endif
static_assert(
TDim::value <= 1,
Expand Down Expand Up @@ -317,16 +317,22 @@ namespace alpaka
}
};

# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
//! The CUDA/HIP stream-ordered memory allocation capability trait specialization.
template<typename TApi, typename TDim>
struct HasAsyncBufSupport<TDim, DevUniformCudaHipRt<TApi>>
: std::bool_constant<
std::is_same_v<TApi, ApiCudaRt> && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0)
&& TDim::value <= 1>
TDim::value <= 1
&& (
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
std::is_same_v<TApi, ApiCudaRt> && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0)
# elif defined(ALPAKA_ACC_GPU_HIP_ENABLED)
std::is_same_v<TApi, ApiHipRt> && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0)
# else
false
# endif
)>
{
};
# endif

//! The pinned/mapped memory allocation trait specialization for the CUDA/HIP devices.
template<typename TApi, typename TElem, typename TDim, typename TIdx>
Expand Down

0 comments on commit ccb2c35

Please sign in to comment.