Skip to content

Commit

Permalink
workaround clang compiler errors
Browse files Browse the repository at this point in the history
Clang for HIP as problems if we have all atomic tests within one kernel
and throws the compiler error: `error: stack size limit exceeded (155632) in _ZN6alpaka16uniform_cuda_hip6de`, therefore we split the tests into multiple kernel to workaround the issue.
  • Loading branch information
psychocoderHPC committed Aug 5, 2022
1 parent 48bca6a commit 5e233b9
Showing 1 changed file with 120 additions and 12 deletions.
132 changes: 120 additions & 12 deletions test/unit/atomic/src/AtomicTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,20 +184,16 @@ class AtomicTestKernel
{
testAtomicHierarchies<Add>(acc, success, operandOrig);
testAtomicHierarchies<Sub>(acc, success, operandOrig);
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
testAtomicHierarchies<Exch>(acc, success, operandOrig);

testAtomicHierarchies<Inc>(acc, success, operandOrig);
testAtomicHierarchies<Dec>(acc, success, operandOrig);
testAtomicHierarchies<And>(acc, success, operandOrig);
testAtomicHierarchies<Or>(acc, success, operandOrig);
testAtomicHierarchies<Xor>(acc, success, operandOrig);

testAtomicCasHierarchies<alpaka::hierarchy::Threads>(acc, success, operandOrig);
}
};


template<typename TAcc, typename T>
class AtomicTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
Expand All @@ -207,16 +203,51 @@ class AtomicTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
testAtomicHierarchies<Add>(acc, success, operandOrig);
testAtomicHierarchies<Sub>(acc, success, operandOrig);
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
testAtomicHierarchies<Exch>(acc, success, operandOrig);

// Inc, Dec, Or, And, Xor are not supported on float/double types
// Inc, Dec are not supported on float/double types

testAtomicCasHierarchies<alpaka::hierarchy::Threads>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T, typename Sfinae = void>
class AtomicCompareOperationsTestKernel
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void
{
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T, typename Sfinae = void>
class AtomicBitOperationsTestKernel
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void
{
testAtomicHierarchies<And>(acc, success, operandOrig);
testAtomicHierarchies<Or>(acc, success, operandOrig);
testAtomicHierarchies<Xor>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T>
class AtomicBitOperationsTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success, T /* operandOrig */) const -> void
{
// Do not perform bitwise atomic operations for floating point types
ALPAKA_CHECK(*success, true);
}
};

#if(defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA) || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP)

template<typename TApi, typename TDim, typename TIdx, typename T>
Expand All @@ -232,7 +263,43 @@ class AtomicTestKernel<
bool* success,
T /* operandOrig */) const -> void
{
// All other types are not supported by CUDA/HIP atomic operations.
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};

template<typename TApi, typename TDim, typename TIdx, typename T>
class AtomicBitOperationsTestKernel<
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
T,
std::enable_if_t<!std::is_floating_point_v<T> && (sizeof(T) != 4u && sizeof(T) != 8u)>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx> const& /* acc */,
bool* success,
T /* operandOrig */) const -> void
{
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};

template<typename TApi, typename TDim, typename TIdx, typename T>
class AtomicCompareOperationsTestKernel<
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
T,
std::enable_if_t<sizeof(T) != 4u && sizeof(T) != 8u>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx> const& /* acc */,
bool* success,
T /* operandOrig */) const -> void
{
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};
Expand All @@ -249,7 +316,39 @@ class AtomicTestKernel<alpaka::AccOacc<TDim, TIdx>, T, std::enable_if_t<sizeof(T
ALPAKA_FN_ACC auto operator()(alpaka::AccOacc<TDim, TIdx> const& /* acc */, bool* success, T /* operandOrig */)
const -> void
{
// All other types are not supported by OpenACC atomic operations.
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};

template<typename TDim, typename TIdx, typename T>
class AtomicBitOperationsTestKernel<
alpaka::AccOacc<TDim, TIdx>,
T,
std::enable_if_t<!std::is_floating_point_v<T> && (sizeof(T) != 4u && sizeof(T) != 8u)>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(alpaka::AccOacc<TDim, TIdx> const& /* acc */, bool* success, T /* operandOrig */)
const -> void
{
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};

template<typename TDim, typename TIdx, typename T>
class AtomicCompareOperationsTestKernel<
alpaka::AccOacc<TDim, TIdx>,
T,
std::enable_if_t<sizeof(T) != 4u && sizeof(T) != 8u>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(alpaka::AccOacc<TDim, TIdx> const& /* acc */, bool* success, T /* operandOrig */)
const -> void
{
// Only 32/64bit atomics are supported
ALPAKA_CHECK(*success, true);
}
};
Expand All @@ -266,10 +365,19 @@ struct TestAtomicOperations

alpaka::test::KernelExecutionFixture<TAcc> fixture(alpaka::Vec<Dim, Idx>::ones());

AtomicTestKernel<TAcc, T> kernel;

T value = static_cast<T>(32);

// The tests are split into multiple kernel to avoid breaking the maximum kernel size.
// clang (HIP) e.g. shows compile error: 'error: stack size limit exceeded (155632) in
// _ZN6alpaka16uniform_cuda_hip6de'
AtomicTestKernel<TAcc, T> kernel;
REQUIRE(fixture(kernel, value));

AtomicBitOperationsTestKernel<TAcc, T> kernelBitOps;
REQUIRE(fixture(kernelBitOps, value));

AtomicCompareOperationsTestKernel<TAcc, T> kernelCompareOps;
REQUIRE(fixture(kernelCompareOps, value));
}
};

Expand Down

0 comments on commit 5e233b9

Please sign in to comment.