-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
inspired by PMacc CONST_VECTOR and alpaka-group/alpaka#2441 (comment)
- Loading branch information
1 parent
a4e4b9e
commit 337e5d3
Showing
5 changed files
with
285 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/* Copyright 2024 René Widera | ||
* SPDX-License-Identifier: MPL-2.0 | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "alpaka/Vec.hpp" | ||
#include "alpaka/core/PP.hpp" | ||
#include "alpaka/core/config.hpp" | ||
#include "alpaka/mem/MdSpan.hpp" | ||
|
||
#include <cassert> | ||
#include <tuple> | ||
#include <type_traits> | ||
|
||
namespace alpaka | ||
{ | ||
|
||
template<typename... Args> | ||
consteval size_t count_arguments(Args&&...) | ||
{ | ||
return sizeof...(Args); | ||
} | ||
|
||
} // namespace alpaka | ||
|
||
#if defined(__CUDA_ARCH__) || (defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__ == 1 && defined(__HIP__)) | ||
# define ALPAKA_DEVICE_COMPILE 1 | ||
#else | ||
# define ALPAKA_DEVICE_COMPILE 0 | ||
#endif | ||
|
||
/* select namespace depending on __CUDA_ARCH__ compiler flag*/ | ||
#if(ALPAKA_DEVICE_COMPILE == 1) | ||
# define ALPAKA_DEVICE_GLOBAL_NAMESPACE(id) using namespace ALPAKA_PP_CAT(alpaka_onAcc, id) | ||
#else | ||
# define ALPAKA_DEVICE_GLOBAL_NAMESPACE(id) using namespace ALPAKA_PP_CAT(alpaka_onHost, id) | ||
#endif | ||
|
||
#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP | ||
# define ALPAKA_DEVICE_GLOBAL_DATA(id, dataType, name, ...) \ | ||
namespace ALPAKA_PP_CAT(alpaka_onAcc, id) \ | ||
{ \ | ||
__device__ std::type_identity_t<ALPAKA_PP_REMOVE_BRACKETS dataType> ALPAKA_PP_CAT(name, id) \ | ||
= __VA_OPT__({__VA_ARGS__}); \ | ||
} | ||
#else | ||
# define ALPAKA_DEVICE_GLOBAL_DATA(id, dataType, name, ...) | ||
|
||
#endif | ||
|
||
#if ALPAKA_DEVICE_COMPILE | ||
# define ALPAKA_DEVICE_GLOBAL_ACCESS(name, id) \ | ||
[[maybe_unused]] __device__ constexpr auto name = alpaka::onAcc::GlobalDeviceMemoryWrapper< \ | ||
ALPAKA_PP_CAT(globalVariables, id)::ALPAKA_PP_CAT(GlobalStorage, id)> \ | ||
{ \ | ||
} | ||
#else | ||
# define ALPAKA_DEVICE_GLOBAL_ACCESS(name, id) \ | ||
[[maybe_unused]] constexpr auto name = alpaka::onAcc::GlobalDeviceMemoryWrapper< \ | ||
ALPAKA_PP_CAT(globalVariables, id)::ALPAKA_PP_CAT(GlobalStorage, id)> \ | ||
{ \ | ||
} | ||
#endif | ||
|
||
namespace alpaka::onAcc | ||
{ | ||
template<typename T_Storage> | ||
struct GlobalDeviceMemoryWrapper : private T_Storage | ||
{ | ||
constexpr decltype(auto) get() const | ||
{ | ||
return alpaka::unWrapp(T_Storage::get()); | ||
} | ||
|
||
constexpr operator std::reference_wrapper<typename T_Storage::type>() | ||
{ | ||
return T_Storage::get(); | ||
} | ||
}; | ||
|
||
#define ALPAKA_DEVICE_GLOBAL_CREATE(location, dataType, id, name, ...) \ | ||
ALPAKA_DEVICE_GLOBAL_DATA(id, dataType, name, __VA_ARGS__) \ | ||
namespace ALPAKA_PP_CAT(alpaka_onHost, id) \ | ||
{ \ | ||
[[maybe_unused]] std::type_identity_t<ALPAKA_PP_REMOVE_BRACKETS dataType> ALPAKA_PP_CAT(name, id) \ | ||
= __VA_OPT__({__VA_ARGS__}); \ | ||
} \ | ||
namespace ALPAKA_PP_CAT(globalVariables, id) \ | ||
{ \ | ||
ALPAKA_DEVICE_GLOBAL_NAMESPACE(id); \ | ||
struct ALPAKA_PP_CAT(GlobalStorage, id) \ | ||
{ \ | ||
using type = ALPAKA_PP_REMOVE_BRACKETS dataType; \ | ||
ALPAKA_FN_ACC auto get() const \ | ||
{ \ | ||
return std::conditional_t< \ | ||
std::is_array_v<ALPAKA_PP_REMOVE_BRACKETS dataType>, \ | ||
alpaka::MdSpanArray<ALPAKA_PP_REMOVE_BRACKETS dataType>, \ | ||
std::reference_wrapper<ALPAKA_PP_REMOVE_BRACKETS dataType>>{ALPAKA_PP_CAT(name, id)}; \ | ||
} \ | ||
}; \ | ||
} \ | ||
ALPAKA_DEVICE_GLOBAL_ACCESS(name, id) | ||
|
||
#define ALPAKA_DEVICE_GLOBAL(type, name, ...) \ | ||
ALPAKA_DEVICE_GLOBAL_CREATE(__device__, type, __COUNTER__, name, __VA_ARGS__) | ||
|
||
} // namespace alpaka::onAcc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
/* Copyright 2024 René Widera | ||
* SPDX-License-Identifier: MPL-2.0 | ||
*/ | ||
#if 1 | ||
# include <alpaka/alpaka.hpp> | ||
# include <alpaka/example/executeForEach.hpp> | ||
# include <alpaka/example/executors.hpp> | ||
|
||
# include <catch2/catch_template_test_macros.hpp> | ||
# include <catch2/catch_test_macros.hpp> | ||
|
||
# include <iostream> | ||
|
||
using namespace alpaka; | ||
using namespace alpaka::onHost; | ||
|
||
using TestApis = std::decay_t<decltype(allExecutorsAndApis(enabledApis))>; | ||
|
||
|
||
ALPAKA_DEVICE_GLOBAL((alpaka::Vec<uint32_t, 2u>), initialised_vector, 42, 43); | ||
ALPAKA_DEVICE_GLOBAL((uint32_t), initialised_scalar, 43); | ||
ALPAKA_DEVICE_GLOBAL((uint32_t[2]), fixed_sized_array, 44, 45); | ||
ALPAKA_DEVICE_GLOBAL((uint32_t[2][3]), fixed_sized_array2D, {9, 5}, {6, 11, 45}); | ||
|
||
struct DeviceGlobalMemKernelVec | ||
{ | ||
template<typename T> | ||
ALPAKA_FN_ACC void operator()(T const& acc, auto out, auto numThreads) const | ||
{ | ||
for(auto globalTheradIdx : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange{numThreads})) | ||
{ | ||
out[globalTheradIdx] = initialised_vector.get().y(); | ||
} | ||
} | ||
}; | ||
|
||
struct DeviceGlobalMemKernelScalar | ||
{ | ||
template<typename T> | ||
ALPAKA_FN_ACC void operator()(T const& acc, auto out, auto numThreads) const | ||
{ | ||
for(auto globalTheradIdx : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange{numThreads})) | ||
{ | ||
out[globalTheradIdx] = initialised_scalar.get(); | ||
} | ||
} | ||
}; | ||
|
||
struct DeviceGlobalMemKernelCArray | ||
{ | ||
template<typename T> | ||
ALPAKA_FN_ACC void operator()(T const& acc, auto out, auto numThreads) const | ||
{ | ||
for(auto globalTheradIdx : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange{numThreads})) | ||
{ | ||
out[globalTheradIdx] = fixed_sized_array.get()[0]; | ||
} | ||
} | ||
}; | ||
|
||
struct DeviceGlobalMemKernelCArray2D | ||
{ | ||
template<typename T> | ||
ALPAKA_FN_ACC void operator()(T const& acc, auto out, auto numThreads) const | ||
{ | ||
for(auto globalTheradIdx : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange{numThreads})) | ||
{ | ||
out[globalTheradIdx] = fixed_sized_array2D.get()[Vec{1, 2}]; | ||
} | ||
} | ||
}; | ||
|
||
TEMPLATE_LIST_TEST_CASE("device global mem", "", TestApis) | ||
{ | ||
auto cfg = TestType::makeDict(); | ||
auto api = cfg[object::api]; | ||
auto exec = cfg[object::exec]; | ||
|
||
std::cout << api.getName() << std::endl; | ||
|
||
Platform platform = makePlatform(api); | ||
Device device = platform.makeDevice(0); | ||
|
||
std::cout << getName(platform) << " " << device.getName() << std::endl; | ||
|
||
Queue queue = device.makeQueue(); | ||
constexpr Vec numBlocks = Vec{1u}; | ||
constexpr Vec blockExtent = Vec{4u}; | ||
constexpr Vec dataExtent = numBlocks * blockExtent; | ||
std::cout << "block shared iota exec=" << core::demangledName(exec) << std::endl; | ||
auto dBuff = onHost::alloc<uint32_t>(device, dataExtent); | ||
|
||
Platform cpuPlatform = makePlatform(api::cpu); | ||
Device cpuDevice = cpuPlatform.makeDevice(0); | ||
auto hBuff = onHost::allocMirror(cpuDevice, dBuff); | ||
wait(queue); | ||
{ | ||
onHost::enqueue( | ||
queue, | ||
exec, | ||
FrameSpec{numBlocks, blockExtent}, | ||
KernelBundle{DeviceGlobalMemKernelVec{}, dBuff.getMdSpan(), dataExtent}); | ||
onHost::memcpy(queue, hBuff, dBuff); | ||
wait(queue); | ||
|
||
auto* ptr = onHost::data(hBuff); | ||
for(uint32_t i = 0u; i < dataExtent; ++i) | ||
{ | ||
CHECK(42 == ptr[i]); | ||
} | ||
} | ||
|
||
// scalar | ||
{ | ||
onHost::enqueue( | ||
queue, | ||
exec, | ||
FrameSpec{numBlocks, blockExtent}, | ||
KernelBundle{DeviceGlobalMemKernelScalar{}, dBuff.getMdSpan(), dataExtent}); | ||
onHost::memcpy(queue, hBuff, dBuff); | ||
wait(queue); | ||
|
||
auto* ptr = onHost::data(hBuff); | ||
for(uint32_t i = 0u; i < dataExtent; ++i) | ||
{ | ||
CHECK(43 == ptr[i]); | ||
} | ||
} | ||
|
||
// C array | ||
{ | ||
onHost::enqueue( | ||
queue, | ||
exec, | ||
FrameSpec{numBlocks, blockExtent}, | ||
KernelBundle{DeviceGlobalMemKernelCArray{}, dBuff.getMdSpan(), dataExtent}); | ||
onHost::memcpy(queue, hBuff, dBuff); | ||
wait(queue); | ||
|
||
auto* ptr = onHost::data(hBuff); | ||
for(uint32_t i = 0u; i < dataExtent; ++i) | ||
{ | ||
CHECK(44 == ptr[i]); | ||
} | ||
} | ||
|
||
// C array 2D | ||
{ | ||
onHost::enqueue( | ||
queue, | ||
exec, | ||
FrameSpec{numBlocks, blockExtent}, | ||
KernelBundle{DeviceGlobalMemKernelCArray2D{}, dBuff.getMdSpan(), dataExtent}); | ||
onHost::memcpy(queue, hBuff, dBuff); | ||
wait(queue); | ||
|
||
auto* ptr = onHost::data(hBuff); | ||
for(uint32_t i = 0u; i < dataExtent; ++i) | ||
{ | ||
CHECK(45 == ptr[i]); | ||
} | ||
} | ||
} | ||
|
||
#endif |