diff --git a/benchmarks/babelstream/src/AlpakaStream.h b/benchmarks/babelstream/src/AlpakaStream.h index ba556b028dba..0524fb37a0ce 100644 --- a/benchmarks/babelstream/src/AlpakaStream.h +++ b/benchmarks/babelstream/src/AlpakaStream.h @@ -42,8 +42,8 @@ struct AlpakaStream : Stream using DevHost = alpaka::Dev; using PlatformAcc = alpaka::Platform; using DevAcc = alpaka::Dev; - using BufHost = alpaka::Buf; - using BufAcc = alpaka::Buf; + using BufHost = alpaka::Buf>; + using BufAcc = alpaka::Buf>; using Queue = alpaka::Queue; using WorkDiv = alpaka::WorkDivMembers; diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp index 12bd4a1a2acb..ad9d7cc08143 100644 --- a/example/bufferCopy/src/bufferCopy.cpp +++ b/example/bufferCopy/src/bufferCopy.cpp @@ -148,8 +148,7 @@ auto main() -> int // // The `alloc` method returns a reference counted buffer handle. // When the last such handle is destroyed, the memory is freed automatically. - using BufHost = alpaka::Buf; - BufHost hostBuffer(alpaka::allocBuf(devHost, extents)); + auto hostBuffer(alpaka::allocBuf(devHost, extents)); // You can also use already allocated memory and wrap it within a view (irrespective of the device type). // The view does not own the underlying memory. So you have to make sure that // the view does not outlive its underlying memory. @@ -159,7 +158,7 @@ auto main() -> int // Allocate accelerator memory buffers // // The interface to allocate a buffer is the same on the host and on the device. - using BufAcc = alpaka::Buf; + using BufAcc = alpaka::Buf>; BufAcc deviceBuffer1(alpaka::allocBuf(devAcc, extents)); BufAcc deviceBuffer2(alpaka::allocBuf(devAcc, extents)); diff --git a/example/convolution1D/src/convolution1D.cpp b/example/convolution1D/src/convolution1D.cpp index 047f462ef0a7..ed3885bfff27 100644 --- a/example/convolution1D/src/convolution1D.cpp +++ b/example/convolution1D/src/convolution1D.cpp @@ -83,7 +83,7 @@ auto main() -> int using DevAcc = alpaka::ExampleDefaultAcc; using QueueProperty = alpaka::Blocking; using QueueAcc = alpaka::Queue; - using BufAcc = alpaka::Buf; + using BufAcc = alpaka::Buf>; std::cout << "Using alpaka accelerator: " << alpaka::getAccName() << '\n'; diff --git a/example/counterBasedRng/src/counterBasedRng.cpp b/example/counterBasedRng/src/counterBasedRng.cpp index 86da223f1d0a..0f2489639323 100644 --- a/example/counterBasedRng/src/counterBasedRng.cpp +++ b/example/counterBasedRng/src/counterBasedRng.cpp @@ -162,8 +162,7 @@ auto main() -> int CounterBasedRngKernel::Key key = {rd(), rd()}; // Allocate buffer on the accelerator - using BufAcc = alpaka::Buf; - BufAcc bufAcc(alpaka::allocBuf(devAcc, extent)); + auto bufAcc(alpaka::allocBuf(devAcc, extent)); // Create the kernel execution task. auto const taskKernelAcc = alpaka::createTaskKernel( diff --git a/example/heatEquation/src/heatEquation.cpp b/example/heatEquation/src/heatEquation.cpp index ff7ee6c7dafe..4e97e15f6974 100644 --- a/example/heatEquation/src/heatEquation.cpp +++ b/example/heatEquation/src/heatEquation.cpp @@ -121,9 +121,8 @@ auto main() -> int double* const pNextHost = std::data(uNextBufHost); // Accelerator buffer - using BufAcc = alpaka::Buf; - auto uNextBufAcc = BufAcc{alpaka::allocBuf(devAcc, extent)}; - auto uCurrBufAcc = BufAcc{alpaka::allocBuf(devAcc, extent)}; + auto uNextBufAcc{alpaka::allocBuf(devAcc, extent)}; + auto uCurrBufAcc{alpaka::allocBuf(devAcc, extent)}; double* pCurrAcc = std::data(uCurrBufAcc); double* pNextAcc = std::data(uNextBufAcc); diff --git a/example/monteCarloIntegration/src/monteCarloIntegration.cpp b/example/monteCarloIntegration/src/monteCarloIntegration.cpp index 52e050785c88..1059c75b8a7b 100644 --- a/example/monteCarloIntegration/src/monteCarloIntegration.cpp +++ b/example/monteCarloIntegration/src/monteCarloIntegration.cpp @@ -88,8 +88,6 @@ auto main() -> int using QueueAcc = alpaka::Queue; QueueAcc queue{devAcc}; - using BufHost = alpaka::Buf; - using BufAcc = alpaka::Buf; using WorkDiv = alpaka::WorkDivMembers; // Problem parameter. constexpr size_t numPoints = 1'000'000u; @@ -104,8 +102,8 @@ auto main() -> int alpaka::GridBlockExtentSubDivRestrictions::Unrestricted)}; // Setup buffer. - BufHost bufHost{alpaka::allocBuf(devHost, extent)}; - BufAcc bufAcc{alpaka::allocBuf(devAcc, extent)}; + auto bufHost{alpaka::allocBuf(devHost, extent)}; + auto bufAcc{alpaka::allocBuf(devAcc, extent)}; uint32_t* const ptrBufAcc{std::data(bufAcc)}; // Initialize the global count to 0. diff --git a/example/randomCells2D/src/randomCells2D.cpp b/example/randomCells2D/src/randomCells2D.cpp index a0a21370cecc..1f614f7ce516 100644 --- a/example/randomCells2D/src/randomCells2D.cpp +++ b/example/randomCells2D/src/randomCells2D.cpp @@ -156,12 +156,12 @@ auto main() -> int using QueueAcc = alpaka::Queue; QueueAcc queue{devAcc}; - using BufHost = alpaka::Buf; - using BufAcc = alpaka::Buf; - using BufHostRand = alpaka::Buf; - using BufAccRand = alpaka::Buf; - using BufHostRandVec = alpaka::Buf; - using BufAccRandVec = alpaka::Buf; + using BufHost = alpaka::Buf>; + using BufAcc = alpaka::Buf>; + using BufHostRand = alpaka::Buf>; + using BufAccRand = alpaka::Buf>; + using BufHostRandVec = alpaka::Buf>; + using BufAccRandVec = alpaka::Buf>; using WorkDiv = alpaka::WorkDivMembers; constexpr Idx numX = NUM_X; diff --git a/example/randomStrategies/src/randomStrategies.cpp b/example/randomStrategies/src/randomStrategies.cpp index 3dd0c9efde5e..43f01d0fe00f 100644 --- a/example/randomStrategies/src/randomStrategies.cpp +++ b/example/randomStrategies/src/randomStrategies.cpp @@ -44,8 +44,8 @@ struct Box QueueAcc queue; ///< default accelerator queue // buffers holding the PRNG states - using BufHostRand = alpaka::Buf; - using BufAccRand = alpaka::Buf; + using BufHostRand = alpaka::Buf>; + using BufAccRand = alpaka::Buf>; Vec const extentRand; ///< size of the buffer of PRNG states WorkDiv workdivRand; ///< work division for PRNG buffer initialization @@ -53,8 +53,8 @@ struct Box BufAccRand bufAccRand; ///< device side PRNG states buffer // buffers holding the "simulation" results - using BufHost = alpaka::Buf; - using BufAcc = alpaka::Buf; + using BufHost = alpaka::Buf>; + using BufAcc = alpaka::Buf>; Vec const extentResult; ///< size of the results buffer WorkDiv workdivResult; ///< work division of the result calculation diff --git a/example/reduce/src/reduce.cpp b/example/reduce/src/reduce.cpp index 2d5fe2c1dc21..d132995ef9d7 100644 --- a/example/reduce/src/reduce.cpp +++ b/example/reduce/src/reduce.cpp @@ -49,7 +49,7 @@ auto reduce( DevAcc devAcc, QueueAcc queue, uint64_t n, - alpaka::Buf hostMemory, + alpaka::Buf> hostMemory, TFunc func) -> T { static constexpr uint64_t blockSize = getMaxBlockSize(); @@ -62,10 +62,11 @@ auto reduce( if(blockCount > maxBlockCount) blockCount = maxBlockCount; - alpaka::Buf sourceDeviceMemory = alpaka::allocBuf(devAcc, n); + using DevBuf = alpaka::Buf>; - alpaka::Buf destinationDeviceMemory - = alpaka::allocBuf(devAcc, static_cast(blockCount)); + DevBuf sourceDeviceMemory = alpaka::allocBuf(devAcc, n); + + DevBuf destinationDeviceMemory = alpaka::allocBuf(devAcc, static_cast(blockCount)); // copy the data to the GPU alpaka::memcpy(queue, sourceDeviceMemory, hostMemory, n); diff --git a/example/vectorAdd/src/vectorAdd.cpp b/example/vectorAdd/src/vectorAdd.cpp index 5eca205279c8..1639a7602616 100644 --- a/example/vectorAdd/src/vectorAdd.cpp +++ b/example/vectorAdd/src/vectorAdd.cpp @@ -111,8 +111,8 @@ auto main() -> int auto const devHost = alpaka::getDevByIdx(platformHost, 0); // Allocate 3 host memory buffers - using BufHost = alpaka::Buf; - BufHost bufHostA(alpaka::allocBuf(devHost, extent)); + auto bufHostA(alpaka::allocBuf(devHost, extent)); + using BufHost = decltype(bufHostA); BufHost bufHostB(alpaka::allocBuf(devHost, extent)); BufHost bufHostC(alpaka::allocBuf(devHost, extent)); @@ -129,8 +129,8 @@ auto main() -> int } // Allocate 3 buffers on the accelerator - using BufAcc = alpaka::Buf; - BufAcc bufAccA(alpaka::allocBuf(devAcc, extent)); + auto bufAccA(alpaka::allocBuf(devAcc, extent)); + using BufAcc = decltype(bufAccA); BufAcc bufAccB(alpaka::allocBuf(devAcc, extent)); BufAcc bufAccC(alpaka::allocBuf(devAcc, extent)); diff --git a/include/alpaka/acc/AccGenericSycl.hpp b/include/alpaka/acc/AccGenericSycl.hpp index fc8ba1da760f..9e533f892701 100644 --- a/include/alpaka/acc/AccGenericSycl.hpp +++ b/include/alpaka/acc/AccGenericSycl.hpp @@ -118,6 +118,11 @@ namespace alpaka::trait { }; + struct MemVisibility> + { + using type = alpaka::MemVisibleGenericSycl; + }; + //! The SYCL accelerator device properties get trait specialization. template typename TAcc, typename TDim, typename TIdx> struct GetAccDevProps< diff --git a/include/alpaka/acc/Tag.hpp b/include/alpaka/acc/Tag.hpp index f7880afd6f15..73575daa0a4d 100644 --- a/include/alpaka/acc/Tag.hpp +++ b/include/alpaka/acc/Tag.hpp @@ -31,6 +31,7 @@ namespace alpaka CREATE_ACC_TAG(TagGpuCudaRt); CREATE_ACC_TAG(TagGpuHipRt); CREATE_ACC_TAG(TagGpuSyclIntel); +#undef CREATE_ACC_TAG namespace trait { diff --git a/include/alpaka/alpaka.hpp b/include/alpaka/alpaka.hpp index e06dede53d48..b0458c00249e 100644 --- a/include/alpaka/alpaka.hpp +++ b/include/alpaka/alpaka.hpp @@ -130,6 +130,7 @@ #include "alpaka/math/MathStdLib.hpp" #include "alpaka/math/MathUniformCudaHipBuiltIn.hpp" // mem +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/alloc/AllocCpuAligned.hpp" #include "alpaka/mem/alloc/AllocCpuNew.hpp" #include "alpaka/mem/alloc/Traits.hpp" diff --git a/include/alpaka/dev/DevCpu.hpp b/include/alpaka/dev/DevCpu.hpp index e36c263072fa..b1cda711541b 100644 --- a/include/alpaka/dev/DevCpu.hpp +++ b/include/alpaka/dev/DevCpu.hpp @@ -167,16 +167,16 @@ namespace alpaka }; } // namespace trait - template + template class BufCpu; namespace trait { //! The CPU device memory buffer type trait specialization. - template - struct BufType + template + struct BufType { - using type = BufCpu; + using type = BufCpu; }; //! The CPU device platform type trait specialization. diff --git a/include/alpaka/dev/DevGenericSycl.hpp b/include/alpaka/dev/DevGenericSycl.hpp index 729090f8f2d3..55902d840f15 100644 --- a/include/alpaka/dev/DevGenericSycl.hpp +++ b/include/alpaka/dev/DevGenericSycl.hpp @@ -32,7 +32,7 @@ namespace alpaka { - template + template class BufGenericSycl; namespace detail @@ -219,10 +219,10 @@ namespace alpaka::trait }; //! The SYCL device memory buffer type trait specialization. - template - struct BufType, TElem, TDim, TIdx> + template + struct BufType, TElem, TDim, TIdx, TMemVisibility> { - using type = BufGenericSycl; + using type = BufGenericSycl; }; //! The SYCL device platform type trait specialization. diff --git a/include/alpaka/dev/DevUniformCudaHipRt.hpp b/include/alpaka/dev/DevUniformCudaHipRt.hpp index 876d8ca5a434..dfbc1dc62d77 100644 --- a/include/alpaka/dev/DevUniformCudaHipRt.hpp +++ b/include/alpaka/dev/DevUniformCudaHipRt.hpp @@ -48,7 +48,7 @@ namespace alpaka template struct PlatformUniformCudaHipRt; - template + template struct BufUniformCudaHipRt; //! The CUDA/HIP RT device handle. @@ -222,10 +222,10 @@ namespace alpaka }; //! The CUDA/HIP RT device memory buffer type trait specialization. - template - struct BufType, TElem, TDim, TIdx> + template + struct BufType, TElem, TDim, TIdx, TMemVisibility> { - using type = BufUniformCudaHipRt; + using type = BufUniformCudaHipRt; }; //! The CUDA/HIP RT device platform type trait specialization. diff --git a/include/alpaka/mem/Visibility.hpp b/include/alpaka/mem/Visibility.hpp new file mode 100644 index 000000000000..349beb719e20 --- /dev/null +++ b/include/alpaka/mem/Visibility.hpp @@ -0,0 +1,153 @@ +/* Copyright 2023 Simeon Ehrig + * SPDX-License-Identifier: MPL-2.0 + */ + +#pragma once + +#include "alpaka/acc/Traits.hpp" +#include "alpaka/dev/Traits.hpp" +#include "alpaka/mem/view/ViewAccessOps.hpp" +#include "alpaka/meta/ForEachType.hpp" +#include "alpaka/meta/TypeListOps.hpp" +#include "alpaka/platform/Traits.hpp" + +#include +#include +#include +#include + +#define CREATE_MEM_VISIBILITY(mem_name) \ + struct mem_name \ + { \ + static std::string get_name() \ + { \ + return #mem_name; \ + } \ + } + +namespace alpaka +{ + CREATE_MEM_VISIBILITY(MemVisibleCPU); + CREATE_MEM_VISIBILITY(MemVisibleFpgaSyclIntel); + CREATE_MEM_VISIBILITY(MemVisibleGenericSycl); + CREATE_MEM_VISIBILITY(MemVisibleCpuSycl); + CREATE_MEM_VISIBILITY(MemVisibleGpuCudaRt); + CREATE_MEM_VISIBILITY(MemVisibleGpuHipRt); + CREATE_MEM_VISIBILITY(MemVisibleGpuSyclIntel); +#undef CREATE_MEM_VISIBILITY + + namespace trait + { + //! Get memory visibility from a type. + //! Normally it is acc or buffer type. + //! + //! \tparam TType which implements the trait + template + struct MemVisibility; + } // namespace trait + + namespace detail + { + struct AppendMemTypeName + { + template + void operator()(std::vector& vs) + { + vs.push_back(TTYPE::get_name()); + } + }; + } // namespace detail + + template< + typename T, + typename = std::enable_if_t< + alpaka::isPlatform> || alpaka::isDevice> + || alpaka::isAccelerator> || alpaka::internal::isView>>> + inline std::string getMemVisiblityName() + { + using MemVisibilityType = typename alpaka::trait::MemVisibility>::type; + if constexpr(alpaka::meta::isList) + { + std::vector vs; + alpaka::meta::forEachType(detail::AppendMemTypeName{}, vs); + + std::stringstream ss; + ss << "<"; + for(std::size_t i = 0; i < vs.size(); ++i) + { + if(i == (vs.size() - 1)) + { + ss << vs[i] << ">"; + } + else + { + ss << vs[i] << ", "; + } + } + return ss.str(); + } + else + { + return MemVisibilityType::get_name(); + } + } + + template + [[maybe_unused]] static std::string getMemVisiblityName(TType) + { + return getMemVisiblityName(); + } + + template< + typename T, + typename TBuf, + typename = std::enable_if_t< + (alpaka::isPlatform> || alpaka::isDevice> + || alpaka::isAccelerator>) &&alpaka::internal::isView>>> + inline constexpr bool hasSameMemView() + { + if constexpr(alpaka::isDevice> || alpaka::isAccelerator>) + { + using Platform = alpaka::Platform; + return alpaka::meta::Contains< + typename alpaka::trait::MemVisibility::type, + typename alpaka::trait::MemVisibility::type>::value; + } + else + { + return alpaka::meta::Contains< + typename alpaka::trait::MemVisibility::type, + typename alpaka::trait::MemVisibility::type>::value; + } + ALPAKA_UNREACHABLE({}); + } + + template + inline constexpr bool hasSameMemView(TDev&, TBuf&) + { + return hasSameMemView, std::decay_t>(); + } + + namespace detail + { + template + struct MemVisibilityHelper + { + using type = typename alpaka::trait::MemVisibility::type; + }; + + template + struct MemVisibilityHelper< + T, + std::enable_if_t> || alpaka::isAccelerator>>> + { + using type = typename alpaka::trait::MemVisibility>>::type; + }; + } // namespace detail + + template + using MemVisibility = typename alpaka::detail::MemVisibilityHelper>::type; + + template + using MemVisibilityTypeList = alpaka::meta::toTuple>>; +} // namespace alpaka diff --git a/include/alpaka/mem/buf/BufCpu.hpp b/include/alpaka/mem/buf/BufCpu.hpp index 4bfc91c73332..1045933965b4 100644 --- a/include/alpaka/mem/buf/BufCpu.hpp +++ b/include/alpaka/mem/buf/BufCpu.hpp @@ -11,10 +11,12 @@ #include "alpaka/core/Vectorize.hpp" #include "alpaka/dev/DevCpu.hpp" #include "alpaka/dev/Traits.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/alloc/AllocCpuAligned.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/mem/view/ViewAccessOps.hpp" #include "alpaka/meta/DependentFalseType.hpp" +#include "alpaka/meta/Unique.hpp" #include "alpaka/platform/PlatformCpu.hpp" #include "alpaka/vec/Vec.hpp" @@ -85,8 +87,8 @@ namespace alpaka } // namespace detail //! The CPU memory buffer. - template - class BufCpu : public internal::ViewAccessOps> + template + class BufCpu : public internal::ViewAccessOps> { public: template @@ -103,67 +105,69 @@ namespace alpaka namespace trait { //! The BufCpu device type trait specialization. - template - struct DevType> + template + struct DevType> { using type = DevCpu; }; //! The BufCpu device get trait specialization. - template - struct GetDev> + template + struct GetDev> { - ALPAKA_FN_HOST static auto getDev(BufCpu const& buf) -> DevCpu + ALPAKA_FN_HOST static auto getDev(BufCpu const& buf) -> DevCpu { return buf.m_spBufCpuImpl->m_dev; } }; //! The BufCpu dimension getter trait. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The BufCpu memory element type get trait specialization. - template - struct ElemType> + template + struct ElemType> { using type = TElem; }; //! The BufCpu width get trait specialization. - template - struct GetExtents> + template + struct GetExtents> { - ALPAKA_FN_HOST auto operator()(BufCpu const& buf) + ALPAKA_FN_HOST auto operator()(BufCpu const& buf) { return buf.m_spBufCpuImpl->m_extentElements; } }; //! The BufCpu native pointer get trait specialization. - template - struct GetPtrNative> + template + struct GetPtrNative> { - ALPAKA_FN_HOST static auto getPtrNative(BufCpu const& buf) -> TElem const* + ALPAKA_FN_HOST static auto getPtrNative(BufCpu const& buf) + -> TElem const* { return buf.m_spBufCpuImpl->m_pMem; } - ALPAKA_FN_HOST static auto getPtrNative(BufCpu& buf) -> TElem* + ALPAKA_FN_HOST static auto getPtrNative(BufCpu& buf) -> TElem* { return buf.m_spBufCpuImpl->m_pMem; } }; //! The BufCpu pointer on device get trait specialization. - template - struct GetPtrDev, DevCpu> + template + struct GetPtrDev, DevCpu, TMemVisibility> { - ALPAKA_FN_HOST static auto getPtrDev(BufCpu const& buf, DevCpu const& dev) - -> TElem const* + ALPAKA_FN_HOST static auto getPtrDev( + BufCpu const& buf, + DevCpu const& dev) -> TElem const* { if(dev == getDev(buf)) { @@ -175,7 +179,8 @@ namespace alpaka } } - ALPAKA_FN_HOST static auto getPtrDev(BufCpu& buf, DevCpu const& dev) -> TElem* + ALPAKA_FN_HOST static auto getPtrDev(BufCpu& buf, DevCpu const& dev) + -> TElem* { if(dev == getDev(buf)) { @@ -193,7 +198,8 @@ namespace alpaka struct BufAlloc { template - ALPAKA_FN_HOST static auto allocBuf(DevCpu const& dev, TExtent const& extent) -> BufCpu + ALPAKA_FN_HOST static auto allocBuf(DevCpu const& dev, TExtent const& extent) + -> BufCpu>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -215,7 +221,11 @@ namespace alpaka auto* memPtr = alpaka::malloc(Allocator{}, static_cast(getExtentProduct(extent))); auto deleter = [](TElem* ptr) { alpaka::free(Allocator{}, ptr); }; - return BufCpu(dev, memPtr, std::move(deleter), extent); + return BufCpu>>( + dev, + memPtr, + std::move(deleter), + extent); } }; @@ -224,7 +234,8 @@ namespace alpaka struct AsyncBufAlloc { template - ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, TExtent const& extent) -> BufCpu + ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, TExtent const& extent) + -> BufCpu>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -260,7 +271,11 @@ namespace alpaka }); }; - return BufCpu(dev, memPtr, std::move(deleter), extent); + return BufCpu>>( + dev, + memPtr, + std::move(deleter), + extent); } }; @@ -278,7 +293,13 @@ namespace alpaka ALPAKA_FN_HOST static auto allocMappedBuf( DevCpu const& host, PlatformCpu const& /*platform*/, - TExtent const& extent) -> BufCpu + TExtent const& extent) + -> BufCpu< + TElem, + TDim, + TIdx, + alpaka::meta::Unique< + std::tuple, alpaka::MemVisibility>>> { // Allocate standard host memory. return allocBuf(host, extent); @@ -292,21 +313,28 @@ namespace alpaka }; //! The BufCpu offset get trait specialization. - template - struct GetOffsets> + template + struct GetOffsets> { - ALPAKA_FN_HOST auto operator()(BufCpu const&) const -> Vec + ALPAKA_FN_HOST auto operator()(BufCpu const&) const -> Vec { return Vec::zeros(); } }; //! The BufCpu idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; + + template + struct MemVisibility> + { + using type = TMemVisibility; + }; + } // namespace trait } // namespace alpaka diff --git a/include/alpaka/mem/buf/BufCpuSycl.hpp b/include/alpaka/mem/buf/BufCpuSycl.hpp index d63eebf540ca..d6d05f487cb4 100644 --- a/include/alpaka/mem/buf/BufCpuSycl.hpp +++ b/include/alpaka/mem/buf/BufCpuSycl.hpp @@ -11,8 +11,8 @@ namespace alpaka { - template - using BufCpuSycl = BufGenericSycl; + template + using BufCpuSycl = BufGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufCudaRt.hpp b/include/alpaka/mem/buf/BufCudaRt.hpp index a5e0020bdb62..f974b2490f3f 100644 --- a/include/alpaka/mem/buf/BufCudaRt.hpp +++ b/include/alpaka/mem/buf/BufCudaRt.hpp @@ -5,14 +5,16 @@ #pragma once #include "alpaka/core/ApiCudaRt.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/buf/BufUniformCudaHipRt.hpp" + #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED namespace alpaka { - template - using BufCudaRt = BufUniformCudaHipRt; + template + using BufCudaRt = BufUniformCudaHipRt; } // namespace alpaka #endif // ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp index 2dca26f1984f..30238b69cc62 100644 --- a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp @@ -11,8 +11,8 @@ namespace alpaka { - template - using BufFpgaSyclIntel = BufGenericSycl; + template + using BufFpgaSyclIntel = BufGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufGenericSycl.hpp b/include/alpaka/mem/buf/BufGenericSycl.hpp index b4a5fd94ed54..7f9d63c05cb7 100644 --- a/include/alpaka/mem/buf/BufGenericSycl.hpp +++ b/include/alpaka/mem/buf/BufGenericSycl.hpp @@ -9,9 +9,11 @@ #include "alpaka/dev/Traits.hpp" #include "alpaka/dim/DimIntegralConst.hpp" #include "alpaka/dim/Traits.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/buf/BufCpu.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/mem/view/ViewAccessOps.hpp" +#include "alpaka/meta/Unique.hpp" #include "alpaka/vec/Vec.hpp" #include @@ -24,8 +26,8 @@ namespace alpaka { //! The SYCL memory buffer. - template - class BufGenericSycl : public internal::ViewAccessOps> + template + class BufGenericSycl : public internal::ViewAccessOps> { public: static_assert( @@ -62,67 +64,68 @@ namespace alpaka namespace alpaka::trait { //! The BufGenericSycl device type trait specialization. - template - struct DevType> + template + struct DevType> { using type = DevGenericSycl; }; //! The BufGenericSycl device get trait specialization. - template - struct GetDev> + template + struct GetDev> { - static auto getDev(BufGenericSycl const& buf) + static auto getDev(BufGenericSycl const& buf) { return buf.m_dev; } }; //! The BufGenericSycl dimension getter trait specialization. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The BufGenericSycl memory element type get trait specialization. - template - struct ElemType> + template + struct ElemType> { using type = TElem; }; //! The BufGenericSycl extent get trait specialization. - template - struct GetExtents> + template + struct GetExtents> { - auto operator()(BufGenericSycl const& buf) const + auto operator()(BufGenericSycl const& buf) const { return buf.m_extentElements; } }; //! The BufGenericSycl native pointer get trait specialization. - template - struct GetPtrNative> + template + struct GetPtrNative> { - static auto getPtrNative(BufGenericSycl const& buf) -> TElem const* + static auto getPtrNative(BufGenericSycl const& buf) + -> TElem const* { return buf.m_spMem.get(); } - static auto getPtrNative(BufGenericSycl& buf) -> TElem* + static auto getPtrNative(BufGenericSycl& buf) -> TElem* { return buf.m_spMem.get(); } }; //! The BufGenericSycl pointer on device get trait specialization. - template - struct GetPtrDev, DevGenericSycl> + template + struct GetPtrDev, DevGenericSycl> { static auto getPtrDev( - BufGenericSycl const& buf, + BufGenericSycl const& buf, DevGenericSycl const& dev) -> TElem const* { if(dev == getDev(buf)) @@ -135,8 +138,9 @@ namespace alpaka::trait } } - static auto getPtrDev(BufGenericSycl& buf, DevGenericSycl const& dev) - -> TElem* + static auto getPtrDev( + BufGenericSycl& buf, + DevGenericSycl const& dev) -> TElem* { if(dev == getDev(buf)) { @@ -155,7 +159,7 @@ namespace alpaka::trait { template static auto allocBuf(DevGenericSycl const& dev, TExtent const& extent) - -> BufGenericSycl + -> BufGenericSycl> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -197,7 +201,11 @@ namespace alpaka::trait nativeContext); auto deleter = [ctx = nativeContext](TElem* ptr) { sycl::free(ptr, ctx); }; - return BufGenericSycl(dev, memPtr, std::move(deleter), extent); + return BufGenericSycl>( + dev, + memPtr, + std::move(deleter), + extent); } }; @@ -208,10 +216,10 @@ namespace alpaka::trait }; //! The BufGenericSycl offset get trait specialization. - template - struct GetOffsets> + template + struct GetOffsets> { - auto operator()(BufGenericSycl const&) const -> Vec + auto operator()(BufGenericSycl const&) const -> Vec { return Vec::zeros(); } @@ -222,8 +230,11 @@ namespace alpaka::trait struct BufAllocMapped { template - static auto allocMappedBuf(DevCpu const& host, TPlatform const& platform, TExtent const& extent) - -> BufCpu + static auto allocMappedBuf(DevCpu const& host, TPlatform const& platform, TExtent const& extent) -> BufCpu< + TElem, + TDim, + TIdx, + alpaka::meta::Unique>>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -233,31 +244,47 @@ namespace alpaka::trait TElem* memPtr = sycl::malloc_host(static_cast(getExtentProduct(extent)), ctx); auto deleter = [ctx](TElem* ptr) { sycl::free(ptr, ctx); }; - return BufCpu(host, memPtr, std::move(deleter), extent); + return BufCpu< + TElem, + TDim, + TIdx, + alpaka::meta::Unique>>>( + host, + memPtr, + std::move(deleter), + extent); } }; //! The BufGenericSycl idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; //! The BufCpu pointer on SYCL device get trait specialization. - template - struct GetPtrDev, DevGenericSycl> + template + struct GetPtrDev, DevGenericSycl> { - static auto getPtrDev(BufCpu const& buf, DevGenericSycl const&) -> TElem const* + static auto getPtrDev(BufCpu const& buf, DevGenericSycl const&) + -> TElem const* { return getPtrNative(buf); } - static auto getPtrDev(BufCpu& buf, DevGenericSycl const&) -> TElem* + static auto getPtrDev(BufCpu& buf, DevGenericSycl const&) + -> TElem* { return getPtrNative(buf); } }; + + template + struct MemVisibility> + { + using type = TMemVisibility; + }; } // namespace alpaka::trait # include "alpaka/mem/buf/sycl/Copy.hpp" diff --git a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp index dd20f8a39648..f46d90971387 100644 --- a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp @@ -11,8 +11,8 @@ namespace alpaka { - template - using BufGpuSyclIntel = BufGenericSycl; + template + using BufGpuSyclIntel = BufGenericSycl; } // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufHipRt.hpp b/include/alpaka/mem/buf/BufHipRt.hpp index 4a59bc46e5d5..cf392d6c94a9 100644 --- a/include/alpaka/mem/buf/BufHipRt.hpp +++ b/include/alpaka/mem/buf/BufHipRt.hpp @@ -11,8 +11,8 @@ namespace alpaka { - template - using BufHipRt = BufUniformCudaHipRt; + template + using BufHipRt = BufUniformCudaHipRt; } // namespace alpaka #endif // ALPAKA_ACC_GPU_HIP_ENABLED diff --git a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp index 826edaba7b1e..d15e80763f47 100644 --- a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp +++ b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp @@ -11,9 +11,11 @@ #include "alpaka/dev/DevUniformCudaHipRt.hpp" #include "alpaka/dev/Traits.hpp" #include "alpaka/dim/DimIntegralConst.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/buf/Traits.hpp" #include "alpaka/mem/view/ViewAccessOps.hpp" #include "alpaka/meta/DependentFalseType.hpp" +#include "alpaka/meta/Unique.hpp" #include "alpaka/vec/Vec.hpp" #include @@ -29,7 +31,7 @@ namespace alpaka struct ApiCudaRt; struct ApiHipRt; - template + template class BufCpu; namespace detail @@ -50,10 +52,10 @@ namespace alpaka } // namespace detail //! The CUDA/HIP memory buffer. - template + template struct BufUniformCudaHipRt : detail::PitchHolder - , internal::ViewAccessOps> + , internal::ViewAccessOps> { static_assert(!std::is_const_v, "The elem type of the buffer must not be const"); static_assert(!std::is_const_v, "The idx type of the buffer must not be const!"); @@ -90,17 +92,17 @@ namespace alpaka namespace trait { //! The BufUniformCudaHipRt device type trait specialization. - template - struct DevType> + template + struct DevType> { using type = DevUniformCudaHipRt; }; //! The BufUniformCudaHipRt device get trait specialization. - template - struct GetDev> + template + struct GetDev> { - ALPAKA_FN_HOST static auto getDev(BufUniformCudaHipRt const& buf) + ALPAKA_FN_HOST static auto getDev(BufUniformCudaHipRt const& buf) -> DevUniformCudaHipRt { return buf.m_dev; @@ -108,51 +110,53 @@ namespace alpaka }; //! The BufUniformCudaHipRt dimension getter trait specialization. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The BufUniformCudaHipRt memory element type get trait specialization. - template - struct ElemType> + template + struct ElemType> { using type = TElem; }; //! The BufUniformCudaHipRt extent get trait specialization. - template - struct GetExtents> + template + struct GetExtents> { - ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt const& buffer) const + ALPAKA_FN_HOST auto operator()( + BufUniformCudaHipRt const& buffer) const { return buffer.m_extentElements; } }; //! The BufUniformCudaHipRt native pointer get trait specialization. - template - struct GetPtrNative> + template + struct GetPtrNative> { - ALPAKA_FN_HOST static auto getPtrNative(BufUniformCudaHipRt const& buf) - -> TElem const* + ALPAKA_FN_HOST static auto getPtrNative( + BufUniformCudaHipRt const& buf) -> TElem const* { return buf.m_spMem.get(); } - ALPAKA_FN_HOST static auto getPtrNative(BufUniformCudaHipRt& buf) -> TElem* + ALPAKA_FN_HOST static auto getPtrNative(BufUniformCudaHipRt& buf) + -> TElem* { return buf.m_spMem.get(); } }; //! The BufUniformCudaHipRt pointer on device get trait specialization. - template - struct GetPtrDev, DevUniformCudaHipRt> + template + struct GetPtrDev, DevUniformCudaHipRt> { ALPAKA_FN_HOST static auto getPtrDev( - BufUniformCudaHipRt const& buf, + BufUniformCudaHipRt const& buf, DevUniformCudaHipRt const& dev) -> TElem const* { if(dev == getDev(buf)) @@ -166,7 +170,7 @@ namespace alpaka } ALPAKA_FN_HOST static auto getPtrDev( - BufUniformCudaHipRt& buf, + BufUniformCudaHipRt& buf, DevUniformCudaHipRt const& dev) -> TElem* { if(dev == getDev(buf)) @@ -180,11 +184,11 @@ namespace alpaka } }; - template - struct GetPitchesInBytes> + template + struct GetPitchesInBytes> { - ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt const& buf) const - -> Vec + ALPAKA_FN_HOST auto operator()( + BufUniformCudaHipRt const& buf) const -> Vec { Vec v{}; if constexpr(TDim::value > 0) @@ -207,7 +211,12 @@ namespace alpaka { template ALPAKA_FN_HOST static auto allocBuf(DevUniformCudaHipRt const& dev, TExtent const& extent) - -> BufUniformCudaHipRt + -> BufUniformCudaHipRt< + TApi, + TElem, + Dim, + TIdx, + alpaka::MemVisibilityTypeList>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -289,7 +298,12 @@ namespace alpaka template ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, [[maybe_unused]] TExtent const& extent) - -> BufUniformCudaHipRt + -> BufUniformCudaHipRt< + TApi, + TElem, + TDim, + TIdx, + alpaka::MemVisibilityTypeList>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -342,7 +356,13 @@ namespace alpaka ALPAKA_FN_HOST static auto allocMappedBuf( DevCpu const& host, PlatformUniformCudaHipRt const& /*platform*/, - TExtent const& extent) -> BufCpu + TExtent const& extent) + -> BufCpu< + TElem, + TDim, + TIdx, + alpaka::meta::Unique< + std::tuple>>>> { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -355,7 +375,16 @@ namespace alpaka TApi::hostMallocMapped | TApi::hostMallocPortable)); auto deleter = [](TElem* ptr) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::hostFree(ptr)); }; - return BufCpu(host, memPtr, std::move(deleter), extent); + return BufCpu< + TElem, + TDim, + TIdx, + alpaka::meta::Unique< + std::tuple>>>>( + host, + memPtr, + std::move(deleter), + extent); } }; @@ -366,10 +395,10 @@ namespace alpaka }; //! The BufUniformCudaHipRt offset get trait specialization. - template - struct GetOffsets> + template + struct GetOffsets> { - ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt const&) const + ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt const&) const -> Vec { return Vec::zeros(); @@ -377,18 +406,24 @@ namespace alpaka }; //! The BufUniformCudaHipRt idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; + template + struct MemVisibility> + { + using type = alpaka::meta::toTuple; + }; + //! The BufCpu pointer on CUDA/HIP device get trait specialization. - template - struct GetPtrDev, DevUniformCudaHipRt> + template + struct GetPtrDev, DevUniformCudaHipRt> { ALPAKA_FN_HOST static auto getPtrDev( - BufCpu const& buf, + BufCpu const& buf, DevUniformCudaHipRt const&) -> TElem const* { // TODO: Check if the memory is mapped at all! @@ -402,8 +437,9 @@ namespace alpaka return pDev; } - ALPAKA_FN_HOST static auto getPtrDev(BufCpu& buf, DevUniformCudaHipRt const&) - -> TElem* + ALPAKA_FN_HOST static auto getPtrDev( + BufCpu& buf, + DevUniformCudaHipRt const&) -> TElem* { // TODO: Check if the memory is mapped at all! TElem* pDev(nullptr); diff --git a/include/alpaka/mem/buf/Traits.hpp b/include/alpaka/mem/buf/Traits.hpp index 33e7c9bda7f1..fca1a07417ad 100644 --- a/include/alpaka/mem/buf/Traits.hpp +++ b/include/alpaka/mem/buf/Traits.hpp @@ -19,7 +19,13 @@ namespace alpaka namespace trait { //! The memory buffer type trait. - template + template< + typename TDev, + typename TElem, + typename TDim, + typename TIdx, + typename TMemVisibility, + typename TSfinae = void> struct BufType; //! The memory allocator trait. @@ -48,8 +54,8 @@ namespace alpaka } // namespace trait //! The memory buffer type trait alias template to remove the ::type. - template - using Buf = typename trait::BufType, TElem, TDim, TIdx>::type; + template + using Buf = typename trait::BufType, TElem, TDim, TIdx, TMemVisibility>::type; //! Allocates memory on the given device. //! diff --git a/include/alpaka/mem/view/ViewAccessOps.hpp b/include/alpaka/mem/view/ViewAccessOps.hpp index 27056678dd17..36723d8902d6 100644 --- a/include/alpaka/mem/view/ViewAccessOps.hpp +++ b/include/alpaka/mem/view/ViewAccessOps.hpp @@ -20,6 +20,7 @@ namespace alpaka::internal inline constexpr bool isView = false; // TODO(bgruber): replace this by a concept in C++20 + // TODO(simeonehrig): extend the trait by memory Visiblity type template inline constexpr bool isView< TView, diff --git a/include/alpaka/mem/view/ViewPlainPtr.hpp b/include/alpaka/mem/view/ViewPlainPtr.hpp index ceb4d95aed32..adeff7558bfc 100644 --- a/include/alpaka/mem/view/ViewPlainPtr.hpp +++ b/include/alpaka/mem/view/ViewPlainPtr.hpp @@ -8,9 +8,18 @@ #include "alpaka/dev/DevCpu.hpp" #include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/DevUniformCudaHipRt.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/mem/view/Traits.hpp" #include "alpaka/mem/view/ViewAccessOps.hpp" #include "alpaka/meta/DependentFalseType.hpp" +#include "alpaka/meta/TypeListOps.hpp" +#include "alpaka/platform/PlatformCpu.hpp" +#include "alpaka/platform/PlatformCpuSycl.hpp" +#include "alpaka/platform/PlatformCudaRt.hpp" +#include "alpaka/platform/PlatformFpgaSyclIntel.hpp" +#include "alpaka/platform/PlatformGenericSycl.hpp" +#include "alpaka/platform/PlatformGpuSyclIntel.hpp" +#include "alpaka/platform/PlatformHipRt.hpp" #include "alpaka/vec/Vec.hpp" #include @@ -19,8 +28,13 @@ namespace alpaka { //! The memory view to wrap plain pointers. - template - struct ViewPlainPtr final : internal::ViewAccessOps> + template< + typename TDev, + typename TElem, + typename TDim, + typename TIdx, + typename TMemVisibility = alpaka::meta::toTuple>> + struct ViewPlainPtr final : internal::ViewAccessOps> { static_assert(!std::is_const_v, "The idx type of the view can not be const!"); @@ -49,15 +63,21 @@ namespace alpaka namespace trait { //! The ViewPlainPtr device type trait specialization. - template - struct DevType> + template + struct DevType> { using type = alpaka::Dev; }; + template + struct MemVisibility> + { + using type = TMemVisibility; + }; + //! The ViewPlainPtr device get trait specialization. - template - struct GetDev> + template + struct GetDev> { static auto getDev(ViewPlainPtr const& view) -> alpaka::Dev { @@ -66,15 +86,15 @@ namespace alpaka }; //! The ViewPlainPtr dimension getter trait. - template - struct DimType> + template + struct DimType> { using type = TDim; }; //! The ViewPlainPtr memory element type get trait specialization. - template - struct ElemType> + template + struct ElemType> { using type = TElem; }; @@ -82,32 +102,32 @@ namespace alpaka namespace trait { - template - struct GetExtents> + template + struct GetExtents> { - ALPAKA_FN_HOST auto operator()(ViewPlainPtr const& view) const + ALPAKA_FN_HOST auto operator()(ViewPlainPtr const& view) const { return view.m_extentElements; } }; //! The ViewPlainPtr native pointer get trait specialization. - template - struct GetPtrNative> + template + struct GetPtrNative> { - static auto getPtrNative(ViewPlainPtr const& view) -> TElem const* + static auto getPtrNative(ViewPlainPtr const& view) -> TElem const* { return view.m_pMem; } - static auto getPtrNative(ViewPlainPtr& view) -> TElem* + static auto getPtrNative(ViewPlainPtr& view) -> TElem* { return view.m_pMem; } }; - template - struct GetPitchesInBytes> + template + struct GetPitchesInBytes> { ALPAKA_FN_HOST auto operator()(ViewPlainPtr const& view) const { @@ -174,18 +194,19 @@ namespace alpaka }; #endif //! The ViewPlainPtr offset get trait specialization. - template - struct GetOffsets> + template + struct GetOffsets> { - ALPAKA_FN_HOST auto operator()(ViewPlainPtr const&) const -> Vec + ALPAKA_FN_HOST auto operator()(ViewPlainPtr const&) const + -> Vec { return Vec::zeros(); } }; //! The ViewPlainPtr idx type trait specialization. - template - struct IdxType> + template + struct IdxType> { using type = TIdx; }; diff --git a/include/alpaka/meta/TypeListOps.hpp b/include/alpaka/meta/TypeListOps.hpp index 2d6bcfe7f45f..e77ccb5cade8 100644 --- a/include/alpaka/meta/TypeListOps.hpp +++ b/include/alpaka/meta/TypeListOps.hpp @@ -1,9 +1,10 @@ -/* Copyright 2022 Bernhard Manfred Gruber +/* Copyright 2024 Bernhard Manfred Gruber, Simeon Ehrig * SPDX-License-Identifier: MPL-2.0 */ #pragma once +#include #include namespace alpaka::meta @@ -35,4 +36,59 @@ namespace alpaka::meta { static constexpr bool value = std::is_same_v || Contains, Value>::value; }; + + // copied from https://stackoverflow.com/a/51073558/22035743 + template + struct IsList : std::false_type + { + }; + + template class TList, typename... TTypes> + struct IsList> : std::true_type + { + }; + + //! \brief Checks whether the specified type is a list. List is a type with a variadic number of template types. + template + constexpr bool isList = IsList>::value; + + namespace detail + { + template class TListType, typename TType, typename = void> + struct ToListImpl + { + using type = TListType; + }; + + template class TListType, typename TList> + struct ToListImpl>> + { + using type = TList; + }; + } // namespace detail + + //! \brief Takes an arbitrary number of types (T) and creates a type list of type TListType with the types (T). If + //! T is a single template parameter and it satisfies alpaka::meta::isList, the type of the structure is T (no type + //! change). + //! \tparam TListType type of the created list + //! \tparam T possible list types or type list + template class TListType, typename... T> + struct ToList; + + template class TListType, typename T> + struct ToList : detail::ToListImpl + { + }; + + template class TListType, typename T, typename... Ts> + struct ToList + { + using type = TListType; + }; + + //! \brief If T is a single argument and a type list (fullfil alpaka::meta::isList), the return type is T. + //! Otherwise, std::tuple is returned with T types as template parameters. + template + using toTuple = typename ToList::type; + } // namespace alpaka::meta diff --git a/include/alpaka/platform/PlatformCpu.hpp b/include/alpaka/platform/PlatformCpu.hpp index c431fd418785..aab9f3a3790f 100644 --- a/include/alpaka/platform/PlatformCpu.hpp +++ b/include/alpaka/platform/PlatformCpu.hpp @@ -6,6 +6,7 @@ #include "alpaka/core/Concepts.hpp" #include "alpaka/dev/DevCpu.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/Traits.hpp" #include @@ -65,5 +66,11 @@ namespace alpaka return {}; } }; + + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleCPU; + }; } // namespace trait } // namespace alpaka diff --git a/include/alpaka/platform/PlatformCpuSycl.hpp b/include/alpaka/platform/PlatformCpuSycl.hpp index db055f9689b2..893ce8e39dc1 100644 --- a/include/alpaka/platform/PlatformCpuSycl.hpp +++ b/include/alpaka/platform/PlatformCpuSycl.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/Traits.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/PlatformGenericSycl.hpp" #include @@ -39,6 +40,12 @@ namespace alpaka::trait { using type = DevGenericSycl; // = DevCpuSycl }; + + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleCpuSycl; + }; } // namespace alpaka::trait #endif diff --git a/include/alpaka/platform/PlatformCudaRt.hpp b/include/alpaka/platform/PlatformCudaRt.hpp index 9bf76fa66682..011f782c2d2d 100644 --- a/include/alpaka/platform/PlatformCudaRt.hpp +++ b/include/alpaka/platform/PlatformCudaRt.hpp @@ -5,6 +5,7 @@ #pragma once #include "alpaka/core/ApiCudaRt.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/PlatformUniformCudaHipRt.hpp" #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED @@ -13,6 +14,15 @@ namespace alpaka { //! The CUDA RT platform. using PlatformCudaRt = PlatformUniformCudaHipRt; + + namespace trait + { + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleGpuCudaRt; + }; + } // namespace trait } // namespace alpaka #endif // ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp index 21ee2c257c5f..b6190b75312c 100644 --- a/include/alpaka/platform/PlatformFpgaSyclIntel.hpp +++ b/include/alpaka/platform/PlatformFpgaSyclIntel.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/Traits.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/PlatformGenericSycl.hpp" #if defined(ALPAKA_ACC_SYCL_ENABLED) && defined(ALPAKA_SYCL_ONEAPI_FPGA) @@ -57,6 +58,12 @@ namespace alpaka::trait { using type = DevGenericSycl; // = DevFpgaSyclIntel }; + + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleFpgaSyclIntel; + }; } // namespace alpaka::trait #endif diff --git a/include/alpaka/platform/PlatformGpuSyclIntel.hpp b/include/alpaka/platform/PlatformGpuSyclIntel.hpp index 216bb5ae26a1..7cfabd09e303 100644 --- a/include/alpaka/platform/PlatformGpuSyclIntel.hpp +++ b/include/alpaka/platform/PlatformGpuSyclIntel.hpp @@ -6,6 +6,7 @@ #include "alpaka/dev/DevGenericSycl.hpp" #include "alpaka/dev/Traits.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/PlatformGenericSycl.hpp" #include @@ -42,6 +43,12 @@ namespace alpaka::trait { using type = DevGenericSycl; // = DevGpuSyclIntel }; + + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleGpuSyclIntel; + }; } // namespace alpaka::trait #endif diff --git a/include/alpaka/platform/PlatformHipRt.hpp b/include/alpaka/platform/PlatformHipRt.hpp index 25303aeaf10e..4179bd8f0967 100644 --- a/include/alpaka/platform/PlatformHipRt.hpp +++ b/include/alpaka/platform/PlatformHipRt.hpp @@ -5,6 +5,7 @@ #pragma once #include "alpaka/core/ApiHipRt.hpp" +#include "alpaka/mem/Visibility.hpp" #include "alpaka/platform/PlatformUniformCudaHipRt.hpp" #ifdef ALPAKA_ACC_GPU_HIP_ENABLED @@ -13,6 +14,15 @@ namespace alpaka { //! The HIP RT platform. using PlatformHipRt = PlatformUniformCudaHipRt; + + namespace trait + { + template<> + struct MemVisibility + { + using type = alpaka::MemVisibleGpuHipRt; + }; + } // namespace trait } // namespace alpaka #endif // ALPAKA_ACC_GPU_HIP_ENABLED diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index bcd108ff8d4c..209a41d9ccbf 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -25,11 +25,7 @@ add_subdirectory("idx/") add_subdirectory("intrinsic/") add_subdirectory("kernel/") add_subdirectory("math/") -add_subdirectory("mem/buf/") -add_subdirectory("mem/copy/") -add_subdirectory("mem/fence/") -add_subdirectory("mem/p2p/") -add_subdirectory("mem/view/") +add_subdirectory("mem/") add_subdirectory("meta/") add_subdirectory("queue/") add_subdirectory("rand/") diff --git a/test/unit/math/src/Buffer.hpp b/test/unit/math/src/Buffer.hpp index 7687fcfd3ca1..8adcb189fcaf 100644 --- a/test/unit/math/src/Buffer.hpp +++ b/test/unit/math/src/Buffer.hpp @@ -6,12 +6,36 @@ #include "Defines.hpp" +#include +#include #include #include namespace mathtest { + namespace detail + { + template + struct MemVisibilityMappedBufferImpl + { + using type = typename alpaka::MemVisibilityTypeList; + }; + + template + struct MemVisibilityMappedBufferImpl< + THost, + TPlatform, + std::enable_if_t>> + { + using type = typename alpaka::meta::Unique< + std::tuple, alpaka::MemVisibility>>; + }; + } // namespace detail + + template + using MemVisibilityMappedBuffer = typename detail::MemVisibilityMappedBufferImpl::type; + //! Provides alpaka-style buffer with arguments' data. //! TData can be a plain value or a complex data-structure. //! The operator() is overloaded and returns the value from the correct Buffer, @@ -32,11 +56,12 @@ namespace mathtest // Defines using's for alpaka-buffer. using DevHost = alpaka::DevCpu; using PlatformHost = alpaka::Platform; - using BufHost = alpaka::Buf; using DevAcc = alpaka::Dev; using PlatformAcc = alpaka::Platform; - using BufAcc = alpaka::Buf; + + using BufHost = alpaka::Buf>; + using BufAcc = alpaka::Buf>; PlatformHost platformHost; DevHost devHost; diff --git a/test/unit/mem/CMakeLists.txt b/test/unit/mem/CMakeLists.txt new file mode 100644 index 000000000000..f4e76bcb7262 --- /dev/null +++ b/test/unit/mem/CMakeLists.txt @@ -0,0 +1,33 @@ +# +# Copyright 2024 Simeon Ehrig +# SPDX-License-Identifier: MPL-2.0 +# + +################################################################################ +# Required CMake version. +################################################################################ + +cmake_minimum_required(VERSION 3.22) + +set(_TARGET_NAME "memVisibilityTest") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +alpaka_add_executable( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +target_link_libraries( + ${_TARGET_NAME} + PRIVATE common) + +set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") +target_compile_definitions(${_TARGET_NAME} PRIVATE "-DTEST_UNIT_MEM") + +add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS}) + + +add_subdirectory("buf/") +add_subdirectory("copy/") +add_subdirectory("fence/") +add_subdirectory("p2p/") +add_subdirectory("view/") diff --git a/test/unit/mem/copy/src/BufSlicing.cpp b/test/unit/mem/copy/src/BufSlicing.cpp index 6169fdaf5ff2..ed2a4dc6f1f9 100644 --- a/test/unit/mem/copy/src/BufSlicing.cpp +++ b/test/unit/mem/copy/src/BufSlicing.cpp @@ -28,8 +28,8 @@ struct TestContainer using DevHost = alpaka::DevCpu; using PlatformHost = alpaka::Platform; - using BufHost = alpaka::Buf; - using BufDevice = alpaka::Buf; + using BufHost = alpaka::Buf>; + using BufDevice = alpaka::Buf>; using SubView = alpaka::ViewSubView; diff --git a/test/unit/mem/src/Visibility.cpp b/test/unit/mem/src/Visibility.cpp new file mode 100644 index 000000000000..097f7ec60b88 --- /dev/null +++ b/test/unit/mem/src/Visibility.cpp @@ -0,0 +1,243 @@ +/* Copyright 2024 Simeon Ehrig + * SPDX-License-Identifier: MPL-2.0 + */ + +#include + +#include + +#include +#include + +using Dim = alpaka::DimInt<1>; +using Idx = std::size_t; + +// TODO(SimeonEhrig): Replace implementation. Instead using a list, specialize `alpaka::Platform` for +// tags to get the Memory Visiblity + +//! \brief check if the accelerator related to the tag is bounded to the cpu platform +//! \tparam TTag alpaka tag type +template +struct isCpuTag : std::false_type +{ +}; + +template +struct isCpuTag< + TTag, + std::enable_if_t< + // TAGCpuSycl is not included because it has it's own platform + std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v + || std::is_same_v>> : std::true_type +{ +}; + +template +struct AccIsEnabledMemVisibilities : std::false_type +{ +}; + +template +struct AccIsEnabledMemVisibilities< + TTagMemView, + std::void_t, alpaka::DimInt<1>, int>>> : std::true_type +{ +}; + +using ExpectedTagsMemVisibilities = alpaka::meta::Filter< + std::tuple< + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple>, + AccIsEnabledMemVisibilities>; + +TEMPLATE_LIST_TEST_CASE("memoryVisibilityType", "[mem][visibility]", ExpectedTagsMemVisibilities) +{ + using Tag = std::tuple_element_t<0, TestType>; + using ExpectedMemVisibility = std::tuple_element_t<1, TestType>; + + using PltfType = alpaka::Platform>; + STATIC_REQUIRE(std::is_same_v::type, ExpectedMemVisibility>); +} + +using EnabledTagTagList = alpaka::meta::CartesianProduct; + +TEMPLATE_LIST_TEST_CASE("testHasSameMemView", "[mem][visibility]", EnabledTagTagList) +{ + using Tag1 = std::tuple_element_t<0, TestType>; + using Tag2 = std::tuple_element_t<1, TestType>; + + SUCCEED(Tag1::get_name() << " + " << Tag2::get_name()); + + using Acc1 = alpaka::TagToAcc; + using Acc2 = alpaka::TagToAcc; + + auto const plt1 = alpaka::Platform{}; + auto const plt2 = alpaka::Platform{}; + + using Plt1 = decltype(plt1); + using Plt2 = decltype(plt2); + + auto const dev1 = alpaka::getDevByIdx(plt1, 0); + auto const dev2 = alpaka::getDevByIdx(plt2, 0); + + using BufAcc1 = alpaka::Buf>; + using BufAcc2 = alpaka::Buf>; + + BufAcc1 bufDev1(alpaka::allocBuf(dev1, Idx(1))); + BufAcc2 bufDev2(alpaka::allocBuf(dev2, Idx(1))); + + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, bufDev1)); + STATIC_REQUIRE(alpaka::hasSameMemView(dev1, bufDev1)); + STATIC_REQUIRE(alpaka::hasSameMemView()); + STATIC_REQUIRE(alpaka::hasSameMemView(plt2, bufDev2)); + STATIC_REQUIRE(alpaka::hasSameMemView(dev2, bufDev2)); + STATIC_REQUIRE(alpaka::hasSameMemView()); + + // at the moment, only the cpu platform has different accelerator types + // therefore all cpu accelerators can access the memory of other cpu accelerators + // if the accelerator is not a cpu accelerator, both accelerators needs to be the + // same to support access to the memory of each other + if constexpr((isCpuTag::value && isCpuTag::value) || std::is_same_v) + { + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, bufDev2)); + STATIC_REQUIRE(alpaka::hasSameMemView(plt2, bufDev1)); + STATIC_REQUIRE(alpaka::hasSameMemView()); + } + else + { + STATIC_REQUIRE_FALSE(alpaka::hasSameMemView(plt1, bufDev2)); + STATIC_REQUIRE_FALSE(alpaka::hasSameMemView(plt2, bufDev1)); + STATIC_REQUIRE_FALSE(alpaka::hasSameMemView()); + } +} + +using EnabledTagTagMemVisibilityList + = alpaka::meta::CartesianProduct; + +TEMPLATE_LIST_TEST_CASE("testMemView", "[mem][visibility]", EnabledTagTagMemVisibilityList) +{ + using Tag1 = std::tuple_element_t<0, std::tuple_element_t<0, TestType>>; + using ExpectedMemVisibilityForTag1 = std::tuple_element_t<1, std::tuple_element_t<0, TestType>>; + using Tag2 = std::tuple_element_t<0, std::tuple_element_t<1, TestType>>; + using ExpectedMemVisibilityForTag2 = std::tuple_element_t<1, std::tuple_element_t<1, TestType>>; + + SUCCEED( + "Tag1: " << Tag1::get_name() << " + " << ExpectedMemVisibilityForTag1::get_name() + << "\nTag2: " << Tag2::get_name() << " + " << ExpectedMemVisibilityForTag1::get_name()); + + + constexpr Idx data_size = 10; + + using Acc1 = alpaka::TagToAcc; + using Acc2 = alpaka::TagToAcc; + + auto const plt1 = alpaka::Platform{}; + auto const plt2 = alpaka::Platform{}; + + auto const dev1 = alpaka::getDevByIdx(plt1, 0); + auto const dev2 = alpaka::getDevByIdx(plt2, 0); + + using Vec1D = alpaka::Vec, Idx>; + Vec1D const extents(Vec1D::all(data_size)); + + std::array data; + + auto data_view1 = alpaka::createView(dev1, data.data(), extents); + STATIC_REQUIRE(std::is_same_v< + typename alpaka::trait::MemVisibility::type, + std::tuple>); + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, data_view1)); + + auto data_view2 = alpaka::createView(dev2, data.data(), extents); + STATIC_REQUIRE(std::is_same_v< + typename alpaka::trait::MemVisibility::type, + std::tuple>); + STATIC_REQUIRE(alpaka::hasSameMemView(plt2, data_view2)); + + if constexpr(std::is_same_v) + { + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, data_view2)); + STATIC_REQUIRE(alpaka::hasSameMemView(plt2, data_view1)); + } + else + { + STATIC_REQUIRE_FALSE(alpaka::hasSameMemView(plt1, data_view2)); + STATIC_REQUIRE_FALSE(alpaka::hasSameMemView(plt2, data_view1)); + } +} + +TEMPLATE_LIST_TEST_CASE("testMemBuf", "[mem][visibility]", EnabledTagTagMemVisibilityList) +{ + using Tag1 = std::tuple_element_t<0, std::tuple_element_t<0, TestType>>; + using ExpectedMemVisibilityForTag1 = std::tuple_element_t<1, std::tuple_element_t<0, TestType>>; + using Tag2 = std::tuple_element_t<0, std::tuple_element_t<1, TestType>>; + using ExpectedMemVisibilityForTag2 = std::tuple_element_t<1, std::tuple_element_t<1, TestType>>; + + SUCCEED( + "Tag1: " << Tag1::get_name() << " + " << ExpectedMemVisibilityForTag1::get_name() + << "\nTag2: " << Tag2::get_name() << " + " << ExpectedMemVisibilityForTag1::get_name()); + + constexpr Idx data_size = 10; + + using Acc1 = alpaka::TagToAcc; + using Acc2 = alpaka::TagToAcc; + + auto const plt1 = alpaka::Platform{}; + auto const plt2 = alpaka::Platform{}; + + auto const dev1 = alpaka::getDevByIdx(plt1, 0); + auto const dev2 = alpaka::getDevByIdx(plt2, 0); + + using Vec1D = alpaka::Vec, Idx>; + Vec1D const extents(Vec1D::all(data_size)); + + // we need only to test the first tag, because the second tag contains the same acc's + auto buf1 = alpaka::allocBuf(dev1, extents); + + STATIC_REQUIRE(std::is_same_v< + typename alpaka::trait::MemVisibility::type, + std::tuple>); + + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, buf1)); + + if constexpr(!std::is_same_v) + { + alpaka::Queue queue1(dev1); + + auto buf1Async = alpaka::allocAsyncBuf(queue1, extents); + + alpaka::wait(queue1); + + STATIC_REQUIRE(std::is_same_v< + typename alpaka::trait::MemVisibility::type, + std::tuple>); + + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, buf1Async)); + } + + if constexpr(isCpuTag::value && alpaka::hasMappedBufSupport>) + { + auto mappedBuffer = alpaka::allocMappedBuf, float, Idx>(dev1, plt2, extents); + + using expectedMappedBufferMemView + = alpaka::meta::Unique>; + + STATIC_REQUIRE(std::is_same_v< + typename alpaka::trait::MemVisibility::type, + expectedMappedBufferMemView>); + + STATIC_REQUIRE(alpaka::hasSameMemView(plt1, mappedBuffer)); + STATIC_REQUIRE(alpaka::hasSameMemView(plt2, mappedBuffer)); + STATIC_REQUIRE(alpaka::hasSameMemView(dev1, mappedBuffer)); + STATIC_REQUIRE(alpaka::hasSameMemView(dev2, mappedBuffer)); + } +}