Skip to content

Commit

Permalink
Include global memory in AccDevProps
Browse files Browse the repository at this point in the history
Fixes: #2194
  • Loading branch information
bernhardmgruber authored and psychocoderHPC committed Jan 22, 2024
1 parent a8793cd commit d6196ba
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 42 deletions.
6 changes: 4 additions & 2 deletions include/alpaka/acc/AccCpuOmp2Blocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ namespace alpaka
template<typename TDim, typename TIdx>
struct GetAccDevProps<AccCpuOmp2Blocks<TDim, TIdx>>
{
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& /* dev */) -> alpaka::AccDevProps<TDim, TIdx>
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> alpaka::AccDevProps<TDim, TIdx>
{
return {// m_multiProcessorCount
static_cast<TIdx>(1),
Expand All @@ -137,7 +137,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
static_cast<size_t>(AccCpuOmp2Blocks<TDim, TIdx>::staticAllocBytes())};
static_cast<size_t>(AccCpuOmp2Blocks<TDim, TIdx>::staticAllocBytes()),
// m_globalMemSizeBytes
getMemBytes(dev)};
}
};

Expand Down
5 changes: 4 additions & 1 deletion include/alpaka/acc/AccCpuOmp2Threads.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ namespace alpaka
# else
auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(::omp_get_max_threads());
# endif
auto const memBytes = getMemBytes(dev);
return {// m_multiProcessorCount
static_cast<TIdx>(1),
// m_gridBlockExtentMax
Expand All @@ -147,7 +148,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
getMemBytes(dev)};
memBytes,
// m_globalMemSizeBytes
memBytes};
}
};

Expand Down
6 changes: 4 additions & 2 deletions include/alpaka/acc/AccCpuSerial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ namespace alpaka
template<typename TDim, typename TIdx>
struct GetAccDevProps<AccCpuSerial<TDim, TIdx>>
{
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& /* dev */) -> AccDevProps<TDim, TIdx>
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
{
return {// m_multiProcessorCount
static_cast<TIdx>(1),
Expand All @@ -131,7 +131,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
static_cast<size_t>(AccCpuSerial<TDim, TIdx>::staticAllocBytes())};
static_cast<size_t>(AccCpuSerial<TDim, TIdx>::staticAllocBytes()),
// m_globalMemSizeBytes
getMemBytes(dev)};
}
};

Expand Down
6 changes: 4 additions & 2 deletions include/alpaka/acc/AccCpuTbbBlocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ namespace alpaka
template<typename TDim, typename TIdx>
struct GetAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>
{
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& /* dev */) -> AccDevProps<TDim, TIdx>
ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
{
return {// m_multiProcessorCount
static_cast<TIdx>(1),
Expand All @@ -128,7 +128,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
static_cast<size_t>(AccCpuTbbBlocks<TDim, TIdx>::staticAllocBytes())};
static_cast<size_t>(AccCpuTbbBlocks<TDim, TIdx>::staticAllocBytes()),
// m_globalMemSizeBytes
getMemBytes(dev)};
}
};

Expand Down
5 changes: 4 additions & 1 deletion include/alpaka/acc/AccCpuThreads.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ namespace alpaka
static_cast<TIdx>(1),
alpaka::core::clipCast<TIdx>(std::thread::hardware_concurrency() * 8));
# endif
auto const memBytes = getMemBytes(dev);
return {// m_multiProcessorCount
static_cast<TIdx>(1),
// m_gridBlockExtentMax
Expand All @@ -155,7 +156,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
getMemBytes(dev)};
memBytes,
// m_globalMemSizeBytes
memBytes};
}
};

Expand Down
37 changes: 6 additions & 31 deletions include/alpaka/acc/AccDevProps.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2020 Benjamin Worpitz, Bernhard Manfred Gruber
/* Copyright 2024 Benjamin Worpitz, Bernhard Manfred Gruber
* SPDX-License-Identifier: MPL-2.0
*/

Expand All @@ -7,9 +7,6 @@
#include "alpaka/core/Common.hpp"
#include "alpaka/vec/Vec.hpp"

#include <string>
#include <vector>

namespace alpaka
{
//! The acceleration properties on a device.
Expand All @@ -23,37 +20,15 @@ namespace alpaka
sizeof(TIdx) >= sizeof(int),
"Index type is not supported, consider using int or a larger type.");

ALPAKA_FN_HOST AccDevProps(
TIdx const& multiProcessorCount,
Vec<TDim, TIdx> const& gridBlockExtentMax,
TIdx const& gridBlockCountMax,
Vec<TDim, TIdx> const& blockThreadExtentMax,
TIdx const& blockThreadCountMax,
Vec<TDim, TIdx> const& threadElemExtentMax,
TIdx const& threadElemCountMax,
size_t const& sharedMemSizeBytes)
: m_gridBlockExtentMax(gridBlockExtentMax)
, m_blockThreadExtentMax(blockThreadExtentMax)
, m_threadElemExtentMax(threadElemExtentMax)
, m_gridBlockCountMax(gridBlockCountMax)
, m_blockThreadCountMax(blockThreadCountMax)
, m_threadElemCountMax(threadElemCountMax)
, m_multiProcessorCount(multiProcessorCount)
, m_sharedMemSizeBytes(sharedMemSizeBytes)
{
}

// NOTE: The members have been reordered from the order in the constructor because gcc is buggy for some TDim
// and TIdx and generates invalid assembly.
// Please keep the order of data members so aggregate initialization does not break!
TIdx m_multiProcessorCount; //!< The number of multiprocessors.
Vec<TDim, TIdx> m_gridBlockExtentMax; //!< The maximum number of blocks in each dimension of the grid.
Vec<TDim, TIdx> m_blockThreadExtentMax; //!< The maximum number of threads in each dimension of a block.
Vec<TDim, TIdx> m_threadElemExtentMax; //!< The maximum number of elements in each dimension of a thread.

TIdx m_gridBlockCountMax; //!< The maximum number of blocks in a grid.
Vec<TDim, TIdx> m_blockThreadExtentMax; //!< The maximum number of threads in each dimension of a block.
TIdx m_blockThreadCountMax; //!< The maximum number of threads in a block.
Vec<TDim, TIdx> m_threadElemExtentMax; //!< The maximum number of elements in each dimension of a thread.
TIdx m_threadElemCountMax; //!< The maximum number of elements in a threads.

TIdx m_multiProcessorCount; //!< The number of multiprocessors.
size_t m_sharedMemSizeBytes; //!< The size of shared memory per block
size_t m_globalMemSizeBytes; //!< The size of global memory
};
} // namespace alpaka
4 changes: 3 additions & 1 deletion include/alpaka/acc/AccGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ namespace alpaka::trait
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
device.template get_info<sycl::info::device::local_mem_size>()};
device.template get_info<sycl::info::device::local_mem_size>(),
// m_globalMemSizeBytes
getMemBytes(dev)};
}
};

Expand Down
8 changes: 6 additions & 2 deletions include/alpaka/acc/AccGpuUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
static_cast<size_t>(sharedMemSizeBytes)};
static_cast<size_t>(sharedMemSizeBytes),
// m_globalMemSizeBytes
getMemBytes(dev)};

# else
typename TApi::DeviceProp_t properties;
Expand All @@ -197,7 +199,9 @@ namespace alpaka
// m_threadElemCountMax
std::numeric_limits<TIdx>::max(),
// m_sharedMemSizeBytes
static_cast<size_t>(properties.sharedMemPerBlock)};
static_cast<size_t>(properties.sharedMemPerBlock),
// m_globalMemSizeBytes
getMemBytes(dev)};
# endif
}
};
Expand Down
42 changes: 42 additions & 0 deletions test/unit/acc/src/AccDevPropsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,46 @@ TEMPLATE_LIST_TEST_CASE("getAccDevProps", "[acc]", alpaka::test::TestAccs)
REQUIRE(devProps.m_threadElemCountMax > 0);
REQUIRE(devProps.m_multiProcessorCount > 0);
REQUIRE(devProps.m_sharedMemSizeBytes > 0);
REQUIRE(devProps.m_globalMemSizeBytes > 0);
}

TEST_CASE("AccDevProps.aggregate_init", "[acc]")
{
auto const props = alpaka::AccDevProps<alpaka::DimInt<1>, int>{1, {2}, 3, {4}, 5, {6}, 7, 8, 9};

CHECK(props.m_multiProcessorCount == 1);
CHECK(props.m_gridBlockExtentMax == alpaka::Vec{2});
CHECK(props.m_gridBlockCountMax == 3);
CHECK(props.m_blockThreadExtentMax == alpaka::Vec{4});
CHECK(props.m_blockThreadCountMax == 5);
CHECK(props.m_threadElemExtentMax == alpaka::Vec{6});
CHECK(props.m_threadElemCountMax == 7);
CHECK(props.m_sharedMemSizeBytes == 8);
CHECK(props.m_globalMemSizeBytes == 9);
}

#ifdef __cpp_designated_initializers
TEST_CASE("AccDevProps.designated_initializers", "[acc]")
{
auto const props = alpaka::AccDevProps<alpaka::DimInt<1>, int>{
.m_multiProcessorCount = 10,
.m_gridBlockExtentMax = {20},
.m_gridBlockCountMax = 30,
.m_blockThreadExtentMax = {40},
.m_blockThreadCountMax = 50,
.m_threadElemExtentMax = {60},
.m_threadElemCountMax = 70,
.m_sharedMemSizeBytes = 80,
.m_globalMemSizeBytes = 90};

CHECK(props.m_multiProcessorCount == 10);
CHECK(props.m_gridBlockExtentMax == alpaka::Vec{20});
CHECK(props.m_gridBlockCountMax == 30);
CHECK(props.m_blockThreadExtentMax == alpaka::Vec{40});
CHECK(props.m_blockThreadCountMax == 50);
CHECK(props.m_threadElemExtentMax == alpaka::Vec{60});
CHECK(props.m_threadElemCountMax == 70);
CHECK(props.m_sharedMemSizeBytes == 80);
CHECK(props.m_globalMemSizeBytes == 90);
}
#endif

0 comments on commit d6196ba

Please sign in to comment.