diff --git a/.gitignore b/.gitignore index c05e5d61..6128db31 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ Win32/x64/ *.aps *.orig *.code-workspace +slirp.out diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3cc6d7fe..5f66dd50 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -48,3 +48,7 @@ if (CHAI_ENABLE_MANAGED_PTR) NAME managed_ptr_benchmarks COMMAND managed_ptr_benchmarks) endif () + +if (CHAI_ENABLE_EXPERIMENTAL) + add_subdirectory(expt) +endif () diff --git a/benchmarks/expt/CMakeLists.txt b/benchmarks/expt/CMakeLists.txt new file mode 100644 index 00000000..5513906d --- /dev/null +++ b/benchmarks/expt/CMakeLists.txt @@ -0,0 +1,33 @@ +############################################################################## +# Copyright (c) Lawrence Livermore National Security, LLC and other CHAI +# contributors. See the CHAI LICENSE and COPYRIGHT files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## + +set(chai_expt_benchmark_depends + chai + gbenchmark) + +if (CHAI_ENABLE_CUDA) + set(chai_expt_benchmark_depends + ${chai_expt_benchmark_depends} + cuda) +endif () + +if (CHAI_ENABLE_HIP) + set(chai_expt_benchmark_depends + ${chai_expt_benchmark_depends} + blt::hip) +endif () + +if (CHAI_ENABLE_CUDA OR CHAI_ENABLE_HIP) + blt_add_executable( + NAME UnifiedArrayManagerBenchmarks + SOURCES UnifiedArrayManagerBenchmarks.cpp + DEPENDS_ON ${chai_expt_benchmark_depends}) + + blt_add_benchmark( + NAME UnifiedArrayManagerBenchmarks + COMMAND UnifiedArrayManagerBenchmarks) +endif () diff --git a/benchmarks/expt/UnifiedArrayManagerBenchmarks.cpp b/benchmarks/expt/UnifiedArrayManagerBenchmarks.cpp new file mode 100644 index 00000000..2d11964c --- /dev/null +++ b/benchmarks/expt/UnifiedArrayManagerBenchmarks.cpp @@ -0,0 +1,297 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Lawrence Livermore National Security, LLC and other CHAI +// contributors. See the CHAI LICENSE and COPYRIGHT files for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// + +#include "benchmark/benchmark.h" + +#include + +#include "chai/expt/ContextGuard.hpp" +#include "chai/expt/UnifiedArrayManager.hpp" + +namespace { + using ::chai::expt::Context; + using ::chai::expt::ContextGuard; + using ::chai::expt::ContextManager; + using ::chai::expt::UnifiedArrayManager; + + static void UnifiedArrayManager_DataAfterTouch(benchmark::State& state, + Context initial_context, + bool initial_touch, + Context call_context, + bool call_touch) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + state.PauseTiming(); + + auto& contextManager = ContextManager::getInstance(); + contextManager.reset(); + + UnifiedArrayManager manager{size}; + { + ContextGuard guard{initial_context}; + int* initial_data = manager.data(/*touch=*/initial_touch); + benchmark::DoNotOptimize(initial_data); + } + + { + ContextGuard guard{call_context}; + state.ResumeTiming(); // measure only the data() call + int* data = manager.data(/*touch=*/call_touch); + benchmark::DoNotOptimize(data); + state.PauseTiming(); // exclude guard destruction + } + } + + state.SetItemsProcessed(state.iterations()); + } + + static void UnifiedArrayManager_DefaultConstruct(benchmark::State& state) + { + for (auto _ : state) + { + UnifiedArrayManager manager{}; + benchmark::DoNotOptimize(manager); + } + } + + BENCHMARK(UnifiedArrayManager_DefaultConstruct); + + static void UnifiedArrayManager_SizeConstruct(benchmark::State& state) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + UnifiedArrayManager manager{size}; + benchmark::DoNotOptimize(manager); + } + + state.SetBytesProcessed(state.iterations() * size * sizeof(int)); + } + + BENCHMARK(UnifiedArrayManager_SizeConstruct) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_FirstData_HostConst(benchmark::State& state) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + state.PauseTiming(); + UnifiedArrayManager manager{size}; + state.ResumeTiming(); + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(/*touch=*/false); + benchmark::DoNotOptimize(data); + } + } + + state.SetItemsProcessed(state.iterations()); + } + + BENCHMARK(UnifiedArrayManager_FirstData_HostConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_FirstData_Host(benchmark::State& state) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + state.PauseTiming(); + UnifiedArrayManager manager{size}; + state.ResumeTiming(); + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(/*touch=*/true); + benchmark::DoNotOptimize(data); + } + } + + state.SetItemsProcessed(state.iterations()); + } + + BENCHMARK(UnifiedArrayManager_FirstData_Host) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_FirstData_DeviceConst(benchmark::State& state) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + state.PauseTiming(); + UnifiedArrayManager manager{size}; + state.ResumeTiming(); + + { + ContextGuard guard{Context::DEVICE}; + int* data = manager.data(/*touch=*/false); + benchmark::DoNotOptimize(data); + } + } + + state.SetItemsProcessed(state.iterations()); + } + + BENCHMARK(UnifiedArrayManager_FirstData_DeviceConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_FirstData_Device(benchmark::State& state) + { + const auto size = static_cast(state.range(0)); + + for (auto _ : state) + { + state.PauseTiming(); + UnifiedArrayManager manager{size}; + state.ResumeTiming(); + + { + ContextGuard guard{Context::DEVICE}; + int* data = manager.data(/*touch=*/true); + benchmark::DoNotOptimize(data); + } + } + + state.SetItemsProcessed(state.iterations()); + } + + BENCHMARK(UnifiedArrayManager_FirstData_Device) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterHostTouch_HostConst(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::HOST, + /*initial_touch=*/true, + /*call_context=*/Context::HOST, + /*call_touch=*/false); + } + + BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_HostConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterHostTouch_Host(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::HOST, + /*initial_touch=*/true, + /*call_context=*/Context::HOST, + /*call_touch=*/true); + } + + BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_Host) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterHostTouch_DeviceConst(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::HOST, + /*initial_touch=*/true, + /*call_context=*/Context::DEVICE, + /*call_touch=*/false); + } + + BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_DeviceConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterHostTouch_Device(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::HOST, + /*initial_touch=*/true, + /*call_context=*/Context::DEVICE, + /*call_touch=*/true); + } + + BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_Device) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterDeviceTouch_HostConst(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::DEVICE, + /*initial_touch=*/true, + /*call_context=*/Context::HOST, + /*call_touch=*/false); + } + + BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_HostConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterDeviceTouch_Host(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::DEVICE, + /*initial_touch=*/true, + /*call_context=*/Context::HOST, + /*call_touch=*/true); + } + + BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_Host) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterDeviceTouch_DeviceConst(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::DEVICE, + /*initial_touch=*/true, + /*call_context=*/Context::DEVICE, + /*call_touch=*/false); + } + + BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_DeviceConst) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); + + static void UnifiedArrayManager_DataAfterDeviceTouch_Device(benchmark::State& state) + { + UnifiedArrayManager_DataAfterTouch(state, + /*initial_context=*/Context::DEVICE, + /*initial_touch=*/true, + /*call_context=*/Context::DEVICE, + /*call_touch=*/true); + } + + BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_Device) + ->Arg(0) + ->RangeMultiplier(8) + ->Range(1, 1 << 20); +} // namespace + +BENCHMARK_MAIN(); diff --git a/docs/sphinx/expt/design.rst b/docs/sphinx/expt/design.rst index 04040bde..0bf4602b 100644 --- a/docs/sphinx/expt/design.rst +++ b/docs/sphinx/expt/design.rst @@ -216,3 +216,54 @@ be extra careful to avoid using it on the device. { a[i] = i; } + +------------------ +UnifiedArrayManager +------------------ + +``UnifiedArrayManager`` manages a single contiguous array allocated from Umpire's +unified (managed) memory allocator (``UM``). Since unified memory is accessible +from both CPU and GPU, CHAI can provide a single pointer value that is valid in +both the ``HOST`` and ``DEVICE`` contexts. + +``UnifiedArrayManager`` relies on :ref:`ContextManager ` to +avoid unnecessary full device synchronizations and to ensure correctness when +switching between contexts: + +- ``data(touch=false)`` returns a pointer suitable for read access in the current + context and synchronizes with the most recent modifying context (if needed). +- ``data(touch=true)`` indicates the caller will modify the array in the current + context; it performs any required synchronization first, then records the + current context as the most recently modified context. + +Like ``HostArrayManager``, ``UnifiedArrayManager`` performs value initialization +of each element on the host (numeric types are initialized to zero). + +.. code-block:: cpp + + #include "chai/expt/Context.hpp" + #include "chai/expt/ContextGuard.hpp" + #include "chai/expt/UnifiedArrayManager.hpp" + + const std::size_t N = 1000000; + ::chai::expt::UnifiedArrayManager a{N}; + + { + ::chai::expt::ContextGuard guard{::chai::expt::Context::HOST}; + int* p = a.data(true); + for (std::size_t i = 0; i < N; ++i) { p[i] = static_cast(i); } + } + + { + ::chai::expt::ContextGuard guard{::chai::expt::Context::DEVICE}; + int* p = a.data(true); + // Launch a CUDA/HIP kernel that writes through p... + } + + { + ::chai::expt::ContextGuard guard{::chai::expt::Context::HOST}; + // If the most recent modification was on DEVICE, this call synchronizes first. + const int* p = a.data(false); + // Read through p... + } + diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 89f20bf6..6b76eff9 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -42,6 +42,7 @@ if(CHAI_ENABLE_EXPERIMENTAL) expt/HostSharedPointer.hpp expt/ManagedArrayPointer.hpp expt/ManagedArraySharedPointer.hpp + expt/UnifiedArrayManager.hpp ManagedSharedPtr.hpp SharedPtrCounter.hpp SharedPtrManager.hpp diff --git a/src/chai/expt/UnifiedArrayManager.hpp b/src/chai/expt/UnifiedArrayManager.hpp new file mode 100644 index 00000000..56845298 --- /dev/null +++ b/src/chai/expt/UnifiedArrayManager.hpp @@ -0,0 +1,160 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Lawrence Livermore National Security, LLC and other CHAI +// contributors. See the CHAI LICENSE and COPYRIGHT files for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// + +#ifndef CHAI_UNIFIED_ARRAY_MANAGER_HPP +#define CHAI_UNIFIED_ARRAY_MANAGER_HPP + +#include "chai/expt/Context.hpp" +#include "chai/expt/ContextManager.hpp" +#include "umpire/ResourceManager.hpp" +#include "umpire/TypedAllocator.hpp" +#include +#include + +namespace chai::expt +{ + /*! + * \brief This class manages a unified memory array. It is designed for use + * with ManagedArrayPointer. + * + * \tparam ElementType The type of elements contained in this array. + * + * \note UnifiedArrayManager performs value initialization of each array element. + * That is to say, numeric types will be initialized to zero and nontrivial + * types will be default constructed. This initialization occurs on the host. + * In the future, this behavior may change to default initialization for + * performance reasons, such that numeric types will be left in an + * indeterminate state and nontrivial types will be default constructed. + */ + template + class UnifiedArrayManager { + private: + /*! + * \brief Allocator used by the managed unified memory storage. + */ + using AllocatorType = ::umpire::TypedAllocator; + + /*! + * \brief Underlying contiguous unified memory storage type for managed elements. + */ + using StorageType = std::vector; + + public: + /*! + * \brief Default-constructs a UnifiedArrayManager with zero elements + * and a default allocator for unified memory allocations. + */ + UnifiedArrayManager() = default; + + /*! + * \brief Constructs a UnifiedArrayManager with zero elements + * and \p allocator for unified memory allocations. + * + * \param allocator Allocator used for unified memory allocations. + */ + explicit UnifiedArrayManager(const umpire::Allocator& allocator) + : m_storage{StorageType(AllocatorType(allocator))} + { + } + + /*! + * \brief Constructs a UnifiedArrayManager with \p size elements + * using the default allocator for unified memory allocations. + * + * \param size Number of elements to allocate. + */ + explicit UnifiedArrayManager(std::size_t size) + : m_storage{StorageType(size, AllocatorType(::umpire::ResourceManager::getInstance().getAllocator("UM")))} + { + } + + /*! + * \brief Constructs a UnifiedArrayManager with \p size elements + * using \p allocator for unified memory allocations. + * + * \param size Number of elements to allocate. + * \param allocator Allocator used for unified memory allocations. + */ + UnifiedArrayManager(std::size_t size, + const umpire::Allocator& allocator) + : m_storage{StorageType(size, AllocatorType(allocator))} + { + } + + /*! + * \brief Resizes the managed storage to \p new_size elements. + * + * \param new_size New number of elements. + */ + void resize(std::size_t new_size) + { + // TODO: Investigate resize in the last modified space. + Context context = Context::HOST; + + if (context != m_modified) + { + ContextManager::getInstance().synchronize(m_modified); + } + + m_storage.resize(new_size); + m_modified = context; + } + + /*! + * \brief Returns the number of elements currently managed. + * + * \return Number of elements in the managed storage. + */ + std::size_t size() const + { + return m_storage.size(); + } + + /*! + * \brief Returns a pointer to the underlying contiguous storage. + * + * \return Pointer to the first element, or nullptr if the storage is empty. + */ + ElementType* data(bool touch) + { + ContextManager& contextManager = ContextManager::getInstance(); + Context context = contextManager.getContext(); + + if (context != m_modified) + { + contextManager.synchronize(m_modified); + } + + if (touch) + { + m_modified = context; + } + else + { + m_modified = Context::NONE; + } + + return m_storage.empty() ? nullptr : m_storage.data(); + } + + private: + /*! + * \brief Underlying unified memory storage for the managed elements. + */ + StorageType m_storage{AllocatorType(::umpire::ResourceManager::getInstance().getAllocator("UM"))}; + + /*! + * \brief Context in which the managed storage was most recently modified. + * + * \note Used to determine when synchronization is required before accessing + * the underlying storage from the current context. + */ + Context m_modified{Context::NONE}; + }; // class UnifiedArrayManager +} // namespace chai::expt + +#endif // CHAI_UNIFIED_ARRAY_MANAGER_HPP diff --git a/tests/expt/CMakeLists.txt b/tests/expt/CMakeLists.txt index a6fd9b14..9e6049cb 100644 --- a/tests/expt/CMakeLists.txt +++ b/tests/expt/CMakeLists.txt @@ -91,3 +91,16 @@ blt_add_executable( blt_add_test( NAME HostArraySharedPointerTests COMMAND HostArraySharedPointerTests) + +if(ENABLE_CUDA OR ENABLE_HIP) + blt_add_executable( + NAME UnifiedArrayManagerTests + SOURCES UnifiedArrayManagerTests.cpp + HEADERS ${chai_expt_test_headers} + INCLUDES ${PROJECT_BINARY_DIR}/include + DEPENDS_ON ${chai_expt_test_depends}) + + blt_add_test( + NAME UnifiedArrayManagerTests + COMMAND UnifiedArrayManagerTests) +endif() diff --git a/tests/expt/UnifiedArrayManagerTests.cpp b/tests/expt/UnifiedArrayManagerTests.cpp new file mode 100644 index 00000000..8ae96399 --- /dev/null +++ b/tests/expt/UnifiedArrayManagerTests.cpp @@ -0,0 +1,574 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Lawrence Livermore National Security, LLC and other CHAI +// contributors. See the CHAI LICENSE and COPYRIGHT files for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// + +#include "chai/config.hpp" +#include "chai/expt/Context.hpp" +#include "chai/expt/ContextGuard.hpp" +#include "chai/expt/ContextManager.hpp" +#include "chai/expt/UnifiedArrayManager.hpp" +#include "camp/helpers.hpp" +#include "gtest/gtest.h" + +#include + +#if defined(CHAI_ENABLE_CUDA) +#include +#endif + +#if defined(CHAI_ENABLE_HIP) +#include +#endif + +namespace { + using ::chai::expt::Context; + using ::chai::expt::ContextGuard; + using ::chai::expt::ContextManager; + using ::chai::expt::UnifiedArrayManager; + + template + T* malloc_managed(std::size_t count) + { + T* ptr = nullptr; + +#if defined(CHAI_ENABLE_CUDA) + CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMallocManaged, (void**)&ptr, sizeof(T) * count); +#elif defined(CHAI_ENABLE_HIP) + CAMP_HIP_API_INVOKE_AND_CHECK(hipMallocManaged, (void**)&ptr, sizeof(T) * count); +#else + static_cast(count); +#endif + + return ptr; + } + + inline void free_managed(void* ptr) + { +#if defined(CHAI_ENABLE_CUDA) + CAMP_CUDA_API_INVOKE_AND_CHECK(cudaFree, ptr); +#elif defined(CHAI_ENABLE_HIP) + CAMP_HIP_API_INVOKE_AND_CHECK(hipFree, ptr); +#else + static_cast(ptr); +#endif + } + +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + __global__ void increment_kernel(int* data, std::size_t size) + { + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i < size) + { + data[i] += 1; + } + } + + __global__ void copy_kernel(const int* in, int* out, std::size_t size) + { + const std::size_t i = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + if (i < size) + { + out[i] = in[i]; + } + } + + inline void device_synchronize_raw() + { +#if defined(CHAI_ENABLE_CUDA) + CAMP_CUDA_API_INVOKE_AND_CHECK(cudaDeviceSynchronize); +#elif defined(CHAI_ENABLE_HIP) + CAMP_HIP_API_INVOKE_AND_CHECK(hipDeviceSynchronize); +#endif + } + + inline void launch_increment(int* data, std::size_t size) + { + constexpr int BLOCK_SIZE = 256; + const int grid_size = static_cast((size + BLOCK_SIZE - 1) / BLOCK_SIZE); + +#if defined(CHAI_ENABLE_CUDA) + increment_kernel<<>>(data, size); + CAMP_CUDA_API_INVOKE_AND_CHECK(cudaGetLastError); +#elif defined(CHAI_ENABLE_HIP) + hipLaunchKernelGGL(increment_kernel, + dim3(grid_size), + dim3(BLOCK_SIZE), + 0, + 0, + data, + size); + CAMP_HIP_API_INVOKE_AND_CHECK(hipGetLastError); +#endif + } + + inline void launch_copy(const int* in, int* out, std::size_t size) + { + constexpr int BLOCK_SIZE = 256; + const int grid_size = static_cast((size + BLOCK_SIZE - 1) / BLOCK_SIZE); + +#if defined(CHAI_ENABLE_CUDA) + copy_kernel<<>>(in, out, size); + CAMP_CUDA_API_INVOKE_AND_CHECK(cudaGetLastError); +#elif defined(CHAI_ENABLE_HIP) + hipLaunchKernelGGL(copy_kernel, + dim3(grid_size), + dim3(BLOCK_SIZE), + 0, + 0, + in, + out, + size); + CAMP_HIP_API_INVOKE_AND_CHECK(hipGetLastError); +#endif + } +#endif + + class UnifiedArrayManagerTest : public ::testing::Test + { + protected: + void SetUp() override + { + ContextManager::getInstance().reset(); + } + + void TearDown() override + { + ContextManager::getInstance().reset(); + } + }; +} // namespace + +TEST_F(UnifiedArrayManagerTest, DefaultConstructor) +{ + UnifiedArrayManager manager{}; + EXPECT_EQ(manager.size(), 0); + + { + ContextGuard guard{Context::HOST}; + EXPECT_EQ(manager.data(false), nullptr); + EXPECT_EQ(manager.data(true), nullptr); + } +} + +TEST_F(UnifiedArrayManagerTest, AllocatorConstructor) +{ + constexpr std::size_t N = 32; + auto& rm = ::umpire::ResourceManager::getInstance(); + umpire::Allocator allocator = rm.getAllocator("UM"); + + UnifiedArrayManager manager{N, allocator}; + EXPECT_EQ(manager.size(), N); + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 0); + data[i] = static_cast(i * 2); + } + } + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(false); + ASSERT_NE(data, nullptr); + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], static_cast(i * 2)); + } + } +} + +TEST_F(UnifiedArrayManagerTest, ResizeToZero) +{ + UnifiedArrayManager manager{10}; + EXPECT_EQ(manager.size(), 10); + + { + ContextGuard guard{Context::HOST}; + EXPECT_NE(manager.data(false), nullptr); + } + + manager.resize(0); + EXPECT_EQ(manager.size(), 0); + + { + ContextGuard guard{Context::HOST}; + EXPECT_EQ(manager.data(false), nullptr); + EXPECT_EQ(manager.data(true), nullptr); + } +} + +TEST_F(UnifiedArrayManagerTest, ResizeSmaller) +{ + constexpr std::size_t N0 = 16; + constexpr std::size_t N1 = 6; + + UnifiedArrayManager manager{N0}; + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + for (std::size_t i = 0; i < N0; ++i) + { + data[i] = static_cast(i); + } + } + + manager.resize(N1); + EXPECT_EQ(manager.size(), N1); + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(false); + ASSERT_NE(data, nullptr); + for (std::size_t i = 0; i < N1; ++i) + { + EXPECT_EQ(data[i], static_cast(i)); + } + } +} + +TEST_F(UnifiedArrayManagerTest, ResizeLarger) +{ + constexpr std::size_t N0 = 8; + constexpr std::size_t N1 = 16; + + UnifiedArrayManager manager{N0}; + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + for (std::size_t i = 0; i < N0; ++i) + { + data[i] = static_cast(i + 10); + } + } + + manager.resize(N1); + EXPECT_EQ(manager.size(), N1); + + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N0; ++i) + { + EXPECT_EQ(data[i], static_cast(i + 10)); + } + + for (std::size_t i = N0; i < N1; ++i) + { + EXPECT_EQ(data[i], 0); + } + } +} + +TEST_F(UnifiedArrayManagerTest, HostReadThenHostRead) +{ + // Set up + constexpr std::size_t N = 128; + UnifiedArrayManager manager{N}; + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 0); + } + } + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 0); + } + } +} + +TEST_F(UnifiedArrayManagerTest, HostWriteThenHostRead) +{ + // Set up + constexpr std::size_t N = 128; + UnifiedArrayManager manager{N}; + + // Host write + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + data[i] = static_cast(i); + } + } + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], static_cast(i)); + } + } +} + +TEST_F(UnifiedArrayManagerTest, HostReadThenDeviceRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 0); + } + } + + // Set up + int* out = malloc_managed(N); + ASSERT_NE(out, nullptr); + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out, N); + } + + // Synchronize + device_synchronize_raw(); + + // Check result + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(out[i], 0); + } + + // Clean up + free_managed(out); +} + +TEST_F(UnifiedArrayManagerTest, HostWriteThenDeviceRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + // Host write + { + ContextGuard guard{Context::HOST}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + data[i] = static_cast(i); + } + } + + // Set up + int* out = malloc_managed(N); + ASSERT_NE(out, nullptr); + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out, N); + } + + // Synchronize + device_synchronize_raw(); + + // Check result + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(out[i], static_cast(i)); + } + + // Clean up + free_managed(out); +} + +TEST_F(UnifiedArrayManagerTest, DeviceReadThenHostRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + int* out = malloc_managed(N); + ASSERT_NE(out, nullptr); + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out, N); + } + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 0); + } + } + + // Synchronize + // Note: The host read should have caused a synchronize, but the following + // checks should independent of whether than part works. + device_synchronize_raw(); + + // Checks + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(out[i], 0); + } + + // Clean up + free_managed(out); +} + +TEST_F(UnifiedArrayManagerTest, DeviceWriteThenHostRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + // Device write + { + ContextGuard guard{Context::DEVICE}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + launch_increment(data, N); + } + + // Host read + { + ContextGuard guard{Context::HOST}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(data[i], 1); + } + } +} + +TEST_F(UnifiedArrayManagerTest, DeviceReadThenDeviceRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + int* out0 = malloc_managed(N); + int* out1 = malloc_managed(N); + ASSERT_NE(out0, nullptr); + ASSERT_NE(out1, nullptr); + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out0, N); + } + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out1, N); + } + + // Synchronize + device_synchronize_raw(); + + // Checks + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(out0[i], 0); + EXPECT_EQ(out1[i], 0); + } + + // Clean up + free_managed(out0); + free_managed(out1); +} + +TEST_F(UnifiedArrayManagerTest, DeviceWriteThenDeviceRead) +{ + // Set up + constexpr std::size_t N = 256; + UnifiedArrayManager manager{N}; + + // Device write + { + ContextGuard guard{Context::DEVICE}; + int* data = manager.data(true); + ASSERT_NE(data, nullptr); + launch_increment(data, N); + } + + // Set up + int* out = malloc_managed(N); + ASSERT_NE(out, nullptr); + + // Device read + { + ContextGuard guard{Context::DEVICE}; + const int* data = manager.data(false); + ASSERT_NE(data, nullptr); + launch_copy(data, out, N); + } + + // Synchronize + device_synchronize_raw(); + + // Checks + for (std::size_t i = 0; i < N; ++i) + { + EXPECT_EQ(out[i], 1); + } + + // Clean up + free_managed(out); +} +