Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d2f8707
Add UnifiedArrayManager
adayton1 Feb 10, 2026
7cc3511
Clean up documentation
adayton1 Feb 10, 2026
4736f8f
Use UM allocator for UnifiedArrayManager
adayton1 Feb 10, 2026
52dbaf6
Merge branch 'develop' into expt/um_array_manager
adayton1 Feb 27, 2026
2e17734
Merge branch 'develop' into expt/um_array_manager
adayton1 Mar 3, 2026
4a096ab
Add UnifiedArrayManager tests
adayton1 Mar 4, 2026
c926541
Minor fixes
adayton1 Mar 4, 2026
225cebe
Update gitignore
adayton1 Mar 4, 2026
f62aea3
Add more tests
adayton1 Mar 4, 2026
7b3a0c4
Add UnifiedArrayManager benchmarks
adayton1 Mar 4, 2026
54fc0e7
Clean up style
adayton1 Mar 4, 2026
0b749d7
More benchmarks
adayton1 Mar 4, 2026
f29edb5
Fix touches
adayton1 Mar 4, 2026
058fa6e
Add documentation for UnifiedArrayManager
adayton1 Mar 5, 2026
f874d00
Add more synchronizes
adayton1 Mar 5, 2026
2df4432
Check full array
adayton1 Mar 5, 2026
a8b0401
Add all combinations of data access
adayton1 Mar 5, 2026
0d27ac7
Separate tests
adayton1 Mar 5, 2026
0af955f
Clean up tests
adayton1 Mar 5, 2026
4cf3bef
More tests
adayton1 Mar 5, 2026
249f27e
Test clean up
adayton1 Mar 5, 2026
869c854
Update benchmarks
adayton1 Mar 5, 2026
a001fe1
Add baseline for unified memory benchmarks
adayton1 Mar 5, 2026
e055d6a
Start over with benchmarks
adayton1 Mar 6, 2026
7d2fd7a
Add size constructor benchmark
adayton1 Mar 6, 2026
2e531b8
Rename benchmarks
adayton1 Mar 6, 2026
a169850
Update range multiplier
adayton1 Mar 6, 2026
06b7228
More test cases
adayton1 Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ Win32/x64/
*.aps
*.orig
*.code-workspace
slirp.out
4 changes: 4 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ if (CHAI_ENABLE_MANAGED_PTR)
NAME managed_ptr_benchmarks
COMMAND managed_ptr_benchmarks)
endif ()

if (CHAI_ENABLE_EXPERIMENTAL)
add_subdirectory(expt)
endif ()
33 changes: 33 additions & 0 deletions benchmarks/expt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
##############################################################################
# Copyright (c) Lawrence Livermore National Security, LLC and other CHAI
# contributors. See the CHAI LICENSE and COPYRIGHT files for details.
#
# SPDX-License-Identifier: BSD-3-Clause
##############################################################################

set(chai_expt_benchmark_depends
chai
gbenchmark)

if (CHAI_ENABLE_CUDA)
set(chai_expt_benchmark_depends
${chai_expt_benchmark_depends}
cuda)
endif ()

if (CHAI_ENABLE_HIP)
set(chai_expt_benchmark_depends
${chai_expt_benchmark_depends}
blt::hip)
endif ()

if (CHAI_ENABLE_CUDA OR CHAI_ENABLE_HIP)
blt_add_executable(
NAME UnifiedArrayManagerBenchmarks
SOURCES UnifiedArrayManagerBenchmarks.cpp
DEPENDS_ON ${chai_expt_benchmark_depends})

blt_add_benchmark(
NAME UnifiedArrayManagerBenchmarks
COMMAND UnifiedArrayManagerBenchmarks)
endif ()
297 changes: 297 additions & 0 deletions benchmarks/expt/UnifiedArrayManagerBenchmarks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
//////////////////////////////////////////////////////////////////////////////
// Copyright (c) Lawrence Livermore National Security, LLC and other CHAI
// contributors. See the CHAI LICENSE and COPYRIGHT files for details.
//
// SPDX-License-Identifier: BSD-3-Clause
//////////////////////////////////////////////////////////////////////////////

#include "benchmark/benchmark.h"

#include <cstddef>

#include "chai/expt/ContextGuard.hpp"
#include "chai/expt/UnifiedArrayManager.hpp"

namespace {
using ::chai::expt::Context;
using ::chai::expt::ContextGuard;
using ::chai::expt::ContextManager;
using ::chai::expt::UnifiedArrayManager;

static void UnifiedArrayManager_DataAfterTouch(benchmark::State& state,
Context initial_context,
bool initial_touch,
Context call_context,
bool call_touch)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
state.PauseTiming();

auto& contextManager = ContextManager::getInstance();
contextManager.reset();

UnifiedArrayManager<int> manager{size};
{
ContextGuard guard{initial_context};
int* initial_data = manager.data(/*touch=*/initial_touch);
benchmark::DoNotOptimize(initial_data);
}

{
ContextGuard guard{call_context};
state.ResumeTiming(); // measure only the data() call
int* data = manager.data(/*touch=*/call_touch);
benchmark::DoNotOptimize(data);
state.PauseTiming(); // exclude guard destruction
}
}

state.SetItemsProcessed(state.iterations());
}

static void UnifiedArrayManager_DefaultConstruct(benchmark::State& state)
{
for (auto _ : state)
{
UnifiedArrayManager<int> manager{};
benchmark::DoNotOptimize(manager);
}
}

BENCHMARK(UnifiedArrayManager_DefaultConstruct);

static void UnifiedArrayManager_SizeConstruct(benchmark::State& state)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
UnifiedArrayManager<int> manager{size};
benchmark::DoNotOptimize(manager);
}

state.SetBytesProcessed(state.iterations() * size * sizeof(int));
}

BENCHMARK(UnifiedArrayManager_SizeConstruct)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_FirstData_HostConst(benchmark::State& state)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
state.PauseTiming();
UnifiedArrayManager<int> manager{size};
state.ResumeTiming();

{
ContextGuard guard{Context::HOST};
int* data = manager.data(/*touch=*/false);
benchmark::DoNotOptimize(data);
}
}

state.SetItemsProcessed(state.iterations());
}

BENCHMARK(UnifiedArrayManager_FirstData_HostConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_FirstData_Host(benchmark::State& state)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
state.PauseTiming();
UnifiedArrayManager<int> manager{size};
state.ResumeTiming();

{
ContextGuard guard{Context::HOST};
int* data = manager.data(/*touch=*/true);
benchmark::DoNotOptimize(data);
}
}

state.SetItemsProcessed(state.iterations());
}

BENCHMARK(UnifiedArrayManager_FirstData_Host)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_FirstData_DeviceConst(benchmark::State& state)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
state.PauseTiming();
UnifiedArrayManager<int> manager{size};
state.ResumeTiming();

{
ContextGuard guard{Context::DEVICE};
int* data = manager.data(/*touch=*/false);
benchmark::DoNotOptimize(data);
}
}

state.SetItemsProcessed(state.iterations());
}

BENCHMARK(UnifiedArrayManager_FirstData_DeviceConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_FirstData_Device(benchmark::State& state)
{
const auto size = static_cast<std::size_t>(state.range(0));

for (auto _ : state)
{
state.PauseTiming();
UnifiedArrayManager<int> manager{size};
state.ResumeTiming();

{
ContextGuard guard{Context::DEVICE};
int* data = manager.data(/*touch=*/true);
benchmark::DoNotOptimize(data);
}
}

state.SetItemsProcessed(state.iterations());
}

BENCHMARK(UnifiedArrayManager_FirstData_Device)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterHostTouch_HostConst(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::HOST,
/*initial_touch=*/true,
/*call_context=*/Context::HOST,
/*call_touch=*/false);
}

BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_HostConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterHostTouch_Host(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::HOST,
/*initial_touch=*/true,
/*call_context=*/Context::HOST,
/*call_touch=*/true);
}

BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_Host)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterHostTouch_DeviceConst(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::HOST,
/*initial_touch=*/true,
/*call_context=*/Context::DEVICE,
/*call_touch=*/false);
}

BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_DeviceConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterHostTouch_Device(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::HOST,
/*initial_touch=*/true,
/*call_context=*/Context::DEVICE,
/*call_touch=*/true);
}

BENCHMARK(UnifiedArrayManager_DataAfterHostTouch_Device)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterDeviceTouch_HostConst(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::DEVICE,
/*initial_touch=*/true,
/*call_context=*/Context::HOST,
/*call_touch=*/false);
}

BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_HostConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterDeviceTouch_Host(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::DEVICE,
/*initial_touch=*/true,
/*call_context=*/Context::HOST,
/*call_touch=*/true);
}

BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_Host)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterDeviceTouch_DeviceConst(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::DEVICE,
/*initial_touch=*/true,
/*call_context=*/Context::DEVICE,
/*call_touch=*/false);
}

BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_DeviceConst)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);

static void UnifiedArrayManager_DataAfterDeviceTouch_Device(benchmark::State& state)
{
UnifiedArrayManager_DataAfterTouch(state,
/*initial_context=*/Context::DEVICE,
/*initial_touch=*/true,
/*call_context=*/Context::DEVICE,
/*call_touch=*/true);
}

BENCHMARK(UnifiedArrayManager_DataAfterDeviceTouch_Device)
->Arg(0)
->RangeMultiplier(8)
->Range(1, 1 << 20);
} // namespace

BENCHMARK_MAIN();
51 changes: 51 additions & 0 deletions docs/sphinx/expt/design.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,54 @@ be extra careful to avoid using it on the device.
{
a[i] = i;
}

------------------
UnifiedArrayManager
------------------

``UnifiedArrayManager`` manages a single contiguous array allocated from Umpire's
unified (managed) memory allocator (``UM``). Since unified memory is accessible
from both CPU and GPU, CHAI can provide a single pointer value that is valid in
both the ``HOST`` and ``DEVICE`` contexts.

``UnifiedArrayManager`` relies on :ref:`ContextManager <experimental_design>` to
avoid unnecessary full device synchronizations and to ensure correctness when
switching between contexts:

- ``data(touch=false)`` returns a pointer suitable for read access in the current
context and synchronizes with the most recent modifying context (if needed).
- ``data(touch=true)`` indicates the caller will modify the array in the current
context; it performs any required synchronization first, then records the
current context as the most recently modified context.

Like ``HostArrayManager``, ``UnifiedArrayManager`` performs value initialization
of each element on the host (numeric types are initialized to zero).

.. code-block:: cpp

#include "chai/expt/Context.hpp"
#include "chai/expt/ContextGuard.hpp"
#include "chai/expt/UnifiedArrayManager.hpp"

const std::size_t N = 1000000;
::chai::expt::UnifiedArrayManager<int> a{N};

{
::chai::expt::ContextGuard guard{::chai::expt::Context::HOST};
int* p = a.data(true);
for (std::size_t i = 0; i < N; ++i) { p[i] = static_cast<int>(i); }
}

{
::chai::expt::ContextGuard guard{::chai::expt::Context::DEVICE};
int* p = a.data(true);
// Launch a CUDA/HIP kernel that writes through p...
}

{
::chai::expt::ContextGuard guard{::chai::expt::Context::HOST};
// If the most recent modification was on DEVICE, this call synchronizes first.
const int* p = a.data(false);
// Read through p...
}

Loading