From e8b991dd7430fa24f598e7619a4f98944f974ed5 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Tue, 2 Jul 2024 01:03:59 +0000 Subject: [PATCH 01/15] Wrapper types for `cudaEvent_t` that provide a modern C++ interface. * `cuda::experimental::event_ref` is a non-owning wrapper around a `cudaEvent_t`. * `cuda::experimental::event` is an owning wrapper around a `cudaEvent_t`. * `cuda::experimental::timed_event` is a `cuda::experimental::event` that also records the time at which it was recorded. --- .../cuda/experimental/__detail/utility.cuh | 24 +++ .../cuda/experimental/__event/event.cuh | 162 ++++++++++++++++++ .../cuda/experimental/__event/event_ref.cuh | 149 ++++++++++++++++ .../cuda/experimental/__event/timed_event.cuh | 121 +++++++++++++ cudax/include/cuda/experimental/event.cuh | 18 ++ cudax/test/CMakeLists.txt | 4 + cudax/test/event/event_smoke.cu | 47 +++++ 7 files changed, 525 insertions(+) create mode 100644 cudax/include/cuda/experimental/__detail/utility.cuh create mode 100644 cudax/include/cuda/experimental/__event/event.cuh create mode 100644 cudax/include/cuda/experimental/__event/event_ref.cuh create mode 100644 cudax/include/cuda/experimental/__event/timed_event.cuh create mode 100644 cudax/include/cuda/experimental/event.cuh create mode 100644 cudax/test/event/event_smoke.cu diff --git a/cudax/include/cuda/experimental/__detail/utility.cuh b/cudax/include/cuda/experimental/__detail/utility.cuh new file mode 100644 index 00000000000..874075b1075 --- /dev/null +++ b/cudax/include/cuda/experimental/__detail/utility.cuh @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDAX_DETAIL_UTILITY_H +#define __CUDAX_DETAIL_UTILITY_H + +namespace cuda::experimental +{ +struct uninit_t +{ + explicit uninit_t() = default; +}; + +inline constexpr uninit_t uninit{}; +} // namespace cuda::experimental + +#endif // __CUDAX_DETAIL_UTILITY_H diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh new file mode 100644 index 00000000000..439d789a36a --- /dev/null +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -0,0 +1,162 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_EVENT_DETAIL_H +#define _CUDAX_EVENT_DETAIL_H + +#include +// cuda_runtime_api needs to come first + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +// CUDAX headers here +#include +#include + +#include + +namespace cuda::experimental +{ +class timed_event; + +/** + * @brief An owning wrapper for an untimed `cudaEvent_t`. + */ +class event : public event_ref +{ + friend class timed_event; + +public: + /** + * @brief Flags to use when creating the event. + */ + enum class flags : unsigned int + { + none = cudaEventDefault, + blocking_sync = cudaEventBlockingSync, + interprocess = cudaEventInterprocess + }; + + /** + * @brief Construct a new `event` object with timing disabled. + * + * @throws cuda_error if the event creation fails. + */ + event() + : event(static_cast(cudaEventDisableTiming)) + {} + + /** + * @brief Construct a new `event` object with the specified flags. + * + * @throws cuda_error if the event creation fails. + */ + explicit event(flags __flags) + : event(static_cast(__flags) | static_cast(cudaEventDisableTiming)) + {} + + /** + * @brief Construct a new `event` object into the moved-from state. + * + * @post `get()` returns `cudaEvent_t()`. + */ + explicit constexpr event(uninit_t) noexcept {} + + /** + * @brief Move-construct a new `event` object + * + * @param __other + * + * @post `__other` is in a moved-from state. + */ + constexpr event(event&& __other) noexcept + : event_ref(_CUDA_VSTD::exchange(__other.__event_, {})) + {} + + /** + * @brief Destroy the `event` object + * + * @note If the event fails to be destroyed, the error is silently ignored. + */ + ~event() + { + if (__event_ != nullptr) + { + [[maybe_unused]] auto __status = ::cudaEventDestroy(__event_); + } + } + + /** + * @brief Construct an `event` object from a native `cudaEvent_t` handle. + * + * @param __event The native handle + * + * @return event The constructed `event` object + * + * @note The constructed `event` object takes ownership of the native handle. + */ + _CCCL_NODISCARD static event from_native_handle(::cudaEvent_t __event) noexcept + { + return event(__event); + } + + /// Disallow construction from an `int`, e.g., `0`. + static event from_native_handle(int) = delete; + + /// Disallow construction from `nullptr`. + static event from_native_handle(_CUDA_VSTD::nullptr_t) = delete; + + /** + * @brief Retrieve the native `cudaEvent_t` handle and give up ownership. + * + * @return cudaEvent_t The native handle being held by the `event` object. + * + * @post The event object is in a moved-from state. + */ + _CCCL_NODISCARD constexpr ::cudaEvent_t release() noexcept + { + return _CUDA_VSTD::exchange(__event_, {}); + } + + _CCCL_NODISCARD_FRIEND constexpr flags operator|(flags __lhs, flags __rhs) noexcept + { + return static_cast(static_cast(__lhs) | static_cast(__rhs)); + } + +private: + // Use `event::from_native_handle(e)` to construct an owning `event` + // object from a `cudaEvent_t` handle. + explicit constexpr event(::cudaEvent_t __event) noexcept + : event_ref(__event) + {} + + explicit event(unsigned int __flags) + { + _CCCL_TRY_CUDA_API( + ::cudaEventCreateWithFlags, "Failed to create CUDA event", &__event_, static_cast(__flags)); + } +}; +} // namespace cuda::experimental + +#endif // _CUDAX_EVENT_DETAIL_H diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh new file mode 100644 index 00000000000..d74a4d64c05 --- /dev/null +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -0,0 +1,149 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_EVENT_REF_DETAIL_H +#define _CUDAX_EVENT_REF_DETAIL_H + +#include +// cuda_runtime_api needs to come first + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +#include + +namespace cuda::experimental +{ +class event; +class timed_event; + +/** + * @brief An non-owning wrapper for an untimed `cudaEvent_t`. + */ +class event_ref +{ +private: + friend class event; + friend class timed_event; + + ::cudaEvent_t __event_{0}; + +public: + using value_type = ::cudaEvent_t; + + /** + * @brief Construct a new `event_ref` that does refer to an event. + */ + event_ref() = default; + + /** + * @brief Construct a new `event_ref` object from a `cudaEvent_t` + * + * This constructor provides an implicit conversion from `cudaEvent_t` + * + * @post `get() == __event` + * + * @note: It is the callers responsibilty to ensure the `event_ref` does not + * outlive the event denoted by the `cudaEvent_t` handle. + */ + constexpr event_ref(::cudaEvent_t __event) noexcept + : __event_(__event) + {} + + /// Disallow construction from an `int`, e.g., `0`. + event_ref(int) = delete; + + /// Disallow construction from `nullptr`. + event_ref(_CUDA_VSTD::nullptr_t) = delete; + + /** + * @brief Records an event on the specified stream + * + * @param __stream + * + * @throws cuda_error if the event record fails + */ + void record(stream_ref __stream) + { + assert(__event_ != nullptr); + assert(__stream.get() != nullptr); + _CCCL_TRY_CUDA_API(::cudaEventRecord, "Failed to record CUDA event", __event_, __stream.get()); + } + + /** + * @brief Waits for a CUDA event_ref to complete on the specified stream + * + * @param __stream The stream to wait on + * + * @throws cuda_error if the event_ref wait fails + */ + void wait(stream_ref __stream) const + { + assert(__event_ != nullptr); + assert(__stream.get() != nullptr); + _CCCL_TRY_CUDA_API(::cudaStreamWaitEvent, "Failed to wait for CUDA event", __stream.get(), __event_); + } + + /** + * @brief Retrieve the native `cudaEvent_t` handle. + * + * @return cudaEvent_t The native handle being held by the event_ref object. + */ + _CCCL_NODISCARD constexpr ::cudaEvent_t get() const noexcept + { + return __event_; + } + + /** + * @brief Compares two `event_ref`s for equality + * + * @note Allows comparison with `cudaEvent_t` due to implicit conversion to + * `event_ref`. + * + * @param lhs The first `event_ref` to compare + * @param rhs The second `event_ref` to compare + * @return true if `lhs` and `rhs` refer to the same `cudaEvent_t` object. + */ + _CCCL_NODISCARD_FRIEND constexpr bool operator==(event_ref __lhs, event_ref __rhs) noexcept + { + return __lhs.__event_ == __rhs.__event_; + } + + /** + * @brief Compares two `event_ref`s for inequality + * + * @note Allows comparison with `cudaEvent_t` due to implicit conversion to + * `event_ref`. + * + * @param lhs The first `event_ref` to compare + * @param rhs The second `event_ref` to compare + * @return true if `lhs` and `rhs` refer to different `cudaEvent_t` objects. + */ + _CCCL_NODISCARD_FRIEND constexpr bool operator!=(event_ref __lhs, event_ref __rhs) noexcept + { + return __lhs.__event_ != __rhs.__event_; + } +}; +} // namespace cuda::experimental + +#endif // _CUDAX_EVENT_REF_DETAIL_H diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh new file mode 100644 index 00000000000..b3e4b421ca3 --- /dev/null +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -0,0 +1,121 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_TIMED_EVENT_DETAIL_H +#define _CUDAX_TIMED_EVENT_DETAIL_H + +#include +// cuda_runtime_api needs to come first + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +// CUDAX headers here +#include +#include + +#include + +namespace cuda::experimental +{ +/** + * @brief An owning wrapper for a `cudaEvent_t` with timing enabled. + */ +class timed_event : public event +{ +public: + /** + * @brief Construct a new `timed_event` object with timing enabled. + * + * @throws cuda_error if the event creation fails. + */ + timed_event() + : event(static_cast(cudaEventDefault)) + {} + + /** + * @brief Construct a new `timed_event` object with the specified flags. + * + * @throws cuda_error if the event creation fails. + */ + explicit timed_event(flags __flags) + : event(static_cast(__flags)) + {} + + /** + * @brief Construct a new `timed_event` object into the moved-from state. + * + * @post `get()` returns `cudaEvent_t()`. + */ + explicit constexpr timed_event(uninit_t) noexcept + : event(uninit) + {} + + /** + * @brief Construct a `timed_event` object from a native `cudaEvent_t` handle. + * + * @param __event The native handle + * + * @return timed_event The constructed `timed_event` object + * + * @note The constructed `timed_event` object takes ownership of the native handle. + */ + _CCCL_NODISCARD static timed_event from_native_handle(::cudaEvent_t __event) noexcept + { + return timed_event(__event); + } + + /// Disallow construction from an `int`, e.g., `0`. + static timed_event from_native_handle(int) = delete; + + /// Disallow construction from `nullptr`. + static timed_event from_native_handle(_CUDA_VSTD::nullptr_t) = delete; + + /** + * @brief Compute the time elapsed between two `timed_event` objects. + * + * @throws cuda_error if the query for the elapsed time fails. + * + * @param __end The `timed_event` object representing the end time. + * @param __start The `timed_event` object representing the start time. + * + * @return cuda::std::chrono::microseconds The elapsed time in microseconds. + */ + _CCCL_NODISCARD_FRIEND _CUDA_VSTD::chrono::microseconds operator-(const timed_event& __end, const timed_event& __start) + { + float __ms = 0.0f; + _CCCL_TRY_CUDA_API( + ::cudaEventElapsedTime, "Failed to get CUDA event elapsed time", &__ms, __start.get(), __end.get()); + return _CUDA_VSTD::chrono::microseconds(static_cast<_CUDA_VSTD::chrono::microseconds::rep>(__ms * 1'000.0f)); + } + +private: + // Use `timed_event::from_native_handle(e)` to construct an owning `timed_event` + // object from a `cudaEvent_t` handle. + explicit constexpr timed_event(::cudaEvent_t __event) noexcept + : event(__event) + {} +}; +} // namespace cuda::experimental + +#endif // _CUDAX_TIMED_EVENT_DETAIL_H diff --git a/cudax/include/cuda/experimental/event.cuh b/cudax/include/cuda/experimental/event.cuh new file mode 100644 index 00000000000..ecb31597dc7 --- /dev/null +++ b/cudax/include/cuda/experimental/event.cuh @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_EVENT_H +#define _CUDAX_EVENT_H + +#include +#include +#include + +#endif // _CUDAX_EVENT_H diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 5cf3b67c843..f457b850565 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -57,4 +57,8 @@ foreach(cn_target IN LISTS cudax_TARGETS) launch/configuration.cu ) target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) + + Cudax_add_catch2_test(test_target event_tests ${cn_target} + event/event_smoke.cu + ) endforeach() diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu new file mode 100644 index 00000000000..cc5c18db2fe --- /dev/null +++ b/cudax/test/event/event_smoke.cu @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "../hierarchy/testing_common.cuh" +#include + +namespace +{ +cudax::event_ref fn_takes_event_ref(cudax::event_ref ref) +{ + return ref; +} +} // namespace + +TEST_CASE("can construct an event_ref from a cudaEvent_t", "[event]") +{ + ::cudaEvent_t event; + CUDAX_REQUIRE(::cudaEventCreate(&event) == ::cudaSuccess); + cudax::event_ref ref(event); + CUDAX_REQUIRE(ref.get() == event); + // test implicit converstion from cudaEvent_t: + cudax::event_ref ref2 = ::fn_takes_event_ref(event); + CUDAX_REQUIRE(ref2.get() == event); + CUDAX_REQUIRE(::cudaEventDestroy(event) == ::cudaSuccess); +} + +TEST_CASE("can copy construct an event_ref and compare for equality", "[event]") +{ + ::cudaEvent_t event; + CUDAX_REQUIRE(::cudaEventCreate(&event) == ::cudaSuccess); + const cudax::event_ref ref(event); + const cudax::event_ref ref2 = ref; + CUDAX_REQUIRE(ref2 == ref); + CUDAX_REQUIRE(!(ref != ref2)); + CUDAX_REQUIRE(ref != cudax::event_ref{}); + CUDAX_REQUIRE(::cudaEvent_t{} != ref); + CUDAX_REQUIRE(::cudaEventDestroy(event) == ::cudaSuccess); +} From a45c33b2ad7036a3b69274cf03ae7c23a295063c Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Sat, 20 Jul 2024 16:50:36 +0000 Subject: [PATCH 02/15] apparently `__event` is a word of power for msvc --- cudax/include/cuda/experimental/__event/event.cuh | 10 +++++----- cudax/include/cuda/experimental/__event/event_ref.cuh | 6 +++--- .../include/cuda/experimental/__event/timed_event.cuh | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index 439d789a36a..8b4a7624080 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -110,15 +110,15 @@ public: /** * @brief Construct an `event` object from a native `cudaEvent_t` handle. * - * @param __event The native handle + * @param __evnt The native handle * * @return event The constructed `event` object * * @note The constructed `event` object takes ownership of the native handle. */ - _CCCL_NODISCARD static event from_native_handle(::cudaEvent_t __event) noexcept + _CCCL_NODISCARD static event from_native_handle(::cudaEvent_t __evnt) noexcept { - return event(__event); + return event(__evnt); } /// Disallow construction from an `int`, e.g., `0`. @@ -147,8 +147,8 @@ public: private: // Use `event::from_native_handle(e)` to construct an owning `event` // object from a `cudaEvent_t` handle. - explicit constexpr event(::cudaEvent_t __event) noexcept - : event_ref(__event) + explicit constexpr event(::cudaEvent_t __evnt) noexcept + : event_ref(__evnt) {} explicit event(unsigned int __flags) diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index d74a4d64c05..298c71cb032 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -61,13 +61,13 @@ public: * * This constructor provides an implicit conversion from `cudaEvent_t` * - * @post `get() == __event` + * @post `get() == __evnt` * * @note: It is the callers responsibilty to ensure the `event_ref` does not * outlive the event denoted by the `cudaEvent_t` handle. */ - constexpr event_ref(::cudaEvent_t __event) noexcept - : __event_(__event) + constexpr event_ref(::cudaEvent_t __evnt) noexcept + : __event_(__evnt) {} /// Disallow construction from an `int`, e.g., `0`. diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index b3e4b421ca3..a0bad202d40 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -74,15 +74,15 @@ public: /** * @brief Construct a `timed_event` object from a native `cudaEvent_t` handle. * - * @param __event The native handle + * @param __evnt The native handle * * @return timed_event The constructed `timed_event` object * * @note The constructed `timed_event` object takes ownership of the native handle. */ - _CCCL_NODISCARD static timed_event from_native_handle(::cudaEvent_t __event) noexcept + _CCCL_NODISCARD static timed_event from_native_handle(::cudaEvent_t __evnt) noexcept { - return timed_event(__event); + return timed_event(__evnt); } /// Disallow construction from an `int`, e.g., `0`. @@ -112,8 +112,8 @@ public: private: // Use `timed_event::from_native_handle(e)` to construct an owning `timed_event` // object from a `cudaEvent_t` handle. - explicit constexpr timed_event(::cudaEvent_t __event) noexcept - : event(__event) + explicit constexpr timed_event(::cudaEvent_t __evnt) noexcept + : event(__evnt) {} }; } // namespace cuda::experimental From 32e26240d0b3919b90a52121a2c44bf904107b2d Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Sat, 20 Jul 2024 17:41:41 +0000 Subject: [PATCH 03/15] represent the elapsed time between two events with nanoseconds instead of microsoconds according to the CUDA docs for `cudaEventElapsedTime`, the elapsed time has sub-microsecond resolution, so it is more appropriate to represent it in nanoseconds. --- cudax/include/cuda/experimental/__event/timed_event.cuh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index a0bad202d40..2f4235e24d9 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -99,14 +99,16 @@ public: * @param __end The `timed_event` object representing the end time. * @param __start The `timed_event` object representing the start time. * - * @return cuda::std::chrono::microseconds The elapsed time in microseconds. + * @return cuda::std::chrono::nanoseconds The elapsed time in nanoseconds. + * + * @note The elapsed time has a resolution of approximately 0.5 microseconds. */ - _CCCL_NODISCARD_FRIEND _CUDA_VSTD::chrono::microseconds operator-(const timed_event& __end, const timed_event& __start) + _CCCL_NODISCARD_FRIEND _CUDA_VSTD::chrono::nanoseconds operator-(const timed_event& __end, const timed_event& __start) { float __ms = 0.0f; _CCCL_TRY_CUDA_API( ::cudaEventElapsedTime, "Failed to get CUDA event elapsed time", &__ms, __start.get(), __end.get()); - return _CUDA_VSTD::chrono::microseconds(static_cast<_CUDA_VSTD::chrono::microseconds::rep>(__ms * 1'000.0f)); + return _CUDA_VSTD::chrono::nanoseconds(static_cast<_CUDA_VSTD::chrono::nanoseconds::rep>(__ms * 1'000'000.0)); } private: From 284328fff00a5e4cd4aa1f265a8a126e25443264 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Mon, 22 Jul 2024 16:52:26 +0000 Subject: [PATCH 04/15] prune unused headers, switch to rst-friendly doxygen comment style --- .../cuda/experimental/__event/event.cuh | 87 +++++++---------- .../cuda/experimental/__event/event_ref.cuh | 97 ++++++++----------- .../cuda/experimental/__event/timed_event.cuh | 73 ++++++-------- 3 files changed, 102 insertions(+), 155 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index 8b4a7624080..871801d154e 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -28,29 +28,22 @@ #include #include #include -#include // CUDAX headers here #include #include -#include - namespace cuda::experimental { class timed_event; -/** - * @brief An owning wrapper for an untimed `cudaEvent_t`. - */ +//! @brief An owning wrapper for an untimed `cudaEvent_t`. class event : public event_ref { friend class timed_event; public: - /** - * @brief Flags to use when creating the event. - */ + //! @brief Flags to use when creating the event. enum class flags : unsigned int { none = cudaEventDefault, @@ -58,47 +51,37 @@ public: interprocess = cudaEventInterprocess }; - /** - * @brief Construct a new `event` object with timing disabled. - * - * @throws cuda_error if the event creation fails. - */ + //! @brief Construct a new `event` object with timing disabled. + //! + //! @throws cuda_error if the event creation fails. event() : event(static_cast(cudaEventDisableTiming)) {} - /** - * @brief Construct a new `event` object with the specified flags. - * - * @throws cuda_error if the event creation fails. - */ + //! @brief Construct a new `event` object with the specified flags. + //! + //! @throws cuda_error if the event creation fails. explicit event(flags __flags) : event(static_cast(__flags) | static_cast(cudaEventDisableTiming)) {} - /** - * @brief Construct a new `event` object into the moved-from state. - * - * @post `get()` returns `cudaEvent_t()`. - */ + //! @brief Construct a new `event` object into the moved-from state. + //! + //! @post `get()` returns `cudaEvent_t()`. explicit constexpr event(uninit_t) noexcept {} - /** - * @brief Move-construct a new `event` object - * - * @param __other - * - * @post `__other` is in a moved-from state. - */ + //! @brief Move-construct a new `event` object + //! + //! @param __other + //! + //! @post `__other` is in a moved-from state. constexpr event(event&& __other) noexcept : event_ref(_CUDA_VSTD::exchange(__other.__event_, {})) {} - /** - * @brief Destroy the `event` object - * - * @note If the event fails to be destroyed, the error is silently ignored. - */ + //! @brief Destroy the `event` object + //! + //! @note If the event fails to be destroyed, the error is silently ignored. ~event() { if (__event_ != nullptr) @@ -107,33 +90,29 @@ public: } } - /** - * @brief Construct an `event` object from a native `cudaEvent_t` handle. - * - * @param __evnt The native handle - * - * @return event The constructed `event` object - * - * @note The constructed `event` object takes ownership of the native handle. - */ + //! @brief Construct an `event` object from a native `cudaEvent_t` handle. + //! + //! @param __evnt The native handle + //! + //! @return event The constructed `event` object + //! + //! @note The constructed `event` object takes ownership of the native handle. _CCCL_NODISCARD static event from_native_handle(::cudaEvent_t __evnt) noexcept { return event(__evnt); } - /// Disallow construction from an `int`, e.g., `0`. + // Disallow construction from an `int`, e.g., `0`. static event from_native_handle(int) = delete; - /// Disallow construction from `nullptr`. + // Disallow construction from `nullptr`. static event from_native_handle(_CUDA_VSTD::nullptr_t) = delete; - /** - * @brief Retrieve the native `cudaEvent_t` handle and give up ownership. - * - * @return cudaEvent_t The native handle being held by the `event` object. - * - * @post The event object is in a moved-from state. - */ + //! @brief Retrieve the native `cudaEvent_t` handle and give up ownership. + //! + //! @return cudaEvent_t The native handle being held by the `event` object. + //! + //! @post The event object is in a moved-from state. _CCCL_NODISCARD constexpr ::cudaEvent_t release() noexcept { return _CUDA_VSTD::exchange(__event_, {}); diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index 298c71cb032..cf75db08d2b 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -26,20 +26,17 @@ #include #include +#include #include #include #include -#include - namespace cuda::experimental { class event; class timed_event; -/** - * @brief An non-owning wrapper for an untimed `cudaEvent_t`. - */ +//! @brief An non-owning wrapper for an untimed `cudaEvent_t`. class event_ref { private: @@ -51,21 +48,17 @@ private: public: using value_type = ::cudaEvent_t; - /** - * @brief Construct a new `event_ref` that does refer to an event. - */ + //! @brief Construct a new `event_ref` that does refer to an event. event_ref() = default; - /** - * @brief Construct a new `event_ref` object from a `cudaEvent_t` - * - * This constructor provides an implicit conversion from `cudaEvent_t` - * - * @post `get() == __evnt` - * - * @note: It is the callers responsibilty to ensure the `event_ref` does not - * outlive the event denoted by the `cudaEvent_t` handle. - */ + //! @brief Construct a new `event_ref` object from a `cudaEvent_t` + //! + //! This constructor provides an implicit conversion from `cudaEvent_t` + //! + //! @post `get() == __evnt` + //! + //! @note: It is the callers responsibilty to ensure the `event_ref` does not + //! outlive the event denoted by the `cudaEvent_t` handle. constexpr event_ref(::cudaEvent_t __evnt) noexcept : __event_(__evnt) {} @@ -75,14 +68,12 @@ public: /// Disallow construction from `nullptr`. event_ref(_CUDA_VSTD::nullptr_t) = delete; + //! @brief Records an event on the specified stream + //! + //! @param __stream + //! + //! @throws cuda_error if the event record fails - /** - * @brief Records an event on the specified stream - * - * @param __stream - * - * @throws cuda_error if the event record fails - */ void record(stream_ref __stream) { assert(__event_ != nullptr); @@ -90,13 +81,11 @@ public: _CCCL_TRY_CUDA_API(::cudaEventRecord, "Failed to record CUDA event", __event_, __stream.get()); } - /** - * @brief Waits for a CUDA event_ref to complete on the specified stream - * - * @param __stream The stream to wait on - * - * @throws cuda_error if the event_ref wait fails - */ + //! @brief Waits for a CUDA event_ref to complete on the specified stream + //! + //! @param __stream The stream to wait on + //! + //! @throws cuda_error if the event_ref wait fails void wait(stream_ref __stream) const { assert(__event_ != nullptr); @@ -104,41 +93,35 @@ public: _CCCL_TRY_CUDA_API(::cudaStreamWaitEvent, "Failed to wait for CUDA event", __stream.get(), __event_); } - /** - * @brief Retrieve the native `cudaEvent_t` handle. - * - * @return cudaEvent_t The native handle being held by the event_ref object. - */ + //! @brief Retrieve the native `cudaEvent_t` handle. + //! + //! @return cudaEvent_t The native handle being held by the event_ref object. _CCCL_NODISCARD constexpr ::cudaEvent_t get() const noexcept { return __event_; } - /** - * @brief Compares two `event_ref`s for equality - * - * @note Allows comparison with `cudaEvent_t` due to implicit conversion to - * `event_ref`. - * - * @param lhs The first `event_ref` to compare - * @param rhs The second `event_ref` to compare - * @return true if `lhs` and `rhs` refer to the same `cudaEvent_t` object. - */ + //! @brief Compares two `event_ref`s for equality + //! + //! @note Allows comparison with `cudaEvent_t` due to implicit conversion to + //! `event_ref`. + //! + //! @param lhs The first `event_ref` to compare + //! @param rhs The second `event_ref` to compare + //! @return true if `lhs` and `rhs` refer to the same `cudaEvent_t` object. _CCCL_NODISCARD_FRIEND constexpr bool operator==(event_ref __lhs, event_ref __rhs) noexcept { return __lhs.__event_ == __rhs.__event_; } - /** - * @brief Compares two `event_ref`s for inequality - * - * @note Allows comparison with `cudaEvent_t` due to implicit conversion to - * `event_ref`. - * - * @param lhs The first `event_ref` to compare - * @param rhs The second `event_ref` to compare - * @return true if `lhs` and `rhs` refer to different `cudaEvent_t` objects. - */ + //! @brief Compares two `event_ref`s for inequality + //! + //! @note Allows comparison with `cudaEvent_t` due to implicit conversion to + //! `event_ref`. + //! + //! @param lhs The first `event_ref` to compare + //! @param rhs The second `event_ref` to compare + //! @return true if `lhs` and `rhs` refer to different `cudaEvent_t` objects. _CCCL_NODISCARD_FRIEND constexpr bool operator!=(event_ref __lhs, event_ref __rhs) noexcept { return __lhs.__event_ != __rhs.__event_; diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index 2f4235e24d9..6ea44f8afd2 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -28,81 +28,66 @@ #include #include #include -#include // CUDAX headers here #include #include -#include - namespace cuda::experimental { -/** - * @brief An owning wrapper for a `cudaEvent_t` with timing enabled. - */ +//! @brief An owning wrapper for a `cudaEvent_t` with timing enabled. class timed_event : public event { public: - /** - * @brief Construct a new `timed_event` object with timing enabled. - * - * @throws cuda_error if the event creation fails. - */ + //! @brief Construct a new `timed_event` object with timing enabled. + //! + //! @throws cuda_error if the event creation fails. timed_event() : event(static_cast(cudaEventDefault)) {} - /** - * @brief Construct a new `timed_event` object with the specified flags. - * - * @throws cuda_error if the event creation fails. - */ + //! @brief Construct a new `timed_event` object with the specified flags. + //! + //! @throws cuda_error if the event creation fails. explicit timed_event(flags __flags) : event(static_cast(__flags)) {} - /** - * @brief Construct a new `timed_event` object into the moved-from state. - * - * @post `get()` returns `cudaEvent_t()`. - */ + //! @brief Construct a new `timed_event` object into the moved-from state. + //! + //! @post `get()` returns `cudaEvent_t()`. explicit constexpr timed_event(uninit_t) noexcept : event(uninit) {} - /** - * @brief Construct a `timed_event` object from a native `cudaEvent_t` handle. - * - * @param __evnt The native handle - * - * @return timed_event The constructed `timed_event` object - * - * @note The constructed `timed_event` object takes ownership of the native handle. - */ + //! @brief Construct a `timed_event` object from a native `cudaEvent_t` handle. + //! + //! @param __evnt The native handle + //! + //! @return timed_event The constructed `timed_event` object + //! + //! @note The constructed `timed_event` object takes ownership of the native handle. _CCCL_NODISCARD static timed_event from_native_handle(::cudaEvent_t __evnt) noexcept { return timed_event(__evnt); } - /// Disallow construction from an `int`, e.g., `0`. + // Disallow construction from an `int`, e.g., `0`. static timed_event from_native_handle(int) = delete; - /// Disallow construction from `nullptr`. + // Disallow construction from `nullptr`. static timed_event from_native_handle(_CUDA_VSTD::nullptr_t) = delete; - /** - * @brief Compute the time elapsed between two `timed_event` objects. - * - * @throws cuda_error if the query for the elapsed time fails. - * - * @param __end The `timed_event` object representing the end time. - * @param __start The `timed_event` object representing the start time. - * - * @return cuda::std::chrono::nanoseconds The elapsed time in nanoseconds. - * - * @note The elapsed time has a resolution of approximately 0.5 microseconds. - */ + //! @brief Compute the time elapsed between two `timed_event` objects. + //! + //! @throws cuda_error if the query for the elapsed time fails. + //! + //! @param __end The `timed_event` object representing the end time. + //! @param __start The `timed_event` object representing the start time. + //! + //! @return cuda::std::chrono::nanoseconds The elapsed time in nanoseconds. + //! + //! @note The elapsed time has a resolution of approximately 0.5 microseconds. _CCCL_NODISCARD_FRIEND _CUDA_VSTD::chrono::nanoseconds operator-(const timed_event& __end, const timed_event& __start) { float __ms = 0.0f; From 9780a9141013871b72d4231cadf22ad8178d4738 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Mon, 22 Jul 2024 23:51:32 +0000 Subject: [PATCH 05/15] add class synopsis comments --- .../cuda/experimental/__event/event.cuh | 49 +++++++++++++++++++ .../cuda/experimental/__event/event_ref.cuh | 34 ++++++++++++- .../cuda/experimental/__event/timed_event.cuh | 43 ++++++++++++++++ 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index 871801d154e..de041d640eb 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -11,6 +11,44 @@ #ifndef _CUDAX_EVENT_DETAIL_H #define _CUDAX_EVENT_DETAIL_H +/* + event synopsis +namespace cuda::experimental { +class event : public event_ref { +public: + enum class flags : unsigned int { none, blocking_sync, interprocess }; + + event(); + event(flags); + event(uninit_t) noexcept; + event(event&&) noexcept; + ~event(); + + [[nodiscard]] static event from_native_handle(cudaEvent_t) noexcept; + static event from_native_handle(int) = delete; + static event from_native_handle(nullptr_t) = delete; + + [[nodiscard]] cudaEvent_t release() noexcept; + + [[nodiscard]] friend flags operator|(flags, flags) noexcept; + + // From event_ref: + using value_type = cudaEvent_t; + + void record(stream_ref) const; + + void wait(stream_ref) const; + + [[nodiscard]] cudaEvent_t get() const noexcept; + + [[nodiscard]] explicit operator bool() const noexcept; + + [[nodiscard]] friend bool operator==(event_ref, event_ref); + [[nodiscard]] friend bool operator!=(event_ref, event_ref); +}; +} // cuda::experimenal +*/ + #include // cuda_runtime_api needs to come first @@ -90,6 +128,17 @@ public: } } + //! @brief Move-assign an `event` object + //! + //! @param __other + //! + //! @post `__other` is in a moved-from state. + constexpr event& operator=(event&& __other) noexcept + { + __event_ = _CUDA_VSTD::exchange(__other.__event_, {}); + return *this; + } + //! @brief Construct an `event` object from a native `cudaEvent_t` handle. //! //! @param __evnt The native handle diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index cf75db08d2b..f5901d614b6 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -11,6 +11,36 @@ #ifndef _CUDAX_EVENT_REF_DETAIL_H #define _CUDAX_EVENT_REF_DETAIL_H +/* + event_ref synopsis +namespace cuda::experimental { +class event_ref { +public: + using value_type = cudaEvent_t; + + event_ref() = default; + event_ref(cudaEvent_t event_) noexcept : event(event_) {} + + event_ref(int) = delete; + event_ref(nullptr_t) = delete; + + void record(stream_ref) const; + + void wait(stream_ref) const; + + [[nodiscard]] cudaEvent_t get() const noexcept; + + [[nodiscard]] explicit operator bool() const noexcept; + + [[nodiscard]] friend bool operator==(event_ref, event_ref); + [[nodiscard]] friend bool operator!=(event_ref, event_ref); + +private: + cudaEvent_t event{}; // exposition only +}; +} // cuda::experimenal +*/ + #include // cuda_runtime_api needs to come first @@ -68,13 +98,13 @@ public: /// Disallow construction from `nullptr`. event_ref(_CUDA_VSTD::nullptr_t) = delete; + //! @brief Records an event on the specified stream //! //! @param __stream //! //! @throws cuda_error if the event record fails - - void record(stream_ref __stream) + void record(stream_ref __stream) const { assert(__event_ != nullptr); assert(__stream.get() != nullptr); diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index 6ea44f8afd2..bc77906b84d 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -11,6 +11,49 @@ #ifndef _CUDAX_TIMED_EVENT_DETAIL_H #define _CUDAX_TIMED_EVENT_DETAIL_H +/* + timed_event synopsis +namespace cuda::experimental { +class timed_event : public event { +public: + timed_event(); + timed_event(flags); + timed_event(uninit_t) noexcept; + timed_event(timed_event&&) noexcept; + ~timed_event(); + timed_event& operator=(timed_event&&) noexcept; + + [[nodiscard]] static timed_event from_native_handle(cudaEvent_t) noexcept; + static timed_event from_native_handle(int) = delete; + static timed_event from_native_handle(nullptr_t) = delete; + + [[nodiscard]] friend auto operator-(const timed_event& end, const timed_event& start) noexcept + -> cuda::std::chrono::nanoseconds; + + // from event: + enum class flags : unsigned int { none, blocking_sync, interprocess }; + + [[nodiscard]] cudaEvent_t release() noexcept; + + [[nodiscard]] friend flags operator|(flags, flags) noexcept; + + // From event_ref: + using value_type = cudaEvent_t; + + void record(stream_ref) const; + + void wait(stream_ref) const; + + [[nodiscard]] cudaEvent_t get() const noexcept; + + [[nodiscard]] explicit operator bool() const noexcept; + + [[nodiscard]] friend bool operator==(event_ref, event_ref); + [[nodiscard]] friend bool operator!=(event_ref, event_ref); +}; +} // cuda::experimenal +*/ + #include // cuda_runtime_api needs to come first From 14219fbdab9e9521f894597a866aafe66bafcee2 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 24 Jul 2024 00:15:53 +0000 Subject: [PATCH 06/15] construct with a stream_ref and record the event on construction --- .../cuda/experimental/__event/event.cuh | 27 +++++++++---------- .../cuda/experimental/__event/event_ref.cuh | 12 ++++++--- .../cuda/experimental/__event/timed_event.cuh | 19 +++++-------- cudax/test/event/event_smoke.cu | 3 ++- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index de041d640eb..b12c553e662 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -18,11 +18,11 @@ class event : public event_ref { public: enum class flags : unsigned int { none, blocking_sync, interprocess }; - event(); - event(flags); + event(stream_ref, flags = flags::none); event(uninit_t) noexcept; event(event&&) noexcept; ~event(); + event& operator=(event&&) noexcept; [[nodiscard]] static event from_native_handle(cudaEvent_t) noexcept; static event from_native_handle(int) = delete; @@ -89,24 +89,22 @@ public: interprocess = cudaEventInterprocess }; - //! @brief Construct a new `event` object with timing disabled. + //! @brief Construct a new `event` object with timing disabled, and record + //! the event in the specified stream. //! //! @throws cuda_error if the event creation fails. - event() - : event(static_cast(cudaEventDisableTiming)) - {} - - //! @brief Construct a new `event` object with the specified flags. - //! - //! @throws cuda_error if the event creation fails. - explicit event(flags __flags) - : event(static_cast(__flags) | static_cast(cudaEventDisableTiming)) - {} + explicit event(stream_ref __stream, flags __flags = flags::none) + : event(static_cast(__flags) | cudaEventDisableTiming) + { + record(__stream); + } //! @brief Construct a new `event` object into the moved-from state. //! //! @post `get()` returns `cudaEvent_t()`. - explicit constexpr event(uninit_t) noexcept {} + explicit constexpr event(uninit_t) noexcept + : event_ref(::cudaEvent_t{}) + {} //! @brief Move-construct a new `event` object //! @@ -180,6 +178,7 @@ private: {} explicit event(unsigned int __flags) + : event_ref(::cudaEvent_t{}) { _CCCL_TRY_CUDA_API( ::cudaEventCreateWithFlags, "Failed to create CUDA event", &__event_, static_cast(__flags)); diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index f5901d614b6..e642f29b278 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -18,7 +18,6 @@ class event_ref { public: using value_type = cudaEvent_t; - event_ref() = default; event_ref(cudaEvent_t event_) noexcept : event(event_) {} event_ref(int) = delete; @@ -78,9 +77,6 @@ private: public: using value_type = ::cudaEvent_t; - //! @brief Construct a new `event_ref` that does refer to an event. - event_ref() = default; - //! @brief Construct a new `event_ref` object from a `cudaEvent_t` //! //! This constructor provides an implicit conversion from `cudaEvent_t` @@ -131,6 +127,14 @@ public: return __event_; } + //! @brief Checks if the `event_ref` is valid + //! + //! @return true if the `event_ref` is valid, false otherwise. + _CCCL_NODISCARD explicit constexpr operator bool() const noexcept + { + return __event_ != nullptr; + } + //! @brief Compares two `event_ref`s for equality //! //! @note Allows comparison with `cudaEvent_t` due to implicit conversion to diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index bc77906b84d..46e352ab302 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -16,8 +16,7 @@ namespace cuda::experimental { class timed_event : public event { public: - timed_event(); - timed_event(flags); + timed_event(stream_ref, flags = flags::none); timed_event(uninit_t) noexcept; timed_event(timed_event&&) noexcept; ~timed_event(); @@ -82,19 +81,15 @@ namespace cuda::experimental class timed_event : public event { public: - //! @brief Construct a new `timed_event` object with timing enabled. + //! @brief Construct a new `timed_event` object with the specified flags + //! and record the event on the specified stream. //! //! @throws cuda_error if the event creation fails. - timed_event() - : event(static_cast(cudaEventDefault)) - {} - - //! @brief Construct a new `timed_event` object with the specified flags. - //! - //! @throws cuda_error if the event creation fails. - explicit timed_event(flags __flags) + explicit timed_event(stream_ref __stream, flags __flags = flags::none) : event(static_cast(__flags)) - {} + { + record(__stream); + } //! @brief Construct a new `timed_event` object into the moved-from state. //! diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu index cc5c18db2fe..3fd4482ae48 100644 --- a/cudax/test/event/event_smoke.cu +++ b/cudax/test/event/event_smoke.cu @@ -41,7 +41,8 @@ TEST_CASE("can copy construct an event_ref and compare for equality", "[event]") const cudax::event_ref ref2 = ref; CUDAX_REQUIRE(ref2 == ref); CUDAX_REQUIRE(!(ref != ref2)); - CUDAX_REQUIRE(ref != cudax::event_ref{}); + CUDAX_REQUIRE((ref ? true : false)); // test contextual convertibility to bool + CUDAX_REQUIRE(!!ref); CUDAX_REQUIRE(::cudaEvent_t{} != ref); CUDAX_REQUIRE(::cudaEventDestroy(event) == ::cudaSuccess); } From 2932ff5b97c4b1a80ad2184dfd17d1081d43a83b Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 24 Jul 2024 17:09:36 +0000 Subject: [PATCH 07/15] review feedback --- .../cuda/experimental/__event/event.cuh | 51 ++++--------------- .../cuda/experimental/__event/event_ref.cuh | 31 +---------- .../cuda/experimental/__event/timed_event.cuh | 47 ++--------------- 3 files changed, 17 insertions(+), 112 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index b12c553e662..e17116169a6 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -11,49 +11,13 @@ #ifndef _CUDAX_EVENT_DETAIL_H #define _CUDAX_EVENT_DETAIL_H -/* - event synopsis -namespace cuda::experimental { -class event : public event_ref { -public: - enum class flags : unsigned int { none, blocking_sync, interprocess }; - - event(stream_ref, flags = flags::none); - event(uninit_t) noexcept; - event(event&&) noexcept; - ~event(); - event& operator=(event&&) noexcept; - - [[nodiscard]] static event from_native_handle(cudaEvent_t) noexcept; - static event from_native_handle(int) = delete; - static event from_native_handle(nullptr_t) = delete; - - [[nodiscard]] cudaEvent_t release() noexcept; - - [[nodiscard]] friend flags operator|(flags, flags) noexcept; - - // From event_ref: - using value_type = cudaEvent_t; - - void record(stream_ref) const; - - void wait(stream_ref) const; - - [[nodiscard]] cudaEvent_t get() const noexcept; - - [[nodiscard]] explicit operator bool() const noexcept; - - [[nodiscard]] friend bool operator==(event_ref, event_ref); - [[nodiscard]] friend bool operator!=(event_ref, event_ref); -}; -} // cuda::experimenal -*/ - #include // cuda_runtime_api needs to come first #include +#include "cuda/std/detail/libcxx/include/__config" + #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) @@ -115,6 +79,9 @@ public: : event_ref(_CUDA_VSTD::exchange(__other.__event_, {})) {} + // Disallow copy construction. + event(const event&) = delete; + //! @brief Destroy the `event` object //! //! @note If the event fails to be destroyed, the error is silently ignored. @@ -131,12 +98,16 @@ public: //! @param __other //! //! @post `__other` is in a moved-from state. - constexpr event& operator=(event&& __other) noexcept + event& operator=(event&& __other) noexcept { - __event_ = _CUDA_VSTD::exchange(__other.__event_, {}); + event __tmp(_CUDA_VSTD::move(__other)); + _CUDA_VSTD::swap(__event_, __tmp.__event_); return *this; } + // Disallow copy assignment. + event& operator=(const event&) = delete; + //! @brief Construct an `event` object from a native `cudaEvent_t` handle. //! //! @param __evnt The native handle diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index e642f29b278..c29a7244dd6 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -11,35 +11,6 @@ #ifndef _CUDAX_EVENT_REF_DETAIL_H #define _CUDAX_EVENT_REF_DETAIL_H -/* - event_ref synopsis -namespace cuda::experimental { -class event_ref { -public: - using value_type = cudaEvent_t; - - event_ref(cudaEvent_t event_) noexcept : event(event_) {} - - event_ref(int) = delete; - event_ref(nullptr_t) = delete; - - void record(stream_ref) const; - - void wait(stream_ref) const; - - [[nodiscard]] cudaEvent_t get() const noexcept; - - [[nodiscard]] explicit operator bool() const noexcept; - - [[nodiscard]] friend bool operator==(event_ref, event_ref); - [[nodiscard]] friend bool operator!=(event_ref, event_ref); - -private: - cudaEvent_t event{}; // exposition only -}; -} // cuda::experimenal -*/ - #include // cuda_runtime_api needs to come first @@ -72,7 +43,7 @@ private: friend class event; friend class timed_event; - ::cudaEvent_t __event_{0}; + ::cudaEvent_t __event_{}; public: using value_type = ::cudaEvent_t; diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index 46e352ab302..4bd601a91a0 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -11,48 +11,6 @@ #ifndef _CUDAX_TIMED_EVENT_DETAIL_H #define _CUDAX_TIMED_EVENT_DETAIL_H -/* - timed_event synopsis -namespace cuda::experimental { -class timed_event : public event { -public: - timed_event(stream_ref, flags = flags::none); - timed_event(uninit_t) noexcept; - timed_event(timed_event&&) noexcept; - ~timed_event(); - timed_event& operator=(timed_event&&) noexcept; - - [[nodiscard]] static timed_event from_native_handle(cudaEvent_t) noexcept; - static timed_event from_native_handle(int) = delete; - static timed_event from_native_handle(nullptr_t) = delete; - - [[nodiscard]] friend auto operator-(const timed_event& end, const timed_event& start) noexcept - -> cuda::std::chrono::nanoseconds; - - // from event: - enum class flags : unsigned int { none, blocking_sync, interprocess }; - - [[nodiscard]] cudaEvent_t release() noexcept; - - [[nodiscard]] friend flags operator|(flags, flags) noexcept; - - // From event_ref: - using value_type = cudaEvent_t; - - void record(stream_ref) const; - - void wait(stream_ref) const; - - [[nodiscard]] cudaEvent_t get() const noexcept; - - [[nodiscard]] explicit operator bool() const noexcept; - - [[nodiscard]] friend bool operator==(event_ref, event_ref); - [[nodiscard]] friend bool operator!=(event_ref, event_ref); -}; -} // cuda::experimenal -*/ - #include // cuda_runtime_api needs to come first @@ -98,6 +56,11 @@ public: : event(uninit) {} + timed_event(timed_event&&) noexcept = default; + timed_event(const timed_event&) = delete; + timed_event& operator=(timed_event&&) noexcept = default; + timed_event& operator=(const timed_event&) = delete; + //! @brief Construct a `timed_event` object from a native `cudaEvent_t` handle. //! //! @param __evnt The native handle From 2585468e3f7a952af1e345ba418458268df1025c Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 24 Jul 2024 18:19:24 +0000 Subject: [PATCH 08/15] tests for `cudax::event` and `cudax::timed_event` --- .../cuda/experimental/__event/event.cuh | 1 - .../cuda/experimental/__event/event_ref.cuh | 1 - .../cuda/experimental/__event/timed_event.cuh | 1 - cudax/test/CMakeLists.txt | 1 + cudax/test/common/utility.cuh | 118 ++++++++++++++++++ cudax/test/event/event_smoke.cu | 103 +++++++++++++-- 6 files changed, 211 insertions(+), 14 deletions(-) create mode 100644 cudax/test/common/utility.cuh diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index e17116169a6..8defbfb2fc7 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -27,7 +27,6 @@ #endif // no system header #include -#include #include #include diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index c29a7244dd6..6ccfc22cb82 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -25,7 +25,6 @@ #endif // no system header #include -#include #include #include #include diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index 4bd601a91a0..84a7313b1ae 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -25,7 +25,6 @@ #endif // no system header #include -#include #include #include diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index f457b850565..5a12aa66c5b 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -61,4 +61,5 @@ foreach(cn_target IN LISTS cudax_TARGETS) Cudax_add_catch2_test(test_target event_tests ${cn_target} event/event_smoke.cu ) + target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) endforeach() diff --git a/cudax/test/common/utility.cuh b/cudax/test/common/utility.cuh new file mode 100644 index 00000000000..d259b64d582 --- /dev/null +++ b/cudax/test/common/utility.cuh @@ -0,0 +1,118 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +// cuda_runtime_api needs to come first + +#include +#include +#include + +#include // IWYU pragma: keep (needed for placement new) + +namespace +{ +namespace test +{ +struct stream : cuda::stream_ref +{ + stream() + : cuda::stream_ref(::cudaStream_t{}) + { + ::cudaStream_t stream{}; + _CCCL_TRY_CUDA_API(::cudaStreamCreate, "failed to create a CUDA stream", &stream); + static_cast(*this) = cuda::stream_ref(stream); + } + + cuda::stream_ref ref() const noexcept + { + return *this; + } + + void wait() const + { + _CCCL_TRY_CUDA_API(::cudaStreamSynchronize, "failed to synchronize a CUDA stream", get()); + } + + ~stream() + { + [[maybe_unused]] auto status = ::cudaStreamDestroy(get()); + } +}; + +struct _malloc_managed +{ +private: + void* pv = nullptr; + +public: + explicit _malloc_managed(std::size_t size) + { + _CCCL_TRY_CUDA_API(::cudaMallocManaged, "failed to allocate managed memory", &pv, size); + } + + ~_malloc_managed() + { + [[maybe_unused]] auto status = ::cudaFree(pv); + } + + template + T* get_as() const noexcept + { + return static_cast(pv); + } +}; + +template +struct managed +{ +private: + _malloc_managed _mem; + +public: + explicit managed(T t) + : _mem(sizeof(T)) + { + ::new (_mem.get_as()) T(_CUDA_VSTD::move(t)); + } + + ~managed() + { + get()->~T(); + } + + T* get() noexcept + { + return _mem.get_as(); + } + const T* get() const noexcept + { + return _mem.get_as(); + } + + T& operator*() noexcept + { + return *get(); + } + const T& operator*() const noexcept + { + return *get(); + } +}; + +/// A kernel that takes a callable object and invokes it with a set of arguments +template +__global__ void invokernel(Fn fn, Args... args) +{ + fn(args...); +} + +} // namespace test +} // namespace diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu index 3fd4482ae48..121e7212b85 100644 --- a/cudax/test/event/event_smoke.cu +++ b/cudax/test/event/event_smoke.cu @@ -10,39 +10,120 @@ #include +#include "../common/utility.cuh" #include "../hierarchy/testing_common.cuh" #include namespace { +namespace test +{ cudax::event_ref fn_takes_event_ref(cudax::event_ref ref) { return ref; } +} // namespace test } // namespace +static_assert(!_CUDA_VSTD::is_default_constructible_v); +static_assert(!_CUDA_VSTD::is_default_constructible_v); +static_assert(!_CUDA_VSTD::is_default_constructible_v); + TEST_CASE("can construct an event_ref from a cudaEvent_t", "[event]") { - ::cudaEvent_t event; - CUDAX_REQUIRE(::cudaEventCreate(&event) == ::cudaSuccess); - cudax::event_ref ref(event); - CUDAX_REQUIRE(ref.get() == event); + ::cudaEvent_t ev; + CUDAX_REQUIRE(::cudaEventCreate(&ev) == ::cudaSuccess); + cudax::event_ref ref(ev); + CUDAX_REQUIRE(ref.get() == ev); + CUDAX_REQUIRE(!!ref); // test implicit converstion from cudaEvent_t: - cudax::event_ref ref2 = ::fn_takes_event_ref(event); - CUDAX_REQUIRE(ref2.get() == event); - CUDAX_REQUIRE(::cudaEventDestroy(event) == ::cudaSuccess); + cudax::event_ref ref2 = ::test::fn_takes_event_ref(ev); + CUDAX_REQUIRE(ref2.get() == ev); + CUDAX_REQUIRE(::cudaEventDestroy(ev) == ::cudaSuccess); + // test an empty event_ref: + cudax::event_ref ref3(::cudaEvent_t{}); + CUDAX_REQUIRE(ref3.get() == ::cudaEvent_t{}); + CUDAX_REQUIRE(!ref3); } TEST_CASE("can copy construct an event_ref and compare for equality", "[event]") { - ::cudaEvent_t event; - CUDAX_REQUIRE(::cudaEventCreate(&event) == ::cudaSuccess); - const cudax::event_ref ref(event); + ::cudaEvent_t ev; + CUDAX_REQUIRE(::cudaEventCreate(&ev) == ::cudaSuccess); + const cudax::event_ref ref(ev); const cudax::event_ref ref2 = ref; CUDAX_REQUIRE(ref2 == ref); CUDAX_REQUIRE(!(ref != ref2)); CUDAX_REQUIRE((ref ? true : false)); // test contextual convertibility to bool CUDAX_REQUIRE(!!ref); CUDAX_REQUIRE(::cudaEvent_t{} != ref); - CUDAX_REQUIRE(::cudaEventDestroy(event) == ::cudaSuccess); + CUDAX_REQUIRE(::cudaEventDestroy(ev) == ::cudaSuccess); + // copy from empty event_ref: + const cudax::event_ref ref3(::cudaEvent_t{}); + const cudax::event_ref ref4 = ref3; + CUDAX_REQUIRE(ref4 == ref3); + CUDAX_REQUIRE(!(ref3 != ref4)); + CUDAX_REQUIRE(!ref4); +} + +TEST_CASE("can use event_ref to record and wait on an event", "[event]") +{ + ::cudaEvent_t ev; + CUDAX_REQUIRE(::cudaEventCreate(&ev) == ::cudaSuccess); + const cudax::event_ref ref(ev); + + test::managed i(0); + test::stream stream; + ::test::invokernel<<<1, 1, 0, stream.get()>>>( + [] _CCCL_HOST_DEVICE(int* pi) { + *pi = 42; + }, + i.get()); + ref.record(stream); + ref.wait(stream); + CUDAX_REQUIRE(*i == 42); + + stream.wait(); + CUDAX_REQUIRE(::cudaEventDestroy(ev) == ::cudaSuccess); +} + +TEST_CASE("can construct an event with a stream_ref", "[event]") +{ + test::stream stream; + cudax::event ev(stream.ref()); + CUDAX_REQUIRE(ev.get() != ::cudaEvent_t{}); +} + +TEST_CASE("can wait on an event", "[event]") +{ + test::stream stream; + ::test::managed i(0); + ::test::invokernel<<<1, 1, 0, stream.get()>>>( + [] _CCCL_HOST_DEVICE(int* pi) { + *pi = 42; + }, + i.get()); + cudax::event ev(stream); + ev.wait(stream); + CUDAX_REQUIRE(*i == 42); + stream.wait(); +} + +TEST_CASE("can take the difference of two timed_event objects", "[event]") +{ + test::stream stream; + ::test::managed i(0); + cudax::timed_event start(stream); + ::test::invokernel<<<1, 1, 0, stream.get()>>>( + [] _CCCL_HOST_DEVICE(int* pi) { + *pi = 42; + }, + i.get()); + cudax::timed_event end(stream); + end.wait(stream); + CUDAX_REQUIRE(*i == 42); + auto elapsed = end - start; + CUDAX_REQUIRE(elapsed.count() >= 0); + STATIC_REQUIRE(_CUDA_VSTD::is_same_v); + stream.wait(); } From 03d126aa8c8139921188040b1e92364037207bba Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 24 Jul 2024 23:14:09 +0000 Subject: [PATCH 09/15] change `event_ref::wait` to use `cudaEventSynchronize` --- .../include/cuda/experimental/__event/event_ref.cuh | 12 +++++------- cudax/test/event/event_smoke.cu | 10 +++++----- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event_ref.cuh b/cudax/include/cuda/experimental/__event/event_ref.cuh index 6ccfc22cb82..b795d46a77b 100644 --- a/cudax/include/cuda/experimental/__event/event_ref.cuh +++ b/cudax/include/cuda/experimental/__event/event_ref.cuh @@ -77,16 +77,14 @@ public: _CCCL_TRY_CUDA_API(::cudaEventRecord, "Failed to record CUDA event", __event_, __stream.get()); } - //! @brief Waits for a CUDA event_ref to complete on the specified stream + //! @brief Waits until all the work in the stream prior to the record of the + //! event has completed. //! - //! @param __stream The stream to wait on - //! - //! @throws cuda_error if the event_ref wait fails - void wait(stream_ref __stream) const + //! @throws cuda_error if waiting for the event fails + void wait() const { assert(__event_ != nullptr); - assert(__stream.get() != nullptr); - _CCCL_TRY_CUDA_API(::cudaStreamWaitEvent, "Failed to wait for CUDA event", __stream.get(), __event_); + _CCCL_TRY_CUDA_API(::cudaEventSynchronize, "Failed to wait for CUDA event", __event_); } //! @brief Retrieve the native `cudaEvent_t` handle. diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu index 121e7212b85..25c5d88ec5a 100644 --- a/cudax/test/event/event_smoke.cu +++ b/cudax/test/event/event_smoke.cu @@ -80,7 +80,7 @@ TEST_CASE("can use event_ref to record and wait on an event", "[event]") }, i.get()); ref.record(stream); - ref.wait(stream); + ref.wait(); CUDAX_REQUIRE(*i == 42); stream.wait(); @@ -99,12 +99,12 @@ TEST_CASE("can wait on an event", "[event]") test::stream stream; ::test::managed i(0); ::test::invokernel<<<1, 1, 0, stream.get()>>>( - [] _CCCL_HOST_DEVICE(int* pi) { + [] _CCCL_DEVICE(int* pi) { *pi = 42; }, i.get()); cudax::event ev(stream); - ev.wait(stream); + ev.wait(); CUDAX_REQUIRE(*i == 42); stream.wait(); } @@ -115,12 +115,12 @@ TEST_CASE("can take the difference of two timed_event objects", "[event]") ::test::managed i(0); cudax::timed_event start(stream); ::test::invokernel<<<1, 1, 0, stream.get()>>>( - [] _CCCL_HOST_DEVICE(int* pi) { + [] _CCCL_DEVICE(int* pi) { *pi = 42; }, i.get()); cudax::timed_event end(stream); - end.wait(stream); + end.wait(); CUDAX_REQUIRE(*i == 42); auto elapsed = end - start; CUDAX_REQUIRE(elapsed.count() >= 0); From a0d1bccad815a5ae87aa7300a4637e7ecfe51749 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 25 Jul 2024 13:25:32 +0200 Subject: [PATCH 10/15] Use a struct for windows instead --- cudax/test/event/event_smoke.cu | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu index 25c5d88ec5a..bc61775dab2 100644 --- a/cudax/test/event/event_smoke.cu +++ b/cudax/test/event/event_smoke.cu @@ -66,6 +66,14 @@ TEST_CASE("can copy construct an event_ref and compare for equality", "[event]") CUDAX_REQUIRE(!ref4); } +struct assign_42 +{ + __device__ constexpr void operator()(int* pi) const noexcept + { + *pi = 42; + } +}; + TEST_CASE("can use event_ref to record and wait on an event", "[event]") { ::cudaEvent_t ev; @@ -74,11 +82,7 @@ TEST_CASE("can use event_ref to record and wait on an event", "[event]") test::managed i(0); test::stream stream; - ::test::invokernel<<<1, 1, 0, stream.get()>>>( - [] _CCCL_HOST_DEVICE(int* pi) { - *pi = 42; - }, - i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); ref.record(stream); ref.wait(); CUDAX_REQUIRE(*i == 42); @@ -98,11 +102,7 @@ TEST_CASE("can wait on an event", "[event]") { test::stream stream; ::test::managed i(0); - ::test::invokernel<<<1, 1, 0, stream.get()>>>( - [] _CCCL_DEVICE(int* pi) { - *pi = 42; - }, - i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); cudax::event ev(stream); ev.wait(); CUDAX_REQUIRE(*i == 42); @@ -114,11 +114,7 @@ TEST_CASE("can take the difference of two timed_event objects", "[event]") test::stream stream; ::test::managed i(0); cudax::timed_event start(stream); - ::test::invokernel<<<1, 1, 0, stream.get()>>>( - [] _CCCL_DEVICE(int* pi) { - *pi = 42; - }, - i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); cudax::timed_event end(stream); end.wait(); CUDAX_REQUIRE(*i == 42); From 2af61350eb275240e383e9b1f01714dfbaa5f4c0 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 25 Jul 2024 13:26:18 +0200 Subject: [PATCH 11/15] Do not include superfluous config header --- cudax/include/cuda/experimental/__event/event.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index 8defbfb2fc7..3083bc3d7c6 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -16,8 +16,6 @@ #include -#include "cuda/std/detail/libcxx/include/__config" - #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) From 9ade362a4a1244581492eba2e452777653efe2bb Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 25 Jul 2024 13:29:19 +0200 Subject: [PATCH 12/15] Add clang-format rule for cudax --- .clang-format | 7 +++++-- cudax/include/cuda/experimental/__event/event.cuh | 1 - cudax/include/cuda/experimental/__event/timed_event.cuh | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index 21fd8c447ad..3cd3f6da331 100644 --- a/.clang-format +++ b/.clang-format @@ -117,12 +117,15 @@ IncludeCategories: - Regex: '^$' - Priority: 5 - SortPriority: 4 + Priority: 6 + SortPriority: 5 - Regex: '^ #include -// CUDAX headers here #include #include diff --git a/cudax/include/cuda/experimental/__event/timed_event.cuh b/cudax/include/cuda/experimental/__event/timed_event.cuh index 84a7313b1ae..debcbcd26e5 100644 --- a/cudax/include/cuda/experimental/__event/timed_event.cuh +++ b/cudax/include/cuda/experimental/__event/timed_event.cuh @@ -28,7 +28,6 @@ #include #include -// CUDAX headers here #include #include From 3f65ae34450196fe666152285d8ca03508d4e403 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 25 Jul 2024 13:33:20 +0200 Subject: [PATCH 13/15] Spell `cudax_add_catch2_test` correctly --- cudax/test/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 5a12aa66c5b..2f337668dda 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -47,18 +47,18 @@ foreach(cn_target IN LISTS cudax_TARGETS) add_dependencies(${config_prefix}.all ${config_meta_target}) # Add tests: - Cudax_add_catch2_test(test_target hierarchy_tests ${cn_target} + cudax_add_catch2_test(test_target hierarchy_tests ${cn_target} hierarchy/hierarchy_smoke.cu hierarchy/hierarchy_custom_types.cu ) - Cudax_add_catch2_test(test_target launch_tests ${cn_target} + cudax_add_catch2_test(test_target launch_tests ${cn_target} launch/launch_smoke.cu launch/configuration.cu ) target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) - Cudax_add_catch2_test(test_target event_tests ${cn_target} + cudax_add_catch2_test(test_target event_tests ${cn_target} event/event_smoke.cu ) target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) From 9b383b664902654e3c3d9bc4618ad4ce084b9704 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 25 Jul 2024 20:10:43 +0200 Subject: [PATCH 14/15] Fix formatting issues --- .../cuda/experimental/__hierarchy/hierarchy_dimensions.cuh | 5 +++-- .../cuda/experimental/__hierarchy/level_dimensions.cuh | 3 ++- cudax/include/cuda/experimental/__launch/configuration.cuh | 4 +++- cudax/include/cuda/experimental/__launch/launch.cuh | 3 ++- cudax/test/launch/launch_smoke.cu | 5 +---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh index 21e298ca40d..48d4b38b1dd 100644 --- a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh @@ -11,10 +11,11 @@ #ifndef _CUDAX__HIERARCHY_HIERARCHY_DIMENSIONS #define _CUDAX__HIERARCHY_HIERARCHY_DIMENSIONS -#include +#include #include -#include "cuda/std/__utility/declval.h" +#include + #include #if _CCCL_STD_VER >= 2017 diff --git a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh index 13d04ca24a5..c685a2db6a3 100644 --- a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh @@ -11,9 +11,10 @@ #ifndef _CUDAX__HIERARCHY_LEVEL_DIMENSIONS #define _CUDAX__HIERARCHY_LEVEL_DIMENSIONS -#include #include +#include + #if _CCCL_STD_VER >= 2017 namespace cuda::experimental { diff --git a/cudax/include/cuda/experimental/__launch/configuration.cuh b/cudax/include/cuda/experimental/__launch/configuration.cuh index 99ff65f7c30..d85a6ff5b96 100644 --- a/cudax/include/cuda/experimental/__launch/configuration.cuh +++ b/cudax/include/cuda/experimental/__launch/configuration.cuh @@ -10,10 +10,12 @@ #ifndef _CUDAX__LAUNCH_CONFIGURATION #define _CUDAX__LAUNCH_CONFIGURATION -#include + #include #include +#include + #if _CCCL_STD_VER >= 2017 namespace cuda::experimental { diff --git a/cudax/include/cuda/experimental/__launch/launch.cuh b/cudax/include/cuda/experimental/__launch/launch.cuh index 21d9d3377bf..790af2a9d58 100644 --- a/cudax/include/cuda/experimental/__launch/launch.cuh +++ b/cudax/include/cuda/experimental/__launch/launch.cuh @@ -12,10 +12,11 @@ #define _CUDAX__LAUNCH_LAUNCH #include -#include #include #include +#include + #if _CCCL_STD_VER >= 2017 namespace cuda::experimental { diff --git a/cudax/test/launch/launch_smoke.cu b/cudax/test/launch/launch_smoke.cu index d7624f3a4ec..554cabd015c 100644 --- a/cudax/test/launch/launch_smoke.cu +++ b/cudax/test/launch/launch_smoke.cu @@ -9,11 +9,8 @@ //===----------------------------------------------------------------------===// #define LIBCUDACXX_ENABLE_EXCEPTIONS #include -#include -#include -#include -#include +#include #include "../hierarchy/testing_common.cuh" From 4a6a01f5bd2d8bd8c56ae223116c4c87caf6294f Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 25 Jul 2024 21:45:34 +0000 Subject: [PATCH 15/15] roll back change to `.clang-format` --- .clang-format | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.clang-format b/.clang-format index 3cd3f6da331..21fd8c447ad 100644 --- a/.clang-format +++ b/.clang-format @@ -117,15 +117,12 @@ IncludeCategories: - Regex: '^$' - Priority: 6 - SortPriority: 5 + Priority: 5 + SortPriority: 4 - Regex: '^