diff --git a/libcudacxx/include/cuda/std/__cccl/builtin.h b/libcudacxx/include/cuda/std/__cccl/builtin.h index 0ab81891842..1d44689dbf0 100644 --- a/libcudacxx/include/cuda/std/__cccl/builtin.h +++ b/libcudacxx/include/cuda/std/__cccl/builtin.h @@ -233,12 +233,6 @@ # undef _CCCL_BUILTIN_MEMMOVE #endif // _CCCL_CUDA_COMPILER(NVCC) -#if _CCCL_CHECK_BUILTIN(builtin_operator_new) && _CCCL_CHECK_BUILTIN(builtin_operator_delete) \ - && _CCCL_CUDA_COMPILER(CLANG) -# define _CCCL_BUILTIN_OPERATOR_DELETE(...) __builtin_operator_delete(__VA_ARGS__) -# define _CCCL_BUILTIN_OPERATOR_NEW(...) __builtin_operator_new(__VA_ARGS__) -#endif // _CCCL_CHECK_BUILTIN(builtin_operator_new) && _CCCL_CHECK_BUILTIN(builtin_operator_delete) - #if _CCCL_CHECK_BUILTIN(builtin_prefetch) || _CCCL_COMPILER(GCC) # define _CCCL_BUILTIN_PREFETCH(...) NV_IF_TARGET(NV_IS_HOST, __builtin_prefetch(__VA_ARGS__);) #else diff --git a/libcudacxx/include/cuda/std/__fwd/new.h b/libcudacxx/include/cuda/std/__fwd/new.h new file mode 100644 index 00000000000..218d2ededd1 --- /dev/null +++ b/libcudacxx/include/cuda/std/__fwd/new.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_STD___FWD_NEW_H +#define _CUDA_STD___FWD_NEW_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#include + +// std:: forward declarations + +#if _CCCL_HAS_HOST_STD_LIB() +_CCCL_BEGIN_NAMESPACE_STD + +enum class align_val_t : ::cuda::std::size_t; + +_CCCL_END_NAMESPACE_STD +#endif // _CCCL_HAS_HOST_STD_LIB() + +// cuda::std:: forward declarations + +_CCCL_BEGIN_NAMESPACE_CUDA_STD + +using ::std::align_val_t; + +_CCCL_END_NAMESPACE_CUDA_STD + +#include + +#endif // _CUDA_STD___FWD_NEW_H diff --git a/libcudacxx/include/cuda/std/__internal/features.h b/libcudacxx/include/cuda/std/__internal/features.h index b221a9fee75..4e18bfa3cdb 100644 --- a/libcudacxx/include/cuda/std/__internal/features.h +++ b/libcudacxx/include/cuda/std/__internal/features.h @@ -24,12 +24,6 @@ #define _LIBCUDACXX_HAS_MONOTONIC_CLOCK() 0 #define _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() 0 -#if _CCCL_CUDA_COMPILATION() || __cpp_aligned_new < 201606 -# define _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() 0 -#else -# define _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() 1 -#endif // !_CCCL_CUDA_COMPILATION() && __cpp_aligned_new >= 201606 - // We need `is_constant_evaluated` for clang and gcc. MSVC also needs extensive rework #if !defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) # define _LIBCUDACXX_HAS_CONSTEXPR_COMPLEX_OPERATIONS() 0 diff --git a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h index 88b3f96e23e..c377c0bf00e 100644 --- a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h +++ b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h @@ -46,26 +46,14 @@ template } while (__n > 0) { -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() - if (__is_overaligned_for_new(alignof(_Tp))) + if constexpr (alignof(_Tp) > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { - ::cuda::std::align_val_t __al = ::cuda::std::align_val_t(::cuda::std::alignment_of<_Tp>::value); - __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp), __al)); + __r.first = static_cast<_Tp*>(::cuda::std::__cccl_operator_new(__n * sizeof(_Tp), align_val_t{alignof(_Tp)})); } else { - __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); + __r.first = static_cast<_Tp*>(::cuda::std::__cccl_operator_new(__n * sizeof(_Tp))); } -#else // ^^^ _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() ^^^ / vvv !_LIBCUDACXX_HAS_ALIGNED_ALLOCATION() vvv - if (__is_overaligned_for_new(alignof(_Tp))) - { - // Since aligned operator new is unavailable, return an empty - // buffer rather than one with invalid alignment. - return __r; - } - - __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); -#endif // !_LIBCUDACXX_HAS_ALIGNED_ALLOCATION() if (__r.first) { diff --git a/libcudacxx/include/cuda/std/__new/allocate.h b/libcudacxx/include/cuda/std/__new/allocate.h index b3344bde47e..765f5390c5d 100644 --- a/libcudacxx/include/cuda/std/__new/allocate.h +++ b/libcudacxx/include/cuda/std/__new/allocate.h @@ -22,105 +22,108 @@ # pragma system_header #endif // no system header +#include +#include +#include +#include #include -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() && !_CCCL_COMPILER(NVRTC) -# include // for align_val_t -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() !_CCCL_COMPILER(NVRTC) - -// clang-cuda only provides device flavors of operator new, so we need to pull in here -#if _CCCL_CUDA_COMPILER(CLANG) -# include -#endif // _CCCL_CUDA_COMPILER(CLANG) - -#if !defined(__cpp_sized_deallocation) || __cpp_sized_deallocation < 201309L -# define _LIBCUDACXX_HAS_SIZED_DEALLOCATION() 0 +#if __cpp_sized_deallocation >= 201309L +# define _CCCL_HAS_SIZED_DEALLOCATION() 1 #else -# define _LIBCUDACXX_HAS_SIZED_DEALLOCATION() 1 +# define _CCCL_HAS_SIZED_DEALLOCATION() 0 #endif #include _CCCL_BEGIN_NAMESPACE_CUDA_STD -_CCCL_API constexpr bool __is_overaligned_for_new(size_t __align) noexcept +template +[[nodiscard]] _CCCL_API void* __cccl_operator_new(_Args... __args) { -#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ - return __align > __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else // ^^^ __STDCPP_DEFAULT_NEW_ALIGNMENT__ ^^^ / vvv !__STDCPP_DEFAULT_NEW_ALIGNMENT__ vvv - return __align > alignof(max_align_t); -#endif // !__STDCPP_DEFAULT_NEW_ALIGNMENT__ + return ::operator new(__args...); } template -_CCCL_API inline void* __cccl_operator_new(_Args... __args) +[[nodiscard]] _CCCL_API void* __cccl_operator_new(size_t __size, align_val_t __align, [[maybe_unused]] _Args... __args) { - // Those builtins are not usable on device and the tests crash when using them -#if defined(_CCCL_BUILTIN_OPERATOR_NEW) - return _CCCL_BUILTIN_OPERATOR_NEW(__args...); -#else // ^^^ _CCCL_BUILTIN_OPERATOR_NEW ^^^ / vvv !_CCCL_BUILTIN_OPERATOR_NEW vvv - return ::operator new(__args...); -#endif // !_CCCL_BUILTIN_OPERATOR_NEW +#if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION() + void* __ret = ::cuda::std::aligned_alloc(__size, static_cast(__align)); + if (__ret == nullptr) + { + ::cuda::std::__throw_bad_alloc(); // always terminates on device + } + return __ret; +#else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv + return ::operator new(__size, __align, __args...); +#endif // ^^^ other ^^^ } template -_CCCL_API inline void __cccl_operator_delete(_Args... __args) +_CCCL_API void __cccl_operator_delete(_Args... __args) { - // Those builtins are not usable on device and the tests crash when using them -#if defined(_CCCL_BUILTIN_OPERATOR_DELETE) - _CCCL_BUILTIN_OPERATOR_DELETE(__args...); -#else // ^^^ _CCCL_BUILTIN_OPERATOR_DELETE ^^^ / vvv !_CCCL_BUILTIN_OPERATOR_DELETE vvv ::operator delete(__args...); -#endif // !_CCCL_BUILTIN_OPERATOR_DELETE } -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() -using ::std::align_val_t; -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() +template +_CCCL_API void __cccl_operator_delete(void* __ptr, align_val_t __align, _Args... __args) +{ +#if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION() + ::cuda::std::free(__ptr); +#else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv + return ::operator delete(__ptr, __align, __args...); +#endif // ^^^ other ^^^ +} + +#if _CCCL_HAS_SIZED_DEALLOCATION() +template +_CCCL_API void __cccl_operator_delete(void* __ptr, size_t __size, align_val_t __align, _Args... __args) +{ +# if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION() + ::cuda::std::free(__ptr); +# else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv + return ::operator delete(__ptr, __size, __align, __args...); +# endif // ^^^ other ^^^ +} +#endif // _CCCL_HAS_SIZED_DEALLOCATION() -_CCCL_API inline void* __cccl_allocate(size_t __size, [[maybe_unused]] size_t __align) +[[nodiscard]] _CCCL_API inline void* __cccl_allocate(size_t __size, size_t __align) { -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() - if (::cuda::std::__is_overaligned_for_new(__align)) + if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { const align_val_t __align_val = static_cast(__align); return ::cuda::std::__cccl_operator_new(__size, __align_val); } -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() return ::cuda::std::__cccl_operator_new(__size); } template _CCCL_API inline void __do_deallocate_handle_size(void* __ptr, [[maybe_unused]] size_t __size, _Args... __args) { -#if _LIBCUDACXX_HAS_SIZED_DEALLOCATION() +#if _CCCL_HAS_SIZED_DEALLOCATION() return ::cuda::std::__cccl_operator_delete(__ptr, __size, __args...); -#else // ^^^ _LIBCUDACXX_HAS_SIZED_DEALLOCATION() ^^^ / vvv !_LIBCUDACXX_HAS_SIZED_DEALLOCATION() vvv +#else // ^^^ _CCCL_HAS_SIZED_DEALLOCATION() ^^^ / vvv !_CCCL_HAS_SIZED_DEALLOCATION() vvv return ::cuda::std::__cccl_operator_delete(__ptr, __args...); -#endif // !_LIBCUDACXX_HAS_SIZED_DEALLOCATION() +#endif // !_CCCL_HAS_SIZED_DEALLOCATION() } -_CCCL_API inline void __cccl_deallocate(void* __ptr, size_t __size, [[maybe_unused]] size_t __align) +_CCCL_API inline void __cccl_deallocate(void* __ptr, size_t __size, size_t __align) { -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() - if (::cuda::std::__is_overaligned_for_new(__align)) + if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { const align_val_t __align_val = static_cast(__align); return ::cuda::std::__do_deallocate_handle_size(__ptr, __size, __align_val); } -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() return ::cuda::std::__do_deallocate_handle_size(__ptr, __size); } -_CCCL_API inline void __cccl_deallocate_unsized(void* __ptr, [[maybe_unused]] size_t __align) +_CCCL_API inline void __cccl_deallocate_unsized(void* __ptr, size_t __align) { -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() - if (::cuda::std::__is_overaligned_for_new(__align)) + if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { const align_val_t __align_val = static_cast(__align); return ::cuda::std::__cccl_operator_delete(__ptr, __align_val); } -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() return ::cuda::std::__cccl_operator_delete(__ptr); } diff --git a/libcudacxx/include/cuda/std/__new/bad_alloc.h b/libcudacxx/include/cuda/std/__new/bad_alloc.h index 9654595f911..7bcdca9b2fd 100644 --- a/libcudacxx/include/cuda/std/__new/bad_alloc.h +++ b/libcudacxx/include/cuda/std/__new/bad_alloc.h @@ -23,6 +23,7 @@ #endif // no system header #include +#include #if _CCCL_HAS_EXCEPTIONS() # include diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp index 88986dbdd5a..84ef51227ab 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp @@ -24,20 +24,6 @@ TEST_DIAG_SUPPRESS_MSVC(4324) // structure was padded due to alignment specifier -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() -static const bool UsingAlignedNew = true; -#else -static const bool UsingAlignedNew = false; -#endif - -#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ -TEST_GLOBAL_VARIABLE const cuda::std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else -TEST_GLOBAL_VARIABLE const cuda::std::size_t MaxAligned = cuda::std::alignment_of::value; -#endif - -TEST_GLOBAL_VARIABLE const cuda::std::size_t OverAligned = MaxAligned * 2; - TEST_GLOBAL_VARIABLE int AlignedType_constructed = 0; template @@ -65,8 +51,8 @@ __host__ __device__ void test_aligned() AlignedType_constructed = 0; globalMemCounter.reset(); cuda::std::allocator a; - const bool IsOverAlignedType = Align > MaxAligned; - const bool ExpectAligned = IsOverAlignedType && UsingAlignedNew; + const bool IsOverAlignedType = Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__; + const bool ExpectAligned = IsOverAlignedType; { assert(globalMemCounter.checkOutstandingNewEq(0)); assert(AlignedType_constructed == 0); @@ -114,9 +100,9 @@ int main(int, char**) test_aligned<4>(); test_aligned<8>(); test_aligned<16>(); - test_aligned(); - test_aligned(); - test_aligned(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__>(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>(); #if defined(_CCCL_HAS_CONSTEXPR_ALLOCATION) static_assert(test_aligned_constexpr<1>()); @@ -124,9 +110,9 @@ int main(int, char**) static_assert(test_aligned_constexpr<4>()); static_assert(test_aligned_constexpr<8>()); static_assert(test_aligned_constexpr<16>()); - static_assert(test_aligned_constexpr()); - static_assert(test_aligned_constexpr()); - static_assert(test_aligned_constexpr()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__>()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>()); #endif // _CCCL_HAS_CONSTEXPR_ALLOCATION return 0; diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp index 68d6b3889b9..1b7fd18e8fd 100644 --- a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp @@ -20,20 +20,6 @@ #include "count_new.h" -#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() -static const bool UsingAlignedNew = true; -#else -static const bool UsingAlignedNew = false; -#endif - -#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ -static const cuda::std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else -static const cuda::std::size_t MaxAligned = cuda::std::alignment_of::value; -#endif - -static const cuda::std::size_t OverAligned = MaxAligned * 2; - template struct alignas(Align) AlignedType { @@ -62,8 +48,8 @@ __host__ __device__ void test_aligned() T::constructed = 0; globalMemCounter.reset(); cuda::std::allocator a; - const bool IsOverAlignedType = Align > MaxAligned; - const bool ExpectAligned = IsOverAlignedType && UsingAlignedNew; + const bool IsOverAlignedType = Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__; + const bool ExpectAligned = IsOverAlignedType; { assert(globalMemCounter.checkOutstandingNewEq(0)); assert(T::constructed == 0); @@ -107,17 +93,17 @@ int main(int, char**) test_aligned<4>(); test_aligned<8>(); test_aligned<16>(); - test_aligned(); - test_aligned(); - test_aligned(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__>(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>(); + test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>(); static_assert(test_aligned_constexpr<1>()); static_assert(test_aligned_constexpr<2>()); static_assert(test_aligned_constexpr<4>()); static_assert(test_aligned_constexpr<8>()); static_assert(test_aligned_constexpr<16>()); - static_assert(test_aligned_constexpr()); - static_assert(test_aligned_constexpr()); - static_assert(test_aligned_constexpr()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__>()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>()); + static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>()); return 0; } diff --git a/libcudacxx/test/support/count_new.h b/libcudacxx/test/support/count_new.h index 13693728867..2e7354a79d9 100644 --- a/libcudacxx/test/support/count_new.h +++ b/libcudacxx/test/support/count_new.h @@ -405,21 +405,20 @@ void operator delete[](void* p, cuda::std::size_t) noexcept } # endif // TEST_COMPILER(GCC) -# if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() -# if defined(_WIN32) -# define USE_ALIGNED_ALLOC -# endif +# if defined(_WIN32) +# define USE_ALIGNED_ALLOC +# endif void* operator new(cuda::std::size_t s, cuda::std::align_val_t av) { const cuda::std::size_t a = static_cast(av); getGlobalMemCounter()->alignedNewCalled(s, a); void* ret; -# ifdef USE_ALIGNED_ALLOC +# ifdef USE_ALIGNED_ALLOC ret = _aligned_malloc(s, a); -# else +# else posix_memalign(&ret, a, s); -# endif +# endif if (ret == nullptr) { cuda::std::__throw_bad_alloc(); @@ -433,11 +432,11 @@ void operator delete(void* p, cuda::std::align_val_t av) noexcept getGlobalMemCounter()->alignedDeleteCalled(p, a); if (p) { -# ifdef USE_ALIGNED_ALLOC +# ifdef USE_ALIGNED_ALLOC ::_aligned_free(p); -# else +# else ::free(p); -# endif +# endif } } @@ -455,8 +454,6 @@ void operator delete[](void* p, cuda::std::align_val_t av) noexcept return operator delete(p, av); } -# endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() - #endif // DISABLE_NEW_COUNT struct DisableAllocationGuard