diff --git a/libcudacxx/include/cuda/std/__cccl/builtin.h b/libcudacxx/include/cuda/std/__cccl/builtin.h
index 0ab81891842..1d44689dbf0 100644
--- a/libcudacxx/include/cuda/std/__cccl/builtin.h
+++ b/libcudacxx/include/cuda/std/__cccl/builtin.h
@@ -233,12 +233,6 @@
 #  undef _CCCL_BUILTIN_MEMMOVE
 #endif // _CCCL_CUDA_COMPILER(NVCC)
 
-#if _CCCL_CHECK_BUILTIN(builtin_operator_new) && _CCCL_CHECK_BUILTIN(builtin_operator_delete) \
-  && _CCCL_CUDA_COMPILER(CLANG)
-#  define _CCCL_BUILTIN_OPERATOR_DELETE(...) __builtin_operator_delete(__VA_ARGS__)
-#  define _CCCL_BUILTIN_OPERATOR_NEW(...)    __builtin_operator_new(__VA_ARGS__)
-#endif // _CCCL_CHECK_BUILTIN(builtin_operator_new) && _CCCL_CHECK_BUILTIN(builtin_operator_delete)
-
 #if _CCCL_CHECK_BUILTIN(builtin_prefetch) || _CCCL_COMPILER(GCC)
 #  define _CCCL_BUILTIN_PREFETCH(...) NV_IF_TARGET(NV_IS_HOST, __builtin_prefetch(__VA_ARGS__);)
 #else
diff --git a/libcudacxx/include/cuda/std/__fwd/new.h b/libcudacxx/include/cuda/std/__fwd/new.h
new file mode 100644
index 00000000000..218d2ededd1
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__fwd/new.h
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___FWD_NEW_H
+#define _CUDA_STD___FWD_NEW_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cstddef/types.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+// std:: forward declarations
+
+#if _CCCL_HAS_HOST_STD_LIB()
+_CCCL_BEGIN_NAMESPACE_STD
+
+enum class align_val_t : ::cuda::std::size_t;
+
+_CCCL_END_NAMESPACE_STD
+#endif // _CCCL_HAS_HOST_STD_LIB()
+
+// cuda::std:: forward declarations
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD
+
+using ::std::align_val_t;
+
+_CCCL_END_NAMESPACE_CUDA_STD
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___FWD_NEW_H
diff --git a/libcudacxx/include/cuda/std/__internal/features.h b/libcudacxx/include/cuda/std/__internal/features.h
index b221a9fee75..4e18bfa3cdb 100644
--- a/libcudacxx/include/cuda/std/__internal/features.h
+++ b/libcudacxx/include/cuda/std/__internal/features.h
@@ -24,12 +24,6 @@
 #define _LIBCUDACXX_HAS_MONOTONIC_CLOCK()       0
 #define _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()    0
 
-#if _CCCL_CUDA_COMPILATION() || __cpp_aligned_new < 201606
-#  define _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() 0
-#else
-#  define _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() 1
-#endif // !_CCCL_CUDA_COMPILATION() && __cpp_aligned_new >= 201606
-
 // We need `is_constant_evaluated` for clang and gcc. MSVC also needs extensive rework
 #if !defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
 #  define _LIBCUDACXX_HAS_CONSTEXPR_COMPLEX_OPERATIONS() 0
diff --git a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
index 88b3f96e23e..c377c0bf00e 100644
--- a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
+++ b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
@@ -46,26 +46,14 @@ template <class _Tp>
   }
   while (__n > 0)
   {
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-    if (__is_overaligned_for_new(alignof(_Tp)))
+    if constexpr (alignof(_Tp) > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
     {
-      ::cuda::std::align_val_t __al = ::cuda::std::align_val_t(::cuda::std::alignment_of<_Tp>::value);
-      __r.first                     = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp), __al));
+      __r.first = static_cast<_Tp*>(::cuda::std::__cccl_operator_new(__n * sizeof(_Tp), align_val_t{alignof(_Tp)}));
     }
     else
     {
-      __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp)));
+      __r.first = static_cast<_Tp*>(::cuda::std::__cccl_operator_new(__n * sizeof(_Tp)));
     }
-#else // ^^^ _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() ^^^ / vvv !_LIBCUDACXX_HAS_ALIGNED_ALLOCATION() vvv
-    if (__is_overaligned_for_new(alignof(_Tp)))
-    {
-      // Since aligned operator new is unavailable, return an empty
-      // buffer rather than one with invalid alignment.
-      return __r;
-    }
-
-    __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp)));
-#endif // !_LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
 
     if (__r.first)
     {
diff --git a/libcudacxx/include/cuda/std/__new/allocate.h b/libcudacxx/include/cuda/std/__new/allocate.h
index b3344bde47e..765f5390c5d 100644
--- a/libcudacxx/include/cuda/std/__new/allocate.h
+++ b/libcudacxx/include/cuda/std/__new/allocate.h
@@ -22,105 +22,108 @@
 #  pragma system_header
 #endif // no system header
 
+#include <cuda/std/__cstdlib/aligned_alloc.h>
+#include <cuda/std/__fwd/new.h>
+#include <cuda/std/__new/bad_alloc.h>
+#include <cuda/std/__new/device_new.h>
 #include <cuda/std/cstddef>
 
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() && !_CCCL_COMPILER(NVRTC)
-#  include <new> // for align_val_t
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION() !_CCCL_COMPILER(NVRTC)
-
-// clang-cuda only provides device flavors of operator new, so we need to pull in <new> here
-#if _CCCL_CUDA_COMPILER(CLANG)
-#  include <new>
-#endif // _CCCL_CUDA_COMPILER(CLANG)
-
-#if !defined(__cpp_sized_deallocation) || __cpp_sized_deallocation < 201309L
-#  define _LIBCUDACXX_HAS_SIZED_DEALLOCATION() 0
+#if __cpp_sized_deallocation >= 201309L
+#  define _CCCL_HAS_SIZED_DEALLOCATION() 1
 #else
-#  define _LIBCUDACXX_HAS_SIZED_DEALLOCATION() 1
+#  define _CCCL_HAS_SIZED_DEALLOCATION() 0
 #endif
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA_STD
 
-_CCCL_API constexpr bool __is_overaligned_for_new(size_t __align) noexcept
+template <class... _Args>
+[[nodiscard]] _CCCL_API void* __cccl_operator_new(_Args... __args)
 {
-#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__
-  return __align > __STDCPP_DEFAULT_NEW_ALIGNMENT__;
-#else // ^^^ __STDCPP_DEFAULT_NEW_ALIGNMENT__ ^^^ / vvv !__STDCPP_DEFAULT_NEW_ALIGNMENT__ vvv
-  return __align > alignof(max_align_t);
-#endif // !__STDCPP_DEFAULT_NEW_ALIGNMENT__
+  return ::operator new(__args...);
 }
 
 template <class... _Args>
-_CCCL_API inline void* __cccl_operator_new(_Args... __args)
+[[nodiscard]] _CCCL_API void* __cccl_operator_new(size_t __size, align_val_t __align, [[maybe_unused]] _Args... __args)
 {
-  // Those builtins are not usable on device and the tests crash when using them
-#if defined(_CCCL_BUILTIN_OPERATOR_NEW)
-  return _CCCL_BUILTIN_OPERATOR_NEW(__args...);
-#else // ^^^ _CCCL_BUILTIN_OPERATOR_NEW ^^^ / vvv !_CCCL_BUILTIN_OPERATOR_NEW vvv
-  return ::operator new(__args...);
-#endif // !_CCCL_BUILTIN_OPERATOR_NEW
+#if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION()
+  void* __ret = ::cuda::std::aligned_alloc(__size, static_cast<size_t>(__align));
+  if (__ret == nullptr)
+  {
+    ::cuda::std::__throw_bad_alloc(); // always terminates on device
+  }
+  return __ret;
+#else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv
+  return ::operator new(__size, __align, __args...);
+#endif // ^^^ other ^^^
 }
 
 template <class... _Args>
-_CCCL_API inline void __cccl_operator_delete(_Args... __args)
+_CCCL_API void __cccl_operator_delete(_Args... __args)
 {
-  // Those builtins are not usable on device and the tests crash when using them
-#if defined(_CCCL_BUILTIN_OPERATOR_DELETE)
-  _CCCL_BUILTIN_OPERATOR_DELETE(__args...);
-#else // ^^^ _CCCL_BUILTIN_OPERATOR_DELETE ^^^ / vvv !_CCCL_BUILTIN_OPERATOR_DELETE vvv
   ::operator delete(__args...);
-#endif // !_CCCL_BUILTIN_OPERATOR_DELETE
 }
 
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-using ::std::align_val_t;
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
+template <class... _Args>
+_CCCL_API void __cccl_operator_delete(void* __ptr, align_val_t __align, _Args... __args)
+{
+#if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION()
+  ::cuda::std::free(__ptr);
+#else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv
+  return ::operator delete(__ptr, __align, __args...);
+#endif // ^^^ other ^^^
+}
+
+#if _CCCL_HAS_SIZED_DEALLOCATION()
+template <class... _Args>
+_CCCL_API void __cccl_operator_delete(void* __ptr, size_t __size, align_val_t __align, _Args... __args)
+{
+#  if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION()
+  ::cuda::std::free(__ptr);
+#  else // ^^^ clang-cuda in device mode ^^^ / vvv other vvv
+  return ::operator delete(__ptr, __size, __align, __args...);
+#  endif // ^^^ other ^^^
+}
+#endif // _CCCL_HAS_SIZED_DEALLOCATION()
 
-_CCCL_API inline void* __cccl_allocate(size_t __size, [[maybe_unused]] size_t __align)
+[[nodiscard]] _CCCL_API inline void* __cccl_allocate(size_t __size, size_t __align)
 {
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-  if (::cuda::std::__is_overaligned_for_new(__align))
+  if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
   {
     const align_val_t __align_val = static_cast<align_val_t>(__align);
     return ::cuda::std::__cccl_operator_new(__size, __align_val);
   }
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
   return ::cuda::std::__cccl_operator_new(__size);
 }
 
 template <class... _Args>
 _CCCL_API inline void __do_deallocate_handle_size(void* __ptr, [[maybe_unused]] size_t __size, _Args... __args)
 {
-#if _LIBCUDACXX_HAS_SIZED_DEALLOCATION()
+#if _CCCL_HAS_SIZED_DEALLOCATION()
   return ::cuda::std::__cccl_operator_delete(__ptr, __size, __args...);
-#else // ^^^ _LIBCUDACXX_HAS_SIZED_DEALLOCATION() ^^^ / vvv !_LIBCUDACXX_HAS_SIZED_DEALLOCATION() vvv
+#else // ^^^ _CCCL_HAS_SIZED_DEALLOCATION() ^^^ / vvv !_CCCL_HAS_SIZED_DEALLOCATION() vvv
   return ::cuda::std::__cccl_operator_delete(__ptr, __args...);
-#endif // !_LIBCUDACXX_HAS_SIZED_DEALLOCATION()
+#endif // !_CCCL_HAS_SIZED_DEALLOCATION()
 }
 
-_CCCL_API inline void __cccl_deallocate(void* __ptr, size_t __size, [[maybe_unused]] size_t __align)
+_CCCL_API inline void __cccl_deallocate(void* __ptr, size_t __size, size_t __align)
 {
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-  if (::cuda::std::__is_overaligned_for_new(__align))
+  if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
   {
     const align_val_t __align_val = static_cast<align_val_t>(__align);
     return ::cuda::std::__do_deallocate_handle_size(__ptr, __size, __align_val);
   }
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
   return ::cuda::std::__do_deallocate_handle_size(__ptr, __size);
 }
 
-_CCCL_API inline void __cccl_deallocate_unsized(void* __ptr, [[maybe_unused]] size_t __align)
+_CCCL_API inline void __cccl_deallocate_unsized(void* __ptr, size_t __align)
 {
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-  if (::cuda::std::__is_overaligned_for_new(__align))
+  if (__align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
   {
     const align_val_t __align_val = static_cast<align_val_t>(__align);
     return ::cuda::std::__cccl_operator_delete(__ptr, __align_val);
   }
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
   return ::cuda::std::__cccl_operator_delete(__ptr);
 }
 
diff --git a/libcudacxx/include/cuda/std/__new/bad_alloc.h b/libcudacxx/include/cuda/std/__new/bad_alloc.h
index 9654595f911..7bcdca9b2fd 100644
--- a/libcudacxx/include/cuda/std/__new/bad_alloc.h
+++ b/libcudacxx/include/cuda/std/__new/bad_alloc.h
@@ -23,6 +23,7 @@
 #endif // no system header
 
 #include <cuda/std/__exception/terminate.h>
+#include <cuda/std/__fwd/new.h>
 
 #if _CCCL_HAS_EXCEPTIONS()
 #  include <new>
diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
index 88986dbdd5a..84ef51227ab 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
@@ -24,20 +24,6 @@
 
 TEST_DIAG_SUPPRESS_MSVC(4324) // structure was padded due to alignment specifier
 
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-static const bool UsingAlignedNew = true;
-#else
-static const bool UsingAlignedNew = false;
-#endif
-
-#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__
-TEST_GLOBAL_VARIABLE const cuda::std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__;
-#else
-TEST_GLOBAL_VARIABLE const cuda::std::size_t MaxAligned = cuda::std::alignment_of<cuda::std::max_align_t>::value;
-#endif
-
-TEST_GLOBAL_VARIABLE const cuda::std::size_t OverAligned = MaxAligned * 2;
-
 TEST_GLOBAL_VARIABLE int AlignedType_constructed = 0;
 
 template <cuda::std::size_t Align>
@@ -65,8 +51,8 @@ __host__ __device__ void test_aligned()
   AlignedType_constructed = 0;
   globalMemCounter.reset();
   cuda::std::allocator<T> a;
-  const bool IsOverAlignedType = Align > MaxAligned;
-  const bool ExpectAligned     = IsOverAlignedType && UsingAlignedNew;
+  const bool IsOverAlignedType = Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__;
+  const bool ExpectAligned     = IsOverAlignedType;
   {
     assert(globalMemCounter.checkOutstandingNewEq(0));
     assert(AlignedType_constructed == 0);
@@ -114,9 +100,9 @@ int main(int, char**)
   test_aligned<4>();
   test_aligned<8>();
   test_aligned<16>();
-  test_aligned<MaxAligned>();
-  test_aligned<OverAligned>();
-  test_aligned<OverAligned * 2>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>();
 
 #if defined(_CCCL_HAS_CONSTEXPR_ALLOCATION)
   static_assert(test_aligned_constexpr<1>());
@@ -124,9 +110,9 @@ int main(int, char**)
   static_assert(test_aligned_constexpr<4>());
   static_assert(test_aligned_constexpr<8>());
   static_assert(test_aligned_constexpr<16>());
-  static_assert(test_aligned_constexpr<MaxAligned>());
-  static_assert(test_aligned_constexpr<OverAligned>());
-  static_assert(test_aligned_constexpr<OverAligned * 2>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>());
 #endif // _CCCL_HAS_CONSTEXPR_ALLOCATION
 
   return 0;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp
index 68d6b3889b9..1b7fd18e8fd 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp
@@ -20,20 +20,6 @@
 
 #include "count_new.h"
 
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-static const bool UsingAlignedNew = true;
-#else
-static const bool UsingAlignedNew = false;
-#endif
-
-#ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__
-static const cuda::std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__;
-#else
-static const cuda::std::size_t MaxAligned = cuda::std::alignment_of<cuda::std::max_align_t>::value;
-#endif
-
-static const cuda::std::size_t OverAligned = MaxAligned * 2;
-
 template <cuda::std::size_t Align>
 struct alignas(Align) AlignedType
 {
@@ -62,8 +48,8 @@ __host__ __device__ void test_aligned()
   T::constructed = 0;
   globalMemCounter.reset();
   cuda::std::allocator<T> a;
-  const bool IsOverAlignedType = Align > MaxAligned;
-  const bool ExpectAligned     = IsOverAlignedType && UsingAlignedNew;
+  const bool IsOverAlignedType = Align > __STDCPP_DEFAULT_NEW_ALIGNMENT__;
+  const bool ExpectAligned     = IsOverAlignedType;
   {
     assert(globalMemCounter.checkOutstandingNewEq(0));
     assert(T::constructed == 0);
@@ -107,17 +93,17 @@ int main(int, char**)
   test_aligned<4>();
   test_aligned<8>();
   test_aligned<16>();
-  test_aligned<MaxAligned>();
-  test_aligned<OverAligned>();
-  test_aligned<OverAligned * 2>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>();
+  test_aligned<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>();
 
   static_assert(test_aligned_constexpr<1>());
   static_assert(test_aligned_constexpr<2>());
   static_assert(test_aligned_constexpr<4>());
   static_assert(test_aligned_constexpr<8>());
   static_assert(test_aligned_constexpr<16>());
-  static_assert(test_aligned_constexpr<MaxAligned>());
-  static_assert(test_aligned_constexpr<OverAligned>());
-  static_assert(test_aligned_constexpr<OverAligned * 2>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2>());
+  static_assert(test_aligned_constexpr<__STDCPP_DEFAULT_NEW_ALIGNMENT__ * 4>());
   return 0;
 }
diff --git a/libcudacxx/test/support/count_new.h b/libcudacxx/test/support/count_new.h
index 13693728867..2e7354a79d9 100644
--- a/libcudacxx/test/support/count_new.h
+++ b/libcudacxx/test/support/count_new.h
@@ -405,21 +405,20 @@ void operator delete[](void* p, cuda::std::size_t) noexcept
 }
 #  endif // TEST_COMPILER(GCC)
 
-#  if _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-#    if defined(_WIN32)
-#      define USE_ALIGNED_ALLOC
-#    endif
+#  if defined(_WIN32)
+#    define USE_ALIGNED_ALLOC
+#  endif
 
 void* operator new(cuda::std::size_t s, cuda::std::align_val_t av)
 {
   const cuda::std::size_t a = static_cast<cuda::std::size_t>(av);
   getGlobalMemCounter()->alignedNewCalled(s, a);
   void* ret;
-#    ifdef USE_ALIGNED_ALLOC
+#  ifdef USE_ALIGNED_ALLOC
   ret = _aligned_malloc(s, a);
-#    else
+#  else
   posix_memalign(&ret, a, s);
-#    endif
+#  endif
   if (ret == nullptr)
   {
     cuda::std::__throw_bad_alloc();
@@ -433,11 +432,11 @@ void operator delete(void* p, cuda::std::align_val_t av) noexcept
   getGlobalMemCounter()->alignedDeleteCalled(p, a);
   if (p)
   {
-#    ifdef USE_ALIGNED_ALLOC
+#  ifdef USE_ALIGNED_ALLOC
     ::_aligned_free(p);
-#    else
+#  else
     ::free(p);
-#    endif
+#  endif
   }
 }
 
@@ -455,8 +454,6 @@ void operator delete[](void* p, cuda::std::align_val_t av) noexcept
   return operator delete(p, av);
 }
 
-#  endif // _LIBCUDACXX_HAS_ALIGNED_ALLOCATION()
-
 #endif // DISABLE_NEW_COUNT
 
 struct DisableAllocationGuard