Fix C++11 support of recently added tests (#651)

NVIDIA · Nov 3, 2023 · abe6d1c · abe6d1c
1 parent 83b3365
commit abe6d1c
Show file tree

Hide file tree

Showing 6 changed files with 15 additions and 0 deletions.
diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// UNSUPPORTED: c++11
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-90
 // UNSUPPORTED: nvrtc

diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// UNSUPPORTED: c++11
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-90
 // UNSUPPORTED: nvrtc

diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// UNSUPPORTED: c++11
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-90
 // UNSUPPORTED: nvrtc

diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// UNSUPPORTED: c++11
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-90
 // UNSUPPORTED: nvrtc

diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
@@ -8,6 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// UNSUPPORTED: c++11
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-90
 // UNSUPPORTED: nvrtc

diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h b/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h
@@ -955,7 +955,13 @@ template <size_t _Copy_size>
 inline __device__
 void __cp_async_shared_global(char * __dest, const char * __src) {
     // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async
+
+    // If `if constexpr` is not available, this function gets instantiated even
+    // if is not called. Do not static_assert in that case.
+#if _LIBCUDACXX_STD_VER >= 17
     static_assert(_Copy_size == 4 || _Copy_size == 8 || _Copy_size == 16, "cp.async.shared.global requires a copy size of 4, 8, or 16.");
+#endif // _LIBCUDACXX_STD_VER >= 17
+
     asm volatile(
         "cp.async.ca.shared.global [%0], [%1], %2, %2;"
         :
@@ -982,7 +988,11 @@ void __cp_async_shared_global<16>(char * __dest, const char * __src) {
 template <size_t _Alignment, typename _Group>
 inline __device__
 void __cp_async_shared_global_mechanism(_Group __g, char * __dest, const char * __src, _CUDA_VSTD::size_t __size) {
+    // If `if constexpr` is not available, this function gets instantiated even
+    // if is not called. Do not static_assert in that case.
+#if _LIBCUDACXX_STD_VER >= 17
     static_assert(4 <= _Alignment, "cp.async requires at least 4-byte alignment");
+#endif // _LIBCUDACXX_STD_VER >= 17
 
     // Maximal copy size is 16.
     constexpr int __copy_size = (_Alignment > 16) ? 16 : _Alignment;