Skip to content

Commit

Permalink
Fix C++11 support of recently added tests (#651)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahendriksen authored Nov 3, 2023
1 parent 83b3365 commit abe6d1c
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++11
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: nvrtc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++11
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: nvrtc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++11
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: nvrtc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++11
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: nvrtc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++11
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: pre-sm-90
// UNSUPPORTED: nvrtc
Expand Down
10 changes: 10 additions & 0 deletions libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,13 @@ template <size_t _Copy_size>
inline __device__
void __cp_async_shared_global(char * __dest, const char * __src) {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async

// If `if constexpr` is not available, this function gets instantiated even
// if is not called. Do not static_assert in that case.
#if _LIBCUDACXX_STD_VER >= 17
static_assert(_Copy_size == 4 || _Copy_size == 8 || _Copy_size == 16, "cp.async.shared.global requires a copy size of 4, 8, or 16.");
#endif // _LIBCUDACXX_STD_VER >= 17

asm volatile(
"cp.async.ca.shared.global [%0], [%1], %2, %2;"
:
Expand All @@ -982,7 +988,11 @@ void __cp_async_shared_global<16>(char * __dest, const char * __src) {
template <size_t _Alignment, typename _Group>
inline __device__
void __cp_async_shared_global_mechanism(_Group __g, char * __dest, const char * __src, _CUDA_VSTD::size_t __size) {
// If `if constexpr` is not available, this function gets instantiated even
// if is not called. Do not static_assert in that case.
#if _LIBCUDACXX_STD_VER >= 17
static_assert(4 <= _Alignment, "cp.async requires at least 4-byte alignment");
#endif // _LIBCUDACXX_STD_VER >= 17

// Maximal copy size is 16.
constexpr int __copy_size = (_Alignment > 16) ? 16 : _Alignment;
Expand Down

0 comments on commit abe6d1c

Please sign in to comment.