diff --git a/libcudacxx/include/cuda/__tma/make_tma_descriptor.h b/libcudacxx/include/cuda/__tma/make_tma_descriptor.h index 5ccae27ccb2..ab8cafccc27 100644 --- a/libcudacxx/include/cuda/__tma/make_tma_descriptor.h +++ b/libcudacxx/include/cuda/__tma/make_tma_descriptor.h @@ -414,6 +414,9 @@ __get_tensor_sizes(const ::DLTensor& __tensor, int __rank, ::CUtensorMapDataType int64_t __cumulative_size = 1; if (__input_strides == nullptr) { +# if DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + _CCCL_THROW(::std::invalid_argument{"__tensor.strides=nullptr is not supported for DLPack v1.2 and later"}); +# else for (int __i = 0; __i < __rank - 1; ++__i) { // TODO(fbusato): check mul overflow @@ -430,6 +433,7 @@ __get_tensor_sizes(const ::DLTensor& __tensor, int __rank, ::CUtensorMapDataType __output_strides[__i] = __stride_bytes; } return __output_strides; +# endif // DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) } // TMA ignores the innermost stride (always 1). for (int __i = __rank - 2; __i >= 0; --__i) diff --git a/libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp b/libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp index c6952d7d863..860a9268758 100644 --- a/libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp @@ -130,9 +130,6 @@ bool test_strides() // stride is 0 strides_storage[0] = 0; unused(cuda::make_tma_descriptor(tensor, box_sizes)); - // stride is nullptr - tensor.strides = nullptr; - unused(cuda::make_tma_descriptor(tensor, box_sizes)); return true; }