From 6be81eb472117e5e7b12c3d2d9d3314c32afb12a Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Mon, 18 Sep 2023 09:16:06 +0200 Subject: [PATCH] Add NVRTC_SKIP_KERNEL_RUN tag to compile, but skip running NVRTC test (#434) Some kernels using TMA need some host code to run as well. In addition, they need the device symbols to be accessible from host code. This turns out to be quite difficult to achieve. Therefore, since the device code is already executed in the non-nvrtc tests, it is reasonable to allow a test to skip running device code when NVRTC is enabled. This still tests whether the code actually compiles. --- .../test/std/skip_nvrtc_launch.pass.cpp | 31 +++++++++++++++++++ .../utils/nvidia/nvrtc/nvrtc.sh | 8 +++-- .../utils/nvidia/nvrtc/post_tail.cu.in | 7 +++-- .../utils/nvidia/nvrtc/tail.cu.in | 5 +-- 4 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp diff --git a/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp b/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp new file mode 100644 index 00000000000..beed5046b38 --- /dev/null +++ b/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp @@ -0,0 +1,31 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: !nvrtc +// NVRTC_SKIP_KERNEL_RUN // do compile, but do not run under nvrtc + +#include +#include + +// This is a test of the NVRTC_SKIP_KERNEL_RUN tag that indicates that a test +// should compiler under NVRTC, but should not be run. +int main(int, char**) +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, ( + // Ensure that code fails at runtime when run under NVRTC. + assert(false); + ) + ); + + return 0; +} diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh index 66eb9c84d09..f4b63d2cbb0 100755 --- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh +++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh @@ -128,6 +128,9 @@ trap finish EXIT thread_count=$(cat "${input}" | egrep 'cuda_thread_count = [0-9]+' | egrep -o '[0-9]+' || echo 1) shmem_size=$(cat "${input}" | egrep 'cuda_block_shmem_size = [0-9]+' | egrep -o '[0-9]+' || echo 0) +# grep through test to see if running the NVRTC kernel is disabled. +do_run_kernel=$(cat "${input}" | grep -q NVRTC_SKIP_KERNEL_RUN && echo "false" || echo "true") + if [[ "${#gpu_archs[@]}" -eq 0 ]] then arch="" @@ -142,6 +145,7 @@ else arch="compute_$(printf "%s\n" "${gpu_archs[@]}" | awk -F_ '{ print $2 }' | sort -un | head -n1)" fi +echo "static const bool nvrtc_do_run_kernel = ${do_run_kernel};" >> ${tempfile} cat "${nvrtcdir}/head.cu.in" >> "${tempfile}" cat "${input}" >> "${tempfile}" cat "${nvrtcdir}/middle.cu.in" >> "${tempfile}" @@ -158,8 +162,8 @@ then fi echo ' // END SCRIPT GENERATED OPTIONS' >> "${tempfile}" cat "${nvrtcdir}/tail.cu.in" >> "${tempfile}" -echo ' '"${thread_count}, 1, 1," >> "${tempfile}" -echo ' '"${shmem_size}," >> "${tempfile}" +echo ' '"${thread_count}, 1, 1," >> "${tempfile}" +echo ' '"${shmem_size}," >> "${tempfile}" cat "${nvrtcdir}/post_tail.cu.in" >> "${tempfile}" cat "${tempfile}" > ${logdir}/generated_file diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in index 507be254ae1..a0f28ebf2df 100644 --- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in +++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in @@ -1,5 +1,8 @@ - NULL, - NULL, 0)); + NULL, + NULL, 0)); + } else { + printf("Skipped running NVRTC-compiled kernel.\n"); + } CUDA_API_CALL(cudaGetLastError()); CUDA_API_CALL(cudaDeviceSynchronize()); diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in index 1a400d688d4..d3d678114b0 100644 --- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in +++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in @@ -39,5 +39,6 @@ CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice)); CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, code.get(), 0, 0, 0)); CUDA_SAFE_CALL(cuModuleGetFunction(&kernel, module, "main_kernel")); - CUDA_SAFE_CALL(cuLaunchKernel(kernel, - 1, 1, 1, + if (nvrtc_do_run_kernel) { + CUDA_SAFE_CALL(cuLaunchKernel(kernel, + 1, 1, 1,