Skip to content

Commit

Permalink
Add NVRTC_SKIP_KERNEL_RUN tag to compile, but skip running NVRTC test (
Browse files Browse the repository at this point in the history
…#434)

Some kernels using TMA need some host code to run as well. In addition,
they need the device symbols to be accessible from host code. This turns
out to be quite difficult to achieve. Therefore, since the device code
is already executed in the non-nvrtc tests, it is reasonable to allow a
test to skip running device code when NVRTC is enabled. This still tests
whether the code actually compiles.
  • Loading branch information
ahendriksen authored Sep 18, 2023
1 parent a6054a0 commit 6be81eb
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 6 deletions.
31 changes: 31 additions & 0 deletions libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: libcpp-has-no-threads
// UNSUPPORTED: !nvrtc
// NVRTC_SKIP_KERNEL_RUN // do compile, but do not run under nvrtc

#include <cuda/std/cassert>
#include <nv/target>

// This is a test of the NVRTC_SKIP_KERNEL_RUN tag that indicates that a test
// should compiler under NVRTC, but should not be run.
int main(int, char**)
{
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
// Ensure that code fails at runtime when run under NVRTC.
assert(false);
)
);

return 0;
}
8 changes: 6 additions & 2 deletions libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ trap finish EXIT
thread_count=$(cat "${input}" | egrep 'cuda_thread_count = [0-9]+' | egrep -o '[0-9]+' || echo 1)
shmem_size=$(cat "${input}" | egrep 'cuda_block_shmem_size = [0-9]+' | egrep -o '[0-9]+' || echo 0)

# grep through test to see if running the NVRTC kernel is disabled.
do_run_kernel=$(cat "${input}" | grep -q NVRTC_SKIP_KERNEL_RUN && echo "false" || echo "true")

if [[ "${#gpu_archs[@]}" -eq 0 ]]
then
arch=""
Expand All @@ -142,6 +145,7 @@ else
arch="compute_$(printf "%s\n" "${gpu_archs[@]}" | awk -F_ '{ print $2 }' | sort -un | head -n1)"
fi

echo "static const bool nvrtc_do_run_kernel = ${do_run_kernel};" >> ${tempfile}
cat "${nvrtcdir}/head.cu.in" >> "${tempfile}"
cat "${input}" >> "${tempfile}"
cat "${nvrtcdir}/middle.cu.in" >> "${tempfile}"
Expand All @@ -158,8 +162,8 @@ then
fi
echo ' // END SCRIPT GENERATED OPTIONS' >> "${tempfile}"
cat "${nvrtcdir}/tail.cu.in" >> "${tempfile}"
echo ' '"${thread_count}, 1, 1," >> "${tempfile}"
echo ' '"${shmem_size}," >> "${tempfile}"
echo ' '"${thread_count}, 1, 1," >> "${tempfile}"
echo ' '"${shmem_size}," >> "${tempfile}"
cat "${nvrtcdir}/post_tail.cu.in" >> "${tempfile}"

cat "${tempfile}" > ${logdir}/generated_file
Expand Down
7 changes: 5 additions & 2 deletions libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
NULL,
NULL, 0));
NULL,
NULL, 0));
} else {
printf("Skipped running NVRTC-compiled kernel.\n");
}

CUDA_API_CALL(cudaGetLastError());
CUDA_API_CALL(cudaDeviceSynchronize());
Expand Down
5 changes: 3 additions & 2 deletions libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, code.get(), 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&kernel, module, "main_kernel"));
CUDA_SAFE_CALL(cuLaunchKernel(kernel,
1, 1, 1,
if (nvrtc_do_run_kernel) {
CUDA_SAFE_CALL(cuLaunchKernel(kernel,
1, 1, 1,

0 comments on commit 6be81eb

Please sign in to comment.