diff --git a/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp b/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp
new file mode 100644
index 00000000000..beed5046b38
--- /dev/null
+++ b/libcudacxx/.upstream-tests/test/std/skip_nvrtc_launch.pass.cpp
@@ -0,0 +1,31 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: !nvrtc
+// NVRTC_SKIP_KERNEL_RUN // do compile, but do not run under nvrtc
+
+#include <cuda/std/cassert>
+#include <nv/target>
+
+// This is a test of the NVRTC_SKIP_KERNEL_RUN tag that indicates that a test
+// should compiler under NVRTC, but should not be run.
+int main(int, char**)
+{
+  NV_DISPATCH_TARGET(
+        NV_IS_DEVICE, (
+          // Ensure that code fails at runtime when run under NVRTC.
+          assert(false);
+        )
+    );
+
+  return 0;
+}
diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh
index 66eb9c84d09..f4b63d2cbb0 100755
--- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh
+++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/nvrtc.sh
@@ -128,6 +128,9 @@ trap finish EXIT
 thread_count=$(cat "${input}" | egrep 'cuda_thread_count = [0-9]+' | egrep -o '[0-9]+' || echo 1)
 shmem_size=$(cat "${input}" | egrep 'cuda_block_shmem_size = [0-9]+' | egrep -o '[0-9]+' || echo 0)
 
+# grep through test to see if running the NVRTC kernel is disabled.
+do_run_kernel=$(cat "${input}" | grep -q NVRTC_SKIP_KERNEL_RUN && echo "false" || echo "true")
+
 if [[ "${#gpu_archs[@]}" -eq 0 ]]
 then
     arch=""
@@ -142,6 +145,7 @@ else
     arch="compute_$(printf "%s\n" "${gpu_archs[@]}" | awk -F_ '{ print $2 }' | sort -un | head -n1)"
 fi
 
+echo "static const bool nvrtc_do_run_kernel = ${do_run_kernel};" >> ${tempfile}
 cat "${nvrtcdir}/head.cu.in" >> "${tempfile}"
 cat "${input}" >> "${tempfile}"
 cat "${nvrtcdir}/middle.cu.in" >> "${tempfile}"
@@ -158,8 +162,8 @@ then
 fi
 echo '        // END SCRIPT GENERATED OPTIONS' >> "${tempfile}"
 cat "${nvrtcdir}/tail.cu.in" >> "${tempfile}"
-echo '        '"${thread_count}, 1, 1," >> "${tempfile}"
-echo '        '"${shmem_size}," >> "${tempfile}"
+echo '            '"${thread_count}, 1, 1," >> "${tempfile}"
+echo '            '"${shmem_size}," >> "${tempfile}"
 cat "${nvrtcdir}/post_tail.cu.in" >> "${tempfile}"
 
 cat "${tempfile}" > ${logdir}/generated_file
diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in
index 507be254ae1..a0f28ebf2df 100644
--- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in
+++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/post_tail.cu.in
@@ -1,5 +1,8 @@
-        NULL,
-        NULL, 0));
+            NULL,
+            NULL, 0));
+    } else {
+        printf("Skipped running NVRTC-compiled kernel.\n");
+    }
 
     CUDA_API_CALL(cudaGetLastError());
     CUDA_API_CALL(cudaDeviceSynchronize());
diff --git a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in
index 1a400d688d4..d3d678114b0 100644
--- a/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in
+++ b/libcudacxx/.upstream-tests/utils/nvidia/nvrtc/tail.cu.in
@@ -39,5 +39,6 @@
     CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
     CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, code.get(), 0, 0, 0));
     CUDA_SAFE_CALL(cuModuleGetFunction(&kernel, module, "main_kernel"));
-    CUDA_SAFE_CALL(cuLaunchKernel(kernel,
-        1, 1, 1,
+    if (nvrtc_do_run_kernel) {
+        CUDA_SAFE_CALL(cuLaunchKernel(kernel,
+            1, 1, 1,