diff --git a/nvrtcc/CMakeLists.txt b/nvrtcc/CMakeLists.txt
new file mode 100644
index 00000000000..a720cc4e89a
--- /dev/null
+++ b/nvrtcc/CMakeLists.txt
@@ -0,0 +1,56 @@
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+cmake_minimum_required(VERSION 3.21)
+
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+project(nvrtcc LANGUAGES CXX)
+
+# Set output directory.
+set(nvrtcc_exec_output_dir ${nvrtcc_BINARY_DIR}/bin)
+
+find_package(CUDAToolkit REQUIRED)
+
+# Find libcufilt.
+find_library(
+  nvrtcc_cufilt_lib
+  "cufilt"
+  PATHS "${CUDAToolkit_LIBRARY_DIR}"
+  NO_DEFAULT_PATH
+)
+if (NOT nvrtcc_cufilt_lib)
+  message(FATAL_ERROR "nvrtcc: cu++filt library (libcufilt.a) not found.")
+endif()
+
+# Add nvrtcc_device_compiler target.
+add_executable(nvrtcc_device_compiler src/nvrtcc_device_compiler.cpp)
+target_compile_features(nvrtcc_device_compiler PRIVATE cxx_std_17)
+target_include_directories(
+  nvrtcc_device_compiler
+  PRIVATE "${CUDAToolkit_INCLUDE_DIRS}"
+)
+target_link_libraries(nvrtcc_device_compiler PRIVATE ${nvrtcc_cufilt_lib})
+set_target_properties(
+  nvrtcc_device_compiler
+  PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${nvrtcc_exec_output_dir}"
+)
+
+# Add nvrtcc target that copies nvrtcc_cicc to the output directory and depends on nvrtcc_device_compiler.
+add_custom_target(
+  nvrtcc
+  ALL
+  COMMAND
+    ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin
+    ${nvrtcc_exec_output_dir}
+)
+add_dependencies(nvrtcc nvrtcc_device_compiler)
diff --git a/nvrtcc/README.md b/nvrtcc/README.md
new file mode 100644
index 00000000000..38420fc88d3
--- /dev/null
+++ b/nvrtcc/README.md
@@ -0,0 +1,55 @@
+> [!CAUTION]
+> This is an internal tool not intended for public use.
+
+> [!WARNING]
+> This tool is experimental.
+
+# NVRTCC
+
+## Overview
+
+`nvrtcc` is a tool to simplify [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html) testing. It follows the `nvcc` compilation trajectory and replaces the `nvcc` generated PTX code with the NVRTC compiled one. The main advantage is that you can keep the source files almost the same as if compiled with `nvcc`. `nvrtcc` makes sure all of the necessary symbols are present in the generated PTX and let's `nvcc` do the host compilation and linking.
+
+The compilation with NVRTC is optional and can be controlled by the `-use-nvrtc` flag. This allows `nvrtcc` to be used as the `CMAKE_CUDA_COMPILER` where we can trick CMake to think it's using `nvcc`. `nvrtcc` supports almost all `nvcc` options except for those that are unsupported by NVRTC.
+
+## Example
+
+When compiling with `nvrtcc`, we need to make sure NVRTC wouldn't see the host includes and symbols, so they must be guarded by `#ifndef __CUDACC_RTC__` preprocessor directive.
+```cpp
+// hello_world.cu
+
+#ifndef __CUDACC_RTC__
+#  include <cstdio>
+#endif
+
+__global__ void kernel()
+{
+#ifdef __CUDACC_RTC__
+    printf("Hello world from NVRTC!\n");
+#else
+    printf("Hello world from NVCC!\n");
+#endif
+}
+
+#ifndef __CUDACC_RTC__
+int main()
+{
+    kernel<<<1, 1>>>();
+    cudaDeviceSynchronize();
+}
+#endif
+```
+
+If we compile with `nvcc` the output should look as:
+```sh
+$ nvcc hello_world.cu -o hello_world
+$ ./hello_world
+Hello world from NVCC!
+```
+
+On the other hand if compiled with `nvrtcc` with `-use-nvrtc` flag present, the output should be:
+```sh
+$ nvrtcc hello_world.cu -o hello_world -use-nvrtc
+$ ./hello_world
+Hello world from NVRTCC!
+```
diff --git a/nvrtcc/bin/nvrtcc b/nvrtcc/bin/nvrtcc
new file mode 100755
index 00000000000..cbae2861fd3
--- /dev/null
+++ b/nvrtcc/bin/nvrtcc
@@ -0,0 +1,609 @@
+#!/usr/bin/env python3
+
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+import argparse
+import copy
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+
+
+class Nvrtcc:
+    @staticmethod
+    def _parse_nvrtcc_opts(opts):
+        parser = argparse.ArgumentParser(allow_abbrev=False)
+
+        parser.add_argument("-nvccbin", "--nvccbin", nargs=1)
+        parser.add_argument("-use-nvrtc", "--use-nvrtc", action="store_true")
+        parser.add_argument("-Xnvrtc", "--nvrtc-options", action="append", default=[])
+
+        return parser.parse_known_args(opts)
+
+    @staticmethod
+    def _get_nvcc_bin_abs_path(nvcc_bin_arg, env):
+        if nvcc_bin_arg is None:
+            nvcc_bin = env.get("NVRTCC_NVCC_BIN", "nvcc")
+        else:
+            nvcc_bin = nvcc_bin_arg
+
+        if not os.path.isabs(nvcc_bin):
+            nvcc_bin = shutil.which(nvcc_bin)
+
+        return nvcc_bin
+
+    @staticmethod
+    def _parse_nvcc_opts(nvcc_opts):
+        parser = argparse.ArgumentParser(allow_abbrev=False)
+
+        # File and Path Specifications
+        parser.add_argument("-o", "--output-file", nargs=1, type=str)
+        parser.add_argument("-objtemp", "--objdir-as-tempdir", action="store_true")
+        parser.add_argument("-include", "--pre-include", action="append", default=[])
+        parser.add_argument("-l", "--library", action="append", default=[])
+        parser.add_argument("-D", "--define-macro", action="append", default=[])
+        parser.add_argument("-U", "--undefine-macro", action="append", default=[])
+        parser.add_argument("-I", "--include-path", action="append", default=[])
+        parser.add_argument("-isystem", "--system-include", action="append", default=[])
+        parser.add_argument("-L", "--library-path", action="append", default=[])
+        parser.add_argument("-odir", "--output-directory", nargs=1, type=str)
+        parser.add_argument("-MF", "--dependency-output", nargs=1, type=str)
+        parser.add_argument("-MP", "--generate-dependency-targets", action="store_true")
+        parser.add_argument("-ccbin", "--compiler-bindir", nargs=1, type=str)
+        parser.add_argument(
+            "-allow-unsupported-compiler",
+            "--allow-unsupported-compiler",
+            action="store_true",
+        )
+        parser.add_argument("-arbin", "--archiver-binary", nargs=1, type=str)
+        parser.add_argument("-cudart", "--cudart", choices=["none", "shared", "static"])
+        parser.add_argument("-cudadevrt", "--cudadevrt", choices=["none", "static"])
+        parser.add_argument("-ldir", "--libdevice-directory", nargs=1, type=str)
+        parser.add_argument("-target-dir", "--target-directory", nargs=1, type=str)
+
+        # Options for Specifying the Compilation Phase
+        parser.add_argument("-link", "--link", action="store_true")
+        parser.add_argument("-lib", "--lib", action="store_true")
+        parser.add_argument("-dlink", "--device-link", action="store_true")
+        parser.add_argument("-dc", "--device-c", action="store_true")
+        parser.add_argument("-dw", "--device-w", action="store_true")
+        parser.add_argument("-cuda", "--cuda", action="store_true")
+        parser.add_argument("-c", "--compile", action="store_true")
+        parser.add_argument("-fatbin", "--fatbin", action="store_true")
+        parser.add_argument("-cubin", "--cubin", action="store_true")
+        parser.add_argument("-ptx", "--ptx", action="store_true")
+        parser.add_argument("-E", "--preprocess", action="store_true")
+        parser.add_argument("-M", "--generate-dependencies", action="store_true")
+        parser.add_argument(
+            "-MM", "--generate-nonsystem-dependencies", action="store_true"
+        )
+        parser.add_argument(
+            "-MD", "--generate-dependencies-with-compile", action="store_true"
+        )
+        parser.add_argument(
+            "-MMD",
+            "--generate-nonsystem-dependencies-with-compile",
+            action="store_true",
+        )
+        parser.add_argument("-optix-ir", "--optix-ir", action="store_true")
+        parser.add_argument("-ltoir", "--ltoir", action="store_true")
+        parser.add_argument("-run", "--run", action="store_true")
+
+        # Options for Specifying Behavior of Compiler/Linker
+        parser.add_argument("-pg", "--profile", action="store_true")
+        parser.add_argument("-g", "--debug", action="store_true")
+        parser.add_argument("-G", "--device-debug", action="store_true")
+        parser.add_argument("-lineinfo", "--generate-line-info", action="store_true")
+        parser.add_argument(
+            "-opt-info", "-optimization-info", action="append", default=[]
+        )
+        parser.add_argument("-O", "--optimize", nargs=1, type=str)
+        parser.add_argument("-Ofc", "--Ofast-compile", nargs=1, type=str)
+        parser.add_argument("-dopt", "--dopt", choices=["on"])
+        parser.add_argument("-dlto", "--dlink-time-opt", action="store_true")
+        parser.add_argument("-lto", "--lto", action="store_true")
+        parser.add_argument("-gen-opt-lto", "--gen-opt-lto", action="store_true")
+        parser.add_argument(
+            "-ftemplate-backtrace-limit", "--ftemplate-backtrace-limit", nargs=1
+        )
+        parser.add_argument("-ftemplate-depth", "--ftemplate-depth", nargs=1, type=int)
+        parser.add_argument("-noeh", "--no-exceptions", action="store_true")
+        parser.add_argument("-shared", "--shared", action="store_true")
+        parser.add_argument("-x", "--x", nargs=1, type=str)
+        parser.add_argument("-std", "--std", nargs=1, type=str)
+        parser.add_argument(
+            "-nohdinitlist", "--no-host-device-initializer-list", action="store_true"
+        )
+        parser.add_argument(
+            "-nohdmoveforward", "--no-host-device-move-forward", action="store_true"
+        )
+        parser.add_argument(
+            "-expt-relaxed-constexpr", "--expt-relaxed-constexpr", action="store_true"
+        )
+        parser.add_argument(
+            "-extended-lambda", "--extended-lambda", action="store_true"
+        )
+        parser.add_argument(
+            "-expt-extended-lambda", "--expt-extended-lambda", action="store_true"
+        )
+        parser.add_argument("-m", "--machine", nargs=1, type=int)
+        parser.add_argument("-m64", "--m64", action="store_true")
+
+        # Options for passing specific phase options
+        parser.add_argument(
+            "-Xcompiler", "--compiler-options", action="append", default=[]
+        )
+        parser.add_argument("-Xlinker", "--linker-options", action="append", default=[])
+        parser.add_argument(
+            "-Xarchive", "--archive-options", action="append", default=[]
+        )
+        parser.add_argument("-Xptxas", "--ptxas-options", action="append", default=[])
+        parser.add_argument("-Xnvlink", "--nvlink-options", action="append", default=[])
+
+        # Miscellaneous options for guiding the compiler driver.
+        parser.add_argument(
+            "-static-global-template-stub", "--static-global-template-stub", nargs=1
+        )
+        parser.add_argument(
+            "-device-entity-has-hidden-visibility",
+            "--device-entity-has-hidden-visibility",
+            nargs=1,
+        )
+        parser.add_argument(
+            "-forward-unknown-to-host-compiler",
+            "--forward-unknown-to-host-compiler",
+            action="store_true",
+        )
+        parser.add_argument(
+            "-forward-unknown-to-host-linker",
+            "--forward-unknown-to-host-linker",
+            action="store_true",
+        )
+        parser.add_argument(
+            "-forward-unknown-opts", "--forward-unknown-opts", action="store_true"
+        )
+        parser.add_argument("-noprof", "--dont-use-profile", action="store_true")
+        parser.add_argument("-dryrun", "--dryrun", action="store_true")
+        parser.add_argument("-v", "--verbose", action="store_true")
+        parser.add_argument("-t", "--threads", nargs=1, type=int)
+        parser.add_argument("-split-compile", "--split-compile", nargs=1, type=int)
+        parser.add_argument(
+            "-split-compile-extended", "--split-compile-extended", nargs=1, type=int
+        )
+        parser.add_argument(
+            "-fdevice-syntax-only", "--fdevice-syntax-only", nargs=1, type=str
+        )
+        parser.add_argument(
+            "-fdevice-time-trace", "--fdevice-time-trace", nargs=1, type=str
+        )
+        parser.add_argument("-keep", "--keep", action="store_true")
+        parser.add_argument("-keep-dir", "--keep-dir", nargs=1, type=str)
+        parser.add_argument("-save-temps", "--save-temps", action="store_true")
+        parser.add_argument("-clean", "--clean-targets", action="store_true")
+        parser.add_argument("-time", "--time", nargs=1, type=str)
+        parser.add_argument("-run-args", "--run-args", nargs=1)
+        parser.add_argument("-idp", "--input-drive-prefix", nargs=1, type=str)
+        parser.add_argument("-ddp", "--dependency-drive-prefix", nargs=1, type=str)
+        parser.add_argument("-dp", "--drive-prefix", nargs=1, type=str)
+        parser.add_argument("-MT", "--dependency-target-name", nargs=1, type=str)
+        parser.add_argument(
+            "-no-align-double", "--no-align-double", action="store_true"
+        )
+        parser.add_argument("-nodlink", "--no-device-link", action="store_true")
+
+        # Options for steering GPU code generation.
+        parser.add_argument("-arch", "--gpu-architecture", action="append", default=[])
+        parser.add_argument("-code", "--gpu-code", action="append", default=[])
+        parser.add_argument("-gencode", "--generate-code", action="append", default=[])
+        parser.add_argument("-rdc", "--relocatable-device-code", nargs=1, type=str)
+        parser.add_argument("-e", "--entries", action="append", default=[])
+        parser.add_argument("-maxrregcount", "--maxrregcount", nargs=1)
+        parser.add_argument("-use_fast_math", "--use_fast_math", action="store_true")
+        parser.add_argument("-ftz", "--ftz", nargs=1, type=str)
+        parser.add_argument("-prec-div", "--prec-div", nargs=1)
+        parser.add_argument("-prec-sqrt", "--prec-sqrt", nargs=1)
+        parser.add_argument("-fmad", "--fmad", nargs=1)
+        parser.add_argument(
+            "-extra-device-vectorization",
+            "--extra-device-vectorization",
+            action="store_true",
+        )
+
+        # Options for steering cuda compilation.
+        parser.add_argument("-default-stream", "--default-stream", nargs=1)
+
+        # Generic tool options.
+        parser.add_argument("-w", "--disable-warnings", action="store_true")
+        parser.add_argument(
+            "-keep-device-functions", "--keep-device-functions", action="store_true"
+        )
+        parser.add_argument("-src-in-ptx", "--source-in-ptx", action="store_true")
+        parser.add_argument("-restrict", "--restrict", action="store_true")
+        parser.add_argument("-Wreorder", "--Wreorder", action="store_true")
+        parser.add_argument(
+            "-Wdefault-stream-launch", "--Wdefault-stream-launch", action="store_true"
+        )
+        parser.add_argument(
+            "-Wmissing-launch-bounds", "--Wmissing-launch-bounds", action="store_true"
+        )
+        parser.add_argument(
+            "-Wext-lambda-captures-this",
+            "--Wext-lambda-captures-this",
+            action="store_true",
+        )
+        parser.add_argument(
+            "-Wno-deprecated-declarations",
+            "--Wno-deprecated-declarations",
+            action="store_true",
+        )
+        parser.add_argument(
+            "-Wno-deprecated-gpu-targets",
+            "--Wno-deprecated-gpu-targets",
+            action="store_true",
+        )
+        parser.add_argument("-Werror", "--Werror", action="append", default=[])
+        parser.add_argument("-res-usage", "--resource-usage", action="store_true")
+        parser.add_argument("-ewp", "--extensible-whole-program", action="store_true")
+        parser.add_argument("-no-compress", "--no-compress", action="store_true")
+        parser.add_argument("-qpp-config", "--qpp-config", action="store_true")
+        parser.add_argument(
+            "-astoolpatch", "--compile-as-tools-patch", action="store_true"
+        )
+        parser.add_argument("-code-ls", "--list-gpu-code", action="store_true")
+        parser.add_argument("-arch-ls", "--list-gpu-arch", action="store_true")
+        parser.add_argument("-err-no", "--display-error-number", action="store_true")
+        parser.add_argument(
+            "-no-err-no", "--no-display-error-number", action="store_true"
+        )
+        parser.add_argument("-diag-error", "--diag-error", action="append", default=[])
+        parser.add_argument(
+            "-diag-suppress", "--diag-suppress", action="append", default=[]
+        )
+        parser.add_argument("-diag-warn", "--diag-warn", action="append", default=[])
+        parser.add_argument("-hls", "--host-linker-script", nargs=1, type=str)
+        parser.add_argument(
+            "-aug-hls", "--augment-host-linker-script", action="store_true"
+        )
+        parser.add_argument("-r", "--relocatable-link", action="store_true")
+        parser.add_argument("-brief-diag", "--brief-diagnostics", nargs=1, type=str)
+        parser.add_argument("-jtd", "--jump-table-density", nargs=1, type=int)
+        parser.add_argument("-reloc-ptx", "--relocatable-ptx", action="store_true")
+        parser.add_argument(
+            "-device-stack-protector", "--device-stack-protector", nargs=1, type=str
+        )
+        parser.add_argument("-compress-mode", "--compress-mode", nargs=1, type=str)
+        parser.add_argument("-frandom-seed", "--frandom-seed", nargs=1, type=str)
+        parser.add_argument(
+            "-fdevice-sanitize", "--fdevice-sanitize", nargs=1, type=str
+        )
+        parser.add_argument("-jobserver", "--jobserver", action="store_true")
+        # "-h", "--help"
+        parser.add_argument("-V", "--version", action="store_true")
+        parser.add_argument("-optf", "--options-file", action="append", default=[])
+
+        return parser.parse_known_args(nvcc_opts)[0]
+
+    def __init__(self, opts: list[str], env):
+        args, nvcc_opts = Nvrtcc._parse_nvrtcc_opts(opts)
+
+        self.nvcc_bin = Nvrtcc._get_nvcc_bin_abs_path(args.nvccbin, env)
+        self.use_nvrtc = args.use_nvrtc
+        self.nvrtc_opts = args.nvrtc_options
+        self.nvcc_opts = ["-D__NVRTCC__"] + nvcc_opts
+        self.nvcc_args = Nvrtcc._parse_nvcc_opts(self.nvcc_opts)
+        self.device_compile_bin = os.path.join(
+            os.path.dirname(__file__), "nvrtcc_device_compiler"
+        )
+        self.nvrtc_lib = os.path.join(
+            os.path.dirname(os.path.dirname(self.nvcc_bin)), "lib64", "libnvrtc.so"
+        )
+        self.env = env
+
+        # Clear environment variables that prepend/append nvcc flags.
+        self.env.pop("NVCC_PREPEND_FLAGS", None)
+        self.env.pop("NVCC_APPEND_FLAGS", None)
+
+    @staticmethod
+    def _emit_error(msg, retcode=-1):
+        print(f"nvrtcc error: {msg}", file=sys.stderr)
+        sys.exit(retcode)
+
+    @staticmethod
+    def _emit_warning(msg):
+        print(f"nvrtcc warning: {msg}", file=sys.stderr)
+
+    def _check_nvcc_args(self):
+        if self.nvcc_args.optix_ir:
+            Nvrtcc._emit_error(
+                "compiling to Optix IR with nvrtc is currently unsupported"
+            )
+
+        if (
+            self.nvcc_args.ltoir
+            or self.nvcc_args.dlink_time_opt
+            or self.nvcc_args.gen_opt_lto
+        ):
+            Nvrtcc._emit_error(
+                "compiling to LTO IR with nvrtc is currently unsupported"
+            )
+
+        if self.nvcc_args.expt_relaxed_constexpr:
+            Nvrtcc._emit_warning("relaxed constexpr is unsupported with nvrtc")
+
+        if self.nvcc_args.extended_lambda or self.nvcc_args.expt_extended_lambda:
+            Nvrtcc._emit_warning(
+                "extended lambda declarations are unsupported with nvrtc"
+            )
+
+        if self.nvcc_args.keep_device_functions:
+            Nvrtcc._emit_warning("keeping device functions is unsupported with nvrtc")
+
+        if self.nvcc_args.options_file:
+            Nvrtcc._emit_error("passing command line options via file is not supported")
+
+    def _get_nvcc_cmd_list(self) -> list[str]:
+        run_result = subprocess.run(
+            [self.nvcc_bin] + self.nvcc_opts + ["-dryrun"],
+            stderr=subprocess.PIPE,
+            env=self.env,
+        )
+
+        if run_result.returncode != 0:
+            Nvrtcc._emit_error(
+                "failed to retrieve nvcc command list", run_result.returncode
+            )
+
+        lines = run_result.stderr.decode().splitlines()
+        return [line.removeprefix("#$ ") for line in lines]
+
+    def _print_if_verbose(self, msg, **kwargs):
+        if self.nvcc_args.verbose:
+            print(msg, **kwargs)
+
+    @staticmethod
+    def _is_comment(cmd):
+        return cmd.startswith("--")
+
+    @staticmethod
+    def _is_env_var_assign(cmd):
+        return re.match(r"^[a-zA-Z0-9_]+=", cmd)
+
+    @staticmethod
+    def _split_cmd(cmd):
+        result = shlex.split(cmd)
+        return result[0], result[1:]
+
+    @staticmethod
+    def _parse_ccbin_invocation(opts):
+        arch_list = None
+
+        arch_list_def = [x for x in opts if x.startswith("-D__CUDA_ARCH_LIST__=")]
+        if arch_list_def:
+            assert len(arch_list_def) == 1
+            arch_list = arch_list_def[0].removeprefix("-D__CUDA_ARCH_LIST__=")
+
+        return argparse.Namespace(arch_list=arch_list)
+
+    @staticmethod
+    def _parse_cicc_invocation(opts):
+        parser = argparse.ArgumentParser(allow_abbrev=False)
+
+        # Positional arguments.
+        parser.add_argument("file", nargs="*")
+
+        # Recognized used arguments.
+        parser.add_argument("-o", "--output-file", type=str)
+        parser.add_argument("--orig_src_file_name", type=str)
+        parser.add_argument("--orig_src_path_name", type=str)
+        parser.add_argument("--c++17", dest="cxx17", action="store_true")
+        parser.add_argument("--c++20", dest="cxx20", action="store_true")
+        parser.add_argument("--c++23", dest="cxx23", action="store_true")
+        parser.add_argument("-arch", type=str)
+        parser.add_argument("-maxreg", type=int)
+        parser.add_argument("-ftz", type=int)
+        parser.add_argument("-prec_div", type=int)
+        parser.add_argument("-prec_sqrt", type=int)
+        parser.add_argument("-fmad", type=int)
+        parser.add_argument("--device-c", action="store_true")
+        parser.add_argument("--partial-link", action="store_true")
+        parser.add_argument("--debug_mode", action="store_true")
+        parser.add_argument("-generate-line-info", action="store_true")
+        parser.add_argument("-O0", dest="dopt", action="store_false", default=True)
+        parser.add_argument("--Ofast-compile", type=str)
+        parser.add_argument("--frandom-seed", nargs=1, type=str)
+        parser.add_argument("-extra-device-vectorization", action="store_true")
+        parser.add_argument("-jump-table-density", type=int)
+        parser.add_argument("-w", action="store_true")
+        parser.add_argument("-kernel-params-are-restrict", action="store_true")
+        parser.add_argument("-inline-info", action="store_true")
+        parser.add_argument("--display_error_number", action="store_true")
+        parser.add_argument("-diag_error", "--diag_error", action="append", default=[])
+        parser.add_argument(
+            "-diag_suppress", "--diag_suppress", action="append", default=[]
+        )
+        parser.add_argument("-diag_warn", "--diag_warn", action="append", default=[])
+        parser.add_argument("--brief_diagnostics", action="store_true")
+        parser.add_argument("-split-compile", nargs=1)
+        parser.add_argument("--device-time-trace", nargs=1)
+        parser.add_argument("--promote_warnings", action="store_true")
+
+        # Recognized ignored arguments.
+        parser.add_argument("--static-host-stub", action="store_true")
+        parser.add_argument("--host-stub-linkage-explicit", action="store_true")
+        parser.add_argument("--device-hidden-visibility", action="store_true")
+        parser.add_argument("--gnu_version")
+        parser.add_argument("--allow_managed", action="store_true")
+        parser.add_argument("-m64", action="store_true")
+        parser.add_argument("--no-version-ident", action="store_true")
+        parser.add_argument("--include_file_name")
+        parser.add_argument("-tused", action="store_true")
+        parser.add_argument("--module_id_file_name")
+        parser.add_argument("--gen_c_file_name")
+        parser.add_argument("--stub_file_name")
+        parser.add_argument("--gen_device_file_name")
+        parser.add_argument("--keep-device-functions", action="store_true")
+        parser.add_argument("--gen_module_id_file", action="store_true")
+        parser.add_argument("--relaxed_constexpr", action="store_true")
+        parser.add_argument("--pending_instantiations", nargs=1)
+        parser.add_argument("--extended-lambda", action="store_true")
+
+        return parser.parse_args(opts)
+
+    def _invoke_nvrtc(self, ccbin_args, cicc_args):
+        cc = int(cicc_args.arch.removeprefix("compute_"))
+
+        nvrtc_opts = copy.deepcopy(self.nvrtc_opts)
+
+        # Remove -device-float128 for architectures with cc < 100.
+        if cc < 100:
+            nvrtc_opts = list(
+                filter(
+                    lambda x: x != "-device-float128" and x != "--device-float128",
+                    nvrtc_opts,
+                )
+            )
+
+        cmd = [self.device_compile_bin]
+
+        # Positional arguments
+        cmd += [self.nvrtc_lib]
+        cmd += [cicc_args.orig_src_path_name]
+        cmd += [cicc_args.orig_src_file_name]
+        cmd += [cicc_args.output_file]
+        cmd += [ccbin_args.arch_list]
+
+        # Add nvrtc options passed by -Xnvrtc
+        cmd += nvrtc_opts
+
+        # Compilation targets
+        cmd += [f"-arch={cicc_args.arch}"]
+        cmd += ["-dc"] if cicc_args.device_c else []
+        cmd += ["-ewp"] if self.nvcc_args.extensible_whole_program else []
+
+        # Debugging support
+        cmd += ["-G"] if cicc_args.debug_mode else []
+        cmd += ["-lineinfo"] if cicc_args.generate_line_info else []
+
+        # Code generation
+        cmd += ["-dopt=on"] if cicc_args.dopt else []
+        cmd += [f"-Ofc={cicc_args.Ofast_compile}"] if cicc_args.Ofast_compile else []
+        cmd += [f"-maxrregcount={cicc_args.maxreg}"] if cicc_args.maxreg else []
+        cmd += [f"-ftz={'true' if cicc_args.ftz else 'false'}"]
+        cmd += [f"-prec-sqrt={'true' if cicc_args.prec_sqrt else 'false'}"]
+        cmd += [f"-prec-div={'true' if cicc_args.prec_div else 'false'}"]
+        cmd += [f"-fmad={'true' if cicc_args.fmad else 'false'}"]
+        cmd += (
+            ["-extra-device-vectorization"]
+            if cicc_args.extra_device_vectorization
+            else []
+        )
+        cmd += (
+            [f"-jtd={cicc_args.jump_table_density}"]
+            if cicc_args.jump_table_density
+            else []
+        )
+        cmd += (
+            ["-frandom-seed", cicc_args.frandom_seed] if cicc_args.frandom_seed else []
+        )
+
+        # Preprocessing
+        cmd += [f"-D{x}" for x in self.nvcc_args.define_macro]
+        cmd += [f"-U{x}" for x in self.nvcc_args.undefine_macro]
+        cmd += [f"-I{x}" for x in self.nvcc_args.include_path]
+        cmd += [f"-I{x}" for x in self.nvcc_args.system_include]
+        cmd += [f"-include={x}" for x in self.nvcc_args.pre_include]
+
+        # Language Dialect
+        cmd += ["-std=c++17"] if cicc_args.cxx17 else []
+        cmd += ["-std=c++20"] if cicc_args.cxx20 else []
+        cmd += ["-std=c++23"] if cicc_args.cxx23 else []
+
+        # Misc
+        cmd += ["-w"] if cicc_args.w else []
+        cmd += ["-Werror=all-warnings"] if cicc_args.promote_warnings else []
+        cmd += ["-restrict"] if cicc_args.kernel_params_are_restrict else []
+        cmd += ["-opt-info=inline"] if cicc_args.inline_info else []
+        cmd += ["-no-err-no"] if not cicc_args.display_error_number else []
+        cmd += [f"-diag-error={diag}" for diag in cicc_args.diag_error]
+        cmd += [f"-diag-suppress={diag}" for diag in cicc_args.diag_suppress]
+        # cmd += [f'-diag-warn={diag}' for diag in cicc_args.diag_warn]
+        cmd += ["-brief_diag=true"] if cicc_args.brief_diagnostics else []
+        cmd += [f"-time={self.nvcc_args.time}"] if self.nvcc_args.time else []
+        cmd += (
+            [f"-split-compile={cicc_args.split_compile}"]
+            if cicc_args.split_compile
+            else []
+        )
+        cmd += (
+            [f"-fdevice-time-trace={cicc_args.device_time_trace}"]
+            if cicc_args.device_time_trace
+            else []
+        )
+
+        # Run the nvrtc compilation.
+        self._print_if_verbose(f"#$ {' '.join(cmd)}")
+        if not self.nvcc_args.dryrun:
+            run_result = subprocess.run(cmd, env=self.env)
+            if run_result.returncode != 0:
+                sys.exit(run_result.returncode)
+
+    def _run_nvrtcc(self):
+        Nvrtcc._check_nvcc_args(self)
+
+        for cmd in self._get_nvcc_cmd_list():
+            self._print_if_verbose(f"#$ {cmd}")
+
+            if not self.nvcc_args.dryrun and Nvrtcc._is_comment(cmd):
+                continue
+
+            if not self.nvcc_args.dryrun and Nvrtcc._is_env_var_assign(cmd):
+                os.environ[cmd.split("=")[0]] = "".join(cmd.split("=")[1:])
+                continue
+
+            bin, opts = Nvrtcc._split_cmd(cmd)
+
+            if not self.nvcc_args.dryrun:
+                # For some reason nvcc produces rm command that can fail if the file doesn't exist, so we just ignore the return value and output.
+                if re.match(r"^rm$", bin):
+                    subprocess.run(
+                        [bin] + opts,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        env=self.env,
+                    )
+                else:
+                    run_result = subprocess.run(
+                        [os.path.expandvars(bin)] + opts, env=self.env
+                    )
+                    if run_result.returncode != 0:
+                        sys.exit(run_result.returncode)
+
+            if re.match(r"^(gcc|clang|nvc\+\+|cl\.exe)$", bin):
+                ccbin_args = Nvrtcc._parse_ccbin_invocation(opts)
+            elif re.match(r"^.*cicc$", bin):
+                cicc_args = Nvrtcc._parse_cicc_invocation(opts)
+                self._invoke_nvrtc(ccbin_args, cicc_args)
+
+    def _run_nvcc(self):
+        run_result = subprocess.run([self.nvcc_bin] + self.nvcc_opts, env=self.env)
+        if run_result.returncode != 0:
+            sys.exit(run_result.returncode)
+
+    def run(self):
+        self._run_nvrtcc() if self.use_nvrtc else self._run_nvcc()
+
+
+if __name__ == "__main__":
+    Nvrtcc(sys.argv[1:], os.environ).run()
+    sys.exit(0)
diff --git a/nvrtcc/examples/demo.cu b/nvrtcc/examples/demo.cu
new file mode 100644
index 00000000000..3ede1711a36
--- /dev/null
+++ b/nvrtcc/examples/demo.cu
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// When compiling with nvrtcc, the __NVRTCC__ macro is defined.
+#ifndef __NVRTCC__
+#  error "this file must be compiled with nvrtcc"
+#endif
+
+// We need to guard includes when compiling device code with nvrtc.
+#ifndef __CUDACC_RTC__
+#  include <cassert>
+#  include <cstdio>
+#endif
+
+#define TO_STRING_HELPER(...) #__VA_ARGS__
+#define TO_STRING(...)        TO_STRING_HELPER(__VA_ARGS__)
+
+// Ordinary kernels work without any problems.
+__global__ void kernel()
+{
+  printf("[%d, %d]: Hello world from kernel!\n", blockIdx.x, threadIdx.x);
+}
+
+// For template kernels the situation is a bit more complicated. We use nvrtcAddNameExpression to make sure these
+// kernels are instantiated.
+template <class T, int = 3>
+__global__ void template_kernel(int)
+{
+  printf("[%d, %d]: Hello world from template kernel!\n", blockIdx.x, threadIdx.x);
+
+  if (blockIdx.x == 0 && threadIdx.x == 0)
+  {
+    // __CUDA_ARCH_LIST__ behaves the same way as if we were compiling with nvcc.
+    printf("\narch list: " TO_STRING(__CUDA_ARCH_LIST__) "\n");
+  }
+}
+
+// Host functions must not be visible to nvrtc.
+#ifndef __CUDACC_RTC__
+int main()
+{
+  // We can now call kernels compiled with nvrtc directly.
+  kernel<<<2, 2>>>();
+  if (cudaDeviceSynchronize() != cudaSuccess)
+  {
+    return 1;
+  }
+
+  printf("\n");
+
+  template_kernel<void><<<3, 1>>>(0);
+  if (cudaDeviceSynchronize() != cudaSuccess)
+  {
+    return 1;
+  }
+}
+#endif
diff --git a/nvrtcc/src/nvrtcc_device_compiler.cpp b/nvrtcc/src/nvrtcc_device_compiler.cpp
new file mode 100644
index 00000000000..62ced7b8881
--- /dev/null
+++ b/nvrtcc/src/nvrtcc_device_compiler.cpp
@@ -0,0 +1,378 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include <dlfcn.h>
+#include <nv_decode.h>
+#include <nvrtc.h>
+
+//! @brief Gets the dynamic handle to nvrtc library. The first call must supply the path to the library.
+static void* get_nvrtc_lib(const char* nvrtc_lib = nullptr)
+{
+  static const auto handle = dlopen(nvrtc_lib, RTLD_NOW);
+  if (handle == nullptr)
+  {
+    std::fprintf(stderr, "Failed to open nvrtc lib.\n");
+    std::exit(-1);
+  }
+  return handle;
+}
+
+//! @brief Gets the pointer to a nvrtc function.
+template <class Signature>
+[[nodiscard]] static Signature* get_nvrtc_api(const char* api)
+{
+  const auto handle = dlsym(get_nvrtc_lib(), api);
+  if (handle == nullptr)
+  {
+    std::fprintf(stderr, "Failed to get nvrtc function: %s.\n", api);
+    std::exit(-1);
+  }
+  return reinterpret_cast<Signature*>(handle);
+}
+
+#define CALL_NVRTC_UNCHECKED(API, ...) get_nvrtc_api<decltype(API)>(#API)(__VA_ARGS__)
+
+#define CALL_NVRTC(API, ...)                                                                                       \
+  do                                                                                                               \
+  {                                                                                                                \
+    nvrtcResult _ret = CALL_NVRTC_UNCHECKED(API, __VA_ARGS__);                                                     \
+    if (_ret != NVRTC_SUCCESS)                                                                                     \
+    {                                                                                                              \
+      std::fprintf(                                                                                                \
+        stderr, "%s(%d): NVRTC error: %s\n", __FILE__, __LINE__, CALL_NVRTC_UNCHECKED(nvrtcGetErrorString, _ret)); \
+      std::exit(-1);                                                                                               \
+    }                                                                                                              \
+  } while (false)
+
+//! @brief Read the file's contents to a string.
+[[nodiscard]] static std::string read_input(const char* file)
+{
+  std::ifstream ifs{file};
+  ifs.seekg(0, std::ios::end);
+  const std::size_t size = ifs.tellg();
+
+  std::string buffer(size, '\0');
+
+  ifs.seekg(0);
+  ifs.read(buffer.data(), size);
+  buffer[size] = '\0';
+
+  return buffer;
+}
+
+//! @brief Returns a view to a line from a string_view.
+[[nodiscard]] static std::string_view get_line(std::string_view s) noexcept
+{
+  const auto pos = s.find('\n');
+  return (pos == std::string_view::npos) ? s : s.substr(0, pos + 1);
+}
+
+//! @brief Returns \c true if \c s starts with \c with.
+[[nodiscard]] static bool starts_with(std::string_view s, std::string_view with)
+{
+  return s.substr(0, std::min(s.size(), with.size())) == with;
+}
+
+//! @brief Type of PTX symbol.
+enum class SymbolType
+{
+  none, //!< Empty/invalid state.
+  variable, //!< Variable (.global).
+  kernel, //!< Kernel (.entry).
+};
+
+//! @brief Structure with data for PTX symbol.
+struct Symbol
+{
+  SymbolType type; //!< Symbol type.
+  std::string_view name; //!< Symbol name.
+};
+
+//! @brief Extracts PTX symbol from line.
+[[nodiscard]] static Symbol extract_symbol_from_line(std::string_view line) noexcept
+{
+  constexpr std::string_view variable_prefix = ".global ";
+  constexpr std::string_view kernel_prefix   = ".entry ";
+
+  std::string_view symbol{};
+  SymbolType symbol_type{SymbolType::none};
+
+  if (starts_with(line, variable_prefix))
+  {
+    symbol_type = SymbolType::variable;
+
+    // Globals have format of ".global .align 4 .u32 _Z3varIiE[] = { ... };".
+
+    // 1. Remove everything after ';'.
+    symbol = line.substr(0, line.find(';'));
+
+    // 2. If the variable is an array, we find the symbol end by searching for '['.
+    symbol = symbol.substr(0, symbol.find('['));
+
+    // 3. If the variable is not an array is initialized, find the symbol end by searching for ' ='.
+    symbol = symbol.substr(0, symbol.find(" ="));
+
+    // 4. We should have the end of the symbol, remove everything in front of it.
+    symbol.remove_prefix(symbol.rfind(' ') + 1);
+
+    // NVVM emits $str and __unnamed_N symbols for some debug data, let's ignore those.
+    if (starts_with(symbol, "$") || starts_with(symbol, "__unnamed"))
+    {
+      symbol_type = SymbolType::none;
+    }
+  }
+  else if (line.find(kernel_prefix) != std::string_view::npos)
+  {
+    symbol_type = SymbolType::kernel;
+
+    // Entries have format of ".visible .entry _Z6squareIiEvPT_i(" and end with ")" if there are no parameters.
+
+    // 1. Discard everything after last "("
+    symbol = line.substr(0, line.find_first_of('('));
+
+    // 2. Remove everything in front of the symbol.
+    symbol.remove_prefix(symbol.rfind(' ') + 1);
+  }
+
+  return {symbol_type, symbol};
+}
+
+//! @brief Extracts name expression in form `symbol<template_args>` from symbol in `void symbol<template_args>(args)`
+//! form.
+//!
+//! @return Pointer to first character of a zero terminated string or `nullptr` if the symbol is not a template.
+//!
+//! @note This function modifies the symbol buffer.
+[[nodiscard]] static char* extract_name_expr(char* symbol, SymbolType symbol_type) noexcept
+{
+  if (symbol_type == SymbolType::kernel)
+  {
+    // Remove 'void ' prefix.
+    symbol += 5;
+  }
+
+  // Iterate over the name
+  std::size_t curly_parens{0};
+  std::size_t template_parens{0};
+  bool is_template{false};
+  for (char* it = symbol; *it != '\0'; ++it)
+  {
+    switch (*it)
+    {
+      case '(':
+        if (template_parens == 0)
+        {
+          *it = '\0';
+          return (is_template) ? symbol : nullptr;
+        }
+        ++curly_parens;
+        break;
+      case ')':
+        --curly_parens;
+        break;
+      case '<':
+        ++template_parens;
+        is_template = true;
+        break;
+      case '>':
+        --template_parens;
+
+        // When we are outside of template parens, and next 2 characters are ::, skip them and reset is_template,
+        // because we were parsing type namespace
+        if (template_parens == 0)
+        {
+          if (it[1] == ':' && it[2] == ':')
+          {
+            is_template = false;
+            it += 2;
+          }
+        }
+        break;
+      default:
+        break;
+    }
+  }
+  return nullptr;
+}
+
+//! @brief Adds symbol to the program.
+static void add_symbol(nvrtcProgram prog, Symbol symbol)
+{
+  struct CuDemangleBuffer
+  {
+    char* ptr{};
+    std::size_t size{};
+
+    ~CuDemangleBuffer()
+    {
+      std::free(ptr);
+    }
+  };
+
+  static std::string symbol_copy{};
+  static CuDemangleBuffer buffer{};
+
+  // If the symbol is not mangled, it's not a template, thus we even needn't to add it as a name expression.
+  if (!starts_with(symbol.name, "_Z"))
+  {
+    return;
+  }
+
+  symbol_copy = symbol.name;
+
+  int status;
+  buffer.ptr = __cu_demangle(symbol_copy.c_str(), buffer.ptr, &buffer.size, &status);
+  switch (status)
+  {
+    case 0:
+      break;
+    case -1:
+      throw std::bad_alloc{};
+    case -2:
+      throw std::invalid_argument{std::string{"invalid symbol '"} + std::string{symbol.name} + "'"};
+    case -3:
+      throw std::invalid_argument{"invalid __cu_demangle parameter"};
+    default:
+      throw std::runtime_error{"unknown __cu_demangle error"};
+  }
+
+  char* name_expr = extract_name_expr(buffer.ptr, symbol.type);
+  if (name_expr != nullptr)
+  {
+    CALL_NVRTC(nvrtcAddNameExpression, prog, name_expr);
+  }
+}
+
+//! @brief Adds all symbols from a given PTX input to the program.
+static void add_symbols(nvrtcProgram prog, std::string_view ptx_input)
+{
+  if (ptx_input.find(-1))
+  {
+    while (!ptx_input.empty())
+    {
+      const auto line = get_line(ptx_input);
+
+      // All symbols start with "." as the first character on line.
+      if (!line.empty() && line[0] == '.')
+      {
+        const auto symbol = extract_symbol_from_line(line);
+
+        if (symbol.type != SymbolType::none)
+        {
+          add_symbol(prog, symbol);
+        }
+      }
+
+      ptx_input.remove_prefix(line.size());
+    }
+  }
+}
+
+int main(int argc, const char* const* argv)
+{
+  auto arg_it = argv + 1;
+
+  // Extract positional arguments.
+  const auto nvrtc_lib   = *arg_it++;
+  const auto input_file  = *arg_it++;
+  const auto input_name  = *arg_it++;
+  const auto output_file = *arg_it++;
+  const auto arch_list   = *arg_it++;
+
+  // Open nvrtc_lib shared object.
+  get_nvrtc_lib(nvrtc_lib);
+
+  // Read the source file.
+  const auto input = read_input(input_file);
+
+  // Read the input PTX file.
+  const auto ptx_input = read_input(output_file);
+
+  // Remove the input PTX file.
+  std::remove(output_file);
+
+  // Create arch list include.
+  const auto arch_list_include = std::string{"#undef __CUDA_ARCH_LIST__\n#define __CUDA_ARCH_LIST__ "} + arch_list;
+
+  constexpr auto arch_list_include_name = "__nvrtcc_arch_list_include";
+
+  // Create list of options to be passed to nvrtc.
+  std::vector<const char*> opts{};
+  opts.reserve(argv + argc - arg_it + 2);
+  opts.push_back("-include");
+  opts.push_back(arch_list_include_name);
+  opts.insert(opts.end(), arg_it, argv + argc);
+
+  // Create list of headers to be used by NVRTC.
+  std::array headers{arch_list_include.c_str()};
+  std::array headers_names{arch_list_include_name};
+  static_assert(headers.size() == headers_names.size());
+
+  // Create nvrtc program.
+  nvrtcProgram prog{};
+  CALL_NVRTC(nvrtcCreateProgram,
+             &prog,
+             input.data(),
+             input_name,
+             static_cast<int>(headers.size()),
+             headers.data(),
+             headers_names.data());
+
+  // Add symbols to the program.
+  add_symbols(prog, ptx_input);
+
+  // Compile the program.
+  const auto compile_result =
+    CALL_NVRTC_UNCHECKED(nvrtcCompileProgram, prog, static_cast<int>(opts.size()), opts.data());
+
+  // Obtain the log size.
+  std::size_t log_size{};
+  CALL_NVRTC(nvrtcGetProgramLogSize, prog, &log_size);
+
+  // Get the log and output it to the stderr. The log always contains EOF, so we check for > 1.
+  if (log_size > 1)
+  {
+    auto log = std::make_unique<char[]>(log_size);
+    CALL_NVRTC(nvrtcGetProgramLog, prog, log.get());
+    std::fprintf(stderr, "%s\n", log.get());
+  }
+
+  // If the compilation failed, exit.
+  if (compile_result != NVRTC_SUCCESS)
+  {
+    std::exit(1);
+  }
+
+  // Get the ptx size.
+  std::size_t ptx_size{};
+  CALL_NVRTC(nvrtcGetPTXSize, prog, &ptx_size);
+
+  // Get the ptx code.
+  auto ptx = std::make_unique<char[]>(ptx_size);
+  CALL_NVRTC(nvrtcGetPTX, prog, ptx.get());
+
+  // Write the ptx to the output file. The code contains EOF, so we write one character less.
+  std::ofstream ofs{output_file};
+  ofs.write(ptx.get(), ptx_size - 1);
+
+  // Destroy the program.
+  CALL_NVRTC(nvrtcDestroyProgram, &prog);
+}
diff --git a/nvrtcc/test/CMakeLists.txt b/nvrtcc/test/CMakeLists.txt
new file mode 100644
index 00000000000..ac871f32870
--- /dev/null
+++ b/nvrtcc/test/CMakeLists.txt
@@ -0,0 +1,34 @@
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+set(CMAKE_BUILD_TYPE Debug)
+
+cmake_minimum_required(VERSION 3.21)
+
+project(nvrtc-testing LANGUAGES CXX CUDA)
+
+add_custom_target(nvrtcc.test)
+enable_testing()
+
+function(add_exec_test name)
+  add_executable(${name} ${ARGN})
+  add_dependencies(nvrtcc.test ${name})
+  target_compile_options(${name} PRIVATE -use-nvrtc)
+endfunction()
+
+function(add_compile_only_test name)
+  add_library(${name} OBJECT ${ARGN})
+  add_dependencies(nvrtcc.test ${name})
+  target_compile_options(${name} PRIVATE -use-nvrtc)
+endfunction()
+
+add_subdirectory(nvcc_options)
+add_subdirectory(nvrtcc_options)
+add_subdirectory(predefined_macros)
diff --git a/nvrtcc/test/common/check_predefined_macros.h b/nvrtcc/test/common/check_predefined_macros.h
new file mode 100644
index 00000000000..f8b258647b0
--- /dev/null
+++ b/nvrtcc/test/common/check_predefined_macros.h
@@ -0,0 +1,61 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(__NVRTCC__)
+#  error "missing __NVRTCC__ definition"
+#endif // !__NVRTCC__
+
+#if defined(__NVCC__) != !defined(__CUDACC_RTC__)
+#  error "__NVCC__ and __CUDACC_RTC__ should never be defined at the same time"
+#endif // __NVCC__ != !__CUDACC_RTC__
+
+#if !defined(__CUDACC__)
+#  error "missing __CUDACC__ definition"
+#endif // !__CUDACC__
+
+#if !defined(__CUDA_ARCH_LIST__)
+#  error "missing __CUDA_ARCH_LIST__ definition."
+#endif // !__CUDA_ARCH_LIST__
+
+#if !defined(__CUDACC_VER_MAJOR__)
+#  error "missing __CUDACC_VER_MAJOR__ definition"
+#endif // !__CUDACC_VER_MAJOR__
+
+#if !defined(__CUDACC_VER_MINOR__)
+#  error "missing __CUDACC_VER_MINOR__ definition"
+#endif // !__CUDACC_VER_MINOR__
+
+#if !defined(__CUDACC_VER_BUILD__)
+#  error "missing __CUDACC_VER_BUILD__ definition"
+#endif // !__CUDACC_VER_BUILD__
+
+#if !defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#  error "missing __NVCC_DIAG_PRAGMA_SUPPORT__ definition"
+#endif // !__NVCC_DIAG_PRAGMA_SUPPORT__
+
+#if defined(__CUDACC_DEBUG__) != defined(EXPECT_CUDACC_DEBUG)
+#  error "__CUDACC_DEBUG__ must match EXPECT_CUDACC_DEBUG definition."
+#endif // __CUDACC_DEBUG__ != EXPECT_CUDACC_DEBUG
+
+#if defined(__CUDACC_EWP__) != defined(EXPECT_CUDACC_EWP)
+#  error "__CUDACC_EWP__ must match EXPECT_CUDACC_EWP definition."
+#endif // __CUDACC_EWP__ != EXPECT_CUDACC_EWP
+
+#if defined(__CUDACC_RDC__) != defined(EXPECT_CUDACC_RDC)
+#  error "__CUDACC_RDC__ must match EXPECT_CUDACC_RDC definition."
+#endif // __CUDACC_RDC__ != EXPECT_CUDACC_RDC
+
+#if defined(__CUDACC_RTC_INT128__) != defined(EXPECT_CUDACC_RTC_INT128)
+#  error "__CUDACC_RTC_INT128__ must match EXPECT_CUDACC_RTC_INT128 definition."
+#endif // __CUDACC_RTC_INT128__ != EXPECT_CUDACC_RTC_INT128
+
+#if defined(__CUDACC_RTC_FLOAT128__) != defined(EXPECT_CUDACC_RTC_FLOAT128)
+#  error "__CUDACC_RTC_FLOAT128__ must match EXPECT_CUDACC_RTC_FLOAT128 definition."
+#endif // __CUDACC_RTC_FLOAT128__ != EXPECT_CUDACC_RTC_FLOAT128
diff --git a/nvrtcc/test/nvcc_options/CMakeLists.txt b/nvrtcc/test/nvcc_options/CMakeLists.txt
new file mode 100644
index 00000000000..14575ae5f42
--- /dev/null
+++ b/nvrtcc/test/nvcc_options/CMakeLists.txt
@@ -0,0 +1,22 @@
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+# 1. Add --diag-suppress test.
+add_compile_only_test(test.nvcc_options.diag_suppress diag_suppress.cu)
+target_compile_options(
+  test.nvcc_options.diag_suppress
+  PRIVATE "-Werror=all-warnings" "--diag-suppress=186"
+)
+
+add_compile_only_test(test.nvcc_options.diag_suppress.short diag_suppress.cu)
+target_compile_options(
+  test.nvcc_options.diag_suppress.short
+  PRIVATE "-Werror=all-warnings" "-diag-suppress=186"
+)
diff --git a/nvrtcc/test/nvcc_options/diag_suppress.cu b/nvrtcc/test/nvcc_options/diag_suppress.cu
new file mode 100644
index 00000000000..e399b80da95
--- /dev/null
+++ b/nvrtcc/test/nvcc_options/diag_suppress.cu
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"
+
+// Triggers: "warning #186-D: pointless comparison of unsigned integer with zero".
+__global__ void kernel(bool* result, unsigned value)
+{
+  *result = value >= 0;
+}
diff --git a/nvrtcc/test/nvrtcc_options/CMakeLists.txt b/nvrtcc/test/nvrtcc_options/CMakeLists.txt
new file mode 100644
index 00000000000..a9ee58dcf19
--- /dev/null
+++ b/nvrtcc/test/nvrtcc_options/CMakeLists.txt
@@ -0,0 +1,52 @@
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+# 1. Add -use-nvrtc test.
+add_executable(test.nvrtcc_options.use_nvrtc.false use_nvrtc.false.cu)
+add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc.false)
+add_test(
+  NAME test.nvrtcc_options.use_nvrtc.false.run
+  COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc.false
+)
+
+add_executable(test.nvrtcc_options.use_nvrtc use_nvrtc.true.cu)
+add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc)
+target_compile_options(test.nvrtcc_options.use_nvrtc PRIVATE "--use-nvrtc")
+add_test(
+  NAME test.nvrtcc_options.use_nvrtc.run
+  COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc
+)
+
+add_executable(test.nvrtcc_options.use_nvrtc.short use_nvrtc.true.cu)
+add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc.short)
+target_compile_options(test.nvrtcc_options.use_nvrtc.short PRIVATE "-use-nvrtc")
+add_test(
+  NAME test.nvrtcc_options.use_nvrtc.short.run
+  COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc.short
+)
+
+# 2. Add -Xnvrtc test. TODO: make --nvrtc-options -my_opt work.
+add_compile_only_test(test.nvrtcc_options.nvrtc_options nvrtc_options.cu)
+target_compile_options(
+  test.nvrtcc_options.nvrtc_options
+  PRIVATE
+    "--nvrtc-options=-builtin-move-forward=false"
+    "--nvrtc-options=-builtin-initializer-list=false"
+    "--nvrtc-options=-DNVRTC_ONLY_MACRO=42"
+)
+
+add_compile_only_test(test.nvrtcc_options.nvrtc_options.short nvrtc_options.cu)
+target_compile_options(
+  test.nvrtcc_options.nvrtc_options.short
+  PRIVATE
+    "-Xnvrtc=-builtin-move-forward=false"
+    "-Xnvrtc=-builtin-initializer-list=false"
+    "-Xnvrtc=-DNVRTC_ONLY_MACRO=42"
+)
diff --git a/nvrtcc/test/nvrtcc_options/nvrtc_options.cu b/nvrtcc/test/nvrtcc_options/nvrtc_options.cu
new file mode 100644
index 00000000000..6f05b0bee06
--- /dev/null
+++ b/nvrtcc/test/nvrtcc_options/nvrtc_options.cu
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"
+
+#if defined(__CUDACC_RTC__) && defined(__NV_BUILTIN_MOVE_FORWARD)
+#  error "-builtin-move-forward=false was not passed properly"
+#endif // __CUDACC_RTC__ && __NV_BUILTIN_MOVE_FORWARD
+#if defined(__CUDACC_RTC__) && defined(__NV_BUILTIN_INITIALIZER_LIST)
+#  error "-builtin-initializer-list=false was not passed properly"
+#endif // __CUDACC_RTC__ && __NV_BUILTIN_INITIALIZER_LIST
+#if defined(NVRTC_ONLY_MACRO) != defined(__CUDACC_RTC__)
+#  error "-DNVRTC_ONLY_MACRO was not passed properly"
+#endif // NVRTC_ONLY_MACRO != __CUDACC_RTC__
+
+#if defined(NVRTC_ONLY_MACRO)
+static_assert(NVRTC_ONLY_MACRO == 42);
+#endif // NVRTC_ONLY_MACRO
diff --git a/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu b/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu
new file mode 100644
index 00000000000..0aa9e60c43b
--- /dev/null
+++ b/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"
+
+#if !defined(__CUDACC_RTC__)
+#  include <cassert>
+#endif // !__CUDACC_RTC__
+
+__managed__ int proof_var;
+
+__global__ void proof_kernel()
+{
+#if defined(__CUDACC_RTC__)
+  proof_var = 1;
+#else // ^^^ __CUDACC_RTC__ ^^^ / vvv !__CUDACC_RTC__ vvv
+  proof_var = -1;
+#endif // ^^^ !__CUDACC_RTC__ ^^^
+}
+
+#if !defined(__CUDACC_RTC__)
+int main()
+{
+  proof_var = 0;
+
+  proof_kernel<<<1, 1>>>();
+  assert(cudaDeviceSynchronize() == cudaSuccess);
+
+  assert(proof_var == -1);
+}
+#endif // ^^^ !__CUDACC_RTC__ ^^^
diff --git a/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu b/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu
new file mode 100644
index 00000000000..3da8943f208
--- /dev/null
+++ b/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"
+
+#if !defined(__CUDACC_RTC__)
+#  include <cassert>
+#endif // !__CUDACC_RTC__
+
+__managed__ int proof_var;
+
+__global__ void proof_kernel()
+{
+#if defined(__CUDACC_RTC__)
+  proof_var = 1;
+#else // ^^^ __CUDACC_RTC__ ^^^ / vvv !__CUDACC_RTC__ vvv
+  proof_var = -1;
+#endif // ^^^ !__CUDACC_RTC__ ^^^
+}
+
+#if !defined(__CUDACC_RTC__)
+int main()
+{
+  proof_var = 0;
+
+  proof_kernel<<<1, 1>>>();
+  assert(cudaDeviceSynchronize() == cudaSuccess);
+
+  assert(proof_var == 1);
+}
+#endif // ^^^ !__CUDACC_RTC__ ^^^
diff --git a/nvrtcc/test/predefined_macros/CMakeLists.txt b/nvrtcc/test/predefined_macros/CMakeLists.txt
new file mode 100644
index 00000000000..e60f86cb5a8
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/CMakeLists.txt
@@ -0,0 +1,98 @@
+##===----------------------------------------------------------------------===##
+##
+## Part of nvrtcc in CUDA Core Compute Libraries,
+## under the Apache License v2.0 with LLVM Exceptions.
+## See https://llvm.org/LICENSE.txt for license information.
+## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+##
+##===----------------------------------------------------------------------===##
+
+# 1. Add default macros test.
+add_compile_only_test(test.predefined_macros.default default.cu)
+
+# 2. Add __CUDA_ARCH_LIST__ test.
+add_compile_only_test(test.predefined_macros.cuda_arch_list __CUDA_ARCH_LIST__.cu)
+set_target_properties(
+  test.predefined_macros.cuda_arch_list
+  PROPERTIES CUDA_ARCHITECTURES "75;80;89;90"
+)
+
+# 3. Add __CUDACC_DEBUG__ test.
+add_compile_only_test(test.predefined_macros.cudacc_debug __CUDACC_DEBUG__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_debug
+  PRIVATE --device-debug
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_debug_short __CUDACC_DEBUG__.cu)
+target_compile_options(test.predefined_macros.cudacc_debug_short PRIVATE -G)
+
+# 4. Add __CUDACC_EWP__ test.
+add_compile_only_test(test.predefined_macros.cudacc_ewp __CUDACC_EWP__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_ewp
+  PRIVATE --extensible-whole-program
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_ewp_short __CUDACC_EWP__.cu)
+target_compile_options(test.predefined_macros.cudacc_ewp_short PRIVATE -ewp)
+
+# 5. Add __CUDACC_RDC__ test.
+add_compile_only_test(test.predefined_macros.cudacc_rdc __CUDACC_RDC__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rdc
+  PRIVATE --relocatable-device-code=true
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_rdc_short __CUDACC_RDC__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rdc_short
+  PRIVATE -rdc=true
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_rdc_alias __CUDACC_RDC__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rdc_alias
+  PRIVATE --device-c
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_rdc_alias_short __CUDACC_RDC__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rdc_alias_short
+  PRIVATE -dc
+)
+
+# 6. Add __CUDACC_RTC_FLOAT128__ test.
+add_compile_only_test(test.predefined_macros.cudacc_rtc_float128 __CUDACC_RTC_FLOAT128__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rtc_float128
+  PRIVATE -Xnvrtc=--device-float128
+)
+set_target_properties(
+  test.predefined_macros.cudacc_rtc_float128
+  PROPERTIES CUDA_ARCHITECTURES "90;100"
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_rtc_float128_short __CUDACC_RTC_FLOAT128__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rtc_float128_short
+  PRIVATE -Xnvrtc=-device-float128
+)
+set_target_properties(
+  test.predefined_macros.cudacc_rtc_float128_short
+  PROPERTIES CUDA_ARCHITECTURES "90;100"
+)
+
+# 7. Add __CUDACC_RTC_INT128__ test.
+add_compile_only_test(test.predefined_macros.cudacc_rtc_int128 __CUDACC_RTC_INT128__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rtc_int128
+  PRIVATE -Xnvrtc=--device-int128
+)
+
+add_compile_only_test(test.predefined_macros.cudacc_rtc_int128_short __CUDACC_RTC_INT128__.cu)
+target_compile_options(
+  test.predefined_macros.cudacc_rtc_int128_short
+  PRIVATE -Xnvrtc=-device-int128
+)
diff --git a/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu b/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu
new file mode 100644
index 00000000000..47b0d39fd38
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu
@@ -0,0 +1,12 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#define EXPECT_CUDACC_DEBUG
+#include "../common/check_predefined_macros.h"
diff --git a/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu b/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu
new file mode 100644
index 00000000000..e8d8da91e11
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu
@@ -0,0 +1,12 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#define EXPECT_CUDACC_EWP
+#include "../common/check_predefined_macros.h"
diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu
new file mode 100644
index 00000000000..63c3bbb2dd0
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu
@@ -0,0 +1,12 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#define EXPECT_CUDACC_RDC
+#include "../common/check_predefined_macros.h"
diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu
new file mode 100644
index 00000000000..f9d916ba508
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CUDACC_RTC__) && __CUDA_ARCH__ >= 1000
+#  define EXPECT_CUDACC_RTC_FLOAT128
+#endif // __CUDACC_RTC__ && __CUDA_ARCH__ >= 1000
+#include "../common/check_predefined_macros.h"
+
+#if defined(EXPECT_CUDACC_RTC_FLOAT128)
+__global__ void kernel(__float128* value)
+{
+  *value = __float128{1.0};
+}
+#endif // EXPECT_CUDACC_RTC_FLOAT128
diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu
new file mode 100644
index 00000000000..2ccc673f18e
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CUDACC_RTC__)
+#  define EXPECT_CUDACC_RTC_INT128
+#endif // __CUDACC_RTC__
+#include "../common/check_predefined_macros.h"
+
+#if defined(EXPECT_CUDACC_RTC_INT128)
+__global__ void kernel(__int128* value)
+{
+  *value = __int128{1};
+}
+#endif // EXPECT_CUDACC_RTC_INT128
diff --git a/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu b/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu
new file mode 100644
index 00000000000..c6712fd1022
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"
+
+template <int... Args>
+struct ArchList
+{};
+
+template <int... Vs, int... Refs>
+__host__ __device__ constexpr bool test_cuda_arch_list(ArchList<Vs...>, ArchList<Refs...>) noexcept
+{
+  static_assert(sizeof...(Vs) == sizeof...(Refs));
+  return ((Vs == Refs) && ...);
+}
+
+static_assert(test_cuda_arch_list(ArchList<__CUDA_ARCH_LIST__>{}, ArchList<750, 800, 890, 900>{}));
diff --git a/nvrtcc/test/predefined_macros/default.cu b/nvrtcc/test/predefined_macros/default.cu
new file mode 100644
index 00000000000..4649bae0c2d
--- /dev/null
+++ b/nvrtcc/test/predefined_macros/default.cu
@@ -0,0 +1,11 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of nvrtcc in CUDA Core Compute Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/check_predefined_macros.h"