diff --git a/nvrtcc/CMakeLists.txt b/nvrtcc/CMakeLists.txt new file mode 100644 index 00000000000..a720cc4e89a --- /dev/null +++ b/nvrtcc/CMakeLists.txt @@ -0,0 +1,56 @@ +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.21) + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +project(nvrtcc LANGUAGES CXX) + +# Set output directory. +set(nvrtcc_exec_output_dir ${nvrtcc_BINARY_DIR}/bin) + +find_package(CUDAToolkit REQUIRED) + +# Find libcufilt. +find_library( + nvrtcc_cufilt_lib + "cufilt" + PATHS "${CUDAToolkit_LIBRARY_DIR}" + NO_DEFAULT_PATH +) +if (NOT nvrtcc_cufilt_lib) + message(FATAL_ERROR "nvrtcc: cu++filt library (libcufilt.a) not found.") +endif() + +# Add nvrtcc_device_compiler target. +add_executable(nvrtcc_device_compiler src/nvrtcc_device_compiler.cpp) +target_compile_features(nvrtcc_device_compiler PRIVATE cxx_std_17) +target_include_directories( + nvrtcc_device_compiler + PRIVATE "${CUDAToolkit_INCLUDE_DIRS}" +) +target_link_libraries(nvrtcc_device_compiler PRIVATE ${nvrtcc_cufilt_lib}) +set_target_properties( + nvrtcc_device_compiler + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${nvrtcc_exec_output_dir}" +) + +# Add nvrtcc target that copies nvrtcc_cicc to the output directory and depends on nvrtcc_device_compiler. +add_custom_target( + nvrtcc + ALL + COMMAND + ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/bin + ${nvrtcc_exec_output_dir} +) +add_dependencies(nvrtcc nvrtcc_device_compiler) diff --git a/nvrtcc/README.md b/nvrtcc/README.md new file mode 100644 index 00000000000..38420fc88d3 --- /dev/null +++ b/nvrtcc/README.md @@ -0,0 +1,55 @@ +> [!CAUTION] +> This is an internal tool not intended for public use. + +> [!WARNING] +> This tool is experimental. + +# NVRTCC + +## Overview + +`nvrtcc` is a tool to simplify [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html) testing. It follows the `nvcc` compilation trajectory and replaces the `nvcc` generated PTX code with the NVRTC compiled one. The main advantage is that you can keep the source files almost the same as if compiled with `nvcc`. `nvrtcc` makes sure all of the necessary symbols are present in the generated PTX and let's `nvcc` do the host compilation and linking. + +The compilation with NVRTC is optional and can be controlled by the `-use-nvrtc` flag. This allows `nvrtcc` to be used as the `CMAKE_CUDA_COMPILER` where we can trick CMake to think it's using `nvcc`. `nvrtcc` supports almost all `nvcc` options except for those that are unsupported by NVRTC. + +## Example + +When compiling with `nvrtcc`, we need to make sure NVRTC wouldn't see the host includes and symbols, so they must be guarded by `#ifndef __CUDACC_RTC__` preprocessor directive. +```cpp +// hello_world.cu + +#ifndef __CUDACC_RTC__ +# include +#endif + +__global__ void kernel() +{ +#ifdef __CUDACC_RTC__ + printf("Hello world from NVRTC!\n"); +#else + printf("Hello world from NVCC!\n"); +#endif +} + +#ifndef __CUDACC_RTC__ +int main() +{ + kernel<<<1, 1>>>(); + cudaDeviceSynchronize(); +} +#endif +``` + +If we compile with `nvcc` the output should look as: +```sh +$ nvcc hello_world.cu -o hello_world +$ ./hello_world +Hello world from NVCC! +``` + +On the other hand if compiled with `nvrtcc` with `-use-nvrtc` flag present, the output should be: +```sh +$ nvrtcc hello_world.cu -o hello_world -use-nvrtc +$ ./hello_world +Hello world from NVRTCC! +``` diff --git a/nvrtcc/bin/nvrtcc b/nvrtcc/bin/nvrtcc new file mode 100755 index 00000000000..cbae2861fd3 --- /dev/null +++ b/nvrtcc/bin/nvrtcc @@ -0,0 +1,609 @@ +#!/usr/bin/env python3 + +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +import argparse +import copy +import os +import re +import shlex +import shutil +import subprocess +import sys + + +class Nvrtcc: + @staticmethod + def _parse_nvrtcc_opts(opts): + parser = argparse.ArgumentParser(allow_abbrev=False) + + parser.add_argument("-nvccbin", "--nvccbin", nargs=1) + parser.add_argument("-use-nvrtc", "--use-nvrtc", action="store_true") + parser.add_argument("-Xnvrtc", "--nvrtc-options", action="append", default=[]) + + return parser.parse_known_args(opts) + + @staticmethod + def _get_nvcc_bin_abs_path(nvcc_bin_arg, env): + if nvcc_bin_arg is None: + nvcc_bin = env.get("NVRTCC_NVCC_BIN", "nvcc") + else: + nvcc_bin = nvcc_bin_arg + + if not os.path.isabs(nvcc_bin): + nvcc_bin = shutil.which(nvcc_bin) + + return nvcc_bin + + @staticmethod + def _parse_nvcc_opts(nvcc_opts): + parser = argparse.ArgumentParser(allow_abbrev=False) + + # File and Path Specifications + parser.add_argument("-o", "--output-file", nargs=1, type=str) + parser.add_argument("-objtemp", "--objdir-as-tempdir", action="store_true") + parser.add_argument("-include", "--pre-include", action="append", default=[]) + parser.add_argument("-l", "--library", action="append", default=[]) + parser.add_argument("-D", "--define-macro", action="append", default=[]) + parser.add_argument("-U", "--undefine-macro", action="append", default=[]) + parser.add_argument("-I", "--include-path", action="append", default=[]) + parser.add_argument("-isystem", "--system-include", action="append", default=[]) + parser.add_argument("-L", "--library-path", action="append", default=[]) + parser.add_argument("-odir", "--output-directory", nargs=1, type=str) + parser.add_argument("-MF", "--dependency-output", nargs=1, type=str) + parser.add_argument("-MP", "--generate-dependency-targets", action="store_true") + parser.add_argument("-ccbin", "--compiler-bindir", nargs=1, type=str) + parser.add_argument( + "-allow-unsupported-compiler", + "--allow-unsupported-compiler", + action="store_true", + ) + parser.add_argument("-arbin", "--archiver-binary", nargs=1, type=str) + parser.add_argument("-cudart", "--cudart", choices=["none", "shared", "static"]) + parser.add_argument("-cudadevrt", "--cudadevrt", choices=["none", "static"]) + parser.add_argument("-ldir", "--libdevice-directory", nargs=1, type=str) + parser.add_argument("-target-dir", "--target-directory", nargs=1, type=str) + + # Options for Specifying the Compilation Phase + parser.add_argument("-link", "--link", action="store_true") + parser.add_argument("-lib", "--lib", action="store_true") + parser.add_argument("-dlink", "--device-link", action="store_true") + parser.add_argument("-dc", "--device-c", action="store_true") + parser.add_argument("-dw", "--device-w", action="store_true") + parser.add_argument("-cuda", "--cuda", action="store_true") + parser.add_argument("-c", "--compile", action="store_true") + parser.add_argument("-fatbin", "--fatbin", action="store_true") + parser.add_argument("-cubin", "--cubin", action="store_true") + parser.add_argument("-ptx", "--ptx", action="store_true") + parser.add_argument("-E", "--preprocess", action="store_true") + parser.add_argument("-M", "--generate-dependencies", action="store_true") + parser.add_argument( + "-MM", "--generate-nonsystem-dependencies", action="store_true" + ) + parser.add_argument( + "-MD", "--generate-dependencies-with-compile", action="store_true" + ) + parser.add_argument( + "-MMD", + "--generate-nonsystem-dependencies-with-compile", + action="store_true", + ) + parser.add_argument("-optix-ir", "--optix-ir", action="store_true") + parser.add_argument("-ltoir", "--ltoir", action="store_true") + parser.add_argument("-run", "--run", action="store_true") + + # Options for Specifying Behavior of Compiler/Linker + parser.add_argument("-pg", "--profile", action="store_true") + parser.add_argument("-g", "--debug", action="store_true") + parser.add_argument("-G", "--device-debug", action="store_true") + parser.add_argument("-lineinfo", "--generate-line-info", action="store_true") + parser.add_argument( + "-opt-info", "-optimization-info", action="append", default=[] + ) + parser.add_argument("-O", "--optimize", nargs=1, type=str) + parser.add_argument("-Ofc", "--Ofast-compile", nargs=1, type=str) + parser.add_argument("-dopt", "--dopt", choices=["on"]) + parser.add_argument("-dlto", "--dlink-time-opt", action="store_true") + parser.add_argument("-lto", "--lto", action="store_true") + parser.add_argument("-gen-opt-lto", "--gen-opt-lto", action="store_true") + parser.add_argument( + "-ftemplate-backtrace-limit", "--ftemplate-backtrace-limit", nargs=1 + ) + parser.add_argument("-ftemplate-depth", "--ftemplate-depth", nargs=1, type=int) + parser.add_argument("-noeh", "--no-exceptions", action="store_true") + parser.add_argument("-shared", "--shared", action="store_true") + parser.add_argument("-x", "--x", nargs=1, type=str) + parser.add_argument("-std", "--std", nargs=1, type=str) + parser.add_argument( + "-nohdinitlist", "--no-host-device-initializer-list", action="store_true" + ) + parser.add_argument( + "-nohdmoveforward", "--no-host-device-move-forward", action="store_true" + ) + parser.add_argument( + "-expt-relaxed-constexpr", "--expt-relaxed-constexpr", action="store_true" + ) + parser.add_argument( + "-extended-lambda", "--extended-lambda", action="store_true" + ) + parser.add_argument( + "-expt-extended-lambda", "--expt-extended-lambda", action="store_true" + ) + parser.add_argument("-m", "--machine", nargs=1, type=int) + parser.add_argument("-m64", "--m64", action="store_true") + + # Options for passing specific phase options + parser.add_argument( + "-Xcompiler", "--compiler-options", action="append", default=[] + ) + parser.add_argument("-Xlinker", "--linker-options", action="append", default=[]) + parser.add_argument( + "-Xarchive", "--archive-options", action="append", default=[] + ) + parser.add_argument("-Xptxas", "--ptxas-options", action="append", default=[]) + parser.add_argument("-Xnvlink", "--nvlink-options", action="append", default=[]) + + # Miscellaneous options for guiding the compiler driver. + parser.add_argument( + "-static-global-template-stub", "--static-global-template-stub", nargs=1 + ) + parser.add_argument( + "-device-entity-has-hidden-visibility", + "--device-entity-has-hidden-visibility", + nargs=1, + ) + parser.add_argument( + "-forward-unknown-to-host-compiler", + "--forward-unknown-to-host-compiler", + action="store_true", + ) + parser.add_argument( + "-forward-unknown-to-host-linker", + "--forward-unknown-to-host-linker", + action="store_true", + ) + parser.add_argument( + "-forward-unknown-opts", "--forward-unknown-opts", action="store_true" + ) + parser.add_argument("-noprof", "--dont-use-profile", action="store_true") + parser.add_argument("-dryrun", "--dryrun", action="store_true") + parser.add_argument("-v", "--verbose", action="store_true") + parser.add_argument("-t", "--threads", nargs=1, type=int) + parser.add_argument("-split-compile", "--split-compile", nargs=1, type=int) + parser.add_argument( + "-split-compile-extended", "--split-compile-extended", nargs=1, type=int + ) + parser.add_argument( + "-fdevice-syntax-only", "--fdevice-syntax-only", nargs=1, type=str + ) + parser.add_argument( + "-fdevice-time-trace", "--fdevice-time-trace", nargs=1, type=str + ) + parser.add_argument("-keep", "--keep", action="store_true") + parser.add_argument("-keep-dir", "--keep-dir", nargs=1, type=str) + parser.add_argument("-save-temps", "--save-temps", action="store_true") + parser.add_argument("-clean", "--clean-targets", action="store_true") + parser.add_argument("-time", "--time", nargs=1, type=str) + parser.add_argument("-run-args", "--run-args", nargs=1) + parser.add_argument("-idp", "--input-drive-prefix", nargs=1, type=str) + parser.add_argument("-ddp", "--dependency-drive-prefix", nargs=1, type=str) + parser.add_argument("-dp", "--drive-prefix", nargs=1, type=str) + parser.add_argument("-MT", "--dependency-target-name", nargs=1, type=str) + parser.add_argument( + "-no-align-double", "--no-align-double", action="store_true" + ) + parser.add_argument("-nodlink", "--no-device-link", action="store_true") + + # Options for steering GPU code generation. + parser.add_argument("-arch", "--gpu-architecture", action="append", default=[]) + parser.add_argument("-code", "--gpu-code", action="append", default=[]) + parser.add_argument("-gencode", "--generate-code", action="append", default=[]) + parser.add_argument("-rdc", "--relocatable-device-code", nargs=1, type=str) + parser.add_argument("-e", "--entries", action="append", default=[]) + parser.add_argument("-maxrregcount", "--maxrregcount", nargs=1) + parser.add_argument("-use_fast_math", "--use_fast_math", action="store_true") + parser.add_argument("-ftz", "--ftz", nargs=1, type=str) + parser.add_argument("-prec-div", "--prec-div", nargs=1) + parser.add_argument("-prec-sqrt", "--prec-sqrt", nargs=1) + parser.add_argument("-fmad", "--fmad", nargs=1) + parser.add_argument( + "-extra-device-vectorization", + "--extra-device-vectorization", + action="store_true", + ) + + # Options for steering cuda compilation. + parser.add_argument("-default-stream", "--default-stream", nargs=1) + + # Generic tool options. + parser.add_argument("-w", "--disable-warnings", action="store_true") + parser.add_argument( + "-keep-device-functions", "--keep-device-functions", action="store_true" + ) + parser.add_argument("-src-in-ptx", "--source-in-ptx", action="store_true") + parser.add_argument("-restrict", "--restrict", action="store_true") + parser.add_argument("-Wreorder", "--Wreorder", action="store_true") + parser.add_argument( + "-Wdefault-stream-launch", "--Wdefault-stream-launch", action="store_true" + ) + parser.add_argument( + "-Wmissing-launch-bounds", "--Wmissing-launch-bounds", action="store_true" + ) + parser.add_argument( + "-Wext-lambda-captures-this", + "--Wext-lambda-captures-this", + action="store_true", + ) + parser.add_argument( + "-Wno-deprecated-declarations", + "--Wno-deprecated-declarations", + action="store_true", + ) + parser.add_argument( + "-Wno-deprecated-gpu-targets", + "--Wno-deprecated-gpu-targets", + action="store_true", + ) + parser.add_argument("-Werror", "--Werror", action="append", default=[]) + parser.add_argument("-res-usage", "--resource-usage", action="store_true") + parser.add_argument("-ewp", "--extensible-whole-program", action="store_true") + parser.add_argument("-no-compress", "--no-compress", action="store_true") + parser.add_argument("-qpp-config", "--qpp-config", action="store_true") + parser.add_argument( + "-astoolpatch", "--compile-as-tools-patch", action="store_true" + ) + parser.add_argument("-code-ls", "--list-gpu-code", action="store_true") + parser.add_argument("-arch-ls", "--list-gpu-arch", action="store_true") + parser.add_argument("-err-no", "--display-error-number", action="store_true") + parser.add_argument( + "-no-err-no", "--no-display-error-number", action="store_true" + ) + parser.add_argument("-diag-error", "--diag-error", action="append", default=[]) + parser.add_argument( + "-diag-suppress", "--diag-suppress", action="append", default=[] + ) + parser.add_argument("-diag-warn", "--diag-warn", action="append", default=[]) + parser.add_argument("-hls", "--host-linker-script", nargs=1, type=str) + parser.add_argument( + "-aug-hls", "--augment-host-linker-script", action="store_true" + ) + parser.add_argument("-r", "--relocatable-link", action="store_true") + parser.add_argument("-brief-diag", "--brief-diagnostics", nargs=1, type=str) + parser.add_argument("-jtd", "--jump-table-density", nargs=1, type=int) + parser.add_argument("-reloc-ptx", "--relocatable-ptx", action="store_true") + parser.add_argument( + "-device-stack-protector", "--device-stack-protector", nargs=1, type=str + ) + parser.add_argument("-compress-mode", "--compress-mode", nargs=1, type=str) + parser.add_argument("-frandom-seed", "--frandom-seed", nargs=1, type=str) + parser.add_argument( + "-fdevice-sanitize", "--fdevice-sanitize", nargs=1, type=str + ) + parser.add_argument("-jobserver", "--jobserver", action="store_true") + # "-h", "--help" + parser.add_argument("-V", "--version", action="store_true") + parser.add_argument("-optf", "--options-file", action="append", default=[]) + + return parser.parse_known_args(nvcc_opts)[0] + + def __init__(self, opts: list[str], env): + args, nvcc_opts = Nvrtcc._parse_nvrtcc_opts(opts) + + self.nvcc_bin = Nvrtcc._get_nvcc_bin_abs_path(args.nvccbin, env) + self.use_nvrtc = args.use_nvrtc + self.nvrtc_opts = args.nvrtc_options + self.nvcc_opts = ["-D__NVRTCC__"] + nvcc_opts + self.nvcc_args = Nvrtcc._parse_nvcc_opts(self.nvcc_opts) + self.device_compile_bin = os.path.join( + os.path.dirname(__file__), "nvrtcc_device_compiler" + ) + self.nvrtc_lib = os.path.join( + os.path.dirname(os.path.dirname(self.nvcc_bin)), "lib64", "libnvrtc.so" + ) + self.env = env + + # Clear environment variables that prepend/append nvcc flags. + self.env.pop("NVCC_PREPEND_FLAGS", None) + self.env.pop("NVCC_APPEND_FLAGS", None) + + @staticmethod + def _emit_error(msg, retcode=-1): + print(f"nvrtcc error: {msg}", file=sys.stderr) + sys.exit(retcode) + + @staticmethod + def _emit_warning(msg): + print(f"nvrtcc warning: {msg}", file=sys.stderr) + + def _check_nvcc_args(self): + if self.nvcc_args.optix_ir: + Nvrtcc._emit_error( + "compiling to Optix IR with nvrtc is currently unsupported" + ) + + if ( + self.nvcc_args.ltoir + or self.nvcc_args.dlink_time_opt + or self.nvcc_args.gen_opt_lto + ): + Nvrtcc._emit_error( + "compiling to LTO IR with nvrtc is currently unsupported" + ) + + if self.nvcc_args.expt_relaxed_constexpr: + Nvrtcc._emit_warning("relaxed constexpr is unsupported with nvrtc") + + if self.nvcc_args.extended_lambda or self.nvcc_args.expt_extended_lambda: + Nvrtcc._emit_warning( + "extended lambda declarations are unsupported with nvrtc" + ) + + if self.nvcc_args.keep_device_functions: + Nvrtcc._emit_warning("keeping device functions is unsupported with nvrtc") + + if self.nvcc_args.options_file: + Nvrtcc._emit_error("passing command line options via file is not supported") + + def _get_nvcc_cmd_list(self) -> list[str]: + run_result = subprocess.run( + [self.nvcc_bin] + self.nvcc_opts + ["-dryrun"], + stderr=subprocess.PIPE, + env=self.env, + ) + + if run_result.returncode != 0: + Nvrtcc._emit_error( + "failed to retrieve nvcc command list", run_result.returncode + ) + + lines = run_result.stderr.decode().splitlines() + return [line.removeprefix("#$ ") for line in lines] + + def _print_if_verbose(self, msg, **kwargs): + if self.nvcc_args.verbose: + print(msg, **kwargs) + + @staticmethod + def _is_comment(cmd): + return cmd.startswith("--") + + @staticmethod + def _is_env_var_assign(cmd): + return re.match(r"^[a-zA-Z0-9_]+=", cmd) + + @staticmethod + def _split_cmd(cmd): + result = shlex.split(cmd) + return result[0], result[1:] + + @staticmethod + def _parse_ccbin_invocation(opts): + arch_list = None + + arch_list_def = [x for x in opts if x.startswith("-D__CUDA_ARCH_LIST__=")] + if arch_list_def: + assert len(arch_list_def) == 1 + arch_list = arch_list_def[0].removeprefix("-D__CUDA_ARCH_LIST__=") + + return argparse.Namespace(arch_list=arch_list) + + @staticmethod + def _parse_cicc_invocation(opts): + parser = argparse.ArgumentParser(allow_abbrev=False) + + # Positional arguments. + parser.add_argument("file", nargs="*") + + # Recognized used arguments. + parser.add_argument("-o", "--output-file", type=str) + parser.add_argument("--orig_src_file_name", type=str) + parser.add_argument("--orig_src_path_name", type=str) + parser.add_argument("--c++17", dest="cxx17", action="store_true") + parser.add_argument("--c++20", dest="cxx20", action="store_true") + parser.add_argument("--c++23", dest="cxx23", action="store_true") + parser.add_argument("-arch", type=str) + parser.add_argument("-maxreg", type=int) + parser.add_argument("-ftz", type=int) + parser.add_argument("-prec_div", type=int) + parser.add_argument("-prec_sqrt", type=int) + parser.add_argument("-fmad", type=int) + parser.add_argument("--device-c", action="store_true") + parser.add_argument("--partial-link", action="store_true") + parser.add_argument("--debug_mode", action="store_true") + parser.add_argument("-generate-line-info", action="store_true") + parser.add_argument("-O0", dest="dopt", action="store_false", default=True) + parser.add_argument("--Ofast-compile", type=str) + parser.add_argument("--frandom-seed", nargs=1, type=str) + parser.add_argument("-extra-device-vectorization", action="store_true") + parser.add_argument("-jump-table-density", type=int) + parser.add_argument("-w", action="store_true") + parser.add_argument("-kernel-params-are-restrict", action="store_true") + parser.add_argument("-inline-info", action="store_true") + parser.add_argument("--display_error_number", action="store_true") + parser.add_argument("-diag_error", "--diag_error", action="append", default=[]) + parser.add_argument( + "-diag_suppress", "--diag_suppress", action="append", default=[] + ) + parser.add_argument("-diag_warn", "--diag_warn", action="append", default=[]) + parser.add_argument("--brief_diagnostics", action="store_true") + parser.add_argument("-split-compile", nargs=1) + parser.add_argument("--device-time-trace", nargs=1) + parser.add_argument("--promote_warnings", action="store_true") + + # Recognized ignored arguments. + parser.add_argument("--static-host-stub", action="store_true") + parser.add_argument("--host-stub-linkage-explicit", action="store_true") + parser.add_argument("--device-hidden-visibility", action="store_true") + parser.add_argument("--gnu_version") + parser.add_argument("--allow_managed", action="store_true") + parser.add_argument("-m64", action="store_true") + parser.add_argument("--no-version-ident", action="store_true") + parser.add_argument("--include_file_name") + parser.add_argument("-tused", action="store_true") + parser.add_argument("--module_id_file_name") + parser.add_argument("--gen_c_file_name") + parser.add_argument("--stub_file_name") + parser.add_argument("--gen_device_file_name") + parser.add_argument("--keep-device-functions", action="store_true") + parser.add_argument("--gen_module_id_file", action="store_true") + parser.add_argument("--relaxed_constexpr", action="store_true") + parser.add_argument("--pending_instantiations", nargs=1) + parser.add_argument("--extended-lambda", action="store_true") + + return parser.parse_args(opts) + + def _invoke_nvrtc(self, ccbin_args, cicc_args): + cc = int(cicc_args.arch.removeprefix("compute_")) + + nvrtc_opts = copy.deepcopy(self.nvrtc_opts) + + # Remove -device-float128 for architectures with cc < 100. + if cc < 100: + nvrtc_opts = list( + filter( + lambda x: x != "-device-float128" and x != "--device-float128", + nvrtc_opts, + ) + ) + + cmd = [self.device_compile_bin] + + # Positional arguments + cmd += [self.nvrtc_lib] + cmd += [cicc_args.orig_src_path_name] + cmd += [cicc_args.orig_src_file_name] + cmd += [cicc_args.output_file] + cmd += [ccbin_args.arch_list] + + # Add nvrtc options passed by -Xnvrtc + cmd += nvrtc_opts + + # Compilation targets + cmd += [f"-arch={cicc_args.arch}"] + cmd += ["-dc"] if cicc_args.device_c else [] + cmd += ["-ewp"] if self.nvcc_args.extensible_whole_program else [] + + # Debugging support + cmd += ["-G"] if cicc_args.debug_mode else [] + cmd += ["-lineinfo"] if cicc_args.generate_line_info else [] + + # Code generation + cmd += ["-dopt=on"] if cicc_args.dopt else [] + cmd += [f"-Ofc={cicc_args.Ofast_compile}"] if cicc_args.Ofast_compile else [] + cmd += [f"-maxrregcount={cicc_args.maxreg}"] if cicc_args.maxreg else [] + cmd += [f"-ftz={'true' if cicc_args.ftz else 'false'}"] + cmd += [f"-prec-sqrt={'true' if cicc_args.prec_sqrt else 'false'}"] + cmd += [f"-prec-div={'true' if cicc_args.prec_div else 'false'}"] + cmd += [f"-fmad={'true' if cicc_args.fmad else 'false'}"] + cmd += ( + ["-extra-device-vectorization"] + if cicc_args.extra_device_vectorization + else [] + ) + cmd += ( + [f"-jtd={cicc_args.jump_table_density}"] + if cicc_args.jump_table_density + else [] + ) + cmd += ( + ["-frandom-seed", cicc_args.frandom_seed] if cicc_args.frandom_seed else [] + ) + + # Preprocessing + cmd += [f"-D{x}" for x in self.nvcc_args.define_macro] + cmd += [f"-U{x}" for x in self.nvcc_args.undefine_macro] + cmd += [f"-I{x}" for x in self.nvcc_args.include_path] + cmd += [f"-I{x}" for x in self.nvcc_args.system_include] + cmd += [f"-include={x}" for x in self.nvcc_args.pre_include] + + # Language Dialect + cmd += ["-std=c++17"] if cicc_args.cxx17 else [] + cmd += ["-std=c++20"] if cicc_args.cxx20 else [] + cmd += ["-std=c++23"] if cicc_args.cxx23 else [] + + # Misc + cmd += ["-w"] if cicc_args.w else [] + cmd += ["-Werror=all-warnings"] if cicc_args.promote_warnings else [] + cmd += ["-restrict"] if cicc_args.kernel_params_are_restrict else [] + cmd += ["-opt-info=inline"] if cicc_args.inline_info else [] + cmd += ["-no-err-no"] if not cicc_args.display_error_number else [] + cmd += [f"-diag-error={diag}" for diag in cicc_args.diag_error] + cmd += [f"-diag-suppress={diag}" for diag in cicc_args.diag_suppress] + # cmd += [f'-diag-warn={diag}' for diag in cicc_args.diag_warn] + cmd += ["-brief_diag=true"] if cicc_args.brief_diagnostics else [] + cmd += [f"-time={self.nvcc_args.time}"] if self.nvcc_args.time else [] + cmd += ( + [f"-split-compile={cicc_args.split_compile}"] + if cicc_args.split_compile + else [] + ) + cmd += ( + [f"-fdevice-time-trace={cicc_args.device_time_trace}"] + if cicc_args.device_time_trace + else [] + ) + + # Run the nvrtc compilation. + self._print_if_verbose(f"#$ {' '.join(cmd)}") + if not self.nvcc_args.dryrun: + run_result = subprocess.run(cmd, env=self.env) + if run_result.returncode != 0: + sys.exit(run_result.returncode) + + def _run_nvrtcc(self): + Nvrtcc._check_nvcc_args(self) + + for cmd in self._get_nvcc_cmd_list(): + self._print_if_verbose(f"#$ {cmd}") + + if not self.nvcc_args.dryrun and Nvrtcc._is_comment(cmd): + continue + + if not self.nvcc_args.dryrun and Nvrtcc._is_env_var_assign(cmd): + os.environ[cmd.split("=")[0]] = "".join(cmd.split("=")[1:]) + continue + + bin, opts = Nvrtcc._split_cmd(cmd) + + if not self.nvcc_args.dryrun: + # For some reason nvcc produces rm command that can fail if the file doesn't exist, so we just ignore the return value and output. + if re.match(r"^rm$", bin): + subprocess.run( + [bin] + opts, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=self.env, + ) + else: + run_result = subprocess.run( + [os.path.expandvars(bin)] + opts, env=self.env + ) + if run_result.returncode != 0: + sys.exit(run_result.returncode) + + if re.match(r"^(gcc|clang|nvc\+\+|cl\.exe)$", bin): + ccbin_args = Nvrtcc._parse_ccbin_invocation(opts) + elif re.match(r"^.*cicc$", bin): + cicc_args = Nvrtcc._parse_cicc_invocation(opts) + self._invoke_nvrtc(ccbin_args, cicc_args) + + def _run_nvcc(self): + run_result = subprocess.run([self.nvcc_bin] + self.nvcc_opts, env=self.env) + if run_result.returncode != 0: + sys.exit(run_result.returncode) + + def run(self): + self._run_nvrtcc() if self.use_nvrtc else self._run_nvcc() + + +if __name__ == "__main__": + Nvrtcc(sys.argv[1:], os.environ).run() + sys.exit(0) diff --git a/nvrtcc/examples/demo.cu b/nvrtcc/examples/demo.cu new file mode 100644 index 00000000000..3ede1711a36 --- /dev/null +++ b/nvrtcc/examples/demo.cu @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// When compiling with nvrtcc, the __NVRTCC__ macro is defined. +#ifndef __NVRTCC__ +# error "this file must be compiled with nvrtcc" +#endif + +// We need to guard includes when compiling device code with nvrtc. +#ifndef __CUDACC_RTC__ +# include +# include +#endif + +#define TO_STRING_HELPER(...) #__VA_ARGS__ +#define TO_STRING(...) TO_STRING_HELPER(__VA_ARGS__) + +// Ordinary kernels work without any problems. +__global__ void kernel() +{ + printf("[%d, %d]: Hello world from kernel!\n", blockIdx.x, threadIdx.x); +} + +// For template kernels the situation is a bit more complicated. We use nvrtcAddNameExpression to make sure these +// kernels are instantiated. +template +__global__ void template_kernel(int) +{ + printf("[%d, %d]: Hello world from template kernel!\n", blockIdx.x, threadIdx.x); + + if (blockIdx.x == 0 && threadIdx.x == 0) + { + // __CUDA_ARCH_LIST__ behaves the same way as if we were compiling with nvcc. + printf("\narch list: " TO_STRING(__CUDA_ARCH_LIST__) "\n"); + } +} + +// Host functions must not be visible to nvrtc. +#ifndef __CUDACC_RTC__ +int main() +{ + // We can now call kernels compiled with nvrtc directly. + kernel<<<2, 2>>>(); + if (cudaDeviceSynchronize() != cudaSuccess) + { + return 1; + } + + printf("\n"); + + template_kernel<<<3, 1>>>(0); + if (cudaDeviceSynchronize() != cudaSuccess) + { + return 1; + } +} +#endif diff --git a/nvrtcc/src/nvrtcc_device_compiler.cpp b/nvrtcc/src/nvrtcc_device_compiler.cpp new file mode 100644 index 00000000000..62ced7b8881 --- /dev/null +++ b/nvrtcc/src/nvrtcc_device_compiler.cpp @@ -0,0 +1,378 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +//! @brief Gets the dynamic handle to nvrtc library. The first call must supply the path to the library. +static void* get_nvrtc_lib(const char* nvrtc_lib = nullptr) +{ + static const auto handle = dlopen(nvrtc_lib, RTLD_NOW); + if (handle == nullptr) + { + std::fprintf(stderr, "Failed to open nvrtc lib.\n"); + std::exit(-1); + } + return handle; +} + +//! @brief Gets the pointer to a nvrtc function. +template +[[nodiscard]] static Signature* get_nvrtc_api(const char* api) +{ + const auto handle = dlsym(get_nvrtc_lib(), api); + if (handle == nullptr) + { + std::fprintf(stderr, "Failed to get nvrtc function: %s.\n", api); + std::exit(-1); + } + return reinterpret_cast(handle); +} + +#define CALL_NVRTC_UNCHECKED(API, ...) get_nvrtc_api(#API)(__VA_ARGS__) + +#define CALL_NVRTC(API, ...) \ + do \ + { \ + nvrtcResult _ret = CALL_NVRTC_UNCHECKED(API, __VA_ARGS__); \ + if (_ret != NVRTC_SUCCESS) \ + { \ + std::fprintf( \ + stderr, "%s(%d): NVRTC error: %s\n", __FILE__, __LINE__, CALL_NVRTC_UNCHECKED(nvrtcGetErrorString, _ret)); \ + std::exit(-1); \ + } \ + } while (false) + +//! @brief Read the file's contents to a string. +[[nodiscard]] static std::string read_input(const char* file) +{ + std::ifstream ifs{file}; + ifs.seekg(0, std::ios::end); + const std::size_t size = ifs.tellg(); + + std::string buffer(size, '\0'); + + ifs.seekg(0); + ifs.read(buffer.data(), size); + buffer[size] = '\0'; + + return buffer; +} + +//! @brief Returns a view to a line from a string_view. +[[nodiscard]] static std::string_view get_line(std::string_view s) noexcept +{ + const auto pos = s.find('\n'); + return (pos == std::string_view::npos) ? s : s.substr(0, pos + 1); +} + +//! @brief Returns \c true if \c s starts with \c with. +[[nodiscard]] static bool starts_with(std::string_view s, std::string_view with) +{ + return s.substr(0, std::min(s.size(), with.size())) == with; +} + +//! @brief Type of PTX symbol. +enum class SymbolType +{ + none, //!< Empty/invalid state. + variable, //!< Variable (.global). + kernel, //!< Kernel (.entry). +}; + +//! @brief Structure with data for PTX symbol. +struct Symbol +{ + SymbolType type; //!< Symbol type. + std::string_view name; //!< Symbol name. +}; + +//! @brief Extracts PTX symbol from line. +[[nodiscard]] static Symbol extract_symbol_from_line(std::string_view line) noexcept +{ + constexpr std::string_view variable_prefix = ".global "; + constexpr std::string_view kernel_prefix = ".entry "; + + std::string_view symbol{}; + SymbolType symbol_type{SymbolType::none}; + + if (starts_with(line, variable_prefix)) + { + symbol_type = SymbolType::variable; + + // Globals have format of ".global .align 4 .u32 _Z3varIiE[] = { ... };". + + // 1. Remove everything after ';'. + symbol = line.substr(0, line.find(';')); + + // 2. If the variable is an array, we find the symbol end by searching for '['. + symbol = symbol.substr(0, symbol.find('[')); + + // 3. If the variable is not an array is initialized, find the symbol end by searching for ' ='. + symbol = symbol.substr(0, symbol.find(" =")); + + // 4. We should have the end of the symbol, remove everything in front of it. + symbol.remove_prefix(symbol.rfind(' ') + 1); + + // NVVM emits $str and __unnamed_N symbols for some debug data, let's ignore those. + if (starts_with(symbol, "$") || starts_with(symbol, "__unnamed")) + { + symbol_type = SymbolType::none; + } + } + else if (line.find(kernel_prefix) != std::string_view::npos) + { + symbol_type = SymbolType::kernel; + + // Entries have format of ".visible .entry _Z6squareIiEvPT_i(" and end with ")" if there are no parameters. + + // 1. Discard everything after last "(" + symbol = line.substr(0, line.find_first_of('(')); + + // 2. Remove everything in front of the symbol. + symbol.remove_prefix(symbol.rfind(' ') + 1); + } + + return {symbol_type, symbol}; +} + +//! @brief Extracts name expression in form `symbol` from symbol in `void symbol(args)` +//! form. +//! +//! @return Pointer to first character of a zero terminated string or `nullptr` if the symbol is not a template. +//! +//! @note This function modifies the symbol buffer. +[[nodiscard]] static char* extract_name_expr(char* symbol, SymbolType symbol_type) noexcept +{ + if (symbol_type == SymbolType::kernel) + { + // Remove 'void ' prefix. + symbol += 5; + } + + // Iterate over the name + std::size_t curly_parens{0}; + std::size_t template_parens{0}; + bool is_template{false}; + for (char* it = symbol; *it != '\0'; ++it) + { + switch (*it) + { + case '(': + if (template_parens == 0) + { + *it = '\0'; + return (is_template) ? symbol : nullptr; + } + ++curly_parens; + break; + case ')': + --curly_parens; + break; + case '<': + ++template_parens; + is_template = true; + break; + case '>': + --template_parens; + + // When we are outside of template parens, and next 2 characters are ::, skip them and reset is_template, + // because we were parsing type namespace + if (template_parens == 0) + { + if (it[1] == ':' && it[2] == ':') + { + is_template = false; + it += 2; + } + } + break; + default: + break; + } + } + return nullptr; +} + +//! @brief Adds symbol to the program. +static void add_symbol(nvrtcProgram prog, Symbol symbol) +{ + struct CuDemangleBuffer + { + char* ptr{}; + std::size_t size{}; + + ~CuDemangleBuffer() + { + std::free(ptr); + } + }; + + static std::string symbol_copy{}; + static CuDemangleBuffer buffer{}; + + // If the symbol is not mangled, it's not a template, thus we even needn't to add it as a name expression. + if (!starts_with(symbol.name, "_Z")) + { + return; + } + + symbol_copy = symbol.name; + + int status; + buffer.ptr = __cu_demangle(symbol_copy.c_str(), buffer.ptr, &buffer.size, &status); + switch (status) + { + case 0: + break; + case -1: + throw std::bad_alloc{}; + case -2: + throw std::invalid_argument{std::string{"invalid symbol '"} + std::string{symbol.name} + "'"}; + case -3: + throw std::invalid_argument{"invalid __cu_demangle parameter"}; + default: + throw std::runtime_error{"unknown __cu_demangle error"}; + } + + char* name_expr = extract_name_expr(buffer.ptr, symbol.type); + if (name_expr != nullptr) + { + CALL_NVRTC(nvrtcAddNameExpression, prog, name_expr); + } +} + +//! @brief Adds all symbols from a given PTX input to the program. +static void add_symbols(nvrtcProgram prog, std::string_view ptx_input) +{ + if (ptx_input.find(-1)) + { + while (!ptx_input.empty()) + { + const auto line = get_line(ptx_input); + + // All symbols start with "." as the first character on line. + if (!line.empty() && line[0] == '.') + { + const auto symbol = extract_symbol_from_line(line); + + if (symbol.type != SymbolType::none) + { + add_symbol(prog, symbol); + } + } + + ptx_input.remove_prefix(line.size()); + } + } +} + +int main(int argc, const char* const* argv) +{ + auto arg_it = argv + 1; + + // Extract positional arguments. + const auto nvrtc_lib = *arg_it++; + const auto input_file = *arg_it++; + const auto input_name = *arg_it++; + const auto output_file = *arg_it++; + const auto arch_list = *arg_it++; + + // Open nvrtc_lib shared object. + get_nvrtc_lib(nvrtc_lib); + + // Read the source file. + const auto input = read_input(input_file); + + // Read the input PTX file. + const auto ptx_input = read_input(output_file); + + // Remove the input PTX file. + std::remove(output_file); + + // Create arch list include. + const auto arch_list_include = std::string{"#undef __CUDA_ARCH_LIST__\n#define __CUDA_ARCH_LIST__ "} + arch_list; + + constexpr auto arch_list_include_name = "__nvrtcc_arch_list_include"; + + // Create list of options to be passed to nvrtc. + std::vector opts{}; + opts.reserve(argv + argc - arg_it + 2); + opts.push_back("-include"); + opts.push_back(arch_list_include_name); + opts.insert(opts.end(), arg_it, argv + argc); + + // Create list of headers to be used by NVRTC. + std::array headers{arch_list_include.c_str()}; + std::array headers_names{arch_list_include_name}; + static_assert(headers.size() == headers_names.size()); + + // Create nvrtc program. + nvrtcProgram prog{}; + CALL_NVRTC(nvrtcCreateProgram, + &prog, + input.data(), + input_name, + static_cast(headers.size()), + headers.data(), + headers_names.data()); + + // Add symbols to the program. + add_symbols(prog, ptx_input); + + // Compile the program. + const auto compile_result = + CALL_NVRTC_UNCHECKED(nvrtcCompileProgram, prog, static_cast(opts.size()), opts.data()); + + // Obtain the log size. + std::size_t log_size{}; + CALL_NVRTC(nvrtcGetProgramLogSize, prog, &log_size); + + // Get the log and output it to the stderr. The log always contains EOF, so we check for > 1. + if (log_size > 1) + { + auto log = std::make_unique(log_size); + CALL_NVRTC(nvrtcGetProgramLog, prog, log.get()); + std::fprintf(stderr, "%s\n", log.get()); + } + + // If the compilation failed, exit. + if (compile_result != NVRTC_SUCCESS) + { + std::exit(1); + } + + // Get the ptx size. + std::size_t ptx_size{}; + CALL_NVRTC(nvrtcGetPTXSize, prog, &ptx_size); + + // Get the ptx code. + auto ptx = std::make_unique(ptx_size); + CALL_NVRTC(nvrtcGetPTX, prog, ptx.get()); + + // Write the ptx to the output file. The code contains EOF, so we write one character less. + std::ofstream ofs{output_file}; + ofs.write(ptx.get(), ptx_size - 1); + + // Destroy the program. + CALL_NVRTC(nvrtcDestroyProgram, &prog); +} diff --git a/nvrtcc/test/CMakeLists.txt b/nvrtcc/test/CMakeLists.txt new file mode 100644 index 00000000000..ac871f32870 --- /dev/null +++ b/nvrtcc/test/CMakeLists.txt @@ -0,0 +1,34 @@ +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +set(CMAKE_BUILD_TYPE Debug) + +cmake_minimum_required(VERSION 3.21) + +project(nvrtc-testing LANGUAGES CXX CUDA) + +add_custom_target(nvrtcc.test) +enable_testing() + +function(add_exec_test name) + add_executable(${name} ${ARGN}) + add_dependencies(nvrtcc.test ${name}) + target_compile_options(${name} PRIVATE -use-nvrtc) +endfunction() + +function(add_compile_only_test name) + add_library(${name} OBJECT ${ARGN}) + add_dependencies(nvrtcc.test ${name}) + target_compile_options(${name} PRIVATE -use-nvrtc) +endfunction() + +add_subdirectory(nvcc_options) +add_subdirectory(nvrtcc_options) +add_subdirectory(predefined_macros) diff --git a/nvrtcc/test/common/check_predefined_macros.h b/nvrtcc/test/common/check_predefined_macros.h new file mode 100644 index 00000000000..f8b258647b0 --- /dev/null +++ b/nvrtcc/test/common/check_predefined_macros.h @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#if !defined(__NVRTCC__) +# error "missing __NVRTCC__ definition" +#endif // !__NVRTCC__ + +#if defined(__NVCC__) != !defined(__CUDACC_RTC__) +# error "__NVCC__ and __CUDACC_RTC__ should never be defined at the same time" +#endif // __NVCC__ != !__CUDACC_RTC__ + +#if !defined(__CUDACC__) +# error "missing __CUDACC__ definition" +#endif // !__CUDACC__ + +#if !defined(__CUDA_ARCH_LIST__) +# error "missing __CUDA_ARCH_LIST__ definition." +#endif // !__CUDA_ARCH_LIST__ + +#if !defined(__CUDACC_VER_MAJOR__) +# error "missing __CUDACC_VER_MAJOR__ definition" +#endif // !__CUDACC_VER_MAJOR__ + +#if !defined(__CUDACC_VER_MINOR__) +# error "missing __CUDACC_VER_MINOR__ definition" +#endif // !__CUDACC_VER_MINOR__ + +#if !defined(__CUDACC_VER_BUILD__) +# error "missing __CUDACC_VER_BUILD__ definition" +#endif // !__CUDACC_VER_BUILD__ + +#if !defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +# error "missing __NVCC_DIAG_PRAGMA_SUPPORT__ definition" +#endif // !__NVCC_DIAG_PRAGMA_SUPPORT__ + +#if defined(__CUDACC_DEBUG__) != defined(EXPECT_CUDACC_DEBUG) +# error "__CUDACC_DEBUG__ must match EXPECT_CUDACC_DEBUG definition." +#endif // __CUDACC_DEBUG__ != EXPECT_CUDACC_DEBUG + +#if defined(__CUDACC_EWP__) != defined(EXPECT_CUDACC_EWP) +# error "__CUDACC_EWP__ must match EXPECT_CUDACC_EWP definition." +#endif // __CUDACC_EWP__ != EXPECT_CUDACC_EWP + +#if defined(__CUDACC_RDC__) != defined(EXPECT_CUDACC_RDC) +# error "__CUDACC_RDC__ must match EXPECT_CUDACC_RDC definition." +#endif // __CUDACC_RDC__ != EXPECT_CUDACC_RDC + +#if defined(__CUDACC_RTC_INT128__) != defined(EXPECT_CUDACC_RTC_INT128) +# error "__CUDACC_RTC_INT128__ must match EXPECT_CUDACC_RTC_INT128 definition." +#endif // __CUDACC_RTC_INT128__ != EXPECT_CUDACC_RTC_INT128 + +#if defined(__CUDACC_RTC_FLOAT128__) != defined(EXPECT_CUDACC_RTC_FLOAT128) +# error "__CUDACC_RTC_FLOAT128__ must match EXPECT_CUDACC_RTC_FLOAT128 definition." +#endif // __CUDACC_RTC_FLOAT128__ != EXPECT_CUDACC_RTC_FLOAT128 diff --git a/nvrtcc/test/nvcc_options/CMakeLists.txt b/nvrtcc/test/nvcc_options/CMakeLists.txt new file mode 100644 index 00000000000..14575ae5f42 --- /dev/null +++ b/nvrtcc/test/nvcc_options/CMakeLists.txt @@ -0,0 +1,22 @@ +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +# 1. Add --diag-suppress test. +add_compile_only_test(test.nvcc_options.diag_suppress diag_suppress.cu) +target_compile_options( + test.nvcc_options.diag_suppress + PRIVATE "-Werror=all-warnings" "--diag-suppress=186" +) + +add_compile_only_test(test.nvcc_options.diag_suppress.short diag_suppress.cu) +target_compile_options( + test.nvcc_options.diag_suppress.short + PRIVATE "-Werror=all-warnings" "-diag-suppress=186" +) diff --git a/nvrtcc/test/nvcc_options/diag_suppress.cu b/nvrtcc/test/nvcc_options/diag_suppress.cu new file mode 100644 index 00000000000..e399b80da95 --- /dev/null +++ b/nvrtcc/test/nvcc_options/diag_suppress.cu @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h" + +// Triggers: "warning #186-D: pointless comparison of unsigned integer with zero". +__global__ void kernel(bool* result, unsigned value) +{ + *result = value >= 0; +} diff --git a/nvrtcc/test/nvrtcc_options/CMakeLists.txt b/nvrtcc/test/nvrtcc_options/CMakeLists.txt new file mode 100644 index 00000000000..a9ee58dcf19 --- /dev/null +++ b/nvrtcc/test/nvrtcc_options/CMakeLists.txt @@ -0,0 +1,52 @@ +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +# 1. Add -use-nvrtc test. +add_executable(test.nvrtcc_options.use_nvrtc.false use_nvrtc.false.cu) +add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc.false) +add_test( + NAME test.nvrtcc_options.use_nvrtc.false.run + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc.false +) + +add_executable(test.nvrtcc_options.use_nvrtc use_nvrtc.true.cu) +add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc) +target_compile_options(test.nvrtcc_options.use_nvrtc PRIVATE "--use-nvrtc") +add_test( + NAME test.nvrtcc_options.use_nvrtc.run + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc +) + +add_executable(test.nvrtcc_options.use_nvrtc.short use_nvrtc.true.cu) +add_dependencies(nvrtcc.test test.nvrtcc_options.use_nvrtc.short) +target_compile_options(test.nvrtcc_options.use_nvrtc.short PRIVATE "-use-nvrtc") +add_test( + NAME test.nvrtcc_options.use_nvrtc.short.run + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.nvrtcc_options.use_nvrtc.short +) + +# 2. Add -Xnvrtc test. TODO: make --nvrtc-options -my_opt work. +add_compile_only_test(test.nvrtcc_options.nvrtc_options nvrtc_options.cu) +target_compile_options( + test.nvrtcc_options.nvrtc_options + PRIVATE + "--nvrtc-options=-builtin-move-forward=false" + "--nvrtc-options=-builtin-initializer-list=false" + "--nvrtc-options=-DNVRTC_ONLY_MACRO=42" +) + +add_compile_only_test(test.nvrtcc_options.nvrtc_options.short nvrtc_options.cu) +target_compile_options( + test.nvrtcc_options.nvrtc_options.short + PRIVATE + "-Xnvrtc=-builtin-move-forward=false" + "-Xnvrtc=-builtin-initializer-list=false" + "-Xnvrtc=-DNVRTC_ONLY_MACRO=42" +) diff --git a/nvrtcc/test/nvrtcc_options/nvrtc_options.cu b/nvrtcc/test/nvrtcc_options/nvrtc_options.cu new file mode 100644 index 00000000000..6f05b0bee06 --- /dev/null +++ b/nvrtcc/test/nvrtcc_options/nvrtc_options.cu @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h" + +#if defined(__CUDACC_RTC__) && defined(__NV_BUILTIN_MOVE_FORWARD) +# error "-builtin-move-forward=false was not passed properly" +#endif // __CUDACC_RTC__ && __NV_BUILTIN_MOVE_FORWARD +#if defined(__CUDACC_RTC__) && defined(__NV_BUILTIN_INITIALIZER_LIST) +# error "-builtin-initializer-list=false was not passed properly" +#endif // __CUDACC_RTC__ && __NV_BUILTIN_INITIALIZER_LIST +#if defined(NVRTC_ONLY_MACRO) != defined(__CUDACC_RTC__) +# error "-DNVRTC_ONLY_MACRO was not passed properly" +#endif // NVRTC_ONLY_MACRO != __CUDACC_RTC__ + +#if defined(NVRTC_ONLY_MACRO) +static_assert(NVRTC_ONLY_MACRO == 42); +#endif // NVRTC_ONLY_MACRO diff --git a/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu b/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu new file mode 100644 index 00000000000..0aa9e60c43b --- /dev/null +++ b/nvrtcc/test/nvrtcc_options/use_nvrtc.false.cu @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h" + +#if !defined(__CUDACC_RTC__) +# include +#endif // !__CUDACC_RTC__ + +__managed__ int proof_var; + +__global__ void proof_kernel() +{ +#if defined(__CUDACC_RTC__) + proof_var = 1; +#else // ^^^ __CUDACC_RTC__ ^^^ / vvv !__CUDACC_RTC__ vvv + proof_var = -1; +#endif // ^^^ !__CUDACC_RTC__ ^^^ +} + +#if !defined(__CUDACC_RTC__) +int main() +{ + proof_var = 0; + + proof_kernel<<<1, 1>>>(); + assert(cudaDeviceSynchronize() == cudaSuccess); + + assert(proof_var == -1); +} +#endif // ^^^ !__CUDACC_RTC__ ^^^ diff --git a/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu b/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu new file mode 100644 index 00000000000..3da8943f208 --- /dev/null +++ b/nvrtcc/test/nvrtcc_options/use_nvrtc.true.cu @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h" + +#if !defined(__CUDACC_RTC__) +# include +#endif // !__CUDACC_RTC__ + +__managed__ int proof_var; + +__global__ void proof_kernel() +{ +#if defined(__CUDACC_RTC__) + proof_var = 1; +#else // ^^^ __CUDACC_RTC__ ^^^ / vvv !__CUDACC_RTC__ vvv + proof_var = -1; +#endif // ^^^ !__CUDACC_RTC__ ^^^ +} + +#if !defined(__CUDACC_RTC__) +int main() +{ + proof_var = 0; + + proof_kernel<<<1, 1>>>(); + assert(cudaDeviceSynchronize() == cudaSuccess); + + assert(proof_var == 1); +} +#endif // ^^^ !__CUDACC_RTC__ ^^^ diff --git a/nvrtcc/test/predefined_macros/CMakeLists.txt b/nvrtcc/test/predefined_macros/CMakeLists.txt new file mode 100644 index 00000000000..e60f86cb5a8 --- /dev/null +++ b/nvrtcc/test/predefined_macros/CMakeLists.txt @@ -0,0 +1,98 @@ +##===----------------------------------------------------------------------===## +## +## Part of nvrtcc in CUDA Core Compute Libraries, +## under the Apache License v2.0 with LLVM Exceptions. +## See https://llvm.org/LICENSE.txt for license information. +## SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +## SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +## +##===----------------------------------------------------------------------===## + +# 1. Add default macros test. +add_compile_only_test(test.predefined_macros.default default.cu) + +# 2. Add __CUDA_ARCH_LIST__ test. +add_compile_only_test(test.predefined_macros.cuda_arch_list __CUDA_ARCH_LIST__.cu) +set_target_properties( + test.predefined_macros.cuda_arch_list + PROPERTIES CUDA_ARCHITECTURES "75;80;89;90" +) + +# 3. Add __CUDACC_DEBUG__ test. +add_compile_only_test(test.predefined_macros.cudacc_debug __CUDACC_DEBUG__.cu) +target_compile_options( + test.predefined_macros.cudacc_debug + PRIVATE --device-debug +) + +add_compile_only_test(test.predefined_macros.cudacc_debug_short __CUDACC_DEBUG__.cu) +target_compile_options(test.predefined_macros.cudacc_debug_short PRIVATE -G) + +# 4. Add __CUDACC_EWP__ test. +add_compile_only_test(test.predefined_macros.cudacc_ewp __CUDACC_EWP__.cu) +target_compile_options( + test.predefined_macros.cudacc_ewp + PRIVATE --extensible-whole-program +) + +add_compile_only_test(test.predefined_macros.cudacc_ewp_short __CUDACC_EWP__.cu) +target_compile_options(test.predefined_macros.cudacc_ewp_short PRIVATE -ewp) + +# 5. Add __CUDACC_RDC__ test. +add_compile_only_test(test.predefined_macros.cudacc_rdc __CUDACC_RDC__.cu) +target_compile_options( + test.predefined_macros.cudacc_rdc + PRIVATE --relocatable-device-code=true +) + +add_compile_only_test(test.predefined_macros.cudacc_rdc_short __CUDACC_RDC__.cu) +target_compile_options( + test.predefined_macros.cudacc_rdc_short + PRIVATE -rdc=true +) + +add_compile_only_test(test.predefined_macros.cudacc_rdc_alias __CUDACC_RDC__.cu) +target_compile_options( + test.predefined_macros.cudacc_rdc_alias + PRIVATE --device-c +) + +add_compile_only_test(test.predefined_macros.cudacc_rdc_alias_short __CUDACC_RDC__.cu) +target_compile_options( + test.predefined_macros.cudacc_rdc_alias_short + PRIVATE -dc +) + +# 6. Add __CUDACC_RTC_FLOAT128__ test. +add_compile_only_test(test.predefined_macros.cudacc_rtc_float128 __CUDACC_RTC_FLOAT128__.cu) +target_compile_options( + test.predefined_macros.cudacc_rtc_float128 + PRIVATE -Xnvrtc=--device-float128 +) +set_target_properties( + test.predefined_macros.cudacc_rtc_float128 + PROPERTIES CUDA_ARCHITECTURES "90;100" +) + +add_compile_only_test(test.predefined_macros.cudacc_rtc_float128_short __CUDACC_RTC_FLOAT128__.cu) +target_compile_options( + test.predefined_macros.cudacc_rtc_float128_short + PRIVATE -Xnvrtc=-device-float128 +) +set_target_properties( + test.predefined_macros.cudacc_rtc_float128_short + PROPERTIES CUDA_ARCHITECTURES "90;100" +) + +# 7. Add __CUDACC_RTC_INT128__ test. +add_compile_only_test(test.predefined_macros.cudacc_rtc_int128 __CUDACC_RTC_INT128__.cu) +target_compile_options( + test.predefined_macros.cudacc_rtc_int128 + PRIVATE -Xnvrtc=--device-int128 +) + +add_compile_only_test(test.predefined_macros.cudacc_rtc_int128_short __CUDACC_RTC_INT128__.cu) +target_compile_options( + test.predefined_macros.cudacc_rtc_int128_short + PRIVATE -Xnvrtc=-device-int128 +) diff --git a/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu b/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu new file mode 100644 index 00000000000..47b0d39fd38 --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDACC_DEBUG__.cu @@ -0,0 +1,12 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#define EXPECT_CUDACC_DEBUG +#include "../common/check_predefined_macros.h" diff --git a/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu b/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu new file mode 100644 index 00000000000..e8d8da91e11 --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDACC_EWP__.cu @@ -0,0 +1,12 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#define EXPECT_CUDACC_EWP +#include "../common/check_predefined_macros.h" diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu new file mode 100644 index 00000000000..63c3bbb2dd0 --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDACC_RDC__.cu @@ -0,0 +1,12 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#define EXPECT_CUDACC_RDC +#include "../common/check_predefined_macros.h" diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu new file mode 100644 index 00000000000..f9d916ba508 --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDACC_RTC_FLOAT128__.cu @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#if defined(__CUDACC_RTC__) && __CUDA_ARCH__ >= 1000 +# define EXPECT_CUDACC_RTC_FLOAT128 +#endif // __CUDACC_RTC__ && __CUDA_ARCH__ >= 1000 +#include "../common/check_predefined_macros.h" + +#if defined(EXPECT_CUDACC_RTC_FLOAT128) +__global__ void kernel(__float128* value) +{ + *value = __float128{1.0}; +} +#endif // EXPECT_CUDACC_RTC_FLOAT128 diff --git a/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu b/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu new file mode 100644 index 00000000000..2ccc673f18e --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDACC_RTC_INT128__.cu @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#if defined(__CUDACC_RTC__) +# define EXPECT_CUDACC_RTC_INT128 +#endif // __CUDACC_RTC__ +#include "../common/check_predefined_macros.h" + +#if defined(EXPECT_CUDACC_RTC_INT128) +__global__ void kernel(__int128* value) +{ + *value = __int128{1}; +} +#endif // EXPECT_CUDACC_RTC_INT128 diff --git a/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu b/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu new file mode 100644 index 00000000000..c6712fd1022 --- /dev/null +++ b/nvrtcc/test/predefined_macros/__CUDA_ARCH_LIST__.cu @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h" + +template +struct ArchList +{}; + +template +__host__ __device__ constexpr bool test_cuda_arch_list(ArchList, ArchList) noexcept +{ + static_assert(sizeof...(Vs) == sizeof...(Refs)); + return ((Vs == Refs) && ...); +} + +static_assert(test_cuda_arch_list(ArchList<__CUDA_ARCH_LIST__>{}, ArchList<750, 800, 890, 900>{})); diff --git a/nvrtcc/test/predefined_macros/default.cu b/nvrtcc/test/predefined_macros/default.cu new file mode 100644 index 00000000000..4649bae0c2d --- /dev/null +++ b/nvrtcc/test/predefined_macros/default.cu @@ -0,0 +1,11 @@ +//===----------------------------------------------------------------------===// +// +// Part of nvrtcc in CUDA Core Compute Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include "../common/check_predefined_macros.h"