diff --git a/repos/spack_repo/builtin/packages/blt/package.py b/repos/spack_repo/builtin/packages/blt/package.py index 4db7a27fda8..0e68f49c039 100644 --- a/repos/spack_repo/builtin/packages/blt/package.py +++ b/repos/spack_repo/builtin/packages/blt/package.py @@ -92,6 +92,13 @@ class Blt(Package): version("0.2.5", sha256="3a000f60194e47b3e5623cc528cbcaf88f7fea4d9620b3c7446ff6658dc582a5") version("0.2.0", sha256="c0cadf1269c2feb189e398a356e3c49170bc832df95e5564e32bdbb1eb0fa1b3") + # https://github.com/google/googletest/pull/4798 + patch( + "https://github.com/LLNL/blt/commit/5ff55b519fc8d5216b07edaf301e2d2bf328021e.patch?full_index=1", + sha256="116702b89d01e022546911fe0b823afa99a6b37a35077055141ad5d480508422", + when="@0.7.1", + ) + depends_on("c", type="build") # generated depends_on("cxx", type="build") # generated depends_on("fortran", type="build") # generated diff --git a/repos/spack_repo/builtin/packages/caliper/libunwind.patch b/repos/spack_repo/builtin/packages/caliper/libunwind.patch new file mode 100644 index 00000000000..9fe02b46d64 --- /dev/null +++ b/repos/spack_repo/builtin/packages/caliper/libunwind.patch @@ -0,0 +1,52 @@ +diff --git a/cmake/FindLibunwind.cmake b/cmake/FindLibunwind.cmake +index 04f325d9..7868138f 100644 +--- a/cmake/FindLibunwind.cmake ++++ b/cmake/FindLibunwind.cmake +@@ -3,20 +3,38 @@ + # + # LIBUNWIND_PREFIX - Set to the libunwind installation directory + # +-# LIBUNWIND_INCLUDE_DIR - Path to libunwind.h ++# LIBUNWIND_INCLUDE_DIRS - Path to libunwind.h + # LIBUNWIND_LIBRARIES - List of libraries for using libunwind + # LIBUNWIND_FOUND - True if libunwind was found + +-find_path(LIBUNWIND_PREFIX +- include/libunwind.h) ++if(LIBUNWIND_PREFIX) ++ # When prefix is explicitly provided, only look there ++ find_library(LIBUNWIND_LIBRARIES ++ NAMES unwind ++ PATHS ${LIBUNWIND_PREFIX}/lib ++ NO_DEFAULT_PATH) + +-find_library(LIBUNWIND_LIBRARIES +- NAMES unwind +- HINTS ${LIBUNWIND_PREFIX}/lib) ++ find_path(LIBUNWIND_INCLUDE_DIRS ++ NAMES libunwind.h ++ PATHS ${LIBUNWIND_PREFIX}/include ++ NO_DEFAULT_PATH) + +-find_path(LIBUNWIND_INCLUDE_DIRS +- NAMES libunwind.h +- HINTS ${LIBUNWIND_PREFIX}/include) ++ if(NOT LIBUNWIND_LIBRARIES OR NOT LIBUNWIND_INCLUDE_DIRS) ++ message(WARNING "LIBUNWIND_PREFIX was set to '${LIBUNWIND_PREFIX}' but libunwind was not found there") ++ endif() ++else() ++ # Try to find libunwind in standard locations ++ find_path(LIBUNWIND_PREFIX ++ include/libunwind.h) ++ ++ find_library(LIBUNWIND_LIBRARIES ++ NAMES unwind ++ HINTS ${LIBUNWIND_PREFIX}/lib) ++ ++ find_path(LIBUNWIND_INCLUDE_DIRS ++ NAMES libunwind.h ++ HINTS ${LIBUNWIND_PREFIX}/include) ++endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Libunwind DEFAULT_MSG LIBUNWIND_LIBRARIES LIBUNWIND_INCLUDE_DIRS) diff --git a/repos/spack_repo/builtin/packages/caliper/package.py b/repos/spack_repo/builtin/packages/caliper/package.py index 83bfe6a4913..c4412bc68c7 100644 --- a/repos/spack_repo/builtin/packages/caliper/package.py +++ b/repos/spack_repo/builtin/packages/caliper/package.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os +import re import socket import sys @@ -10,6 +11,7 @@ CachedCMakePackage, cmake_cache_option, cmake_cache_path, + cmake_cache_string, ) from spack_repo.builtin.build_systems.cuda import CudaPackage from spack_repo.builtin.build_systems.rocm import ROCmPackage @@ -36,6 +38,8 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage): license("BSD-3-Clause") version("master", branch="master") + version("2.13.1", sha256="7cef0173e0e0673abb7943a2641b660adfbc3d6bc4b33941ab4f431f92a4d016") + version("2.13.0", sha256="28c6e8fd940bdee9e80d1e8ae1ce0f76d6a690cbb6242d4eec115d6c0204e331") version("2.12.1", sha256="2b5a8f98382c94dc75cc3f4517c758eaf9a3f9cea0a8dbdc7b38506060d6955c") version("2.11.0", sha256="b86b733cbb73495d5f3fe06e6a9885ec77365c8aa9195e7654581180adc2217c") version("2.10.0", sha256="14c4fb5edd5e67808d581523b4f8f05ace8549698c0e90d84b53171a77f58565") @@ -65,9 +69,9 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage): variant("tools", default=True, description="Enable tools") variant("python", default=False, description="Build Python bindings") - depends_on("c", type="build") # generated - depends_on("cxx", type="build") # generated - depends_on("fortran", type="build") # generated + depends_on("c", type="build") + depends_on("cxx", type="build") + depends_on("fortran", when="+fortran", type="build") depends_on("adiak@0.1:0", when="@:2.10 +adiak") depends_on("adiak@0.4:0", when="@2.11: +adiak") @@ -90,8 +94,14 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage): conflicts("+rocm+cuda") # Legacy nvtx is only supported until cuda@12.8, newer cuda only provides nvtx3. - conflicts("^cuda@12.9:", "@:2.12.1") + conflicts("^cuda@12.9:", "@:2.13.1") + patch("libunwind.patch", when="@:2.13") + patch( + "https://github.com/LLNL/Caliper/commit/648f8ab496a4a2c3f38e0cfa572340e429d8c76e.patch?full_index=1", + sha256="d947b5df6b68a24f516bb3b4ec04c28d4b8246ac0cbe664cf113dd2b6ca92073", + when="@2.12:2.13", + ) patch("for_aarch64.patch", when="@:2.11 target=aarch64:") patch( "sampler-service-missing-libunwind-include-dir.patch", @@ -143,6 +153,27 @@ def initconfig_hardware_entries(self): entries.append(cmake_cache_option("WITH_NVTX", True)) entries.append(cmake_cache_path("CUDA_TOOLKIT_ROOT_DIR", spec["cuda"].prefix)) entries.append(cmake_cache_path("CUPTI_PREFIX", spec["cuda"].prefix)) + + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + # gcc-toolchain support + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + # ppc64le workaround + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + if cuda_flags: + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) else: entries.append(cmake_cache_option("WITH_CUPTI", False)) entries.append(cmake_cache_option("WITH_NVTX", False)) @@ -150,6 +181,32 @@ def initconfig_hardware_entries(self): if spec.satisfies("+rocm"): entries.append(cmake_cache_option("WITH_ROCTRACER", True)) entries.append(cmake_cache_option("WITH_ROCTX", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + hip_link_flags = "" + + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) else: entries.append(cmake_cache_option("WITH_ROCTRACER", False)) entries.append(cmake_cache_option("WITH_ROCTX", False)) @@ -162,6 +219,31 @@ def initconfig_mpi_entries(self): entries.append(cmake_cache_option("WITH_MPI", spec.satisfies("+mpi"))) + if spec.satisfies("+mpi"): + # MPI configuration from mpi_for_radiuss_projects + if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"): + entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) + + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + + # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4 + if "toss_4" in sys_type: + srun_wrapper = which_string("srun") + mpi_exec_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry + ] + if len(mpi_exec_index) > 0: + del entries[mpi_exec_index[0]] + mpi_exec_flag_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry + ] + if len(mpi_exec_flag_index) > 0: + del entries[mpi_exec_flag_index[0]] + entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper)) + entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n")) + return entries def initconfig_package_entries(self): diff --git a/repos/spack_repo/builtin/packages/care/package.py b/repos/spack_repo/builtin/packages/care/package.py index 431c5895f3c..455ed282145 100644 --- a/repos/spack_repo/builtin/packages/care/package.py +++ b/repos/spack_repo/builtin/packages/care/package.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import re import socket from spack_repo.builtin.build_systems.cached_cmake import ( @@ -246,11 +247,58 @@ def initconfig_hardware_entries(self): entries.append(cmake_cache_option("CUDA_SEPARABLE_COMPILATION", True)) entries.append(cmake_cache_string("NVTOOLSEXT_DIR", spec["cuda"].prefix)) entries.append(cmake_cache_string("CUB_DIR", spec["cub"].prefix)) + + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + # gcc-toolchain support + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + # ppc64le workaround + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + if cuda_flags: + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) else: entries.append(cmake_cache_option("ENABLE_CUDA", False)) if spec.satisfies("+rocm"): entries.append(cmake_cache_option("ENABLE_HIP", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + hip_link_flags = "" + + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) else: entries.append(cmake_cache_option("ENABLE_HIP", False)) @@ -262,6 +310,31 @@ def initconfig_mpi_entries(self): entries = super(Care, self).initconfig_mpi_entries() entries.append(cmake_cache_option("ENABLE_MPI", spec.satisfies("+mpi"))) + if spec.satisfies("+mpi"): + # MPI configuration from mpi_for_radiuss_projects + if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"): + entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) + + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + + # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4 + if "toss_4" in sys_type: + srun_wrapper = which_string("srun") + mpi_exec_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry + ] + if len(mpi_exec_index) > 0: + del entries[mpi_exec_index[0]] + mpi_exec_flag_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry + ] + if len(mpi_exec_flag_index) > 0: + del entries[mpi_exec_flag_index[0]] + entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper)) + entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n")) + return entries def initconfig_package_entries(self): diff --git a/repos/spack_repo/builtin/packages/chai/package.py b/repos/spack_repo/builtin/packages/chai/package.py index a4b3f6a5882..4d6e88adf51 100644 --- a/repos/spack_repo/builtin/packages/chai/package.py +++ b/repos/spack_repo/builtin/packages/chai/package.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import re import socket from spack_repo.builtin.build_systems.cached_cmake import ( @@ -144,6 +145,7 @@ class Chai(CachedCMakePackage, CudaPackage, ROCmPackage): variant("raja", default=False, description="Build plugin for RAJA") variant("examples", default=True, description="Build examples.") variant("openmp", default=False, description="Build using OpenMP") + variant("disable_rm", default=False, description="Disable resource manager") # TODO: figure out gtest dependency and then set this default True # and remove the +tests conflict below. variant( @@ -280,11 +282,58 @@ def initconfig_hardware_entries(self): if spec.satisfies("+separable_compilation"): entries.append(cmake_cache_option("CMAKE_CUDA_SEPARABLE_COMPILATION", True)) entries.append(cmake_cache_option("CUDA_SEPARABLE_COMPILATION", True)) + + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + # gcc-toolchain support + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + # ppc64le workaround + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + if cuda_flags: + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) else: entries.append(cmake_cache_option("ENABLE_CUDA", False)) if spec.satisfies("+rocm"): entries.append(cmake_cache_option("ENABLE_HIP", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + hip_link_flags = "" + + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) else: entries.append(cmake_cache_option("ENABLE_HIP", False)) @@ -296,6 +345,31 @@ def initconfig_mpi_entries(self): entries = super(Chai, self).initconfig_mpi_entries() entries.append(cmake_cache_option("ENABLE_MPI", spec.satisfies("+mpi"))) + if spec.satisfies("+mpi"): + # MPI configuration from mpi_for_radiuss_projects + if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"): + entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) + + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + + # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4 + if "toss_4" in sys_type: + srun_wrapper = which_string("srun") + mpi_exec_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry + ] + if len(mpi_exec_index) > 0: + del entries[mpi_exec_index[0]] + mpi_exec_flag_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry + ] + if len(mpi_exec_flag_index) > 0: + del entries[mpi_exec_flag_index[0]] + entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper)) + entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n")) + return entries def initconfig_package_entries(self): @@ -348,6 +422,10 @@ def initconfig_package_entries(self): ) ) + entries.append( + cmake_cache_option("{}DISABLE_RM".format(option_prefix), spec.satisfies("+disable_rm")) + ) + return entries def cmake_args(self): diff --git a/repos/spack_repo/builtin/packages/raja/package.py b/repos/spack_repo/builtin/packages/raja/package.py index 94af7336dd2..7016d65c050 100644 --- a/repos/spack_repo/builtin/packages/raja/package.py +++ b/repos/spack_repo/builtin/packages/raja/package.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import re import socket from spack_repo.builtin.build_systems.cached_cmake import ( @@ -221,7 +222,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): variant("openmp", default=False, description="Build OpenMP backend") variant("shared", default=False, description="Build shared libs") variant("desul", default=False, description="Build desul atomics backend") - variant("vectorization", default=False, description="Build SIMD/SIMT intrinsics support") + variant("vectorization", default=True, description="Build SIMD/SIMT intrinsics support") variant( "omptask", default=False, description="Build OpenMP task variants of internal algorithms" ) @@ -230,6 +231,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): variant("gpu-profiling", default=False, description="Enable GPU profiling") variant("plugins", default=False, description="Enable runtime plugins") + variant("caliper", default=False, description="Enable caliper support") variant("examples", default=True, description="Build examples.") variant("exercises", default=True, description="Build exercises.") # TODO: figure out gtest dependency and then set this default True @@ -292,6 +294,8 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("llvm-openmp", when="+openmp %apple-clang") + depends_on("caliper", when="+caliper") + depends_on("rocprim", when="+rocm") with when("+rocm @0.12.0:"): depends_on("camp+rocm") @@ -371,8 +375,57 @@ def initconfig_hardware_entries(self): entries.append(cmake_cache_option("ENABLE_OPENMP", spec.satisfies("+openmp"))) entries.append(cmake_cache_option("ENABLE_CUDA", spec.satisfies("+cuda"))) + if spec.satisfies("+cuda"): + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + # gcc-toolchain support + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + # ppc64le workaround + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + if cuda_flags: + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) + if spec.satisfies("+rocm"): entries.append(cmake_cache_option("ENABLE_HIP", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + hip_link_flags = "" + + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) + hipcc_flags = [] if self.spec.satisfies("^rocprim@7.0"): hipcc_flags.append("-std=c++17") diff --git a/repos/spack_repo/builtin/packages/raja_perf/package.py b/repos/spack_repo/builtin/packages/raja_perf/package.py index 3a0732d196b..ac39191e0b3 100644 --- a/repos/spack_repo/builtin/packages/raja_perf/package.py +++ b/repos/spack_repo/builtin/packages/raja_perf/package.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import re import socket from spack_repo.builtin.build_systems.cached_cmake import ( @@ -15,6 +16,7 @@ from spack_repo.builtin.packages.blt.package import llnl_link_helpers from spack.package import * +from spack.util.executable import which_string class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage): @@ -120,6 +122,7 @@ class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("cxx", type="build") # generated depends_on("blt") + depends_on("blt@0.7.0:", type="build", when="@2025.03.0:") depends_on("blt@0.6.2:", type="build", when="@2024.07.0:") depends_on("blt@0.5.3", type="build", when="@2023.06") depends_on("blt@0.5.2:0.5.3", type="build", when="@2022.10") @@ -229,6 +232,24 @@ def initconfig_hardware_entries(self): entries.append(cmake_cache_option("ENABLE_CUDA", True)) # Shared handling of cuda. + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) + # Custom options. # We place everything in CMAKE_CUDA_FLAGS_(RELEASE|RELWITHDEBINFO|DEBUG) # which are not set by cuda_for_radiuss_projects @@ -268,6 +289,31 @@ def initconfig_hardware_entries(self): if "+rocm" in spec: entries.append(cmake_cache_option("ENABLE_HIP", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + hip_link_flags = "" + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) else: entries.append(cmake_cache_option("ENABLE_HIP", False)) @@ -304,6 +350,33 @@ def initconfig_mpi_entries(self): entries.append(cmake_cache_option("ENABLE_MPI", "+mpi" in spec)) + # MPI configuration from mpi_for_radiuss_projects + if spec.satisfies("+mpi"): + if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"): + entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) + + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4 + # TODO: Remove this logic by adding `using_flux` case in + # spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded + # path to srun in same file. + if "toss_4" in sys_type: + srun_wrapper = which_string("srun") + mpi_exec_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry + ] + if len(mpi_exec_index) > 0: + del entries[mpi_exec_index[0]] + mpi_exec_flag_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry + ] + if len(mpi_exec_flag_index) > 0: + del entries[mpi_exec_flag_index[0]] + entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper)) + entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n")) + return entries def initconfig_package_entries(self): diff --git a/repos/spack_repo/builtin/packages/umpire/package.py b/repos/spack_repo/builtin/packages/umpire/package.py index b703b3dc74d..cd3ac1773aa 100644 --- a/repos/spack_repo/builtin/packages/umpire/package.py +++ b/repos/spack_repo/builtin/packages/umpire/package.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os +import re import socket from spack_repo.builtin.build_systems.cached_cmake import ( @@ -16,6 +17,7 @@ from spack_repo.builtin.packages.blt.package import llnl_link_helpers from spack.package import * +from spack.util.executable import which_string class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): @@ -190,6 +192,13 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): when="@:5.0.1 ^blt@0.4:", ) + # https://github.com/LLNL/Umpire/pull/805 + patch( + "https://github.com/LLNL/Umpire/commit/47ff0aa1f7a01a917c3b7ac618e8a9e44a10fd25.patch?full_index=1", + sha256="802f074a05e1cb1f428e13d99c5fcb1435f86bd8f36a1ea2f7b6756e6625e0a0", + when="@2022.10.0", + ) + # https://github.com/LLNL/Umpire/pull/816 patch( "https://github.com/LLNL/Umpire/commit/2292d1d6078f6d9523b7ad0886ffa053644569d5.patch?full_index=1", @@ -231,7 +240,7 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): variant("backtrace", default=False, description="Enable backtrace tools") variant("dev_benchmarks", default=False, description="Enable developer benchmarks") variant("device_alloc", default=False, description="Enable DeviceAllocator") - variant("werror", default=False, description="Enable warnings as errors") + variant("werror", default=True, description="Enable warnings as errors") variant("asan", default=False, description="Enable ASAN") variant("sanitizer_tests", default=False, description="Enable address sanitizer tests") variant("fmt_header_only", default=True, description="Link to header-only fmt target") @@ -265,7 +274,8 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("camp+openmp", when="+openmp") depends_on("camp~cuda", when="~cuda") depends_on("camp~rocm", when="~rocm") - depends_on("camp@2025.09:", when="@2025.09:") + depends_on("camp@2025.12:", when="@develop") + depends_on("camp@2025.09", when="@2025.09") depends_on("camp@2025.03", when="@2025.03") depends_on("camp@2024.07", when="@2024.07") depends_on("camp@2024.02.1", when="@2024.02.1") @@ -279,9 +289,9 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("sqlite", when="+sqlite_experimental") depends_on("mpi", when="+mpi") - depends_on("fmt@9.1:", when="@2024.02.0:") + depends_on("fmt@9.1:11.0", when="@2024.02.0:") # For some reason, we need c++ 17 explicitly only with intel - depends_on("fmt@9.1: cxxstd=17", when="@2024.02.0: %intel@19.1") + depends_on("fmt@9.1:11.0 cxxstd=17", when="@2024.02.0: %intel@19.1") with when("@5.0.0:"): with when("+cuda"): @@ -385,11 +395,54 @@ def initconfig_hardware_entries(self): entries.append(cmake_cache_option("ENABLE_CUDA", True)) # Umpire used to pick only the first architecture in the list. The shared logic in # CachedCMakePackage keeps the list of architectures. + + # CUDA configuration from cuda_for_radiuss_projects + cuda_flags = [] + if not spec.satisfies("cuda_arch=none"): + cuda_archs = ";".join(spec.variants["cuda_arch"].value) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs)) + + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + cuda_flags.append("-Xcompiler {}".format(using_toolchain[0])) + + if spec.satisfies("target=ppc64le %gcc@8.1:"): + cuda_flags.append("-Xcompiler -mno-float128") + + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) else: entries.append(cmake_cache_option("ENABLE_CUDA", False)) if spec.satisfies("+rocm"): entries.append(cmake_cache_option("ENABLE_HIP", True)) + + # HIP configuration from hip_for_radiuss_projects + rocm_root = spec["llvm-amdgpu"].prefix + hip_link_flags = "" + gcc_toolchain_regex = re.compile(".*gcc-toolchain.*") + using_toolchain = list( + filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]) + ) + if using_toolchain: + gcc_prefix = using_toolchain[0] + entries.append( + cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)) + ) + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", + hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix), + ) + ) + else: + entries.append( + cmake_cache_string( + "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root) + ) + ) else: entries.append(cmake_cache_option("ENABLE_HIP", False)) @@ -419,6 +472,33 @@ def initconfig_mpi_entries(self): cmake_cache_option("UMPIRE_ENABLE_MPI3_SHARED_MEMORY", spec.satisfies("+mpi3_shmem")) ) + # MPI configuration from mpi_for_radiuss_projects + if spec.satisfies("+mpi"): + if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"): + entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind")) + + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4 + # TODO: Remove this logic by adding `using_flux` case in + # spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded + # path to srun in same file. + if "toss_4" in sys_type: + srun_wrapper = which_string("srun") + mpi_exec_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry + ] + if len(mpi_exec_index) > 0: + del entries[mpi_exec_index[0]] + mpi_exec_flag_index = [ + index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry + ] + if len(mpi_exec_flag_index) > 0: + del entries[mpi_exec_flag_index[0]] + entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper)) + entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n")) + return entries def initconfig_package_entries(self): diff --git a/stacks/e4s-rocm-external/spack.yaml b/stacks/e4s-rocm-external/spack.yaml index 78dccbb01fa..c21bfd05313 100644 --- a/stacks/e4s-rocm-external/spack.yaml +++ b/stacks/e4s-rocm-external/spack.yaml @@ -213,7 +213,7 @@ spack: - amrex +rocm amdgpu_target=gfx90a - arborx +rocm amdgpu_target=gfx90a - cabana +rocm amdgpu_target=gfx90a - - caliper +rocm amdgpu_target=gfx90a + # - caliper +rocm amdgpu_target=gfx90a - chai +rocm amdgpu_target=gfx90a - fftx +rocm amdgpu_target=gfx90a - gasnet +rocm amdgpu_target=gfx90a