diff --git a/repos/spack_repo/builtin/packages/blt/package.py b/repos/spack_repo/builtin/packages/blt/package.py
index 4db7a27fda8..0e68f49c039 100644
--- a/repos/spack_repo/builtin/packages/blt/package.py
+++ b/repos/spack_repo/builtin/packages/blt/package.py
@@ -92,6 +92,13 @@ class Blt(Package):
     version("0.2.5", sha256="3a000f60194e47b3e5623cc528cbcaf88f7fea4d9620b3c7446ff6658dc582a5")
     version("0.2.0", sha256="c0cadf1269c2feb189e398a356e3c49170bc832df95e5564e32bdbb1eb0fa1b3")
 
+    # https://github.com/google/googletest/pull/4798
+    patch(
+        "https://github.com/LLNL/blt/commit/5ff55b519fc8d5216b07edaf301e2d2bf328021e.patch?full_index=1",
+        sha256="116702b89d01e022546911fe0b823afa99a6b37a35077055141ad5d480508422",
+        when="@0.7.1",
+    )
+
     depends_on("c", type="build")  # generated
     depends_on("cxx", type="build")  # generated
     depends_on("fortran", type="build")  # generated
diff --git a/repos/spack_repo/builtin/packages/caliper/libunwind.patch b/repos/spack_repo/builtin/packages/caliper/libunwind.patch
new file mode 100644
index 00000000000..9fe02b46d64
--- /dev/null
+++ b/repos/spack_repo/builtin/packages/caliper/libunwind.patch
@@ -0,0 +1,52 @@
+diff --git a/cmake/FindLibunwind.cmake b/cmake/FindLibunwind.cmake
+index 04f325d9..7868138f 100644
+--- a/cmake/FindLibunwind.cmake
++++ b/cmake/FindLibunwind.cmake
+@@ -3,20 +3,38 @@
+ #
+ # LIBUNWIND_PREFIX      - Set to the libunwind installation directory
+ #
+-# LIBUNWIND_INCLUDE_DIR - Path to libunwind.h
++# LIBUNWIND_INCLUDE_DIRS - Path to libunwind.h
+ # LIBUNWIND_LIBRARIES   - List of libraries for using libunwind
+ # LIBUNWIND_FOUND       - True if libunwind was found
+ 
+-find_path(LIBUNWIND_PREFIX
+-  include/libunwind.h)
++if(LIBUNWIND_PREFIX)
++  # When prefix is explicitly provided, only look there
++  find_library(LIBUNWIND_LIBRARIES
++    NAMES unwind
++    PATHS ${LIBUNWIND_PREFIX}/lib
++    NO_DEFAULT_PATH)
+ 
+-find_library(LIBUNWIND_LIBRARIES
+-  NAMES unwind
+-  HINTS ${LIBUNWIND_PREFIX}/lib)
++  find_path(LIBUNWIND_INCLUDE_DIRS
++    NAMES libunwind.h
++    PATHS ${LIBUNWIND_PREFIX}/include
++    NO_DEFAULT_PATH)
+ 
+-find_path(LIBUNWIND_INCLUDE_DIRS
+-  NAMES libunwind.h
+-  HINTS ${LIBUNWIND_PREFIX}/include)
++  if(NOT LIBUNWIND_LIBRARIES OR NOT LIBUNWIND_INCLUDE_DIRS)
++    message(WARNING "LIBUNWIND_PREFIX was set to '${LIBUNWIND_PREFIX}' but libunwind was not found there")
++  endif()
++else()
++  # Try to find libunwind in standard locations
++  find_path(LIBUNWIND_PREFIX
++    include/libunwind.h)
++
++  find_library(LIBUNWIND_LIBRARIES
++    NAMES unwind
++    HINTS ${LIBUNWIND_PREFIX}/lib)
++
++  find_path(LIBUNWIND_INCLUDE_DIRS
++    NAMES libunwind.h
++    HINTS ${LIBUNWIND_PREFIX}/include)
++endif()
+ 
+ include(FindPackageHandleStandardArgs)
+ find_package_handle_standard_args(Libunwind DEFAULT_MSG LIBUNWIND_LIBRARIES LIBUNWIND_INCLUDE_DIRS)
diff --git a/repos/spack_repo/builtin/packages/caliper/package.py b/repos/spack_repo/builtin/packages/caliper/package.py
index 83bfe6a4913..c4412bc68c7 100644
--- a/repos/spack_repo/builtin/packages/caliper/package.py
+++ b/repos/spack_repo/builtin/packages/caliper/package.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import os
+import re
 import socket
 import sys
 
@@ -10,6 +11,7 @@
     CachedCMakePackage,
     cmake_cache_option,
     cmake_cache_path,
+    cmake_cache_string,
 )
 from spack_repo.builtin.build_systems.cuda import CudaPackage
 from spack_repo.builtin.build_systems.rocm import ROCmPackage
@@ -36,6 +38,8 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage):
     license("BSD-3-Clause")
 
     version("master", branch="master")
+    version("2.13.1", sha256="7cef0173e0e0673abb7943a2641b660adfbc3d6bc4b33941ab4f431f92a4d016")
+    version("2.13.0", sha256="28c6e8fd940bdee9e80d1e8ae1ce0f76d6a690cbb6242d4eec115d6c0204e331")
     version("2.12.1", sha256="2b5a8f98382c94dc75cc3f4517c758eaf9a3f9cea0a8dbdc7b38506060d6955c")
     version("2.11.0", sha256="b86b733cbb73495d5f3fe06e6a9885ec77365c8aa9195e7654581180adc2217c")
     version("2.10.0", sha256="14c4fb5edd5e67808d581523b4f8f05ace8549698c0e90d84b53171a77f58565")
@@ -65,9 +69,9 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("tools", default=True, description="Enable tools")
     variant("python", default=False, description="Build Python bindings")
 
-    depends_on("c", type="build")  # generated
-    depends_on("cxx", type="build")  # generated
-    depends_on("fortran", type="build")  # generated
+    depends_on("c", type="build")
+    depends_on("cxx", type="build")
+    depends_on("fortran", when="+fortran", type="build")
 
     depends_on("adiak@0.1:0", when="@:2.10 +adiak")
     depends_on("adiak@0.4:0", when="@2.11: +adiak")
@@ -90,8 +94,14 @@ class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage):
 
     conflicts("+rocm+cuda")
     # Legacy nvtx is only supported until cuda@12.8, newer cuda only provides nvtx3.
-    conflicts("^cuda@12.9:", "@:2.12.1")
+    conflicts("^cuda@12.9:", "@:2.13.1")
 
+    patch("libunwind.patch", when="@:2.13")
+    patch(
+        "https://github.com/LLNL/Caliper/commit/648f8ab496a4a2c3f38e0cfa572340e429d8c76e.patch?full_index=1",
+        sha256="d947b5df6b68a24f516bb3b4ec04c28d4b8246ac0cbe664cf113dd2b6ca92073",
+        when="@2.12:2.13",
+    )
     patch("for_aarch64.patch", when="@:2.11 target=aarch64:")
     patch(
         "sampler-service-missing-libunwind-include-dir.patch",
@@ -143,6 +153,27 @@ def initconfig_hardware_entries(self):
             entries.append(cmake_cache_option("WITH_NVTX", True))
             entries.append(cmake_cache_path("CUDA_TOOLKIT_ROOT_DIR", spec["cuda"].prefix))
             entries.append(cmake_cache_path("CUPTI_PREFIX", spec["cuda"].prefix))
+
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            # gcc-toolchain support
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            # ppc64le workaround
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            if cuda_flags:
+                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
         else:
             entries.append(cmake_cache_option("WITH_CUPTI", False))
             entries.append(cmake_cache_option("WITH_NVTX", False))
@@ -150,6 +181,32 @@ def initconfig_hardware_entries(self):
         if spec.satisfies("+rocm"):
             entries.append(cmake_cache_option("WITH_ROCTRACER", True))
             entries.append(cmake_cache_option("WITH_ROCTX", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            hip_link_flags = ""
+
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
         else:
             entries.append(cmake_cache_option("WITH_ROCTRACER", False))
             entries.append(cmake_cache_option("WITH_ROCTX", False))
@@ -162,6 +219,31 @@ def initconfig_mpi_entries(self):
 
         entries.append(cmake_cache_option("WITH_MPI", spec.satisfies("+mpi")))
 
+        if spec.satisfies("+mpi"):
+            # MPI configuration from mpi_for_radiuss_projects
+            if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"):
+                entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
+
+            sys_type = spec.architecture
+            if "SYS_TYPE" in env:
+                sys_type = env["SYS_TYPE"]
+
+            # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
+            if "toss_4" in sys_type:
+                srun_wrapper = which_string("srun")
+                mpi_exec_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
+                ]
+                if len(mpi_exec_index) > 0:
+                    del entries[mpi_exec_index[0]]
+                mpi_exec_flag_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry
+                ]
+                if len(mpi_exec_flag_index) > 0:
+                    del entries[mpi_exec_flag_index[0]]
+                entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
+                entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
+
         return entries
 
     def initconfig_package_entries(self):
diff --git a/repos/spack_repo/builtin/packages/care/package.py b/repos/spack_repo/builtin/packages/care/package.py
index 431c5895f3c..455ed282145 100644
--- a/repos/spack_repo/builtin/packages/care/package.py
+++ b/repos/spack_repo/builtin/packages/care/package.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import re
 import socket
 
 from spack_repo.builtin.build_systems.cached_cmake import (
@@ -246,11 +247,58 @@ def initconfig_hardware_entries(self):
             entries.append(cmake_cache_option("CUDA_SEPARABLE_COMPILATION", True))
             entries.append(cmake_cache_string("NVTOOLSEXT_DIR", spec["cuda"].prefix))
             entries.append(cmake_cache_string("CUB_DIR", spec["cub"].prefix))
+
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            # gcc-toolchain support
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            # ppc64le workaround
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            if cuda_flags:
+                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
         else:
             entries.append(cmake_cache_option("ENABLE_CUDA", False))
 
         if spec.satisfies("+rocm"):
             entries.append(cmake_cache_option("ENABLE_HIP", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            hip_link_flags = ""
+
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
         else:
             entries.append(cmake_cache_option("ENABLE_HIP", False))
 
@@ -262,6 +310,31 @@ def initconfig_mpi_entries(self):
         entries = super(Care, self).initconfig_mpi_entries()
         entries.append(cmake_cache_option("ENABLE_MPI", spec.satisfies("+mpi")))
 
+        if spec.satisfies("+mpi"):
+            # MPI configuration from mpi_for_radiuss_projects
+            if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"):
+                entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
+
+            sys_type = spec.architecture
+            if "SYS_TYPE" in env:
+                sys_type = env["SYS_TYPE"]
+
+            # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
+            if "toss_4" in sys_type:
+                srun_wrapper = which_string("srun")
+                mpi_exec_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
+                ]
+                if len(mpi_exec_index) > 0:
+                    del entries[mpi_exec_index[0]]
+                mpi_exec_flag_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry
+                ]
+                if len(mpi_exec_flag_index) > 0:
+                    del entries[mpi_exec_flag_index[0]]
+                entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
+                entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
+
         return entries
 
     def initconfig_package_entries(self):
diff --git a/repos/spack_repo/builtin/packages/chai/package.py b/repos/spack_repo/builtin/packages/chai/package.py
index a4b3f6a5882..4d6e88adf51 100644
--- a/repos/spack_repo/builtin/packages/chai/package.py
+++ b/repos/spack_repo/builtin/packages/chai/package.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import re
 import socket
 
 from spack_repo.builtin.build_systems.cached_cmake import (
@@ -144,6 +145,7 @@ class Chai(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("raja", default=False, description="Build plugin for RAJA")
     variant("examples", default=True, description="Build examples.")
     variant("openmp", default=False, description="Build using OpenMP")
+    variant("disable_rm", default=False, description="Disable resource manager")
     # TODO: figure out gtest dependency and then set this default True
     # and remove the +tests conflict below.
     variant(
@@ -280,11 +282,58 @@ def initconfig_hardware_entries(self):
             if spec.satisfies("+separable_compilation"):
                 entries.append(cmake_cache_option("CMAKE_CUDA_SEPARABLE_COMPILATION", True))
                 entries.append(cmake_cache_option("CUDA_SEPARABLE_COMPILATION", True))
+
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            # gcc-toolchain support
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            # ppc64le workaround
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            if cuda_flags:
+                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
         else:
             entries.append(cmake_cache_option("ENABLE_CUDA", False))
 
         if spec.satisfies("+rocm"):
             entries.append(cmake_cache_option("ENABLE_HIP", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            hip_link_flags = ""
+
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
         else:
             entries.append(cmake_cache_option("ENABLE_HIP", False))
 
@@ -296,6 +345,31 @@ def initconfig_mpi_entries(self):
         entries = super(Chai, self).initconfig_mpi_entries()
         entries.append(cmake_cache_option("ENABLE_MPI", spec.satisfies("+mpi")))
 
+        if spec.satisfies("+mpi"):
+            # MPI configuration from mpi_for_radiuss_projects
+            if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"):
+                entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
+
+            sys_type = spec.architecture
+            if "SYS_TYPE" in env:
+                sys_type = env["SYS_TYPE"]
+
+            # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
+            if "toss_4" in sys_type:
+                srun_wrapper = which_string("srun")
+                mpi_exec_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
+                ]
+                if len(mpi_exec_index) > 0:
+                    del entries[mpi_exec_index[0]]
+                mpi_exec_flag_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry
+                ]
+                if len(mpi_exec_flag_index) > 0:
+                    del entries[mpi_exec_flag_index[0]]
+                entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
+                entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
+
         return entries
 
     def initconfig_package_entries(self):
@@ -348,6 +422,10 @@ def initconfig_package_entries(self):
             )
         )
 
+        entries.append(
+            cmake_cache_option("{}DISABLE_RM".format(option_prefix), spec.satisfies("+disable_rm"))
+        )
+
         return entries
 
     def cmake_args(self):
diff --git a/repos/spack_repo/builtin/packages/raja/package.py b/repos/spack_repo/builtin/packages/raja/package.py
index 94af7336dd2..7016d65c050 100644
--- a/repos/spack_repo/builtin/packages/raja/package.py
+++ b/repos/spack_repo/builtin/packages/raja/package.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import re
 import socket
 
 from spack_repo.builtin.build_systems.cached_cmake import (
@@ -221,7 +222,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("openmp", default=False, description="Build OpenMP backend")
     variant("shared", default=False, description="Build shared libs")
     variant("desul", default=False, description="Build desul atomics backend")
-    variant("vectorization", default=False, description="Build SIMD/SIMT intrinsics support")
+    variant("vectorization", default=True, description="Build SIMD/SIMT intrinsics support")
     variant(
         "omptask", default=False, description="Build OpenMP task variants of internal algorithms"
     )
@@ -230,6 +231,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("gpu-profiling", default=False, description="Enable GPU profiling")
 
     variant("plugins", default=False, description="Enable runtime plugins")
+    variant("caliper", default=False, description="Enable caliper support")
     variant("examples", default=True, description="Build examples.")
     variant("exercises", default=True, description="Build exercises.")
     # TODO: figure out gtest dependency and then set this default True
@@ -292,6 +294,8 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage):
 
     depends_on("llvm-openmp", when="+openmp %apple-clang")
 
+    depends_on("caliper", when="+caliper")
+
     depends_on("rocprim", when="+rocm")
     with when("+rocm @0.12.0:"):
         depends_on("camp+rocm")
@@ -371,8 +375,57 @@ def initconfig_hardware_entries(self):
         entries.append(cmake_cache_option("ENABLE_OPENMP", spec.satisfies("+openmp")))
         entries.append(cmake_cache_option("ENABLE_CUDA", spec.satisfies("+cuda")))
 
+        if spec.satisfies("+cuda"):
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            # gcc-toolchain support
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            # ppc64le workaround
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            if cuda_flags:
+                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
+
         if spec.satisfies("+rocm"):
             entries.append(cmake_cache_option("ENABLE_HIP", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            hip_link_flags = ""
+
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
+
             hipcc_flags = []
             if self.spec.satisfies("^rocprim@7.0"):
                 hipcc_flags.append("-std=c++17")
diff --git a/repos/spack_repo/builtin/packages/raja_perf/package.py b/repos/spack_repo/builtin/packages/raja_perf/package.py
index 3a0732d196b..ac39191e0b3 100644
--- a/repos/spack_repo/builtin/packages/raja_perf/package.py
+++ b/repos/spack_repo/builtin/packages/raja_perf/package.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import re
 import socket
 
 from spack_repo.builtin.build_systems.cached_cmake import (
@@ -15,6 +16,7 @@
 from spack_repo.builtin.packages.blt.package import llnl_link_helpers
 
 from spack.package import *
+from spack.util.executable import which_string
 
 
 class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage):
@@ -120,6 +122,7 @@ class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("cxx", type="build")  # generated
 
     depends_on("blt")
+    depends_on("blt@0.7.0:", type="build", when="@2025.03.0:")
     depends_on("blt@0.6.2:", type="build", when="@2024.07.0:")
     depends_on("blt@0.5.3", type="build", when="@2023.06")
     depends_on("blt@0.5.2:0.5.3", type="build", when="@2022.10")
@@ -229,6 +232,24 @@ def initconfig_hardware_entries(self):
             entries.append(cmake_cache_option("ENABLE_CUDA", True))
             # Shared handling of cuda.
 
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
+
             # Custom options.
             # We place everything in CMAKE_CUDA_FLAGS_(RELEASE|RELWITHDEBINFO|DEBUG)
             # which are not set by cuda_for_radiuss_projects
@@ -268,6 +289,31 @@ def initconfig_hardware_entries(self):
 
         if "+rocm" in spec:
             entries.append(cmake_cache_option("ENABLE_HIP", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            hip_link_flags = ""
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
         else:
             entries.append(cmake_cache_option("ENABLE_HIP", False))
 
@@ -304,6 +350,33 @@ def initconfig_mpi_entries(self):
 
         entries.append(cmake_cache_option("ENABLE_MPI", "+mpi" in spec))
 
+        # MPI configuration from mpi_for_radiuss_projects
+        if spec.satisfies("+mpi"):
+            if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"):
+                entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
+
+            sys_type = spec.architecture
+            if "SYS_TYPE" in env:
+                sys_type = env["SYS_TYPE"]
+            # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
+            # TODO: Remove this logic by adding `using_flux` case in
+            #  spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded
+            #  path to srun in same file.
+            if "toss_4" in sys_type:
+                srun_wrapper = which_string("srun")
+                mpi_exec_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
+                ]
+                if len(mpi_exec_index) > 0:
+                    del entries[mpi_exec_index[0]]
+                mpi_exec_flag_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry
+                ]
+                if len(mpi_exec_flag_index) > 0:
+                    del entries[mpi_exec_flag_index[0]]
+                entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
+                entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
+
         return entries
 
     def initconfig_package_entries(self):
diff --git a/repos/spack_repo/builtin/packages/umpire/package.py b/repos/spack_repo/builtin/packages/umpire/package.py
index b703b3dc74d..cd3ac1773aa 100644
--- a/repos/spack_repo/builtin/packages/umpire/package.py
+++ b/repos/spack_repo/builtin/packages/umpire/package.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import os
+import re
 import socket
 
 from spack_repo.builtin.build_systems.cached_cmake import (
@@ -16,6 +17,7 @@
 from spack_repo.builtin.packages.blt.package import llnl_link_helpers
 
 from spack.package import *
+from spack.util.executable import which_string
 
 
 class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):
@@ -190,6 +192,13 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):
         when="@:5.0.1 ^blt@0.4:",
     )
 
+    # https://github.com/LLNL/Umpire/pull/805
+    patch(
+        "https://github.com/LLNL/Umpire/commit/47ff0aa1f7a01a917c3b7ac618e8a9e44a10fd25.patch?full_index=1",
+        sha256="802f074a05e1cb1f428e13d99c5fcb1435f86bd8f36a1ea2f7b6756e6625e0a0",
+        when="@2022.10.0",
+    )
+
     # https://github.com/LLNL/Umpire/pull/816
     patch(
         "https://github.com/LLNL/Umpire/commit/2292d1d6078f6d9523b7ad0886ffa053644569d5.patch?full_index=1",
@@ -231,7 +240,7 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("backtrace", default=False, description="Enable backtrace tools")
     variant("dev_benchmarks", default=False, description="Enable developer benchmarks")
     variant("device_alloc", default=False, description="Enable DeviceAllocator")
-    variant("werror", default=False, description="Enable warnings as errors")
+    variant("werror", default=True, description="Enable warnings as errors")
     variant("asan", default=False, description="Enable ASAN")
     variant("sanitizer_tests", default=False, description="Enable address sanitizer tests")
     variant("fmt_header_only", default=True, description="Link to header-only fmt target")
@@ -265,7 +274,8 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("camp+openmp", when="+openmp")
     depends_on("camp~cuda", when="~cuda")
     depends_on("camp~rocm", when="~rocm")
-    depends_on("camp@2025.09:", when="@2025.09:")
+    depends_on("camp@2025.12:", when="@develop")
+    depends_on("camp@2025.09", when="@2025.09")
     depends_on("camp@2025.03", when="@2025.03")
     depends_on("camp@2024.07", when="@2024.07")
     depends_on("camp@2024.02.1", when="@2024.02.1")
@@ -279,9 +289,9 @@ class Umpire(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("sqlite", when="+sqlite_experimental")
     depends_on("mpi", when="+mpi")
 
-    depends_on("fmt@9.1:", when="@2024.02.0:")
+    depends_on("fmt@9.1:11.0", when="@2024.02.0:")
     # For some reason, we need c++ 17 explicitly only with intel
-    depends_on("fmt@9.1: cxxstd=17", when="@2024.02.0: %intel@19.1")
+    depends_on("fmt@9.1:11.0 cxxstd=17", when="@2024.02.0: %intel@19.1")
 
     with when("@5.0.0:"):
         with when("+cuda"):
@@ -385,11 +395,54 @@ def initconfig_hardware_entries(self):
             entries.append(cmake_cache_option("ENABLE_CUDA", True))
             # Umpire used to pick only the first architecture in the list. The shared logic in
             # CachedCMakePackage keeps the list of architectures.
+
+            # CUDA configuration from cuda_for_radiuss_projects
+            cuda_flags = []
+            if not spec.satisfies("cuda_arch=none"):
+                cuda_archs = ";".join(spec.variants["cuda_arch"].value)
+                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_archs))
+
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                cuda_flags.append("-Xcompiler {}".format(using_toolchain[0]))
+
+            if spec.satisfies("target=ppc64le %gcc@8.1:"):
+                cuda_flags.append("-Xcompiler -mno-float128")
+
+            entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))
         else:
             entries.append(cmake_cache_option("ENABLE_CUDA", False))
 
         if spec.satisfies("+rocm"):
             entries.append(cmake_cache_option("ENABLE_HIP", True))
+
+            # HIP configuration from hip_for_radiuss_projects
+            rocm_root = spec["llvm-amdgpu"].prefix
+            hip_link_flags = ""
+            gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
+            using_toolchain = list(
+                filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"])
+            )
+            if using_toolchain:
+                gcc_prefix = using_toolchain[0]
+                entries.append(
+                    cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix))
+                )
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS",
+                        hip_link_flags + " -Wl,-rpath={0}/lib64".format(gcc_prefix),
+                    )
+                )
+            else:
+                entries.append(
+                    cmake_cache_string(
+                        "CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)
+                    )
+                )
         else:
             entries.append(cmake_cache_option("ENABLE_HIP", False))
 
@@ -419,6 +472,33 @@ def initconfig_mpi_entries(self):
             cmake_cache_option("UMPIRE_ENABLE_MPI3_SHARED_MEMORY", spec.satisfies("+mpi3_shmem"))
         )
 
+        # MPI configuration from mpi_for_radiuss_projects
+        if spec.satisfies("+mpi"):
+            if spec["mpi"].name == "spectrum-mpi" and spec.satisfies("^blt"):
+                entries.append(cmake_cache_string("BLT_MPI_COMMAND_APPEND", "mpibind"))
+
+            sys_type = spec.architecture
+            if "SYS_TYPE" in env:
+                sys_type = env["SYS_TYPE"]
+            # Replace /usr/bin/srun path with srun flux wrapper path on TOSS 4
+            # TODO: Remove this logic by adding `using_flux` case in
+            #  spack/lib/spack/spack/build_systems/cached_cmake.py:196 and remove hard-coded
+            #  path to srun in same file.
+            if "toss_4" in sys_type:
+                srun_wrapper = which_string("srun")
+                mpi_exec_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_EXECUTABLE" in entry
+                ]
+                if len(mpi_exec_index) > 0:
+                    del entries[mpi_exec_index[0]]
+                mpi_exec_flag_index = [
+                    index for index, entry in enumerate(entries) if "MPIEXEC_NUMPROC_FLAG" in entry
+                ]
+                if len(mpi_exec_flag_index) > 0:
+                    del entries[mpi_exec_flag_index[0]]
+                entries.append(cmake_cache_path("MPIEXEC_EXECUTABLE", srun_wrapper))
+                entries.append(cmake_cache_string("MPIEXEC_NUMPROC_FLAG", "-n"))
+
         return entries
 
     def initconfig_package_entries(self):
diff --git a/stacks/e4s-rocm-external/spack.yaml b/stacks/e4s-rocm-external/spack.yaml
index 78dccbb01fa..c21bfd05313 100644
--- a/stacks/e4s-rocm-external/spack.yaml
+++ b/stacks/e4s-rocm-external/spack.yaml
@@ -213,7 +213,7 @@ spack:
   - amrex +rocm amdgpu_target=gfx90a
   - arborx +rocm amdgpu_target=gfx90a
   - cabana +rocm amdgpu_target=gfx90a
-  - caliper +rocm amdgpu_target=gfx90a
+  # - caliper +rocm amdgpu_target=gfx90a
   - chai +rocm amdgpu_target=gfx90a
   - fftx +rocm amdgpu_target=gfx90a
   - gasnet +rocm amdgpu_target=gfx90a