diff --git a/experiments/laghos/experiment.py b/experiments/laghos/experiment.py index 83fc1acb6..13ce1aa50 100644 --- a/experiments/laghos/experiment.py +++ b/experiments/laghos/experiment.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.directives import variant, maintainers +from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.scaling import StrongScaling from benchpark.caliper import Caliper @@ -31,48 +31,43 @@ class Laghos( description="app version", ) - maintainers("wdhawkins") - def compute_applications_section(self): - if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"): - device = "n_gpus" - n_devices_per_node = "{sys_gpus_per_node}" - else: - device = "n_ranks" - n_devices_per_node = "{sys_cores_per_node}" - self.add_experiment_variable("n_threads_per_proc", 1) - - # The total number of resources for this experiment is calculated as: - # n_devices = n_devices_per_node * scaling_factor - # Scaling (strong) is achieved by scaling the scaling_factor variable - # For mpi-only builds: - # n_devices_per_node = sys_cores_per_node, by default - # n_devices = n_ranks - # For gpu builds: - # n_devices_per_node = sys_gpus_per_node, by default - # n_devices = n_gpus - scaling_factor = {"scaling_factor": 1} + # Number of initial nodes + n_resources = {"n_nodes": 1} + device = "n_ranks" if self.spec.satisfies("+cuda"): self.add_experiment_variable("device", "cuda", True) elif self.spec.satisfies("+rocm"): self.add_experiment_variable("device", "hip", True) - if self.spec.satisfies("+single_node"): - for pk, pv in scaling_factor.items(): - self.add_experiment_variable(pk, pv) - elif self.spec.satisfies("+strong"): + if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"): + device = "n_gpus" + if self.spec.satisfies("+strong"): scaled_variables = self.generate_strong_scaling_params( - {tuple(scaling_factor.keys()): list(scaling_factor.values())}, + {tuple(n_resources.keys()): list(n_resources.values())}, int(self.spec.variants["scaling-factor"][0]), int(self.spec.variants["scaling-iterations"][0]), ) for pk, pv in scaled_variables.items(): - self.add_experiment_variable(pk, pv) - - self.add_experiment_variable( - device, f"{n_devices_per_node} * {{scaling_factor}}", True - ) + self.add_experiment_variable(pk, pv, True) + num_resources = scaled_variables["n_nodes"] + if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"): + self.add_experiment_variable(device, num_resources, True) + else: + self.add_experiment_variable( + device, "{sys_cores_per_node} * {n_nodes}", True + ) + else: + for pk, pv in n_resources.items(): + if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"): + self.add_experiment_variable(device, pv, True) + else: + for pk, pv in n_resources.items(): + self.add_experiment_variable(pk, pv, True) + self.add_experiment_variable( + device, "{sys_cores_per_node} * {n_nodes}", True + ) def compute_package_section(self): # get package version diff --git a/modifiers/caliper/modifier.py b/modifiers/caliper/modifier.py index 13f3854d9..a60ba4957 100644 --- a/modifiers/caliper/modifier.py +++ b/modifiers/caliper/modifier.py @@ -50,13 +50,14 @@ class Caliper(BasicModifier): env_var_modification( "CALI_CONFIG", - 'spot(output={}{}),metadata(file={}),metadata(file=/etc/node_info.json,keys="host.name,host.cluster,host.os")'.format( + 'spot(output={}{},profile.mpi,comm.stats),metadata(file={}),metadata(file=/etc/node_info.json,keys="host.name,host.cluster,host.os")'.format( _cali_datafile, "${CALI_CONFIG_MODE}", _caliper_metadata_file ), method="set", modes=[_default_mode], ) + add_mode( mode_name="mpi", mode_option="profile.mpi", diff --git a/repo/laghos/package.py b/repo/laghos/package.py index c0359c804..b92d7e178 100644 --- a/repo/laghos/package.py +++ b/repo/laghos/package.py @@ -15,7 +15,7 @@ class Laghos(MakefilePackage, CudaPackage, ROCmPackage): tags = ["proxy-app", "ecp-proxy-app"] - homepage = "https://github.com/CEED/Laghos" + homepage = "https://github.com/wdhawkins/laghos" git = "https://github.com/wdhawkins/Laghos.git" maintainers("wdhawkins") @@ -41,8 +41,7 @@ class Laghos(MakefilePackage, CudaPackage, ROCmPackage): depends_on("mfem@3.4.1-laghos-v2.0", when="@2.0") # Recommended mfem version for laghos v1.x is: ^mfem@3.3.1-laghos-v1.0 depends_on("mfem@3.3.1-laghos-v1.0", when="@1.0,1.1") - depends_on("mfem@4.4", when="@develop") - depends_on("mfem+caliper", when="+caliper") + depends_on("mfem@4.4_comm_cali+caliper", when="+caliper") depends_on("mfem cxxstd=14") requires("^[virtuals=zlib-api] zlib") diff --git a/repo/mfem/package.py b/repo/mfem/package.py index 5bfe19269..29fac8330 100644 --- a/repo/mfem/package.py +++ b/repo/mfem/package.py @@ -14,6 +14,8 @@ class Mfem(BuiltinMfem): # depends_on("rocblas", when="+rocm") # depends_on("rocsolver", when="+rocm") + version("4.4_comm_cali", branch="comm_cali", submodules=False, git="https://github.com/gracenansamba/mfem.git") + requires("+rocm", when="^rocblas") requires("+rocm", when="^rocsolver") requires("+caliper", when="^hypre+caliper")