diff --git a/experiments/kripke/experiment.py b/experiments/kripke/experiment.py index 12f6cfcdb..35f22cf1e 100644 --- a/experiments/kripke/experiment.py +++ b/experiments/kripke/experiment.py @@ -42,6 +42,13 @@ class Kripke( description="Enable single memory space model in rocm", ) + variant( + "other", + default=False, + values=(True, False), + description="Set other input/environment variables", + ) + maintainers("pearce8") def compute_applications_section(self): @@ -61,28 +68,140 @@ def compute_applications_section(self): self.add_experiment_variable("nquad", 128, True) self.add_experiment_variable("ds", 128, True) self.add_experiment_variable("lorder", 4, True) + self.add_experiment_variable("pool", 4, True) + problem_spec = { + "nzx": 32, + "nzy": 32, + "nzz": 16, + "pool": 4, + "npx": 2, + "npy": 2, + "npz": 1, + "ngroups": 64, + "gs": 1, + "nquad": 128, + "ds": 128, + "lorder": 4, + "layout": "GDZ", + "strong_n": lambda var, itr, dim, scaling_factor: var.val(dim), + "strong_p": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "weak_n": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "weak_p": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "throughput_n": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "throughput_p": lambda var, itr, dim, scaling_factor: var.val(dim), + } # Must be exec_mode=perf else: - # Number of processes in each dimension - self.add_experiment_variable( - "n_resources_dict", - {"npx": [2, 2, 2], "npy": [2, 2, 2], "npz": [1, 1, 1]}, - True, - ) + if self.spec.satisfies("+throughput"): + problem_spec = { + "nzx": [ + 80, + 100, + 120, + 140, + 160, + 180, + 200, + 220, + ], + "nzy": [ + 80, + 100, + 120, + 140, + 160, + 180, + 200, + 220, + ], + "nzz": [ + 40, + 50, + 60, + 70, + 80, + 90, + 100, + 110, + ], + "pool": 120, + "npx": [2, 2, 2, 2, 2, 2, 2, 2], + "npy": [2, 2, 2, 2, 2, 2, 2, 2], + "npz": [1, 1, 1, 1, 1, 1, 1, 1], + "ngroups": 48, + "gs": 1, + "nquad": 80, + "ds": 80, + "lorder": 4, + "layout": "GDZ", + "strong_n": None, + "strong_p": None, + "weak_n": None, + "weak_p": None, + "throughput_n": None, + "throughput_p": None, + } + else: + problem_spec = { + "nzx": 80, + "nzy": 80, + "nzz": 40, + "pool": 120, + "npx": 2, + "npy": 2, + "npz": 1, + "ngroups": 48, + "gs": 1, + "nquad": 80, + "ds": 80, + "lorder": 4, + "layout": "GDZ", + "strong_n": lambda var, itr, dim, scaling_factor: var.val(dim), + "strong_p": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "weak_n": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "weak_p": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "throughput_n": None, + "throughput_p": None, + } + # Number of processes in each dimension + self.add_experiment_variable( + "n_resources_dict", + { + "npx": problem_spec["npx"], + "npy": problem_spec["npy"], + "npz": problem_spec["npz"], + }, + True, + ) - # Per-process size (in zones) in each dimension - self.add_experiment_variable( - "total_problem_size_dict", - {"nzx": [64, 64, 64], "nzy": [64, 64, 64], "nzz": [32, 32, 32]}, - True, - ) + # Per-process size (in zones) in each dimension + self.add_experiment_variable( + "total_problem_size_dict", + { + "nzx": problem_spec["nzx"], + "nzy": problem_spec["nzy"], + "nzz": problem_spec["nzz"], + }, + True, + ) - self.add_experiment_variable("ngroups", [220, 320, 360], True) - self.add_experiment_variable("gs", 1, True) - self.add_experiment_variable("nquad", 36, True) - self.add_experiment_variable("ds", 36, True) - self.add_experiment_variable("lorder", 4, True) - self.add_experiment_variable("layout", "GDZ", True) + self.add_experiment_variable("ngroups", problem_spec["ngroups"], True) + self.add_experiment_variable("gs", problem_spec["gs"], True) + self.add_experiment_variable("nquad", problem_spec["nquad"], True) + self.add_experiment_variable("ds", problem_spec["ds"], True) + self.add_experiment_variable("lorder", problem_spec["lorder"], True) + self.add_experiment_variable("layout", problem_spec["layout"], True) + self.add_experiment_variable("pool", problem_spec["pool"], True) + + if self.spec.satisfies("+other"): + self.set_environment_variable("HSA_XNACK", 1) # Set the variables required by the experiment self.set_required_variables( @@ -102,32 +221,16 @@ def compute_applications_section(self): self.register_scaling_config( { ScalingMode.Strong: { - "n_resources_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ) - * scaling_factor, - "total_problem_size_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ), + "n_resources_dict": problem_spec["strong_p"], + "total_problem_size_dict": problem_spec["strong_n"], }, ScalingMode.Weak: { - "n_resources_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ) - * scaling_factor, - "total_problem_size_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ) - * scaling_factor, + "n_resources_dict": problem_spec["weak_p"], + "total_problem_size_dict": problem_spec["weak_n"], }, ScalingMode.Throughput: { - "n_resources_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ), - "total_problem_size_dict": lambda var, itr, dim, scaling_factor: var.val( - dim - ) - * scaling_factor, + "n_resources_dict": problem_spec["throughput_p"], + "total_problem_size_dict": problem_spec["throughput_n"], }, } ) diff --git a/repo/kripke/application.py b/repo/kripke/application.py index 8c6e429be..e31f8cc17 100644 --- a/repo/kripke/application.py +++ b/repo/kripke/application.py @@ -25,6 +25,7 @@ class Kripke(ExecutableApplication): ' --zones {nzx},{nzy},{nzz}' + ' --sigt {sigt0},{sigt1},{sigt2}' + ' --sigs {sigs0},{sigs1},{sigs2}' + + ' --dev_pool_size {pool}' + ' --arch {arch}' + ' --layout {layout}' + # ' --pdist {lout}' + @@ -73,6 +74,9 @@ class Kripke(ExecutableApplication): workload_variable('sigs2', default='0.05', description='Total material cross-sections', workloads=['kripke']) + workload_variable('pool', default='4', + description='Device memory pool size', + workloads=['kripke']) workload_variable('arch', default='Sequential', description='Architecture selection. Selects the back-end used for computation, available are Sequential, OpenMP, CUDA and HIP. The default depends on capabilities selected by the build system and is selected from list of increasing precedence: Sequential, OpenMP, CUDA and HIP.', workloads=['kripke']) diff --git a/repo/kripke/package.py b/repo/kripke/package.py index 9cb42af90..dbf2a1879 100644 --- a/repo/kripke/package.py +++ b/repo/kripke/package.py @@ -78,26 +78,35 @@ class Kripke(CMakePackage, CudaPackage, ROCmPackage): depends_on("blt@0.6.2:", type="build", when=f"@1.2.7:") - depends_on("chai+openmp", when="+openmp") - depends_on("chai~openmp", when="~openmp") - depends_on("chai+cuda", when="+cuda") - depends_on("chai~cuda", when="~cuda") - - for arch in ("none", "50", "60", "70", "80", "90"): - depends_on(f"chai cuda_arch={arch}", when=f"cuda_arch={arch}") - - depends_on("chai+rocm", when="+rocm") - depends_on("chai~rocm", when="~rocm") - for target in ("none", "gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx942"): - depends_on(f"chai amdgpu_target={target}", when=f"amdgpu_target={target}") - - depends_on("umpire+openmp", when="+openmp") - depends_on("umpire~openmp", when="~openmp") - depends_on("umpire+cuda", when="+cuda") - depends_on("umpire~cuda", when="~cuda") - depends_on("umpire+rocm", when="+rocm") - depends_on("umpire~rocm", when="~rocm") - + with when("+chai"): + depends_on("chai+openmp", when="+openmp") + depends_on("chai~openmp", when="~openmp") + + depends_on("chai+cuda", when="+cuda") + depends_on("chai~cuda", when="~cuda") + for sm_ in CudaPackage.cuda_arch_values: + depends_on("chai cuda_arch={0}".format(sm_), when="cuda_arch={0}".format(sm_)) + + depends_on("chai+rocm", when="+rocm") + depends_on("chai~rocm", when="~rocm") + for arch in ROCmPackage.amdgpu_targets: + depends_on("chai amdgpu_target={0}".format(arch), when="amdgpu_target={0}".format(arch)) + + with when("+umpire"): + depends_on("umpire+openmp", when="+openmp") + depends_on("umpire~openmp", when="~openmp") + + depends_on("umpire+cuda", when="+cuda") + depends_on("umpire~cuda", when="~cuda") + for sm_ in CudaPackage.cuda_arch_values: + depends_on("umpire cuda_arch={0}".format(sm_), when="cuda_arch={0}".format(sm_)) + + depends_on("umpire+rocm", when="+rocm") + depends_on("umpire~rocm", when="~rocm") + for arch in ROCmPackage.amdgpu_targets: + depends_on("umpire amdgpu_target={0}".format(arch), when="amdgpu_target={0}".format(arch)) + + def setup_build_environment(self, env): spec = self.spec if "+cuda" in spec: @@ -117,8 +126,10 @@ def cmake_args(self): if "+rocm" in spec or "+cuda" in spec: enable_chai = "ON" + enable_chai_single_memory = "ON" if "+single_memory" in spec else "OFF" else: enable_chai = "OFF" + enable_chai_single_memory = "OFF" args.extend( [ @@ -128,6 +139,7 @@ def cmake_args(self): "-DRAJA_DIR=%s" % self.spec["raja"].prefix, "-Dchai_DIR=%s" % self.spec["chai"].prefix, "-DENABLE_CHAI=%s" % enable_chai, + "-DENABLE_CHAI_SINGLE_MEMORY=%s" % enable_chai_single_memory, "-DMPI_CXX_LINK_FLAGS='%s'" % self.spec['mpi'].libs.ld_flags, ] )