Attempt of inner mpi call

aiidateam · Aug 31, 2022 · 63e7a2b · 63e7a2b
1 parent e477665
commit 63e7a2b
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 4 deletions.
diff --git a/aiida/engine/processes/calcjobs/calcjob.py b/aiida/engine/processes/calcjobs/calcjob.py
@@ -740,6 +740,8 @@ def presubmit(self, folder: Folder) -> CalcInfo:
             else:
                 prepend_cmdline_params = []
 
+            cmdline_params = [str(this_code.get_executable())] + (code_info.cmdline_params or [])
+
             escape_exec_line = False
             if isinstance(this_code, Containerized):
                 prepend_cmdline_params += this_code.get_engine_command()
@@ -749,7 +751,19 @@ def presubmit(self, folder: Folder) -> CalcInfo:
                 # therefore default set to False.
                 escape_exec_line = this_code.escape_exec_line
 
-            cmdline_params = [str(this_code.get_executable())] + (code_info.cmdline_params or [])
+                inner_mpi = this_code.inner_mpi
+                if inner_mpi:
+                    # First always serials outside
+                    prepend_cmdline_params = this_code.get_engine_command()
+
+                    # Set mpi args for code if not exist use the computer one as default
+                    if this_code.get_mpirun_command():
+                        code_mpi_args = this_code.get_mpirun_command()
+                    else:
+                        code_mpi_args = mpi_args
+
+                    # import ipdb; ipdb.set_trace()
+                    cmdline_params = [arg.format(**subst_dict) for arg in code_mpi_args] + cmdline_params
 
             tmpl_code_info = JobTemplateCodeInfo()
             tmpl_code_info.prepend_cmdline_params = prepend_cmdline_params

diff --git a/aiida/orm/nodes/data/code/containerized.py b/aiida/orm/nodes/data/code/containerized.py
@@ -34,11 +34,23 @@ class Containerized(AbstractCode):
     _KEY_ATTRIBUTE_ENGINE_COMMAND: str = 'engine_command'
     _KEY_ATTRIBUTE_IMAGE: str = 'image'
     _KEY_ATTRIBUTE_ESCAPE_EXEC_LINE: str = 'escape_exec_line'
-
-    def __init__(self, engine_command: str, image: str, escape_exec_line: bool = False, **kwargs):
+    _KEY_ATTRIBUTE_INNER_MPI: str = 'inner_mpi'
+    _KEY_ATTRIBUTE_MPI_ARGS: str = 'mpi_args'
+
+    def __init__(
+        self,
+        engine_command: str,
+        image: str,
+        inner_mpi: bool = False,
+        mpi_args: str = '',
+        escape_exec_line: bool = False,
+        **kwargs
+    ):
         super().__init__(**kwargs)
         self.engine_command = engine_command
         self.image = image
+        self.inner_mpi = inner_mpi
+        self.mpi_args = mpi_args
         self.escape_exec_line = escape_exec_line
 
     @property
@@ -62,6 +74,16 @@ def engine_command(self, value: str) -> None:
 
         self.base.attributes.set(self._KEY_ATTRIBUTE_ENGINE_COMMAND, value)
 
+    @property
+    def mpi_args(self) -> str:
+        return self.base.attributes.get(self._KEY_ATTRIBUTE_MPI_ARGS)
+
+    @mpi_args.setter
+    def mpi_args(self, value: str) -> None:
+        type_check(value, str)
+
+        self.base.attributes.set(self._KEY_ATTRIBUTE_MPI_ARGS, value)
+
     @property
     def escape_exec_line(self) -> bool:
         """True for escape whole execute line after engine command in double quotes.
@@ -80,6 +102,16 @@ def escape_exec_line(self, value: bool) -> None:
 
         self.base.attributes.set(self._KEY_ATTRIBUTE_ESCAPE_EXEC_LINE, value)
 
+    @property
+    def inner_mpi(self) -> bool:
+        return self.base.attributes.get(self._KEY_ATTRIBUTE_INNER_MPI)
+
+    @inner_mpi.setter
+    def inner_mpi(self, value: bool) -> None:
+        type_check(value, bool)
+
+        self.base.attributes.set(self._KEY_ATTRIBUTE_INNER_MPI, value)
+
     @property
     def image(self) -> str:
         """The image of container
@@ -107,6 +139,12 @@ def get_engine_command(self) -> str:
 
         return cmdline.split()
 
+    def get_mpirun_command(self) -> list:
+        """Return the mpi_args in terms of code."""
+        mpi_args = self.mpi_args
+
+        return mpi_args.split()
+
     @classmethod
     def _get_cli_options(cls) -> dict:
         """Return the CLI options that would allow to create an instance of this class."""

diff --git a/tests/engine/processes/calcjobs/test_calc_job.py b/tests/engine/processes/calcjobs/test_calc_job.py
@@ -264,6 +264,46 @@ def test_containerized_installed_code(file_regression, aiida_localhost):
     file_regression.check(content, extension='.sh')
 
 
+@pytest.mark.requires_rmq
+@pytest.mark.usefixtures('clear_database_before_test', 'chdir_tmp_path')
+def test_containerized_inner_mpi(file_regression, aiida_localhost):
+    """test run container code"""
+    aiida_localhost.set_use_double_quotes(True)
+    engine_command = """conda run --name {image}"""
+    override_mpirun_command = 'inner_mpirun -np {tot_num_mpiprocs}'
+    containerized_code = orm.InstalledContainerizedCode(
+        default_calc_job_plugin='core.arithmetic.add',
+        filepath_executable='/bin/bash',
+        engine_command=engine_command,
+        image='myenv',
+        inner_mpi=True,
+        mpi_args=override_mpirun_command,
+        computer=aiida_localhost,
+        escape_exec_line=False,
+    ).store()
+
+    inputs = {
+        'code': containerized_code,
+        'metadata': {
+            'dry_run': True,
+            'options': {
+                'resources': {
+                    'num_machines': 1,
+                    'num_mpiprocs_per_machine': 1
+                },
+                'withmpi': True,
+            }
+        }
+    }
+
+    _, node = launch.run_get_node(DummyCalcJob, **inputs)
+    folder_name = node.dry_run_info['folder']
+    submit_script_filename = node.get_option('submit_script_filename')
+    content = (pathlib.Path(folder_name) / submit_script_filename).read_bytes().decode('utf-8')
+
+    file_regression.check(content, extension='.sh')
+
+
 @pytest.mark.requires_rmq
 @pytest.mark.usefixtures('clear_database_before_test', 'chdir_tmp_path')
 def test_containerized_portable_code(file_regression, tmp_path, aiida_localhost):
@@ -339,7 +379,6 @@ def test_multi_codes_run_withmpi(aiida_local_code_factory, file_regression, calc
 @pytest.mark.usefixtures('clear_database_before_test', 'chdir_tmp_path')
 def test_portable_code(tmp_path, aiida_localhost):
     """test run container code"""
-    import pathlib
     (tmp_path / 'bash').write_bytes(b'bash implementation')
     subdir = tmp_path / 'sub'
     subdir.mkdir()

diff --git a/tests/engine/processes/calcjobs/test_calc_job/test_containerized_inner_mpi.sh b/tests/engine/processes/calcjobs/test_calc_job/test_containerized_inner_mpi.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+exec > _scheduler-stdout.txt
+exec 2> _scheduler-stderr.txt
+
+
+"conda" "run" "--name" "myenv" 'inner_mpirun' '-np' '1' '/bin/bash' '--version' '-c' < "aiida.in" > "aiida.out" 2> "aiida.err"