nomad-coe · ladinesa · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 29, 2025
diff --git a/src/nomad_simulations/schema_packages/common.py b/src/nomad_simulations/schema_packages/common.py
@@ -0,0 +1,54 @@
+import numpy as np
+from nomad.datamodel.data import ArchiveSection
+from nomad.datamodel.metainfo.annotations import ELNAnnotation
+from nomad.metainfo import Datetime, Quantity
+
+
+class Time(ArchiveSection):
+    """
+    Contains time-related quantities.
+    """
+
+    datetime_end = Quantity(
+        type=Datetime,
+        description="""
+        The date and time when this computation ended.
+        """,
+        a_eln=ELNAnnotation(component='DateTimeEditQuantity'),
+    )
+
+    cpu1_start = Quantity(
+        type=np.float64,
+        unit='second',
+        description="""
+        The starting time of the computation on the (first) CPU 1.
+        """,
+        a_eln=ELNAnnotation(component='NumberEditQuantity'),
+    )
+
+    cpu1_end = Quantity(
+        type=np.float64,
+        unit='second',
+        description="""
+        The end time of the computation on the (first) CPU 1.
+        """,
+        a_eln=ELNAnnotation(component='NumberEditQuantity'),
+    )
+
+    wall_start = Quantity(
+        type=np.float64,
+        unit='second',
+        description="""
+        The internal wall-clock time from the starting of the computation.
+        """,
+        a_eln=ELNAnnotation(component='NumberEditQuantity'),
+    )
+
+    wall_end = Quantity(
+        type=np.float64,
+        unit='second',
+        description="""
+        The internal wall-clock time from the end of the computation.
+        """,
+        a_eln=ELNAnnotation(component='NumberEditQuantity'),
+    )
diff --git a/src/nomad_simulations/schema_packages/general.py b/src/nomad_simulations/schema_packages/general.py
@@ -11,7 +11,7 @@
 from nomad.datamodel.data import Schema
 from nomad.datamodel.metainfo.annotations import ELNAnnotation
 from nomad.datamodel.metainfo.basesections import Activity, Entity
-from nomad.metainfo import Datetime, Quantity, SchemaPackage, Section, SubSection
+from nomad.metainfo import Quantity, SchemaPackage, Section, SubSection
 
 from nomad_simulations.schema_packages.model_method import ModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
@@ -21,6 +21,8 @@
     is_not_representative,
 )
 
+from .common import Time
+
 configuration = config.get_plugin_entry_point(
     'nomad_simulations.schema_packages:nomad_simulations_plugin'
 )
@@ -121,7 +123,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         pass
 
 
-class BaseSimulation(Activity):
+class BaseSimulation(Activity, Time):
     """
     A computational simulation that produces output data from a given input model system
     and input methodological parameters.
@@ -135,50 +137,6 @@ class BaseSimulation(Activity):
         links=['https://liusemweb.github.io/mdo/core/1.1/index.html#Calculation']
     )
 
-    datetime_end = Quantity(
-        type=Datetime,
-        description="""
-        The date and time when this computation ended.
-        """,
-        a_eln=ELNAnnotation(component='DateTimeEditQuantity'),
-    )
-
-    cpu1_start = Quantity(
-        type=np.float64,
-        unit='second',
-        description="""
-        The starting time of the computation on the (first) CPU 1.
-        """,
-        a_eln=ELNAnnotation(component='NumberEditQuantity'),
-    )
-
-    cpu1_end = Quantity(
-        type=np.float64,
-        unit='second',
-        description="""
-        The end time of the computation on the (first) CPU 1.
-        """,
-        a_eln=ELNAnnotation(component='NumberEditQuantity'),
-    )
-
-    wall_start = Quantity(
-        type=np.float64,
-        unit='second',
-        description="""
-        The internal wall-clock time from the starting of the computation.
-        """,
-        a_eln=ELNAnnotation(component='NumberEditQuantity'),
-    )
-
-    wall_end = Quantity(
-        type=np.float64,
-        unit='second',
-        description="""
-        The internal wall-clock time from the end of the computation.
-        """,
-        a_eln=ELNAnnotation(component='NumberEditQuantity'),
-    )
-
     program = SubSection(sub_section=Program.m_def, repeats=False)
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:

diff --git a/src/nomad_simulations/schema_packages/outputs.py b/src/nomad_simulations/schema_packages/outputs.py
@@ -1,7 +1,6 @@
 from typing import TYPE_CHECKING, Optional
 
 import numpy as np
-from nomad.datamodel.data import ArchiveSection
 from nomad.datamodel.metainfo.annotations import ELNAnnotation
 from nomad.metainfo import Quantity, SubSection
 
@@ -38,8 +37,10 @@
     XASSpectrum,
 )
 
+from .common import Time
 
-class Outputs(ArchiveSection):
+
+class Outputs(Time):
     """
     Output properties of a simulation. This base class can be used for inheritance in any of the output properties
     defined in this schema.

diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py
@@ -0,0 +1,4 @@
+from .general import SimulationWorkflow
+from .geometry_optimization import GeometryOptimization
+from .gw import DFTGWWorkflow
+from .single_point import SinglePoint
diff --git a/src/nomad_simulations/schema_packages/workflow/general.py b/src/nomad_simulations/schema_packages/workflow/general.py
@@ -0,0 +1,71 @@
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.metainfo.util import MSubSectionList
+from structlog.stdlib import BoundLogger
+
+INCORRECT_N_TASKS = 'Incorrect number of tasks found.'
+
+
+class SimulationWorkflow(Workflow):
+    """
+    Base class for simulation workflows.
+    """
+
+    def normalize(self, archive: EntryArchive, logger: BoundLogger):
+        """
+        Generate tasks from the archive data outputs.
+        """
+        if not archive.data or not archive.data.outputs:
+            return
+
+        # generate tasks from outputs
+        if not self.tasks:
+            # default should to serial execution
+            times: list[tuple[float, float]] = list(
+                [
+                    (o.wall_start or n, o.wall_end or n)
+                    for n, o in enumerate(archive.data.outputs)
+                ]
+            )
+            times.sort(key=lambda x: x[0])
+            # current parent task
+            parent_n = 0
+            parent_outputs: list[Link] = []
+            for n, time in enumerate(times):
+                task = Task(
+                    outputs=[
+                        Link(
+                            name='Output',
+                            section=archive.data.outputs[n],
+                        )
+                    ],
+                )
+                self.tasks.append(task)
+                # link tasks based on overlap in execution time
+                if time[0] >= times[parent_n][1]:
+                    # if no overlap, assign outputs of parent as input to next task
+                    task.inputs.extend(
+                        [
+                            Link(name='Input', section=output.section)
+                            for output in parent_outputs or task.outputs
+                        ]
+                    )
+                    # assign first parent outputs as workflow inputs
+                    if not self.inputs:
+                        self.inputs.extend(task.inputs)
+                    # assign as new parent
+                    parent_n = n
+                    # reset outputs
+                    parent_outputs = list(task.outputs)
+                else:
+                    parent_outputs.extend(task.outputs)
+                    # if overlap, assign parent outputs to task inputs
+                    task.inputs.extend(
+                        [
+                            Link(name='Input', section=output.section)
+                            for output in self.tasks[parent_n or n].outputs
+                        ]
+                    )
+            if not self.outputs:
+                # assign parent outputs as workflow outputs
+                self.outputs.extend(parent_outputs)
diff --git a/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py b/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py
@@ -0,0 +1,33 @@
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task
+from structlog.stdlib import BoundLogger
+
+from .general import SimulationWorkflow
+
+
+class GeometryOptimization(SimulationWorkflow):
+    """
+    Definitions for geometry optimization workflow.
+    """
+
+    def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        """
+        Specify the inputs and outputs of the tasks as the model system.
+        """
+        super().normalize(archive, logger)
+
+        def to_system_links(task: Task) -> None:
+            task.inputs = [
+                Link(name='Input system', section=link.section.model_system_ref)
+                for link in task.inputs
+                if link.section and link.section.model_system_ref
+            ]
+            task.outputs = [
+                Link(name='Output system', section=link.section.model_system_ref)
+                for link in task.inputs
+                if link.section and link.section.model_system_ref
+            ]
+
+        to_system_links(self)
+        for task in self.tasks:
+            to_system_links(task)
diff --git a/src/nomad_simulations/schema_packages/workflow/gw.py b/src/nomad_simulations/schema_packages/workflow/gw.py
@@ -0,0 +1,37 @@
+from nomad.datamodel import EntryArchive
+from structlog.stdlib import BoundLogger
+
+from .general import INCORRECT_N_TASKS, SimulationWorkflow
+
+
+class DFTGWWorkflow(SimulationWorkflow):
+    """
+    Definitions for GW calculation based on DFT workflow.
+    """
+
+    def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        """
+        Link the DFT and GW single point workflows in the DFT-GW workflow.
+        """
+        super().normalize(archive, logger)
+
+        if not self.name:
+            self.name: str = 'DFT+GW'
+
+        if len(self.tasks) != 2:
+            logger.error(INCORRECT_N_TASKS)
+            return
+
+        if not self.inputs:
+            # set inputs to inputs of DFT
+            self.inputs.extend(self.tasks[0].task.inputs)
+
+        if not self.outputs:
+            # set ouputs to outputs of GW
+            self.outputs.extend(self.tasks[1].task.outputs)
+
+        # link dft and gw workflows
+        self.tasks[0].inputs = self.inputs
+        self.tasks[0].outputs = self.tasks[0].task.outputs
+        self.tasks[1].inputs = self.tasks[0].outputs
+        self.tasks[1].outputs = self.outputs
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -0,0 +1,40 @@
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link
+from structlog.stdlib import BoundLogger
+
+from .general import INCORRECT_N_TASKS, SimulationWorkflow
+
+
+class SinglePoint(SimulationWorkflow):
+    """
+    Definitions for single point workflow.
+    """
+
+    def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        """
+        Specify the method and system as inputs.
+        """
+        super().normalize(archive, logger)
+        if len(self.tasks) != 1:
+            logger.error(INCORRECT_N_TASKS)
+            return
+
+        if not self.inputs:
+            self.inputs.extend(self.tasks[0].inputs)
+
+        inps: list[Link] = []
+        for inp in self.inputs:
+            if inp.section and inp.section.model_system_ref:
+                inps.append(
+                    Link(name='Input system', section=inp.section.model_system_ref)
+                )
+            if inp.section and inp.section.model_method_ref:
+                inps.append(
+                    Link(name='Input method', section=inp.section.model_method_ref)
+                )
+        self.inputs.clear()
+        self.inputs.extend(inps)
+
+        # reconnect inputs to link as these are redefined
+        self.tasks[0].inputs.clear()
+        self.tasks[0].inputs.extend(inps)