Fix module for script runners generated by CLI and from __main__ (#1445)

elliotgunton · web-flow · commit 55b060d7523a · 2025-07-24T17:20:33.000+01:00
**Pull Request Checklist** - [x] Fixes #573, Fixes #1161 - [x] Tests added - [x] Documentation/examples added - [x] [Good commit messages](https://cbea.ms/git-commit/) and/or PR title **Description of PR** Currently, Script Runners cannot be exported to YAML correctly if they are contained in the same module as `__main__`, i.e. when trying to export a workflow using the file it's written in, as the `transform_values` sees `__main__` as the module. The CLI also ignores the full path spec of the function, only using the stem. This PR fixes both cases by using a utility function to construct a valid module path. --------- Signed-off-by: Elliot Gunton <elliotgunton@gmail.com>
diff --git a/src/hera/_cli/generate/__init__.py b/src/hera/_cli/generate/__init__.py
@@ -1,3 +1,5 @@
+"""Code generation CLI functions."""
+
 from hera._cli.generate import yaml
 
 __all__ = [
diff --git a/src/hera/_cli/generate/yaml.py b/src/hera/_cli/generate/yaml.py
@@ -1,5 +1,3 @@
-"""The main entrypoint for hera CLI."""
-
 from __future__ import annotations
 
 import importlib.util
@@ -8,6 +6,7 @@
 
 from hera._cli.base import GenerateYaml
 from hera._cli.generate.util import YAML_EXTENSIONS, convert_code, expand_paths, write_output
+from hera.workflows._runner.util import create_module_string
 from hera.workflows.workflow import Workflow
 
 DEFAULT_EXTENSION = ".yaml"
@@ -47,8 +46,8 @@ def load_workflows_from_module(path: Path) -> list[Workflow]:
     Returns:
         A list containing all `Workflow` objects defined within that module.
     """
-    module_name = path.stem
-    spec = importlib.util.spec_from_file_location(module_name, path, submodule_search_locations=[str(path.parent)])
+    module_name = create_module_string(path)
+    spec = importlib.util.spec_from_file_location(module_name, path)
     assert spec
 
     module = importlib.util.module_from_spec(spec)
diff --git a/src/hera/workflows/_runner/util.py b/src/hera/workflows/_runner/util.py
@@ -297,3 +297,30 @@ def _run() -> None:
         exit(result.exit_code)
 
     print(serialize(result))
+
+
+def create_module_string(path: Path) -> str:
+    """Create a Python module path from the given path.
+
+    We find the most specific sys.path to create a valid, importable module path to the given path.
+
+    e.g. if sys.path contains "/project" and the file is "/project/workflows/wf_a.py", then the returned string will be
+    "workflows.wf_a"
+
+    If we cannot find a valid sys.path, we simply use the file stem, e.g. for the
+    file "/project/workflows/wf_a.py", return `wf_a`.
+    """
+    path = path.resolve()
+
+    # find the most specific sys.path that contains the given path
+    candidates = []
+    for base in map(lambda p: Path(p).resolve(), sys.path + [os.getcwd()]):
+        if path.is_relative_to(base):
+            candidates.append(base)
+
+    if not candidates:
+        return path.stem
+
+    # use the most specific sys.path to construct a valid module path to import
+    base_path = max(candidates, key=lambda p: len(str(p)))
+    return ".".join(str(path.resolve().relative_to(base_path)).replace(".py", "").split("/"))
diff --git a/src/hera/workflows/script.py b/src/hera/workflows/script.py
@@ -11,6 +11,7 @@
 import textwrap
 from abc import abstractmethod
 from functools import wraps
+from pathlib import Path
 from typing import (
     Any,
     Callable,
@@ -852,11 +853,19 @@ def transform_values(self, cls: Type[Script], values: Any) -> Any:
 
         if values.get("args") is not None:
             raise ValueError("Cannot specify args when callable is True")
+
+        module = values["source"].__module__
+
+        if module == "__main__":
+            from hera.workflows._runner.util import create_module_string
+
+            module = create_module_string(Path(values["source"].__globals__["__file__"]))
+
         values["args"] = [
             "-m",
             "hera.workflows.runner",
             "-e",
-            f"{values['source'].__module__}:{values['source'].__name__}",
+            f"{module}:{values['source'].__name__}",
         ]
 
         return values
diff --git a/tests/cli/examples/runner_workflow.py b/tests/cli/examples/runner_workflow.py
@@ -0,0 +1,13 @@
+from hera.workflows import Workflow, script
+
+
+@script(constructor="runner")
+def hello():
+    pass
+
+
+with Workflow(
+    generate_name="runner-workflow-",
+    entrypoint="hello",
+) as w:
+    hello()
diff --git a/tests/cli/test_generate_yaml.py b/tests/cli/test_generate_yaml.py
@@ -1,3 +1,4 @@
+import shutil
 import sys
 from pathlib import Path
 from textwrap import dedent
@@ -23,47 +24,70 @@ def patch_open():
     return patch("io.open", new=mock_open())
 
 
-single_workflow_output = dedent("""\
-    apiVersion: argoproj.io/v1alpha1
-    kind: Workflow
-    metadata:
-      name: single
-    spec: {}
-    """)
-
-workflow_template_output = dedent("""\
-    apiVersion: argoproj.io/v1alpha1
-    kind: WorkflowTemplate
-    metadata:
-      name: workflow-template
-    spec: {}
-    """)
-
-cluster_workflow_template_output = dedent("""\
-    apiVersion: argoproj.io/v1alpha1
-    kind: ClusterWorkflowTemplate
-    metadata:
-      name: cluster-workflow-template
-    spec: {}
-    """)
-
-multiple_workflow_output = dedent("""\
-    apiVersion: argoproj.io/v1alpha1
-    kind: Workflow
-    metadata:
-      name: one
-    spec: {}
-    ---
-    apiVersion: argoproj.io/v1alpha1
-    kind: Workflow
-    metadata:
-      name: two
-    spec: {}
-    """)
+single_workflow_output = """\
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  name: single
+spec: {}
+"""
+
+runner_workflow_output = """\
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: runner-workflow-
+spec:
+  entrypoint: hello
+  templates:
+  - name: hello
+    script:
+      image: python:3.9
+      source: '{{inputs.parameters}}'
+      args:
+      - -m
+      - hera.workflows.runner
+      - -e
+      - tests.cli.examples.runner_workflow:hello
+      command:
+      - python
+"""
+
+
+workflow_template_output = """\
+apiVersion: argoproj.io/v1alpha1
+kind: WorkflowTemplate
+metadata:
+  name: workflow-template
+spec: {}
+"""
+
+cluster_workflow_template_output = """\
+apiVersion: argoproj.io/v1alpha1
+kind: ClusterWorkflowTemplate
+metadata:
+  name: cluster-workflow-template
+spec: {}
+"""
+
+multiple_workflow_output = """\
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  name: one
+spec: {}
+---
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  name: two
+spec: {}
+"""
 
 whole_folder_output = join_output(
     cluster_workflow_template_output,
     multiple_workflow_output,
+    runner_workflow_output,
     single_workflow_output,
     workflow_template_output,
 )
@@ -89,6 +113,24 @@ def test_single_workflow(capsys):
     assert output == single_workflow_output
 
 
+@pytest.mark.cli
+def test_runner_workflow(capsys):
+    runner.invoke("tests/cli/examples/runner_workflow.py")
+
+    output = get_stdout(capsys)
+    assert output == runner_workflow_output
+
+
+@pytest.mark.cli
+def test_runner_workflow_not_in_cwd(capsys, tmp_path):
+    shutil.copy("tests/cli/examples/runner_workflow.py", tmp_path)
+    runner.invoke(str(tmp_path / "runner_workflow.py"))
+
+    output = get_stdout(capsys)
+    # The module is not in sys.path so we just use the stem of the workflow (i.e. best guess)
+    assert output == runner_workflow_output.replace("tests.cli.examples.runner_workflow", "runner_workflow")
+
+
 @pytest.mark.cli
 def test_multiple_workflow(capsys):
     runner.invoke("tests/cli/examples/multiple_workflow.py")
@@ -308,7 +350,11 @@ def test_exclude_one(capsys):
     runner.invoke("tests/cli/examples", "--exclude=*/examples/*template*")
 
     output = get_stdout(capsys)
-    assert output == join_output(multiple_workflow_output, single_workflow_output)
+    assert output == join_output(
+        multiple_workflow_output,
+        runner_workflow_output,
+        single_workflow_output,
+    )
 
 
 @pytest.mark.cli
@@ -320,7 +366,7 @@ def test_exclude_two(capsys):
     )
 
     output = get_stdout(capsys)
-    assert output == multiple_workflow_output
+    assert output == join_output(multiple_workflow_output, runner_workflow_output)
 
 
 @pytest.mark.cli
diff --git a/tests/test_runner.py b/tests/test_runner.py
@@ -22,7 +22,7 @@
 import tests.helper as test_module
 from hera.shared._pydantic import _PYDANTIC_VERSION
 from hera.shared.serialization import serialize
-from hera.workflows._runner.util import _run, _runner
+from hera.workflows._runner.util import _run, _runner, create_module_string
 from hera.workflows.io.v1 import Output as OutputV1
 
 try:
@@ -1079,3 +1079,63 @@ def test_script_partially_annotated_tuple_should_raise_an_error():
         ),
     ):
         _runner(entrypoint, kwargs_list)
+
+
+@pytest.mark.parametrize(
+    "sys_path_relatives,file_rel_path,expected",
+    [
+        pytest.param(["project"], "project/wf_a.py", "wf_a", id="Exact direct match in sys.path"),
+        pytest.param(["project"], "project/workflows/wf_a.py", "workflows.wf_a", id="Submodule match in sys.path"),
+        pytest.param(
+            ["project"],
+            "project/workflows/subpackage/another/wf_a.py",
+            "workflows.subpackage.another.wf_a",
+            id="Deep submodule match in sys.path",
+        ),
+        pytest.param(
+            ["project", "project/src"],
+            "project/src/workflows/wf_b.py",
+            "workflows.wf_b",
+            id="More specific match (src dir) in sys.path",
+        ),
+        pytest.param([], "project/workflows/wf_c.py", "wf_c", id="No match, fallback to stem"),
+        pytest.param(
+            [""],
+            "project/workflows/wf_d.py",
+            "project.workflows.wf_d",
+            id="sys.path contains root, nested module path is full path",
+        ),
+    ],
+)
+def test_create_module_string(
+    tmp_path,
+    monkeypatch,
+    sys_path_relatives: list[str],
+    file_rel_path: str,
+    expected: str,
+):
+    # GIVEN
+    # Create file structure
+    file_path = tmp_path / file_rel_path
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Set up sys.path using tmp_path as root
+    mock_sys_path = [str(tmp_path / rel) for rel in sys_path_relatives]
+    monkeypatch.setattr(sys, "path", mock_sys_path)
+
+    # THEN
+    assert create_module_string(file_path) == expected
+
+
+def test_symlinked_sys_path(tmp_path, monkeypatch):
+    real_dir = tmp_path / "real_project"
+    real_dir.mkdir()
+    file_path = real_dir / "wf.py"
+
+    # Create a symlink pointing to real_project
+    symlink_path = tmp_path / "link_project"
+    symlink_path.symlink_to(real_dir)
+
+    monkeypatch.setattr(sys, "path", [str(symlink_path)])
+
+    assert create_module_string(file_path) == "wf"
diff --git a/tests/test_unit/test_script.py b/tests/test_unit/test_script.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+"""Code generation CLI functions."""`
	`2`	`+`
`1`	`3`	`from hera._cli.generate import yaml`
`2`	`4`
`3`	`5`	`__all__ = [`