xcube-dev · pont-us · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -8,6 +8,9 @@
 * Improve STAC output
 * Tweak CWL format (#24)
 * Use micromamba entry point in Docker image (#26)
+* Allow setting of CWL workflow ID (#29)
+* Add in-notebook configuration interface (#30)
+* Support writing of stage-out STAC by notebook (#32)
 
 ## Changes in 0.1.0
 

diff --git a/environment.yml b/environment.yml
@@ -15,6 +15,7 @@ dependencies:
   - xarray
   - xcube  # See note below
   # test dependencies
+  - cwltool
   - pytest
   - pytest-cov
 

diff --git a/test/data/paramtest.ipynb b/test/data/paramtest.ipynb
@@ -151,7 +151,8 @@
    "outputs": [],
    "source": [
     "parameter_1 = my_constant * 2\n",
-    "parameter_2 = \"default value\""
+    "parameter_2 = \"default value\"\n",
+    "xcengine_config = dict(workflow_id=\"my-workflow\")"
    ]
   }
  ],
@@ -171,7 +172,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.8"
+   "version": "3.13.3"
   }
  },
  "nbformat": 4,

diff --git a/test/test_core.py b/test/test_core.py
@@ -3,11 +3,14 @@
 import pathlib
 import pytz
 from io import BufferedReader
+import yaml
+import cwltool.load_tool
 
 import pytest
 from unittest.mock import Mock
 
 import docker.models.images
+import schema_salad.exceptions
 
 import xcengine.core
 import xcengine.parameters
@@ -160,3 +163,38 @@ def test_script_creator_convert_notebook_to_script(tmp_path, clear):
     expected = {output_dir / f for f in filenames}
     assert set(output_dir.iterdir()) == expected
     # TODO test execution as well?
+
+
+@pytest.mark.parametrize("nb_name", ["noparamtest", "paramtest"])
+def test_script_creator_cwl(tmp_path, nb_name):
+    nb_path = pathlib.Path(__file__).parent / "data" / f"{nb_name}.ipynb"
+    script_creator = ScriptCreator(nb_path)
+    image_tag = "foo"
+    cwl_path = tmp_path / "test.cwl"
+    cwl = script_creator.create_cwl(image_tag)
+    with open(cwl_path, "w") as fh:
+        yaml.dump(cwl, fh)
+    loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
+        str(cwl_path)
+    )
+    try:
+        cwltool.load_tool.resolve_and_validate_document(
+            loading_context, workflowobj, uri
+        )
+    except schema_salad.exceptions.ValidationException:
+        pytest.fail("CWL validation failed")
+    graph = cwl["$graph"]
+    cli_tools = [n for n in graph if n["class"] == "CommandLineTool"]
+    assert len(cli_tools) == 1
+    cli_tool = cli_tools[0]
+    assert (
+        cli_tool["requirements"]["DockerRequirement"]["dockerPull"]
+        == image_tag
+    )
+    assert cli_tool["hints"]["DockerRequirement"]["dockerPull"] == image_tag
+    workflows = [n for n in graph if n["class"] == "Workflow"]
+    assert len(workflows) == 1
+    workflow = workflows[0]
+    assert workflow["id"] == (
+        nb_path.stem if nb_name == "noparamtest" else "my-workflow"
+    )
diff --git a/test/test_parameters.py b/test/test_parameters.py
@@ -143,17 +143,30 @@ def test_parameters_get_cwl_step_inputs(notebook_parameters):
 
 
 def test_parameters_from_code(expected_vars):
-    assert (
-        xcengine.parameters.NotebookParameters.from_code(
-            """
+    parameters = xcengine.parameters.NotebookParameters.from_code(
+        """
 some_int = 42
 some_float = 3.14159
 some_string = "foo"
 some_bool = False
     """
-        ).params
-        == expected_vars
     )
+    assert parameters.params == expected_vars
+    assert parameters.config == {}
+
+
+def test_parameters_from_code_with_xce_config(expected_vars):
+    xce_config = dict(foo=1, bar="hi!", baz={})
+    code = f"""
+some_int = 42
+some_float = 3.14159
+some_string = "foo"
+some_bool = False
+{NotebookParameters.config_var_name} = {xce_config!r}
+    """
+    parameters = xcengine.parameters.NotebookParameters.from_code(code)
+    assert parameters.params == expected_vars
+    assert parameters.config == xce_config
 
 
 def test_parameters_from_code_with_setup(expected_vars):

diff --git a/xcengine/cli.py b/xcengine/cli.py
@@ -150,11 +150,18 @@ def build(
             )
             image = image_builder.build()
     if eoap:
+
         class IndentDumper(yaml.Dumper):
             def increase_indent(self, flow=False, indentless=False):
                 return super(IndentDumper, self).increase_indent(flow, False)
 
-        eoap.write_text(yaml.dump(image_builder.create_cwl(), sort_keys=False, Dumper=IndentDumper))
+        eoap.write_text(
+            yaml.dump(
+                image_builder.create_cwl(),
+                sort_keys=False,
+                Dumper=IndentDumper,
+            )
+        )
     print(f"Built image with tags {image.tags}")
 
 

diff --git a/xcengine/core.py b/xcengine/core.py
@@ -36,10 +36,12 @@
 class ScriptCreator:
     """Turn a Jupyter notebook into a set of scripts"""
 
+    nb_path: pathlib.Path
     notebook: nbformat.NotebookNode
     nb_params: NotebookParameters = NotebookParameters({})
 
     def __init__(self, nb_path: pathlib.Path):
+        self.nb_path = nb_path
         with open(nb_path) as fh:
             self.notebook = nbformat.read(fh, as_version=4)
         self.process_params_cell()
@@ -75,13 +77,16 @@ def process_params_cell(self) -> None:
                 params_cell_index = i
                 break
         if params_cell_index is not None:
+            # Collect the code from the cells preceding the parameter cell
+            # (because it might be a necessary preliminary to executing the
+            # parameter cell itself).
             setup_node = nbformat.from_dict(self.notebook)
             setup_node.cells = setup_node.cells[:params_cell_index]
             exporter = nbconvert.PythonExporter()
             (setup_code, _) = exporter.from_notebook_node(setup_node)
-            # Mock out the get_ipython function in case there are any
-            # IPython magic commands in the notebook. This effectively
-            # turns them into no-ops
+            # Mock out the get_ipython function in case there are any IPython
+            # magic commands in the notebook. This effectively turns them into
+            # no-ops.
             setup_code = (
                 "import unittest.mock\n"
                 "get_ipython = unittest.mock.MagicMock\n" + setup_code
@@ -118,7 +123,9 @@ def create_cwl(self, image_tag: str) -> dict[str, Any]:
             "$graph": [
                 {
                     "class": "Workflow",
-                    "id": "xcengine_ap",
+                    "id": self.nb_params.config.get(
+                        "workflow_id", self.nb_path.stem
+                    ),
                     "label": "xcengine notebook",
                     "doc": "xcengine notebook",
                     "requirements": [],
@@ -144,9 +151,7 @@ def create_cwl(self, image_tag: str) -> dict[str, Any]:
                     "requirements": {
                         "DockerRequirement": {"dockerPull": image_tag}
                     },
-                    "hints": {
-                        "DockerRequirement": {"dockerPull": image_tag}
-                    },
+                    "hints": {"DockerRequirement": {"dockerPull": image_tag}},
                     "baseCommand": [
                         "/usr/local/bin/_entrypoint.sh",
                         "python",

diff --git a/xcengine/parameters.py b/xcengine/parameters.py
@@ -7,6 +7,7 @@
 import pystac
 import xarray as xr
 import yaml
+from typing_extensions import ClassVar
 
 LOGGER = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -17,12 +18,19 @@ class NotebookParameters:
     params: dict[str, tuple[type, Any]]
     cwl_params: dict[str, tuple[type | str, Any]]
     dataset_inputs: list[str]
-
-    def __init__(self, params: dict[str, tuple[type, Any]]):
+    config_var_name: ClassVar[str] = "xcengine_config"
+    config: dict[str, Any]
+
+    def __init__(
+        self,
+        params: dict[str, tuple[type, Any]],
+        config: dict[str, Any] = None,
+    ):
         self.params = params
+        self.config = {} if config is None else config
         self.make_cwl_params()
 
-    def make_cwl_params(self):
+    def make_cwl_params(self) -> None:
         self.dataset_inputs = []
         self.cwl_params = {}
         for param_name in self.params:
@@ -38,11 +46,9 @@ def make_cwl_params(self):
     def from_code(
         cls, code: str, setup_code: str | None = None
     ) -> "NotebookParameters":
-        # TODO run whole notebook up to params cell, not just the params cell!
-        # (Because it might use imports etc. from earlier in the notebook.)
-        # This will need some tweaking of the parameter extraction -- see
-        # comment therein.
-        return cls(cls.extract_variables(code, setup_code))
+        variables = cls.extract_variables(code, setup_code)
+        config = variables.pop(cls.config_var_name, (None, None))
+        return cls(variables, config[1])
 
     @classmethod
     def from_yaml(cls, yaml_content: str | typing.IO) -> "NotebookParameters":
@@ -74,13 +80,17 @@ def extract_variables(
             old_locals = locals_.copy()
         exec(code, globals(), locals_)
         new_vars = locals_.keys() - old_locals.keys()
-        return {k: cls.make_param_tuple(locals_[k]) for k in new_vars}
+        return {k: cls.make_param_tuple(k, locals_[k]) for k in new_vars}
 
-    @staticmethod
-    def make_param_tuple(value: Any) -> tuple[type, Any]:
+    @classmethod
+    def make_param_tuple(cls, key: str, value: Any) -> tuple[type, Any]:
         return (
             t := type(value),
-            value if t in {int, float, str, bool} else None,
+            (
+                value
+                if t in {int, float, str, bool} or key == cls.config_var_name
+                else None
+            ),
         )
 
     def get_cwl_workflow_inputs(self) -> dict[str, dict[str, Any]]:

diff --git a/xcengine/util.py b/xcengine/util.py
@@ -21,10 +21,14 @@ def clear_directory(directory: pathlib.Path) -> None:
 def write_stac(
     datasets: dict[str, xr.Dataset], stac_root: pathlib.Path
 ) -> None:
+    catalog_path = stac_root / "catalog.json"
+    if catalog_path.exists():
+        # Assume that the user code generated its own stage-out data
+        return
     catalog = pystac.Catalog(
         id="catalog",
         description="Root catalog",
-        href=f"{stac_root}/catalog.json",
+        href=f"{catalog_path}",
     )
     for ds_name, ds in datasets.items():
         zarr_name = ds_name + ".zarr"
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ dependencies: @@
       - xarray
       - xcube  # See note below
       # test dependencies
+      - cwltool
       - pytest
       - pytest-cov
@@ Expand Down @@