Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
* Improve STAC output
* Tweak CWL format (#24)
* Use micromamba entry point in Docker image (#26)
* Allow setting of CWL workflow ID (#29)
* Add in-notebook configuration interface (#30)
* Support writing of stage-out STAC by notebook (#32)

## Changes in 0.1.0

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- xarray
- xcube # See note below
# test dependencies
- cwltool
- pytest
- pytest-cov

Expand Down
5 changes: 3 additions & 2 deletions test/data/paramtest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@
"outputs": [],
"source": [
"parameter_1 = my_constant * 2\n",
"parameter_2 = \"default value\""
"parameter_2 = \"default value\"\n",
"xcengine_config = dict(workflow_id=\"my-workflow\")"
]
}
],
Expand All @@ -171,7 +172,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
"version": "3.13.3"
}
},
"nbformat": 4,
Expand Down
38 changes: 38 additions & 0 deletions test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import pathlib
import pytz
from io import BufferedReader
import yaml
import cwltool.load_tool

import pytest
from unittest.mock import Mock

import docker.models.images
import schema_salad.exceptions

import xcengine.core
import xcengine.parameters
Expand Down Expand Up @@ -160,3 +163,38 @@ def test_script_creator_convert_notebook_to_script(tmp_path, clear):
expected = {output_dir / f for f in filenames}
assert set(output_dir.iterdir()) == expected
# TODO test execution as well?


@pytest.mark.parametrize("nb_name", ["noparamtest", "paramtest"])
def test_script_creator_cwl(tmp_path, nb_name):
nb_path = pathlib.Path(__file__).parent / "data" / f"{nb_name}.ipynb"
script_creator = ScriptCreator(nb_path)
image_tag = "foo"
cwl_path = tmp_path / "test.cwl"
cwl = script_creator.create_cwl(image_tag)
with open(cwl_path, "w") as fh:
yaml.dump(cwl, fh)
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
str(cwl_path)
)
try:
cwltool.load_tool.resolve_and_validate_document(
loading_context, workflowobj, uri
)
except schema_salad.exceptions.ValidationException:
pytest.fail("CWL validation failed")
graph = cwl["$graph"]
cli_tools = [n for n in graph if n["class"] == "CommandLineTool"]
assert len(cli_tools) == 1
cli_tool = cli_tools[0]
assert (
cli_tool["requirements"]["DockerRequirement"]["dockerPull"]
== image_tag
)
assert cli_tool["hints"]["DockerRequirement"]["dockerPull"] == image_tag
workflows = [n for n in graph if n["class"] == "Workflow"]
assert len(workflows) == 1
workflow = workflows[0]
assert workflow["id"] == (
nb_path.stem if nb_name == "noparamtest" else "my-workflow"
)
23 changes: 18 additions & 5 deletions test/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,30 @@ def test_parameters_get_cwl_step_inputs(notebook_parameters):


def test_parameters_from_code(expected_vars):
assert (
xcengine.parameters.NotebookParameters.from_code(
"""
parameters = xcengine.parameters.NotebookParameters.from_code(
"""
some_int = 42
some_float = 3.14159
some_string = "foo"
some_bool = False
"""
).params
== expected_vars
)
assert parameters.params == expected_vars
assert parameters.config == {}


def test_parameters_from_code_with_xce_config(expected_vars):
xce_config = dict(foo=1, bar="hi!", baz={})
code = f"""
some_int = 42
some_float = 3.14159
some_string = "foo"
some_bool = False
{NotebookParameters.config_var_name} = {xce_config!r}
"""
parameters = xcengine.parameters.NotebookParameters.from_code(code)
assert parameters.params == expected_vars
assert parameters.config == xce_config


def test_parameters_from_code_with_setup(expected_vars):
Expand Down
9 changes: 8 additions & 1 deletion xcengine/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,18 @@ def build(
)
image = image_builder.build()
if eoap:

class IndentDumper(yaml.Dumper):
def increase_indent(self, flow=False, indentless=False):
return super(IndentDumper, self).increase_indent(flow, False)

eoap.write_text(yaml.dump(image_builder.create_cwl(), sort_keys=False, Dumper=IndentDumper))
eoap.write_text(
yaml.dump(
image_builder.create_cwl(),
sort_keys=False,
Dumper=IndentDumper,
)
)
print(f"Built image with tags {image.tags}")


Expand Down
19 changes: 12 additions & 7 deletions xcengine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@
class ScriptCreator:
"""Turn a Jupyter notebook into a set of scripts"""

nb_path: pathlib.Path
notebook: nbformat.NotebookNode
nb_params: NotebookParameters = NotebookParameters({})

def __init__(self, nb_path: pathlib.Path):
self.nb_path = nb_path
with open(nb_path) as fh:
self.notebook = nbformat.read(fh, as_version=4)
self.process_params_cell()
Expand Down Expand Up @@ -75,13 +77,16 @@ def process_params_cell(self) -> None:
params_cell_index = i
break
if params_cell_index is not None:
# Collect the code from the cells preceding the parameter cell
# (because it might be a necessary preliminary to executing the
# parameter cell itself).
setup_node = nbformat.from_dict(self.notebook)
setup_node.cells = setup_node.cells[:params_cell_index]
exporter = nbconvert.PythonExporter()
(setup_code, _) = exporter.from_notebook_node(setup_node)
# Mock out the get_ipython function in case there are any
# IPython magic commands in the notebook. This effectively
# turns them into no-ops
# Mock out the get_ipython function in case there are any IPython
# magic commands in the notebook. This effectively turns them into
# no-ops.
setup_code = (
"import unittest.mock\n"
"get_ipython = unittest.mock.MagicMock\n" + setup_code
Expand Down Expand Up @@ -118,7 +123,9 @@ def create_cwl(self, image_tag: str) -> dict[str, Any]:
"$graph": [
{
"class": "Workflow",
"id": "xcengine_ap",
"id": self.nb_params.config.get(
"workflow_id", self.nb_path.stem
),
"label": "xcengine notebook",
"doc": "xcengine notebook",
"requirements": [],
Expand All @@ -144,9 +151,7 @@ def create_cwl(self, image_tag: str) -> dict[str, Any]:
"requirements": {
"DockerRequirement": {"dockerPull": image_tag}
},
"hints": {
"DockerRequirement": {"dockerPull": image_tag}
},
"hints": {"DockerRequirement": {"dockerPull": image_tag}},
"baseCommand": [
"/usr/local/bin/_entrypoint.sh",
"python",
Expand Down
34 changes: 22 additions & 12 deletions xcengine/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pystac
import xarray as xr
import yaml
from typing_extensions import ClassVar

LOGGER = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
Expand All @@ -17,12 +18,19 @@ class NotebookParameters:
params: dict[str, tuple[type, Any]]
cwl_params: dict[str, tuple[type | str, Any]]
dataset_inputs: list[str]

def __init__(self, params: dict[str, tuple[type, Any]]):
config_var_name: ClassVar[str] = "xcengine_config"
config: dict[str, Any]

def __init__(
self,
params: dict[str, tuple[type, Any]],
config: dict[str, Any] = None,
):
self.params = params
self.config = {} if config is None else config
self.make_cwl_params()

def make_cwl_params(self):
def make_cwl_params(self) -> None:
self.dataset_inputs = []
self.cwl_params = {}
for param_name in self.params:
Expand All @@ -38,11 +46,9 @@ def make_cwl_params(self):
def from_code(
cls, code: str, setup_code: str | None = None
) -> "NotebookParameters":
# TODO run whole notebook up to params cell, not just the params cell!
# (Because it might use imports etc. from earlier in the notebook.)
# This will need some tweaking of the parameter extraction -- see
# comment therein.
return cls(cls.extract_variables(code, setup_code))
variables = cls.extract_variables(code, setup_code)
config = variables.pop(cls.config_var_name, (None, None))
return cls(variables, config[1])

@classmethod
def from_yaml(cls, yaml_content: str | typing.IO) -> "NotebookParameters":
Expand Down Expand Up @@ -74,13 +80,17 @@ def extract_variables(
old_locals = locals_.copy()
exec(code, globals(), locals_)
new_vars = locals_.keys() - old_locals.keys()
return {k: cls.make_param_tuple(locals_[k]) for k in new_vars}
return {k: cls.make_param_tuple(k, locals_[k]) for k in new_vars}

@staticmethod
def make_param_tuple(value: Any) -> tuple[type, Any]:
@classmethod
def make_param_tuple(cls, key: str, value: Any) -> tuple[type, Any]:
return (
t := type(value),
value if t in {int, float, str, bool} else None,
(
value
if t in {int, float, str, bool} or key == cls.config_var_name
else None
),
)

def get_cwl_workflow_inputs(self) -> dict[str, dict[str, Any]]:
Expand Down
6 changes: 5 additions & 1 deletion xcengine/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@ def clear_directory(directory: pathlib.Path) -> None:
def write_stac(
datasets: dict[str, xr.Dataset], stac_root: pathlib.Path
) -> None:
catalog_path = stac_root / "catalog.json"
if catalog_path.exists():
# Assume that the user code generated its own stage-out data
return
catalog = pystac.Catalog(
id="catalog",
description="Root catalog",
href=f"{stac_root}/catalog.json",
href=f"{catalog_path}",
)
for ds_name, ds in datasets.items():
zarr_name = ds_name + ".zarr"
Expand Down