From c4f8a1b5301c24f8785d953e8b0bbfd1cab61eaa Mon Sep 17 00:00:00 2001
From: Pontus Lurcock <pontus.lurcock@brockmann-consult.de>
Date: Fri, 26 Sep 2025 16:49:45 +0200
Subject: [PATCH 1/2] Support NetCDF as output format

Now xcengine images can write NetCDF as well as Zarr by setting a
dataset attribute in the notebook. Tested but not yet documented.
---
 test/test_util.py | 39 +++++++++++++++++++++++----------------
 xcengine/util.py  | 23 +++++++++++++++--------
 2 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/test/test_util.py b/test/test_util.py
index 98090e3..68f9a74 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -38,14 +38,15 @@ def test_clear_directory(tmp_path):
     assert os.listdir(tmp_path) == []
 
 
-@pytest.mark.parametrize("write_zarrs", [False, True])
-def test_write_stac(tmp_path, dataset, write_zarrs):
-    datasets = {"ds1": dataset, "ds2": dataset}
-    if write_zarrs:
+@pytest.mark.parametrize("write_datasets", [False, True])
+def test_write_stac(tmp_path, dataset, write_datasets):
+    datasets = {"ds1": dataset, "ds2": dataset.copy()}
+    datasets["ds2"].attrs["xcengine_output_format"] = "netcdf"
+    if write_datasets:
         output_path = tmp_path / "output"
         output_path.mkdir()
-        for ds_id, ds in datasets.items():
-            ds.to_zarr(output_path / (ds_id + ".zarr"))
+        datasets["ds1"].to_zarr(output_path / ("ds1.zarr"))
+        datasets["ds2"].to_netcdf(output_path / ("ds2.nc"))
 
     write_stac(datasets, tmp_path)
     catalog = pystac.Catalog.from_file(tmp_path / "catalog.json")
@@ -57,21 +58,27 @@ def test_write_stac(tmp_path, dataset, write_zarrs):
         for item in items
     }
     assert data_asset_hrefs == {
-        ds_id: [
-            str(Path(tmp_path / ds_id / f"{ds_id}.zarr").resolve(strict=False))
-        ]
-        for ds_id in datasets.keys()
+        "ds1": [str((tmp_path / "ds1" / "ds1.zarr").resolve(strict=False))],
+        "ds2": [str((tmp_path / "ds2" / "ds2.nc").resolve(strict=False))],
     }
 
 
 @pytest.mark.parametrize("eoap_mode", [False, True])
-def test_save_datasets(tmp_path, dataset, eoap_mode):
-    datasets = {"ds1": dataset, "ds2": dataset}
+@pytest.mark.parametrize("ds2_format", [None, "zarr", "netcdf"])
+def test_save_datasets(tmp_path, dataset, eoap_mode, ds2_format):
+    datasets = {"ds1": dataset, "ds2": dataset.copy()}
+    if ds2_format is not None:
+        datasets["ds2"].attrs["xcengine_output_format"] = ds2_format
     save_datasets(datasets, tmp_path, eoap_mode)
-    for ds_id in datasets.keys():
-        assert (
-            tmp_path / (ds_id if eoap_mode else "output") / (ds_id + ".zarr")
-        ).is_dir()
+    def outdir(ds_id):
+        return tmp_path / (ds_id if eoap_mode else "output")
+    assert (outdir("ds1") / "ds1.zarr").is_dir()
+    ds2_suffix = "nc" if ds2_format == "netcdf" else "zarr"
+    ds2_path =  outdir("ds2") / f"ds2.{ds2_suffix}"
+    if ds2_format == "netcdf":
+        assert ds2_path.is_file()
+    else:
+        assert ds2_path.is_dir()
     catalogue_path = tmp_path / "catalog.json"
     if eoap_mode:
         assert catalogue_path.is_file()
diff --git a/xcengine/util.py b/xcengine/util.py
index ba2f173..18376b8 100644
--- a/xcengine/util.py
+++ b/xcengine/util.py
@@ -31,18 +31,20 @@ def write_stac(
         href=f"{catalog_path}",
     )
     for ds_name, ds in datasets.items():
-        zarr_name = ds_name + ".zarr"
-        zarr_path = stac_root / "output" / zarr_name
+        output_format = ds.attrs.get("xcengine_output_format", "zarr")
+        suffix = "nc" if output_format == "netcdf" else "zarr"
+        output_name = f"{ds_name}.{suffix}"
+        output_path = stac_root / "output" / output_name
         asset_parent = stac_root / ds_name
         asset_parent.mkdir(parents=True, exist_ok=True)
-        asset_path = asset_parent / zarr_name
-        if zarr_path.exists():
+        asset_path = asset_parent / output_name
+        if output_path.exists():
             # If a Zarr for this asset is present in the output directory,
             # move it into the corresponding STAC subdirectory. If not,
             # we write the same STAC items with the same asset links anyway
             # and assume that the caller will take care of actually writing
             # the asset.
-            zarr_path.rename(asset_path)
+            output_path.rename(asset_path)
         asset = pystac.Asset(
             roles=["data", "visual"],
             href=str(asset_path),
@@ -52,7 +54,7 @@ def write_stac(
             # https://planetarycomputer.microsoft.com/api/stac/v1/collections/terraclimate
             # uses the similar "application/vnd+zarr" but RFC 6838 mandates
             # "." rather than "+".
-            media_type="application/vnd.zarr",
+            media_type="application/x-netcdf" if output_format == "netcdf" else "application/vnd.zarr",
             title=ds.attrs.get("title", ds_name),
         )
         bb = namedtuple("Bounds", ["left", "bottom", "right", "top"])(
@@ -92,9 +94,14 @@ def save_datasets(
     for ds_id, ds in datasets.items():
         output_subpath = output_path / (ds_id if eoap_mode else "output")
         output_subpath.mkdir(parents=True, exist_ok=True)
-        dataset_path = output_subpath / (ds_id + ".zarr")
+        output_format = ds.attrs.get("xcengine_output_format", "zarr")
+        suffix = "nc" if output_format == "netcdf" else "zarr"
+        dataset_path = output_subpath / f"{ds_id}.{suffix}"
         saved_datasets[ds_id] = dataset_path
-        ds.to_zarr(dataset_path)
+        if output_format == "netcdf":
+            ds.to_netcdf(dataset_path)
+        else:
+            ds.to_zarr(dataset_path)
     # The "finished" file is a flag to indicate to a runner when
     # processing is complete, though the xcetool runner doesn't yet use it.
     (output_path / "finished").touch()

From 3d082bb8f535e19bd0f33ea685c425f21cb37cc7 Mon Sep 17 00:00:00 2001
From: Pontus Lurcock <pontus.lurcock@brockmann-consult.de>
Date: Fri, 26 Sep 2025 17:12:55 +0200
Subject: [PATCH 2/2] Document NetCDF output; update changelog

---
 CHANGES.md |  1 +
 README.md  | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index bcd22cd..96cfe67 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -14,6 +14,7 @@
 * Support writing of stage-out STAC by notebook (#32)
 * Make viewer work on non-default ports (#21)
 * Improve dynamic example notebook
+* Support NetCDF output (#28)
 
 ## Changes in 0.1.0
 
diff --git a/README.md b/README.md
index 3e5acc5..eb21b6c 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,18 @@ parameters cell and make them available as command-line parameters for the
 output script and container, and as workflow parameters for the application
 package.
 
+# Customizing output formats
+
+An xcengine-generated script or image can automatically write all
+`xarray.Dataset` objects from the notebook code to disk, for example to be
+staged out as EO Application Package outputs. By default, Zarr format is
+used, but this can be changed to NetCDF on a per-dataset basis by applying
+an attribute:
+
+```python
+my_dataset.attrs["xcengine_output_format"] = "netcdf"
+```
+
 # xcetool usage
 
 xcengine provides a command-line tool called `xcetool`, which has several