Support NetCDF as output format

pont-us · pont-us · commit c4f8a1b5301c · 2025-09-26T16:49:45.000+02:00
Now xcengine images can write NetCDF as well as Zarr by setting a
dataset attribute in the notebook. Tested but not yet documented.
diff --git a/test/test_util.py b/test/test_util.py
@@ -38,14 +38,15 @@ def test_clear_directory(tmp_path):
     assert os.listdir(tmp_path) == []
 
 
-@pytest.mark.parametrize("write_zarrs", [False, True])
-def test_write_stac(tmp_path, dataset, write_zarrs):
-    datasets = {"ds1": dataset, "ds2": dataset}
-    if write_zarrs:
+@pytest.mark.parametrize("write_datasets", [False, True])
+def test_write_stac(tmp_path, dataset, write_datasets):
+    datasets = {"ds1": dataset, "ds2": dataset.copy()}
+    datasets["ds2"].attrs["xcengine_output_format"] = "netcdf"
+    if write_datasets:
         output_path = tmp_path / "output"
         output_path.mkdir()
-        for ds_id, ds in datasets.items():
-            ds.to_zarr(output_path / (ds_id + ".zarr"))
+        datasets["ds1"].to_zarr(output_path / ("ds1.zarr"))
+        datasets["ds2"].to_netcdf(output_path / ("ds2.nc"))
 
     write_stac(datasets, tmp_path)
     catalog = pystac.Catalog.from_file(tmp_path / "catalog.json")
@@ -57,21 +58,27 @@ def test_write_stac(tmp_path, dataset, write_zarrs):
         for item in items
     }
     assert data_asset_hrefs == {
-        ds_id: [
-            str(Path(tmp_path / ds_id / f"{ds_id}.zarr").resolve(strict=False))
-        ]
-        for ds_id in datasets.keys()
+        "ds1": [str((tmp_path / "ds1" / "ds1.zarr").resolve(strict=False))],
+        "ds2": [str((tmp_path / "ds2" / "ds2.nc").resolve(strict=False))],
     }
 
 
 @pytest.mark.parametrize("eoap_mode", [False, True])
-def test_save_datasets(tmp_path, dataset, eoap_mode):
-    datasets = {"ds1": dataset, "ds2": dataset}
+@pytest.mark.parametrize("ds2_format", [None, "zarr", "netcdf"])
+def test_save_datasets(tmp_path, dataset, eoap_mode, ds2_format):
+    datasets = {"ds1": dataset, "ds2": dataset.copy()}
+    if ds2_format is not None:
+        datasets["ds2"].attrs["xcengine_output_format"] = ds2_format
     save_datasets(datasets, tmp_path, eoap_mode)
-    for ds_id in datasets.keys():
-        assert (
-            tmp_path / (ds_id if eoap_mode else "output") / (ds_id + ".zarr")
-        ).is_dir()
+    def outdir(ds_id):
+        return tmp_path / (ds_id if eoap_mode else "output")
+    assert (outdir("ds1") / "ds1.zarr").is_dir()
+    ds2_suffix = "nc" if ds2_format == "netcdf" else "zarr"
+    ds2_path =  outdir("ds2") / f"ds2.{ds2_suffix}"
+    if ds2_format == "netcdf":
+        assert ds2_path.is_file()
+    else:
+        assert ds2_path.is_dir()
     catalogue_path = tmp_path / "catalog.json"
     if eoap_mode:
         assert catalogue_path.is_file()
diff --git a/xcengine/util.py b/xcengine/util.py
@@ -31,18 +31,20 @@ def write_stac(
         href=f"{catalog_path}",
     )
     for ds_name, ds in datasets.items():
-        zarr_name = ds_name + ".zarr"
-        zarr_path = stac_root / "output" / zarr_name
+        output_format = ds.attrs.get("xcengine_output_format", "zarr")
+        suffix = "nc" if output_format == "netcdf" else "zarr"
+        output_name = f"{ds_name}.{suffix}"
+        output_path = stac_root / "output" / output_name
         asset_parent = stac_root / ds_name
         asset_parent.mkdir(parents=True, exist_ok=True)
-        asset_path = asset_parent / zarr_name
-        if zarr_path.exists():
+        asset_path = asset_parent / output_name
+        if output_path.exists():
             # If a Zarr for this asset is present in the output directory,
             # move it into the corresponding STAC subdirectory. If not,
             # we write the same STAC items with the same asset links anyway
             # and assume that the caller will take care of actually writing
             # the asset.
-            zarr_path.rename(asset_path)
+            output_path.rename(asset_path)
         asset = pystac.Asset(
             roles=["data", "visual"],
             href=str(asset_path),
@@ -52,7 +54,7 @@ def write_stac(
             # https://planetarycomputer.microsoft.com/api/stac/v1/collections/terraclimate
             # uses the similar "application/vnd+zarr" but RFC 6838 mandates
             # "." rather than "+".
-            media_type="application/vnd.zarr",
+            media_type="application/x-netcdf" if output_format == "netcdf" else "application/vnd.zarr",
             title=ds.attrs.get("title", ds_name),
         )
         bb = namedtuple("Bounds", ["left", "bottom", "right", "top"])(
@@ -92,9 +94,14 @@ def save_datasets(
     for ds_id, ds in datasets.items():
         output_subpath = output_path / (ds_id if eoap_mode else "output")
         output_subpath.mkdir(parents=True, exist_ok=True)
-        dataset_path = output_subpath / (ds_id + ".zarr")
+        output_format = ds.attrs.get("xcengine_output_format", "zarr")
+        suffix = "nc" if output_format == "netcdf" else "zarr"
+        dataset_path = output_subpath / f"{ds_id}.{suffix}"
         saved_datasets[ds_id] = dataset_path
-        ds.to_zarr(dataset_path)
+        if output_format == "netcdf":
+            ds.to_netcdf(dataset_path)
+        else:
+            ds.to_zarr(dataset_path)
     # The "finished" file is a flag to indicate to a runner when
     # processing is complete, though the xcetool runner doesn't yet use it.
     (output_path / "finished").touch()