Skip to content

Commit c4f8a1b

Browse files
committed
Support NetCDF as output format
Now xcengine images can write NetCDF as well as Zarr by setting a dataset attribute in the notebook. Tested but not yet documented.
1 parent c3c9155 commit c4f8a1b

File tree

2 files changed

+38
-24
lines changed

2 files changed

+38
-24
lines changed

test/test_util.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ def test_clear_directory(tmp_path):
3838
assert os.listdir(tmp_path) == []
3939

4040

41-
@pytest.mark.parametrize("write_zarrs", [False, True])
42-
def test_write_stac(tmp_path, dataset, write_zarrs):
43-
datasets = {"ds1": dataset, "ds2": dataset}
44-
if write_zarrs:
41+
@pytest.mark.parametrize("write_datasets", [False, True])
42+
def test_write_stac(tmp_path, dataset, write_datasets):
43+
datasets = {"ds1": dataset, "ds2": dataset.copy()}
44+
datasets["ds2"].attrs["xcengine_output_format"] = "netcdf"
45+
if write_datasets:
4546
output_path = tmp_path / "output"
4647
output_path.mkdir()
47-
for ds_id, ds in datasets.items():
48-
ds.to_zarr(output_path / (ds_id + ".zarr"))
48+
datasets["ds1"].to_zarr(output_path / ("ds1.zarr"))
49+
datasets["ds2"].to_netcdf(output_path / ("ds2.nc"))
4950

5051
write_stac(datasets, tmp_path)
5152
catalog = pystac.Catalog.from_file(tmp_path / "catalog.json")
@@ -57,21 +58,27 @@ def test_write_stac(tmp_path, dataset, write_zarrs):
5758
for item in items
5859
}
5960
assert data_asset_hrefs == {
60-
ds_id: [
61-
str(Path(tmp_path / ds_id / f"{ds_id}.zarr").resolve(strict=False))
62-
]
63-
for ds_id in datasets.keys()
61+
"ds1": [str((tmp_path / "ds1" / "ds1.zarr").resolve(strict=False))],
62+
"ds2": [str((tmp_path / "ds2" / "ds2.nc").resolve(strict=False))],
6463
}
6564

6665

6766
@pytest.mark.parametrize("eoap_mode", [False, True])
68-
def test_save_datasets(tmp_path, dataset, eoap_mode):
69-
datasets = {"ds1": dataset, "ds2": dataset}
67+
@pytest.mark.parametrize("ds2_format", [None, "zarr", "netcdf"])
68+
def test_save_datasets(tmp_path, dataset, eoap_mode, ds2_format):
69+
datasets = {"ds1": dataset, "ds2": dataset.copy()}
70+
if ds2_format is not None:
71+
datasets["ds2"].attrs["xcengine_output_format"] = ds2_format
7072
save_datasets(datasets, tmp_path, eoap_mode)
71-
for ds_id in datasets.keys():
72-
assert (
73-
tmp_path / (ds_id if eoap_mode else "output") / (ds_id + ".zarr")
74-
).is_dir()
73+
def outdir(ds_id):
74+
return tmp_path / (ds_id if eoap_mode else "output")
75+
assert (outdir("ds1") / "ds1.zarr").is_dir()
76+
ds2_suffix = "nc" if ds2_format == "netcdf" else "zarr"
77+
ds2_path = outdir("ds2") / f"ds2.{ds2_suffix}"
78+
if ds2_format == "netcdf":
79+
assert ds2_path.is_file()
80+
else:
81+
assert ds2_path.is_dir()
7582
catalogue_path = tmp_path / "catalog.json"
7683
if eoap_mode:
7784
assert catalogue_path.is_file()

xcengine/util.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,20 @@ def write_stac(
3131
href=f"{catalog_path}",
3232
)
3333
for ds_name, ds in datasets.items():
34-
zarr_name = ds_name + ".zarr"
35-
zarr_path = stac_root / "output" / zarr_name
34+
output_format = ds.attrs.get("xcengine_output_format", "zarr")
35+
suffix = "nc" if output_format == "netcdf" else "zarr"
36+
output_name = f"{ds_name}.{suffix}"
37+
output_path = stac_root / "output" / output_name
3638
asset_parent = stac_root / ds_name
3739
asset_parent.mkdir(parents=True, exist_ok=True)
38-
asset_path = asset_parent / zarr_name
39-
if zarr_path.exists():
40+
asset_path = asset_parent / output_name
41+
if output_path.exists():
4042
# If a Zarr for this asset is present in the output directory,
4143
# move it into the corresponding STAC subdirectory. If not,
4244
# we write the same STAC items with the same asset links anyway
4345
# and assume that the caller will take care of actually writing
4446
# the asset.
45-
zarr_path.rename(asset_path)
47+
output_path.rename(asset_path)
4648
asset = pystac.Asset(
4749
roles=["data", "visual"],
4850
href=str(asset_path),
@@ -52,7 +54,7 @@ def write_stac(
5254
# https://planetarycomputer.microsoft.com/api/stac/v1/collections/terraclimate
5355
# uses the similar "application/vnd+zarr" but RFC 6838 mandates
5456
# "." rather than "+".
55-
media_type="application/vnd.zarr",
57+
media_type="application/x-netcdf" if output_format == "netcdf" else "application/vnd.zarr",
5658
title=ds.attrs.get("title", ds_name),
5759
)
5860
bb = namedtuple("Bounds", ["left", "bottom", "right", "top"])(
@@ -92,9 +94,14 @@ def save_datasets(
9294
for ds_id, ds in datasets.items():
9395
output_subpath = output_path / (ds_id if eoap_mode else "output")
9496
output_subpath.mkdir(parents=True, exist_ok=True)
95-
dataset_path = output_subpath / (ds_id + ".zarr")
97+
output_format = ds.attrs.get("xcengine_output_format", "zarr")
98+
suffix = "nc" if output_format == "netcdf" else "zarr"
99+
dataset_path = output_subpath / f"{ds_id}.{suffix}"
96100
saved_datasets[ds_id] = dataset_path
97-
ds.to_zarr(dataset_path)
101+
if output_format == "netcdf":
102+
ds.to_netcdf(dataset_path)
103+
else:
104+
ds.to_zarr(dataset_path)
98105
# The "finished" file is a flag to indicate to a runner when
99106
# processing is complete, though the xcetool runner doesn't yet use it.
100107
(output_path / "finished").touch()

0 commit comments

Comments
 (0)