-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Add support for NETCDF4_CLASSIC to h5netcdf engine #10686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
68d5c73
120af07
f8f44f0
522d37d
783c407
2f1c781
b142d38
b14c373
909a96c
14d22a1
35b50ce
a187c8d
0c1bc08
10707ee
03ea2de
592b98b
13b60d0
cf8b4be
d0b0948
6351e66
b2a21d2
50dbd36
6d686d4
18e4bd0
d9c4879
99c90a5
28bd354
f2a29ca
f1c1819
88d0841
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
| from typing import TYPE_CHECKING, Any, Self | ||
|
|
||
| import numpy as np | ||
| from packaging.version import Version | ||
|
|
||
| from xarray.backends.common import ( | ||
| BACKEND_ENTRYPOINTS, | ||
|
|
@@ -27,6 +28,7 @@ | |
| PickleableFileManager, | ||
| ) | ||
| from xarray.backends.locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock | ||
| from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable | ||
| from xarray.backends.netCDF4_ import ( | ||
| BaseNetCDF4Array, | ||
| _build_and_get_enum, | ||
|
|
@@ -124,6 +126,7 @@ def __init__( | |
| manager: FileManager | h5netcdf.File | h5netcdf.Group, | ||
| group=None, | ||
| mode=None, | ||
| format="NETCDF4", | ||
| lock=HDF5_LOCK, | ||
| autoclose=False, | ||
| ): | ||
|
|
@@ -143,7 +146,7 @@ def __init__( | |
| self._manager = manager | ||
| self._group = group | ||
| self._mode = mode | ||
| self.format = None | ||
| self.format = format or "NETCDF4" | ||
| # todo: utilizing find_root_and_group seems a bit clunky | ||
| # making filename available on h5netcdf.Group seems better | ||
| self._filename = find_root_and_group(self.ds)[0].filename | ||
|
|
@@ -152,6 +155,9 @@ def __init__( | |
| self.autoclose = autoclose | ||
|
|
||
| def get_child_store(self, group: str) -> Self: | ||
| if self.format == "NETCDF4_CLASSIC": | ||
| raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") | ||
|
|
||
| if self._group is not None: | ||
| group = os.path.join(self._group, group) | ||
| return type(self)( | ||
|
|
@@ -167,7 +173,7 @@ def open( | |
| cls, | ||
| filename, | ||
| mode="r", | ||
| format=None, | ||
| format="NETCDF4", | ||
| group=None, | ||
| lock=None, | ||
| autoclose=False, | ||
|
|
@@ -198,8 +204,8 @@ def open( | |
| f"{magic_number!r} is not the signature of a valid netCDF4 file" | ||
| ) | ||
|
|
||
| if format not in [None, "NETCDF4"]: | ||
| raise ValueError("invalid format for h5netcdf backend") | ||
| if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]: | ||
| raise ValueError(f"invalid format for h5netcdf backend: {format}") | ||
|
|
||
| kwargs = { | ||
| "invalid_netcdf": invalid_netcdf, | ||
|
|
@@ -210,6 +216,8 @@ def open( | |
| kwargs.update(driver_kwds) | ||
| if phony_dims is not None: | ||
| kwargs["phony_dims"] = phony_dims | ||
| if format is not None and Version(h5netcdf.__version__) > Version("1.6.4"): | ||
| kwargs["format"] = format | ||
|
|
||
| if lock is None: | ||
| if mode == "r": | ||
|
|
@@ -223,7 +231,15 @@ def open( | |
| else PickleableFileManager | ||
| ) | ||
| manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs) | ||
| return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose) | ||
|
|
||
| return cls( | ||
| manager, | ||
| group=group, | ||
| format=format, | ||
| mode=mode, | ||
| lock=lock, | ||
| autoclose=autoclose, | ||
| ) | ||
|
|
||
| def _acquire(self, needs_lock=True): | ||
| with self._manager.acquire_context(needs_lock) as root: | ||
|
|
@@ -319,11 +335,27 @@ def set_dimension(self, name, length, is_unlimited=False): | |
| else: | ||
| self.ds.dimensions[name] = length | ||
|
|
||
| def convert_string(self, value): | ||
| """If format is NETCDF4_CLASSIC, convert strings to fixed width char | ||
| arrays to ensure they can be read by legacy software. | ||
|
|
||
| CLASSIC attributes are read by third party software as fixed width char arrays | ||
| """ | ||
| if self.format == "NETCDF4_CLASSIC": | ||
| value = encode_nc3_attr_value(value) | ||
| if isinstance(value, bytes): | ||
| value = np.bytes_(value) | ||
|
||
| return value | ||
|
|
||
| def set_attribute(self, key, value): | ||
| value = self.convert_string(value) | ||
huard marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
huard marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.ds.attrs[key] = value | ||
|
|
||
| def encode_variable(self, variable, name=None): | ||
| return _encode_nc4_variable(variable, name=name) | ||
| if self.format == "NETCDF4_CLASSIC": | ||
| return encode_nc3_variable(variable, name=name) | ||
| else: | ||
| return _encode_nc4_variable(variable, name=name) | ||
|
|
||
| def prepare_variable( | ||
| self, name, variable, check_encoding=False, unlimited_dims=None | ||
|
|
@@ -332,7 +364,9 @@ def prepare_variable( | |
|
|
||
| _ensure_no_forward_slash_in_name(name) | ||
| attrs = variable.attrs.copy() | ||
| dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding) | ||
| dtype = _get_datatype( | ||
| variable, nc_format=self.format, raise_on_invalid_encoding=check_encoding | ||
| ) | ||
|
|
||
| fillvalue = attrs.pop("_FillValue", None) | ||
|
|
||
|
|
@@ -394,7 +428,7 @@ def prepare_variable( | |
| nc4_var = self.ds[name] | ||
|
|
||
| for k, v in attrs.items(): | ||
| nc4_var.attrs[k] = v | ||
| nc4_var.attrs[k] = self.convert_string(v) | ||
huard marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
huard marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| target = H5NetCDFArrayWrapper(name, self) | ||
|
|
||
|
|
@@ -484,7 +518,7 @@ def open_dataset( | |
| drop_variables: str | Iterable[str] | None = None, | ||
| use_cftime=None, | ||
| decode_timedelta=None, | ||
| format=None, | ||
| format="NETCDF4", | ||
| group=None, | ||
| lock=None, | ||
| invalid_netcdf=None, | ||
|
|
@@ -544,7 +578,7 @@ def open_datatree( | |
| drop_variables: str | Iterable[str] | None = None, | ||
| use_cftime=None, | ||
| decode_timedelta=None, | ||
| format=None, | ||
| format="NETCDF4", | ||
| group: str | None = None, | ||
| lock=None, | ||
| invalid_netcdf=None, | ||
|
|
@@ -587,7 +621,7 @@ def open_groups_as_dict( | |
| drop_variables: str | Iterable[str] | None = None, | ||
| use_cftime=None, | ||
| decode_timedelta=None, | ||
| format=None, | ||
| format="NETCDF4", | ||
| group: str | None = None, | ||
| lock=None, | ||
| invalid_netcdf=None, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -460,6 +460,7 @@ def roundtrip( | |
| save_kwargs = {} | ||
| if open_kwargs is None: | ||
| open_kwargs = {} | ||
|
|
||
| with create_tmp_file(allow_cleanup_failure=allow_cleanup_failure) as path: | ||
| self.save(data, path, **save_kwargs) | ||
| with self.open(path, **open_kwargs) as ds: | ||
|
|
@@ -4735,6 +4736,54 @@ def create_store(self): | |
| ) as store: | ||
| yield store | ||
|
|
||
| @requires_h5netcdf | ||
| def test_string_attributes_stored_as_char(self, tmp_path): | ||
| import h5netcdf | ||
|
|
||
| original = Dataset(attrs={"foo": "bar"}) | ||
| store_path = tmp_path / "tmp.nc" | ||
| original.to_netcdf(store_path, engine=self.engine, format=self.file_format) | ||
| with h5netcdf.File(store_path, "r") as ds: | ||
| # Check that the attribute is stored as a char array | ||
| assert ds._h5file.attrs["foo"].dtype == np.dtype("S3") | ||
|
Comment on lines
+4742
to
+4750
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NumPy's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using fixed width chars replicates the behavior of the netCDF4 backend for the CLASSIC format. Again, this has to do with the Sticking as close as possible to netCDF4 output increases my confidence that the h5netcdf outputs will be compatible with 3rd party software expecting the CLASSIC format. |
||
|
|
||
|
|
||
| @requires_h5netcdf | ||
| class TestNetCDF4ClassicViaH5NetCDFData(TestNetCDF4ClassicViaNetCDF4Data): | ||
| engine: T_NetcdfEngine = "h5netcdf" | ||
| file_format: T_NetcdfTypes = "NETCDF4_CLASSIC" | ||
|
|
||
| @contextlib.contextmanager | ||
| def create_store(self): | ||
| with create_tmp_file() as tmp_file: | ||
| with backends.H5NetCDFStore.open( | ||
| tmp_file, mode="w", format="NETCDF4_CLASSIC" | ||
| ) as store: | ||
| yield store | ||
|
|
||
| @requires_netCDF4 | ||
| def test_cross_engine_read_write_netcdf4(self) -> None: | ||
| # Drop dim3, because its labels include strings. These appear to be | ||
| # not properly read with python-netCDF4, which converts them into | ||
| # unicode instead of leaving them as bytes. | ||
| data = create_test_data().drop_vars("dim3") | ||
| data.attrs["foo"] = "bar" | ||
| valid_engines: list[T_NetcdfEngine] = ["netcdf4", "h5netcdf"] | ||
| for write_engine in valid_engines: | ||
| with create_tmp_file() as tmp_file: | ||
| data.to_netcdf(tmp_file, engine=write_engine, format=self.file_format) | ||
| for read_engine in valid_engines: | ||
| with open_dataset(tmp_file, engine=read_engine) as actual: | ||
| assert_identical(data, actual) | ||
|
|
||
| def test_group_fails(self): | ||
| # Check writing group data fails with CLASSIC format | ||
| original = create_test_data() | ||
| with pytest.raises( | ||
| ValueError, match=r"Cannot create sub-groups in `NETCDF4_CLASSIC` format." | ||
| ): | ||
| original.to_netcdf(group="sub", format=self.file_format, engine=self.engine) | ||
|
|
||
|
|
||
| @requires_scipy_or_netCDF4 | ||
| class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.