Merge pull request #1921 from OceanParcels/1844-netcdf-engine

VeckoTheGecko · web-flow · commit 29798dc4b67b · 2025-03-07T17:11:29.000+01:00
Remove `netcdf_engine` from Field and FieldSet
diff --git a/docs/examples/example_globcurrent.py b/docs/examples/example_globcurrent.py
@@ -1,4 +1,3 @@
-from collections.abc import Callable
 from datetime import timedelta
 from glob import glob
 
@@ -9,19 +8,6 @@
 import parcels
 
 
-def Unit_to_units(d: dict) -> dict:
-    if "Unit" in d:
-        d["units"] = d.pop("Unit")
-    return d
-
-
-def xarray_patch_metadata(ds: xr.Dataset, f: Callable[[dict], dict]) -> xr.Dataset:
-    """Convert attrs"""
-    for var in ds.variables:
-        ds[var].attrs = f(ds[var].attrs)
-    return ds
-
-
 def set_globcurrent_fieldset(
     filename=None,
 ):
@@ -35,11 +21,7 @@ def set_globcurrent_fieldset(
         "V": "northward_eulerian_current_velocity",
     }
     dimensions = {"lat": "lat", "lon": "lon", "time": "time"}
-    ds = (
-        xr.open_mfdataset(filename, combine="by_coords")
-        .pipe(xarray_patch_metadata, Unit_to_units)
-        .pipe(xr.decode_cf)
-    )
+    ds = xr.open_mfdataset(filename, combine="by_coords")
 
     return parcels.FieldSet.from_xarray_dataset(
         ds,
diff --git a/parcels/field.py b/parcels/field.py
@@ -228,7 +228,6 @@ def __init__(
 
         self._dimensions = kwargs.pop("dimensions", None)
         self._dataFiles = kwargs.pop("dataFiles", None)
-        self._netcdf_engine = kwargs.pop("netcdf_engine", "netcdf4")
         self._creation_log = kwargs.pop("creation_log", "")
 
         # data_full_zdim is the vertical dimension of the complete field data, ignoring the indices.
@@ -278,10 +277,6 @@ def interp_method(self, value):
     def gridindexingtype(self):
         return self._gridindexingtype
 
-    @property
-    def netcdf_engine(self):
-        return self._netcdf_engine
-
     @classmethod
     def _get_dim_filenames(cls, filenames, dim):
         if isinstance(filenames, str) or not isinstance(filenames, collections.abc.Iterable):
@@ -297,11 +292,11 @@ def _get_dim_filenames(cls, filenames, dim):
             return filenames
 
     @staticmethod
-    def _collect_timeslices(data_filenames, dimensions, indices, netcdf_engine):
+    def _collect_timeslices(data_filenames, dimensions, indices):
         timeslices = []
         dataFiles = []
         for fname in data_filenames:
-            with NetcdfFileBuffer(fname, dimensions, indices, netcdf_engine=netcdf_engine) as filebuffer:
+            with NetcdfFileBuffer(fname, dimensions, indices) as filebuffer:
                 ftime = filebuffer.time
                 timeslices.append(ftime)
                 dataFiles.append([fname] * len(ftime))
@@ -387,10 +382,9 @@ def from_netcdf(
                 raise NotImplementedError("Vertically adaptive meshes not implemented for from_netcdf()")
             depth_filename = depth_filename[0]
 
-        netcdf_engine = kwargs.pop("netcdf_engine", "netcdf4")
         gridindexingtype = kwargs.get("gridindexingtype", "nemo")
 
-        indices: dict[str, npt.NDArray] = {}  # TODO Nick: Cleanup
+        indices: dict[str, npt.NDArray] = {}
 
         interp_method: InterpMethod = kwargs.pop("interp_method", "linear")
         if type(interp_method) is dict:
@@ -429,7 +423,7 @@ def from_netcdf(
                 depth = filebuffer.depth
                 data_full_zdim = filebuffer.data_full_zdim
         else:
-            indices["depth"] = [0]
+            indices["depth"] = np.array([0])
             depth = np.zeros(1)
             data_full_zdim = 1
 
@@ -442,9 +436,7 @@ def from_netcdf(
             # Concatenate time variable to determine overall dimension
             # across multiple files
             if "time" in dimensions:
-                time, time_origin, timeslices, dataFiles = cls._collect_timeslices(
-                    data_filenames, dimensions, indices, netcdf_engine
-                )
+                time, time_origin, timeslices, dataFiles = cls._collect_timeslices(data_filenames, dimensions, indices)
                 grid = Grid.create_grid(lon, lat, depth, time, time_origin=time_origin, mesh=mesh)
                 kwargs["dataFiles"] = dataFiles
             else:  # e.g. for the CROCO CS_w field, see https://github.com/OceanParcels/Parcels/issues/1831
@@ -453,7 +445,7 @@ def from_netcdf(
         elif grid is not None and ("dataFiles" not in kwargs or kwargs["dataFiles"] is None):
             # ==== means: the field has a shared grid, but may have different data files, so we need to collect the
             # ==== correct file time series again.
-            _, _, _, dataFiles = cls._collect_timeslices(data_filenames, dimensions, indices, netcdf_engine)
+            _, _, _, dataFiles = cls._collect_timeslices(data_filenames, dimensions, indices)
             kwargs["dataFiles"] = dataFiles
 
         if "time" in indices:
@@ -488,7 +480,6 @@ def from_netcdf(
             allow_time_extrapolation = False if "time" in dimensions else True
 
         kwargs["dimensions"] = dimensions.copy()
-        kwargs["netcdf_engine"] = netcdf_engine
 
         return cls(
             variable,
diff --git a/parcels/fieldfilebuffer.py b/parcels/fieldfilebuffer.py
@@ -19,7 +19,6 @@ def __init__(
         interp_method: InterpMethodOption = "linear",
         data_full_zdim=None,
         gridindexingtype="nemo",
-        netcdf_engine="netcdf4",
     ):
         self.filename: PathLike | list[PathLike] = filename
         self.dimensions = dimensions  # Dict with dimension keys for file data
@@ -28,10 +27,9 @@ def __init__(
         self.interp_method = interp_method
         self.gridindexingtype = gridindexingtype
         self.data_full_zdim = data_full_zdim
-        self.netcdf_engine = netcdf_engine
 
     def __enter__(self):
-        self.dataset = open_xarray_dataset(self.filename, self.netcdf_engine)
+        self.dataset = open_xarray_dataset(self.filename)
         return self
 
     def __exit__(self, type, value, traceback):
@@ -159,12 +157,12 @@ def time_access(self):
         return time
 
 
-def open_xarray_dataset(filename: Path | str, netcdf_engine: str) -> xr.Dataset:
+def open_xarray_dataset(filename: Path | str) -> xr.Dataset:
     try:
         # Unfortunately we need to do if-else here, cause the lock-parameter is either False or a Lock-object
         # (which we would rather want to have being auto-managed).
         # If 'lock' is not specified, the Lock-object is auto-created and managed by xarray internally.
-        ds = xr.open_mfdataset(filename, decode_cf=True, engine=netcdf_engine)
+        ds = xr.open_mfdataset(filename, decode_cf=True)
         ds["decoded"] = True
     except:
         warnings.warn(  # TODO: Is this warning necessary? What cases does this except block get triggered - is it to do with the bare except???
@@ -174,7 +172,7 @@ def open_xarray_dataset(filename: Path | str, netcdf_engine: str) -> xr.Dataset:
             stacklevel=2,
         )
 
-        ds = xr.open_mfdataset(filename, decode_cf=False, engine=netcdf_engine)
+        ds = xr.open_mfdataset(filename, decode_cf=False)
         ds["decoded"] = False
     return ds
 
diff --git a/parcels/fieldset.py b/parcels/fieldset.py
@@ -356,9 +356,6 @@ def from_netcdf(
         gridindexingtype : str
             The type of gridindexing. Either 'nemo' (default), 'mitgcm', 'mom5', 'pop', or 'croco' are supported.
             See also the Grid indexing documentation on oceanparcels.org
-        netcdf_engine :
-            engine to use for netcdf reading in xarray. Default is 'netcdf',
-            but in cases where this doesn't work, setting netcdf_engine='scipy' could help. Accepted options are the same as the ``engine`` parameter in ``xarray.open_dataset()``.
         **kwargs :
             Keyword arguments passed to the :class:`parcels.Field` constructor.
 
@@ -386,7 +383,7 @@ def from_netcdf(
                 for dim, p in paths.items():
                     paths[dim] = cls._parse_wildcards(p, filenames, var)
 
-            # Use dimensions[var] if its a dict of dicts
+            # Use dimensions[var] if it's a dict of dicts
             dims = dimensions[var] if var in dimensions else dimensions
             cls.checkvaliddimensionsdict(dims)
             fieldtype = fieldtype[var] if (fieldtype and var in fieldtype) else fieldtype
diff --git a/parcels/tools/_v3to4.py b/parcels/tools/_v3to4.py
@@ -0,0 +1,27 @@
+"""
+Temporary utilities to help with the transition from v3 to v4 of Parcels.
+
+TODO v4: Remove this module. Move functions that are still relevant into other modules
+"""
+
+from collections.abc import Callable
+
+import xarray as xr
+
+
+def Unit_to_units(d: dict) -> dict:
+    if "Unit" in d:
+        d["units"] = d.pop("Unit")
+    return d
+
+
+def xarray_patch_metadata(ds: xr.Dataset, f: Callable[[dict], dict]) -> xr.Dataset:
+    """Convert attrs"""
+    for var in ds.variables:
+        ds[var].attrs = f(ds[var].attrs)
+    return ds
+
+
+def patch_dataset_v4_compat(ds: xr.Dataset) -> xr.Dataset:
+    """Patches an xarray dataset to be compatible with v4"""
+    return ds.pipe(xarray_patch_metadata, Unit_to_units)
diff --git a/parcels/tools/exampledata_utils.py b/parcels/tools/exampledata_utils.py
@@ -4,6 +4,9 @@
 from urllib.request import urlretrieve
 
 import platformdirs
+import xarray as xr
+
+from parcels.tools._v3to4 import patch_dataset_v4_compat
 
 __all__ = ["download_example_dataset", "get_data_home", "list_example_datasets"]
 
@@ -147,4 +150,9 @@ def download_example_dataset(dataset: str, data_home=None):
             url = f"{example_data_url}/{dataset}/{filename}"
             urlretrieve(url, str(filepath))
 
+            should_patch = dataset == "GlobCurrent_example_data"
+
+            if should_patch:
+                xr.load_dataset(filepath).pipe(patch_dataset_v4_compat).to_netcdf(filepath)
+
     return dataset_folder