diff --git a/docs/examples/example_globcurrent.py b/docs/examples/example_globcurrent.py index fa768509c8..77df3ffef7 100755 --- a/docs/examples/example_globcurrent.py +++ b/docs/examples/example_globcurrent.py @@ -11,7 +11,6 @@ def set_globcurrent_fieldset( filename=None, indices=None, - deferred_load=True, use_xarray=False, timestamps=None, ): @@ -41,7 +40,6 @@ def set_globcurrent_fieldset( variables, dimensions, indices, - deferred_load=deferred_load, timestamps=timestamps, ) @@ -80,9 +78,7 @@ def test_globcurrent_fieldset_advancetime(dt, lonstart, latstart, use_xarray): lat=[latstart], ) - fieldsetall = set_globcurrent_fieldset( - files[0:10], deferred_load=False, use_xarray=use_xarray - ) + fieldsetall = set_globcurrent_fieldset(files[0:10], use_xarray=use_xarray) psetall = parcels.ParticleSet.from_list( fieldset=fieldsetall, pclass=parcels.Particle, @@ -118,32 +114,6 @@ def test_globcurrent_particles(use_xarray): assert abs(pset[0].lat - -35.3) < 1 -@pytest.mark.v4remove -@pytest.mark.xfail(reason="time_periodic removed in v4") -@pytest.mark.parametrize("rundays", [300, 900]) -def test_globcurrent_time_periodic(rundays): - sample_var = [] - for deferred_load in [True, False]: - fieldset = set_globcurrent_fieldset( - time_periodic=timedelta(days=365), deferred_load=deferred_load - ) - - MyParticle = parcels.Particle.add_variable("sample_var", initial=0.0) - - pset = parcels.ParticleSet( - fieldset, pclass=MyParticle, lon=25, lat=-35, time=fieldset.U.grid.time[0] - ) - - def SampleU(particle, fieldset, time): # pragma: no cover - u, v = fieldset.UV[time, particle.depth, particle.lat, particle.lon] - particle.sample_var += u - - pset.execute(SampleU, runtime=timedelta(days=rundays), dt=timedelta(days=1)) - sample_var.append(pset[0].sample_var) - - assert np.allclose(sample_var[0], sample_var[1]) - - @pytest.mark.parametrize("dt", [-300, 300]) def test_globcurrent_xarray_vs_netcdf(dt): fieldsetNetcdf = set_globcurrent_fieldset(use_xarray=False) @@ -241,9 +211,19 @@ def test_globcurrent_time_extrapolation_error(use_xarray): ) +@pytest.mark.v4alpha +@pytest.mark.xfail( + reason="This was always broken when using eager loading `deferred_load=False` for the P field. Needs to be fixed." +) @pytest.mark.parametrize("dt", [-300, 300]) @pytest.mark.parametrize("with_starttime", [True, False]) def test_globcurrent_startparticles_between_time_arrays(dt, with_starttime): + """Test for correctly initialising particle start times. + + When using Fields with different temporal domains, its important to intialise particles + at the beginning of the time period where all Fields have available data (i.e., the + intersection of the temporal domains) + """ fieldset = set_globcurrent_fieldset() data_folder = parcels.download_example_dataset("GlobCurrent_example_data") diff --git a/docs/examples/example_ofam.py b/docs/examples/example_ofam.py index d0f887b8ea..ec089b4a31 100644 --- a/docs/examples/example_ofam.py +++ b/docs/examples/example_ofam.py @@ -8,7 +8,7 @@ import parcels -def set_ofam_fieldset(deferred_load=True, use_xarray=False): +def set_ofam_fieldset(use_xarray=False): data_folder = parcels.download_example_dataset("OFAM_example_data") filenames = { "U": f"{data_folder}/OFAM_simple_U.nc", @@ -32,13 +32,12 @@ def set_ofam_fieldset(deferred_load=True, use_xarray=False): variables, dimensions, allow_time_extrapolation=True, - deferred_load=deferred_load, ) @pytest.mark.parametrize("use_xarray", [True, False]) def test_ofam_fieldset_fillvalues(use_xarray): - fieldset = set_ofam_fieldset(deferred_load=False, use_xarray=use_xarray) + fieldset = set_ofam_fieldset(use_xarray=use_xarray) # V.data[0, 0, 150] is a landpoint, that makes NetCDF4 generate a masked array, instead of an ndarray assert fieldset.V.data[0, 0, 150] == 0 diff --git a/parcels/_typing.py b/parcels/_typing.py index b2ee43580c..731969dbb0 100644 --- a/parcels/_typing.py +++ b/parcels/_typing.py @@ -29,7 +29,6 @@ Mesh = Literal["spherical", "flat"] # corresponds with `mesh` VectorType = Literal["3D", "3DSigma", "2D"] | None # corresponds with `vector_type` GridIndexingType = Literal["pop", "mom5", "mitgcm", "nemo", "croco"] # corresponds with `gridindexingtype` -UpdateStatus = Literal["not_updated", "first_updated", "updated"] # corresponds with `_update_status` NetcdfEngine = Literal["netcdf4", "xarray", "scipy"] diff --git a/parcels/field.py b/parcels/field.py index 5e72745012..764d277275 100644 --- a/parcels/field.py +++ b/parcels/field.py @@ -177,10 +177,6 @@ def __init__( self.filebuffername = name[1] self.data = data if grid: - if grid.defer_load and isinstance(data, np.ndarray): - raise ValueError( - "Cannot combine Grid from defer_loaded Field with np.ndarray data. please specify lon, lat, depth and time dimensions separately" - ) self._grid = grid else: if (time is not None) and isinstance(time[0], np.datetime64): @@ -225,14 +221,12 @@ def __init__( else: self.allow_time_extrapolation = allow_time_extrapolation - if not self.grid.defer_load: - self.data = self._reshape(self.data) - self._loaded_time_indices = range(self.grid.tdim) - - # Hack around the fact that NaN and ridiculously large values - # propagate in SciPy's interpolators - self.data[np.isnan(self.data)] = 0.0 + self.data = self._reshape(self.data) + self._loaded_time_indices = range(self.grid.tdim) + # Hack around the fact that NaN and ridiculously large values + # propagate in SciPy's interpolators + self.data[np.isnan(self.data)] = 0.0 self._scaling_factor = None self._dimensions = kwargs.pop("dimensions", None) @@ -355,7 +349,6 @@ def from_netcdf( mesh: Mesh = "spherical", timestamps=None, allow_time_extrapolation: bool | None = None, - deferred_load: bool = True, **kwargs, ) -> "Field": """Create field from netCDF file. @@ -388,11 +381,6 @@ def from_netcdf( boolean whether to allow for extrapolation in time (i.e. beyond the last available time snapshot) Default is False if dimensions includes time, else True - deferred_load : bool - boolean whether to only pre-load data (in deferred mode) or - fully load them (default: True). It is advised to deferred load the data, since in - that case Parcels deals with a better memory management during particle set execution. - deferred_load=False is however sometimes necessary for plotting the fields. gridindexingtype : str The type of gridindexing. Either 'nemo' (default), 'mitgcm', 'mom5', 'pop', or 'croco' are supported. See also the Grid indexing documentation on oceanparcels.org @@ -551,56 +539,29 @@ def from_netcdf( "time dimension in indices is not necessary anymore. It is then ignored.", FieldSetWarning, stacklevel=2 ) - if grid.time.size <= 2: - deferred_load = False - - if not deferred_load: - # Pre-allocate data before reading files into buffer - data_list = [] - ti = 0 - for tslice, fname in zip(grid.timeslices, data_filenames, strict=True): - with NetcdfFileBuffer( # type: ignore[operator] - fname, - dimensions, - indices, - netcdf_engine, - interp_method=interp_method, - data_full_zdim=data_full_zdim, - ) as filebuffer: - # If Field.from_netcdf is called directly, it may not have a 'data' dimension - # In that case, assume that 'name' is the data dimension - filebuffer.name = variable[1] - buffer_data = filebuffer.data - if len(buffer_data.shape) == 4: - errormessage = ( - f"Field {filebuffer.name} expecting a data shape of [tdim={grid.tdim}, zdim={grid.zdim}, " - f"ydim={grid.ydim}, xdim={grid.xdim }] " - f"but got shape {buffer_data.shape}." - ) - assert buffer_data.shape[0] == grid.tdim, errormessage - assert buffer_data.shape[2] == grid.ydim, errormessage - assert buffer_data.shape[3] == grid.xdim, errormessage - - if len(buffer_data.shape) == 2: - data_list.append(buffer_data.reshape(sum(((len(tslice), 1), buffer_data.shape), ()))) - elif len(buffer_data.shape) == 3: - if len(filebuffer.indices["depth"]) > 1: - data_list.append(buffer_data.reshape(sum(((1,), buffer_data.shape), ()))) - else: - if type(tslice) not in [list, np.ndarray, xr.DataArray]: - tslice = [tslice] - data_list.append(buffer_data.reshape(sum(((len(tslice), 1), buffer_data.shape[1:]), ()))) - else: - data_list.append(buffer_data) - if type(tslice) not in [list, np.ndarray, xr.DataArray]: - tslice = [tslice] - ti += len(tslice) - data = np.concatenate(data_list, axis=0) - else: - grid._defer_load = True - grid._ti = -1 - data = DeferredArray() - data.compute_shape(grid.xdim, grid.ydim, grid.zdim, grid.tdim, len(grid.timeslices)) + with NetcdfFileBuffer( # type: ignore[operator] + data_filenames, + dimensions, + indices, + netcdf_engine, + interp_method=interp_method, + data_full_zdim=data_full_zdim, + ) as filebuffer: + # If Field.from_netcdf is called directly, it may not have a 'data' dimension + # In that case, assume that 'name' is the data dimension + filebuffer.name = variable[1] + buffer_data = filebuffer.data + if len(buffer_data.shape) == 4: + errormessage = ( + f"Field {filebuffer.name} expecting a data shape of [tdim={grid.tdim}, zdim={grid.zdim}, " + f"ydim={grid.ydim}, xdim={grid.xdim }] " + f"but got shape {buffer_data.shape}." + ) + assert buffer_data.shape[0] == grid.tdim, errormessage + assert buffer_data.shape[2] == grid.ydim, errormessage + assert buffer_data.shape[3] == grid.xdim, errormessage + + data = buffer_data if allow_time_extrapolation is None: allow_time_extrapolation = False if "time" in dimensions else True @@ -727,8 +688,7 @@ def set_scaling_factor(self, factor): if self._scaling_factor: raise NotImplementedError(f"Scaling factor for field {self.name} already defined.") self._scaling_factor = factor - if not self.grid.defer_load: - self.data *= factor + self.data *= factor def set_depth_from_field(self, field): """Define the depth dimensions from another (time-varying) field. @@ -913,69 +873,6 @@ def _rescale_and_set_minmax(self, data): data *= self._scaling_factor return data - def _data_concatenate(self, data, data_to_concat, tindex): - if data[tindex] is not None: - if isinstance(data, np.ndarray): - data[tindex] = None - elif isinstance(data, list): - del data[tindex] - if tindex == 0: - data = np.concatenate([data_to_concat, data[tindex + 1 :, :]], axis=0) - elif tindex == 1: - data = np.concatenate([data[:tindex, :], data_to_concat], axis=0) - else: - raise ValueError("data_concatenate is used for computeTimeChunk, with tindex in [0, 1]") - return data - - def computeTimeChunk(self, data, tindex): - g = self.grid - timestamp = self.timestamps - if timestamp is not None: - summedlen = np.cumsum([len(ls) for ls in self.timestamps]) - if g._ti + tindex >= summedlen[-1]: - ti = g._ti + tindex - summedlen[-1] - else: - ti = g._ti + tindex - timestamp = self.timestamps[np.where(ti < summedlen)[0][0]] - - filebuffer = NetcdfFileBuffer( - self._dataFiles[g._ti + tindex], - self.dimensions, - self.indices, - netcdf_engine=self.netcdf_engine, - timestamp=timestamp, - interp_method=self.interp_method, - data_full_zdim=self.data_full_zdim, - ) - filebuffer.__enter__() - time_data = filebuffer.time - time_data = g.time_origin.reltime(time_data) - filebuffer.ti = (time_data <= g.time[tindex]).argmin() - 1 - if self.netcdf_engine != "xarray": - filebuffer.name = self.filebuffername - buffer_data = filebuffer.data - if len(buffer_data.shape) == 2: - buffer_data = np.reshape(buffer_data, sum(((1, 1), buffer_data.shape), ())) - elif len(buffer_data.shape) == 3 and g.zdim > 1: - buffer_data = np.reshape(buffer_data, sum(((1,), buffer_data.shape), ())) - elif len(buffer_data.shape) == 3: - buffer_data = np.reshape( - buffer_data, - sum( - ( - ( - buffer_data.shape[0], - 1, - ), - buffer_data.shape[1:], - ), - (), - ), - ) - data = self._data_concatenate(data, buffer_data, tindex) - self.filebuffers[tindex] = filebuffer - return data - def ravel_index(self, zi, yi, xi): """Return the flat index of the given grid points. @@ -1560,32 +1457,6 @@ def __getitem__(self, key): return _deal_with_errors(error, key, vector_type=self.vector_type) -class DeferredArray: - """Class used for throwing error when Field.data is not read in deferred loading mode.""" - - data_shape = () - - def __init__(self): - self.data_shape = (1,) - - def compute_shape(self, xdim, ydim, zdim, tdim, tslices): - if zdim == 1 and tdim == 1: - self.data_shape = (tslices, 1, ydim, xdim) - elif zdim > 1 or tdim > 1: - if zdim > 1: - self.data_shape = (1, zdim, ydim, xdim) - else: - self.data_shape = (max(tdim, tslices), 1, ydim, xdim) - else: - self.data_shape = (tdim, zdim, ydim, xdim) - return self.data_shape - - def __getitem__(self, key): - raise RuntimeError( - "Field is in deferred_load mode, so can't be accessed. Use .computeTimeChunk() method to force loading of data" - ) - - class NestedField(list): """NestedField is a class that allows for interpolation of fields on different grids of potentially varying resolution. diff --git a/parcels/fieldfilebuffer.py b/parcels/fieldfilebuffer.py index e0fd6b9b32..837788029f 100644 --- a/parcels/fieldfilebuffer.py +++ b/parcels/fieldfilebuffer.py @@ -5,7 +5,7 @@ import numpy as np import xarray as xr -from parcels._typing import InterpMethodOption +from parcels._typing import InterpMethodOption, PathLike from parcels.tools.converters import convert_xarray_time_units from parcels.tools.warnings import FileWarning @@ -22,12 +22,11 @@ def __init__( gridindexingtype="nemo", **kwargs, ): - self.filename = filename + self.filename: PathLike | list[PathLike] = filename self.dimensions = dimensions # Dict with dimension keys for file data self.indices = indices self.dataset = None self.timestamp = timestamp - self.ti = None self.interp_method = interp_method self.gridindexingtype = gridindexingtype self.data_full_zdim = data_full_zdim @@ -189,7 +188,7 @@ def data(self): def data_access(self): data = self.dataset[self.name] - ti = range(data.shape[0]) if self.ti is None else self.ti + ti = range(data.shape[0]) return np.array(self._apply_indices(data, ti)) @property @@ -222,7 +221,7 @@ def open_xarray_dataset(filename: Path | str, netcdf_engine: str) -> xr.Dataset: # Unfortunately we need to do if-else here, cause the lock-parameter is either False or a Lock-object # (which we would rather want to have being auto-managed). # If 'lock' is not specified, the Lock-object is auto-created and managed by xarray internally. - ds = xr.open_dataset(filename, decode_cf=True, engine=netcdf_engine) + ds = xr.open_mfdataset(filename, decode_cf=True, engine=netcdf_engine) ds["decoded"] = True except: warnings.warn( # TODO: Is this warning necessary? What cases does this except block get triggered - is it to do with the bare except??? @@ -232,6 +231,6 @@ def open_xarray_dataset(filename: Path | str, netcdf_engine: str) -> xr.Dataset: stacklevel=2, ) - ds = xr.open_dataset(filename, decode_cf=False, engine=netcdf_engine) + ds = xr.open_mfdataset(filename, decode_cf=False, engine=netcdf_engine) ds["decoded"] = False return ds diff --git a/parcels/fieldset.py b/parcels/fieldset.py index e4a162fdda..d840f93a98 100644 --- a/parcels/fieldset.py +++ b/parcels/fieldset.py @@ -9,14 +9,13 @@ from parcels._compat import MPI from parcels._typing import GridIndexingType, InterpMethodOption, Mesh -from parcels.field import DeferredArray, Field, NestedField, VectorField +from parcels.field import Field, NestedField, VectorField from parcels.grid import Grid from parcels.gridset import GridSet from parcels.particlefile import ParticleFile from parcels.tools._helpers import fieldset_repr from parcels.tools.converters import TimeConverter, convert_xarray_time_units from parcels.tools.loggers import logger -from parcels.tools.statuscodes import TimeExtrapolationError from parcels.tools.warnings import FieldSetWarning __all__ = ["FieldSet"] @@ -285,8 +284,6 @@ def check_velocityfields(U, V, W): g.time_origin.time_origin, type(self.time_origin.time_origin) ), "time origins of different grids must be have the same type" g.time = g.time + self.time_origin.reltime(g.time_origin) - if g.defer_load: - g.time_full = g.time_full + self.time_origin.reltime(g.time_origin) g._time_origin = self.time_origin self._add_UVfield() @@ -298,9 +295,8 @@ def check_velocityfields(U, V, W): raise ValueError( "If depth dimension is set at 'not_yet_set', it must be added later using Field.set_depth_from_field(field)" ) - if not f.grid.defer_load: - depth_data = f.grid.depth_field.data - f.grid._depth = depth_data if isinstance(depth_data, np.ndarray) else np.array(depth_data) + depth_data = f.grid.depth_field.data + f.grid._depth = depth_data if isinstance(depth_data, np.ndarray) else np.array(depth_data) self._completed = True @classmethod @@ -326,7 +322,6 @@ def from_netcdf( mesh: Mesh = "spherical", timestamps=None, allow_time_extrapolation: bool | None = None, - deferred_load=True, **kwargs, ): """Initialises FieldSet object from NetCDF files. @@ -374,11 +369,6 @@ def from_netcdf( boolean whether to allow for extrapolation (i.e. beyond the last available time snapshot) Default is False if dimensions includes time, else True - deferred_load : bool - boolean whether to only pre-load data (in deferred mode) or - fully load them (default: True). It is advised to deferred load the data, since in - that case Parcels deals with a better memory management during particle set execution. - deferred_load=False is however sometimes necessary for plotting the fields. interp_method : str Method for interpolation. Options are 'linear' (default), 'nearest', 'linear_invdist_land_tracer', 'cgrid_velocity', 'cgrid_tracer' and 'bgrid_velocity' @@ -468,7 +458,6 @@ def from_netcdf( mesh=mesh, timestamps=timestamps, allow_time_extrapolation=allow_time_extrapolation, - deferred_load=deferred_load, fieldtype=fieldtype, dataFiles=dFiles, **kwargs, @@ -1168,7 +1157,6 @@ def from_parcels( indices=None, extra_fields=None, allow_time_extrapolation: bool | None = None, - deferred_load=True, **kwargs, ): """Initialises FieldSet data from NetCDF files using the Parcels FieldSet.write() conventions. @@ -1192,11 +1180,6 @@ def from_parcels( boolean whether to allow for extrapolation (i.e. beyond the last available time snapshot) Default is False if dimensions includes time, else True - deferred_load : bool - boolean whether to only pre-load data (in deferred mode) or - fully load them (default: True). It is advised to deferred load the data, since in - that case Parcels deals with a better memory management during particle set execution. - deferred_load=False is however sometimes necessary for plotting the fields. uvar : (Default value = 'vozocrtx') vvar : @@ -1222,7 +1205,6 @@ def from_parcels( variables=extra_fields, dimensions=dimensions, allow_time_extrapolation=allow_time_extrapolation, - deferred_load=deferred_load, **kwargs, ) @@ -1384,99 +1366,8 @@ def computeTimeChunk(self, time=0.0, dt=1): time step of the integration scheme, needed to set the direction of time chunk loading. Default is 1. """ - signdt = np.sign(dt) nextTime = np.inf if dt > 0 else -np.inf - for g in self.gridset.grids: - g._update_status = "not_updated" - for f in self.get_fields(): - if isinstance(f, (VectorField, NestedField)) or not f.grid.defer_load: - continue - if f.grid._update_status == "not_updated": - nextTime_loc = f.grid._computeTimeChunk(f, time, signdt) - if time == nextTime_loc and signdt != 0: - raise TimeExtrapolationError(time, field=f) - nextTime = min(nextTime, nextTime_loc) if signdt >= 0 else max(nextTime, nextTime_loc) - - for f in self.get_fields(): - if isinstance(f, (VectorField, NestedField)) or not f.grid.defer_load or f._dataFiles is None: - continue - f._loaded_time_indices = [] # reset loaded time indices - g = f.grid - if g._update_status == "first_updated": # First load of data - if f.data is not None and not isinstance(f.data, DeferredArray): - if not isinstance(f.data, list): - f.data = None - else: - for i in range(len(f.data)): - del f.data[i, :] - - if f.gridindexingtype == "pop" and g.zdim > 1: - zd = g.zdim - 1 - else: - zd = g.zdim - data = np.empty((g.tdim, zd, g.ydim, g.xdim), dtype=np.float32) - f._loaded_time_indices = range(2) - for tind in f._loaded_time_indices: - for fb in f.filebuffers: - if fb is not None: - fb.close() - fb = None - data = f.computeTimeChunk(data, tind) - data = f._rescale_and_set_minmax(data) - - if isinstance(f.data, DeferredArray): - f.data = DeferredArray() - f.data = f._reshape(data) - - elif g._update_status == "updated": - if f.gridindexingtype == "pop" and g.zdim > 1: - zd = g.zdim - 1 - else: - zd = g.zdim - data = np.empty((g.tdim, zd, g.ydim, g.xdim), dtype=np.float32) - if signdt >= 0: - f._loaded_time_indices = [1] - if f.filebuffers[0] is not None: - f.filebuffers[0].close() - f.filebuffers[0] = None - f.filebuffers[0] = f.filebuffers[1] - data = f.computeTimeChunk(data, 1) - else: - f._loaded_time_indices = [0] - if f.filebuffers[1] is not None: - f.filebuffers[1].close() - f.filebuffers[1] = None - f.filebuffers[1] = f.filebuffers[0] - data = f.computeTimeChunk(data, 0) - data = f._rescale_and_set_minmax(data) - if signdt >= 0: - data = f._reshape(data)[1, :] - if not isinstance(f.data, DeferredArray): - if isinstance(f.data, list): - del f.data[0, :] - else: - f.data[0, :] = None - f.data[0, :] = f.data[1, :] - f.data[1, :] = data - else: - data = f._reshape(data)[0, :] - if not isinstance(f.data, DeferredArray): - if isinstance(f.data, list): - del f.data[1, :] - else: - f.data[1, :] = None - f.data[1, :] = f.data[0, :] - f.data[0, :] = data - - # update time varying grid depth - for f in self.get_fields(): - if isinstance(f, (VectorField, NestedField)) or not f.grid.defer_load or f._dataFiles is None: - continue - if f.grid.depth_field is not None: - depth_data = f.grid.depth_field.data - f.grid._depth = depth_data if isinstance(depth_data, np.ndarray) else np.array(depth_data) - if abs(nextTime) == np.inf or np.isnan(nextTime): # Second happens when dt=0 return nextTime else: diff --git a/parcels/grid.py b/parcels/grid.py index 90e7652e82..33687581eb 100644 --- a/parcels/grid.py +++ b/parcels/grid.py @@ -1,10 +1,9 @@ -import functools from enum import IntEnum import numpy as np import numpy.typing as npt -from parcels._typing import Mesh, UpdateStatus, assert_valid_mesh +from parcels._typing import Mesh, assert_valid_mesh from parcels.tools.converters import TimeConverter __all__ = [ @@ -42,7 +41,6 @@ def __init__( mesh: Mesh, ): self._ti = -1 - self._update_status: UpdateStatus | None = None lon = np.array(lon) lat = np.array(lat) time = np.zeros(1, dtype=np.float64) if time is None else time @@ -66,7 +64,6 @@ def __init__( assert_valid_mesh(mesh) self._mesh = mesh self._zonal_periodic = False - self._defer_load = False self._lonlat_minmax = np.array( [np.nanmin(lon), np.nanmax(lon), np.nanmin(lat), np.nanmax(lat)], dtype=np.float32 ) @@ -115,10 +112,6 @@ def time_origin(self): def zonal_periodic(self): return self._zonal_periodic - @property - def defer_load(self): - return self._defer_load - @staticmethod def create_grid( lon: npt.ArrayLike, @@ -182,54 +175,6 @@ def _add_Sdepth_periodic_halo(self, zonal, meridional, halosize): ) assert self.depth.shape[2] == self.ydim, "Third dim must be y." - def _computeTimeChunk(self, f, time, signdt): - nextTime_loc = np.inf if signdt >= 0 else -np.inf - prev_time_indices = self.time - if self._update_status == "not_updated": - if self._ti >= 0: - if time < self.time[0] or time > self.time[1]: - self._ti = -1 # reset - elif signdt >= 0 and (time < self.time_full[0] or time >= self.time_full[-1]): - self._ti = -1 # reset - elif signdt < 0 and (time <= self.time_full[0] or time > self.time_full[-1]): - self._ti = -1 # reset - elif signdt >= 0 and time >= self.time[1] and self._ti < len(self.time_full) - 2: - self._ti += 1 - self.time = self.time_full[self._ti : self._ti + 2] - self._update_status = "updated" - elif signdt < 0 and time <= self.time[0] and self._ti > 0: - self._ti -= 1 - self.time = self.time_full[self._ti : self._ti + 2] - self._update_status = "updated" - if self._ti == -1: - self.time = self.time_full - self._ti = f._time_index(time) - - if signdt == -1 and self._ti > 0 and self.time_full[self._ti] == time: - self._ti -= 1 - if self._ti >= len(self.time_full) - 1: - self._ti = len(self.time_full) - 2 - - self.time = self.time_full[self._ti : self._ti + 2] - self.tdim = 2 - if prev_time_indices is None or len(prev_time_indices) != 2 or len(prev_time_indices) != len(self.time): - self._update_status = "first_updated" - elif functools.reduce( - lambda i, j: i and j, map(lambda m, k: m == k, self.time, prev_time_indices), True - ) and len(prev_time_indices) == len(self.time): - self._update_status = "not_updated" - elif functools.reduce( - lambda i, j: i and j, map(lambda m, k: m == k, self.time[:1], prev_time_indices[:1]), True - ) and len(prev_time_indices) == len(self.time): - self._update_status = "updated" - else: - self._update_status = "first_updated" - if signdt >= 0 and (self._ti < len(self.time_full) - 2 or not f.allow_time_extrapolation): - nextTime_loc = self.time[1] - elif signdt < 0 and (self._ti > 0 or not f.allow_time_extrapolation): - nextTime_loc = self.time[0] - return nextTime_loc - class RectilinearGrid(Grid): """Rectilinear Grid class diff --git a/tests/test_field.py b/tests/test_field.py index 106622705c..28f7aaa132 100644 --- a/tests/test_field.py +++ b/tests/test_field.py @@ -56,7 +56,7 @@ def test_field_nonstandardtime(calendar, cftime_datetime, tmpdir): filepath = tmpdir.join("test_nonstandardtime.nc") dates = [getattr(cftime, cftime_datetime)(1, m, 1) for m in range(1, 13)] da = xr.DataArray( - np.random.rand(12, xdim, ydim), coords=[dates, range(xdim), range(ydim)], dims=["time", "lon", "lat"], name="U" + np.random.rand(12, ydim, xdim), coords=[dates, range(ydim), range(xdim)], dims=["time", "lat", "lon"], name="U" ) da.to_netcdf(str(filepath)) diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py index 7ab1cb5db7..8affdc2e00 100644 --- a/tests/test_fieldset.py +++ b/tests/test_fieldset.py @@ -37,6 +37,49 @@ def generate_fieldset_data(xdim, ydim, zdim=1, tdim=1): return (data, dimensions) +def to_xarray_dataset(data: dict[str, np.array], dimensions: dict[str, np.array]) -> xr.Dataset: + assert len(dimensions) in [2, 4], "this function only deals with output from generate_fieldset_data()" + + if len(dimensions) == 4: + return xr.Dataset( + { + "U": (["time", "depth", "lat", "lon"], data["U"]), + "V": (["time", "depth", "lat", "lon"], data["V"]), + }, + coords=dimensions, + ) + + return xr.Dataset( + { + "U": (["lat", "lon"], data["U"]), + "V": (["lat", "lon"], data["V"]), + }, + coords=dimensions, + ) + + +@pytest.fixture +def multifile_fieldset(tmp_path): + stem = "test_subsets" + + timestamps = np.arange(0, 4, 1) * 86400.0 + timestamps = np.expand_dims(timestamps, 1) + + ufiles = [] + vfiles = [] + for index, timestamp in enumerate(timestamps): + data, dimensions = generate_fieldset_data(100, 100) + path = tmp_path / f"{stem}_{index}.nc" + to_xarray_dataset(data, dimensions).pipe(assign_dataset_timestamp_dim, timestamp).to_netcdf(path) + ufiles.append(path) + vfiles.append(path) + + files = {"U": ufiles, "V": vfiles} + variables = {"U": "U", "V": "V"} + dimensions = {"lon": "lon", "lat": "lat"} + return FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps) + + @pytest.mark.parametrize("xdim", [100, 200]) @pytest.mark.parametrize("ydim", [100, 200]) def test_fieldset_from_data(xdim, ydim): @@ -280,24 +323,10 @@ def test_add_field_after_pset(fieldtype): fieldset.add_vector_field(vfield) -def test_fieldset_samegrids_from_file(tmpdir): +def test_fieldset_samegrids_from_file(multifile_fieldset): """Test for subsetting fieldset from file using indices dict.""" - data, dimensions = generate_fieldset_data(100, 100) - filepath1 = tmpdir.join("test_subsets_1") - fieldset1 = FieldSet.from_data(data, dimensions) - fieldset1.write(filepath1) - - ufiles = [filepath1 + "U.nc"] * 4 - vfiles = [filepath1 + "V.nc"] * 4 - timestamps = np.arange(0, 4, 1) * 86400.0 - timestamps = np.expand_dims(timestamps, 1) - files = {"U": ufiles, "V": vfiles} - variables = {"U": "vozocrtx", "V": "vomecrty"} - dimensions = {"lon": "nav_lon", "lat": "nav_lat"} - fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True) - - assert fieldset.gridset.size == 1 - assert fieldset.U.grid == fieldset.V.grid + assert multifile_fieldset.gridset.size == 1 + assert multifile_fieldset.U.grid == multifile_fieldset.V.grid @pytest.mark.parametrize("gridtype", ["A", "C"]) @@ -314,56 +343,54 @@ def test_fieldset_dimlength1_cgrid(gridtype): assert success -def test_fieldset_diffgrids_from_file(tmpdir): +def assign_dataset_timestamp_dim(ds, timestamp): + """Expand dim to 'time' and assign timestamp.""" + ds.expand_dims("time") + ds["time"] = timestamp + return ds + + +def test_fieldset_diffgrids_from_file(tmp_path): """Test for subsetting fieldset from file using indices dict.""" - filename = "test_subsets" - data, dimensions = generate_fieldset_data(100, 100) - filepath1 = tmpdir.join(filename + "_1") - fieldset1 = FieldSet.from_data(data, dimensions) - fieldset1.write(filepath1) - data, dimensions = generate_fieldset_data(50, 50) - filepath2 = tmpdir.join(filename + "_2") - fieldset2 = FieldSet.from_data(data, dimensions) - fieldset2.write(filepath2) - - ufiles = [filepath1 + "U.nc"] * 4 - vfiles = [filepath2 + "V.nc"] * 4 + stem = "test_subsets" + timestamps = np.arange(0, 4, 1) * 86400.0 timestamps = np.expand_dims(timestamps, 1) - files = {"U": ufiles, "V": vfiles} - variables = {"U": "vozocrtx", "V": "vomecrty"} - dimensions = {"lon": "nav_lon", "lat": "nav_lat"} + + ufiles = [] + vfiles = [] + for index, timestamp in enumerate(timestamps): + # U files + data, dimensions = generate_fieldset_data(100, 100) + path = tmp_path / f"{stem}_U_{index}.nc" + to_xarray_dataset(data, dimensions).pipe(assign_dataset_timestamp_dim, timestamp).to_netcdf(path) + ufiles.append(path) + + data, dimensions = generate_fieldset_data(50, 50) + path = tmp_path / f"{stem}_V_{index}.nc" + to_xarray_dataset(data, dimensions).pipe(assign_dataset_timestamp_dim, timestamp).to_netcdf(path) + vfiles.append(path) + + files = {"U": [str(f) for f in ufiles], "V": [str(f) for f in vfiles]} + variables = {"U": "U", "V": "V"} + dimensions = {"lon": "lon", "lat": "lat"} fieldset = FieldSet.from_netcdf(files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True) assert fieldset.gridset.size == 2 assert fieldset.U.grid != fieldset.V.grid -def test_fieldset_diffgrids_from_file_data(tmpdir): +def test_fieldset_diffgrids_from_file_data(multifile_fieldset): """Test for subsetting fieldset from file using indices dict.""" data, dimensions = generate_fieldset_data(100, 100) - filepath = tmpdir.join("test_subsets") - fieldset_data = FieldSet.from_data(data, dimensions) - fieldset_data.write(filepath) - field_data = fieldset_data.U - field_data.name = "B" + field_U = FieldSet.from_data(data, dimensions).U + field_U.name = "B" - ufiles = [filepath + "U.nc"] * 4 - vfiles = [filepath + "V.nc"] * 4 - timestamps = np.arange(0, 4, 1) * 86400.0 - timestamps = np.expand_dims(timestamps, 1) - files = {"U": ufiles, "V": vfiles} - variables = {"U": "vozocrtx", "V": "vomecrty"} - dimensions = {"lon": "nav_lon", "lat": "nav_lat"} - fieldset_file = FieldSet.from_netcdf( - files, variables, dimensions, timestamps=timestamps, allow_time_extrapolation=True - ) - - fieldset_file.add_field(field_data, "B") - fields = [f for f in fieldset_file.get_fields() if isinstance(f, Field)] + multifile_fieldset.add_field(field_U, "B") + fields = [f for f in multifile_fieldset.get_fields() if isinstance(f, Field)] assert len(fields) == 3 - assert fieldset_file.gridset.size == 2 - assert fieldset_file.U.grid != fieldset_file.B.grid + assert multifile_fieldset.gridset.size == 2 + assert multifile_fieldset.U.grid != multifile_fieldset.B.grid def test_fieldset_samegrids_from_data(): @@ -723,37 +750,3 @@ def test_fieldset_from_data_gridtypes(): pset.execute(AdvectionRK4, runtime=1.5, dt=0.5) assert np.allclose(plon, pset.lon) assert np.allclose(plat, pset.lat) - - -@pytest.mark.parametrize("direction", [1, -1]) -@pytest.mark.parametrize("time_extrapolation", [True, False]) -def test_deferredload_simplefield(direction, time_extrapolation, tmpdir): - tdim = 10 - filename = tmpdir.join("simplefield_deferredload.nc") - data = np.zeros((tdim, 2, 2)) - for ti in range(tdim): - data[ti, :, :] = ti if direction == 1 else tdim - ti - 1 - ds = xr.Dataset( - {"U": (("t", "y", "x"), data), "V": (("t", "y", "x"), data)}, - coords={"x": [0, 1], "y": [0, 1], "t": np.arange(tdim)}, - ) - ds.to_netcdf(filename) - - fieldset = FieldSet.from_netcdf( - filename, - {"U": "U", "V": "V"}, - {"lon": "x", "lat": "y", "time": "t"}, - deferred_load=True, - mesh="flat", - allow_time_extrapolation=time_extrapolation, - ) - - SamplingParticle = Particle.add_variable("p") - pset = ParticleSet(fieldset, SamplingParticle, lon=0.5, lat=0.5) - - def SampleU(particle, fieldset, time): # pragma: no cover - particle.p, tmp = fieldset.UV[particle] - - runtime = tdim * 2 if time_extrapolation else None - pset.execute(SampleU, dt=direction, runtime=runtime) - assert pset.p == tdim - 1 if time_extrapolation else tdim - 2 diff --git a/tests/test_fieldset_sampling.py b/tests/test_fieldset_sampling.py index 23e952ab91..d61ed755d0 100644 --- a/tests/test_fieldset_sampling.py +++ b/tests/test_fieldset_sampling.py @@ -600,6 +600,10 @@ def SampleU(particle, fieldset, time): # pragma: no cover assert np.isclose(pset.p, 1.0) +@pytest.mark.v4alpha +@pytest.mark.xfail( + reason="Now timestamps has been removed, and filebuffer expects different files. This needs to be rewritten/removed." +) @pytest.mark.parametrize("npart", [1, 10]) def test_sampling_multigrids_non_vectorfield_from_file(npart, tmpdir): xdim, ydim = 100, 200 diff --git a/tests/test_grids.py b/tests/test_grids.py index b11d186473..3fae1c9c3e 100644 --- a/tests/test_grids.py +++ b/tests/test_grids.py @@ -576,8 +576,7 @@ def sampleVel(particle, fieldset, time): # pragma: no cover @pytest.mark.parametrize("vert_discretisation", ["zlevel", "slevel", "slevel2"]) -@pytest.mark.parametrize("deferred_load", [True, False]) -def test_popgrid(vert_discretisation, deferred_load): +def test_popgrid(vert_discretisation): if vert_discretisation == "zlevel": w_dep = "w_dep" elif vert_discretisation == "slevel": @@ -589,7 +588,7 @@ def test_popgrid(vert_discretisation, deferred_load): variables = {"U": "U", "V": "V", "W": "W", "T": "T"} dimensions = {"lon": "lon", "lat": "lat", "depth": w_dep, "time": "time"} - fieldset = FieldSet.from_pop(filenames, variables, dimensions, mesh="flat", deferred_load=deferred_load) + fieldset = FieldSet.from_pop(filenames, variables, dimensions, mesh="flat") def sampleVel(particle, fieldset, time): # pragma: no cover (particle.zonal, particle.meridional, particle.vert) = fieldset.UVW[particle] diff --git a/tests/tools/test_warnings.py b/tests/tools/test_warnings.py index 1e6266638f..2af65bc5ae 100644 --- a/tests/tools/test_warnings.py +++ b/tests/tools/test_warnings.py @@ -48,6 +48,8 @@ def test_file_warnings(tmp_zarrfile): pset.execute(AdvectionRK4, runtime=3, dt=1, output_file=pfile) +@pytest.mark.v4alpha +@pytest.mark.xfail(reason="https://github.com/OceanParcels/Parcels/pull/1908#issuecomment-2698014941") def test_kernel_warnings(): # positive scaling factor for W filenames = str(TEST_DATA / "POPtestdata_time.nc")