Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions pyglider/ncprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ def make_gridfiles(
depth_bins=None,
dz=1,
starttime='1970-01-01',
exclude_vars=None,
):
"""
Turn a timeseries netCDF file into a vertically gridded netCDF.
Expand All @@ -219,6 +220,14 @@ def make_gridfiles(
dz : float, default = 1
Vertical grid spacing in meters. Ignored if ``depth_bins`` is not None

starttime : str, default = '1970-01-01'
The minimum time of data that will be gridded. All data before this
time will be dropped

exclude_vars : list of strings, default empty list
Variable names from the timeseries that should not be gridded.
These variables will be excluded from the gridded netCDF file

Returns
-------
outname : str
Expand Down Expand Up @@ -274,6 +283,7 @@ def make_gridfiles(
dsout = xr.Dataset(
coords={'depth': ('depth', depths), 'profile': (xdimname, profiles)}
)
# dsout['profile'].attrs = ds.profile_index.attrs
dsout['depth'].attrs = {
'units': 'm',
'long_name': 'Depth',
Expand All @@ -285,14 +295,20 @@ def make_gridfiles(
}

# Bin by profile index, for the mean time, lat, and lon values for each profile
ds['time_1970'] = ds.temperature.copy()
ds['time_1970'] = ds.longitude.copy()
ds['time_1970'].values = ds.time.values.astype(np.float64)
for td in ('time_1970', 'longitude', 'latitude'):
td_lookup = {
'time_1970': 'mean',
'longitude': 'mean',
'latitude': 'mean',
'profile_direction': lambda x: stats.mode(x, keepdims=True)[0][0],
}
for td, bin_stat in td_lookup.items():
good = np.where(~np.isnan(ds[td]) & (ds['profile_index'] % 1 == 0))[0]
dat, xedges, binnumber = stats.binned_statistic(
ds['profile_index'].values[good],
ds[td].values[good],
statistic='mean',
statistic=bin_stat,
bins=[profile_bins],
)
if td == 'time_1970':
Expand All @@ -302,9 +318,12 @@ def make_gridfiles(
dsout[td] = (('time'), dat, ds[td].attrs)

# Bin by profile index, for the profile start (min) and end (max) times
profile_lookup = {'profile_time_start': "min", 'profile_time_end': "max"}
profile_time_lookup = {
'profile_time_start': "min",
'profile_time_end': "max"
}
good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0]
for td, bin_stat in profile_lookup.items():
for td, bin_stat in profile_time_lookup.items():
_log.debug(f'td, bin_stat {td}, {bin_stat}')
dat, xedges, binnumber = stats.binned_statistic(
ds['profile_index'].values[good],
Expand All @@ -319,8 +338,12 @@ def make_gridfiles(
ds = ds.drop('time_1970')
_log.info(f'Done times!')

for k in ds.keys():
if k in ['time', 'profile', 'longitude', 'latitude', 'depth'] or 'time' in k:
if exclude_vars is None:
exclude_vars = []
exclude_vars += list(dsout.keys()) + ["depth"]
for k in ds.keys():
if (k in exclude_vars) or ('time' in k) or ('profile' in k):
_log.debug('Not gridding %s', k)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks to be gridding longitude and latitude by deafult now? Or are they in the default exclude_vars?

I wonder if for non-coordinate data (eg temperature etc) we should really be setting whether to grid it or not in the yaml instead of specifying a list here. eg in the yaml say grid_exclude: True if we don't want it to be gridded, but we do want it in the time series?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks to be gridding longitude and latitude by deafult now? Or are they in the default exclude_vars?

longitude and latitude (along with time, profile_direction, profile_time_start, and profile_time_end) are all guaranteed to be part of dsout. Thus, they're always added to exclude_vars in exclude_vars += list(dsout.keys()) + ["depth"]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if for non-coordinate data (eg temperature etc) we should really be setting whether to grid it or not in the yaml instead of specifying a list here. eg in the yaml say grid_exclude: True if we don't want it to be gridded, but we do want it in the time series?

A grid_exclude boolean key feels totally reasonable to me. It's also more consistent with the function checking for the 'average_method' key. I'll update the pr now

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, this seems good. I was just wondering though if we wanted to allow the user to exclude a timeseries variable from being gridded at this stage by adding a property to the yaml. But maybe that can be a follow up?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@smwoodman I was waiting to understand this...

continue
_log.info('Gridding %s', k)
good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0]
Expand Down Expand Up @@ -372,10 +395,10 @@ def make_gridfiles(
dsout.attrs['time_coverage_end'] = dsout.attrs['time_coverage_end'][:19]
# fix standard_name so they don't overlap!
try:
dsout['waypoint_latitude'].attrs.pop('standard_name')
dsout['waypoint_longitude'].attrs.pop('standard_name')
dsout['profile_time_start'].attrs.pop('standard_name')
dsout['profile_time_end'].attrs.pop('standard_name')
dsout['waypoint_latitude'].attrs.pop('standard_name')
dsout['waypoint_longitude'].attrs.pop('standard_name')
except:
pass
# remove, so they can be encoded later:
Expand Down