From b3ad4e8961fb3b797c021ae735d1e8faa91ec12d Mon Sep 17 00:00:00 2001 From: Sam Woodman Date: Wed, 9 Jul 2025 01:57:51 +0000 Subject: [PATCH 1/4] First pass at #226 Added excelude_vars, and calculated profile_direction as the mode --- pyglider/ncprocess.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/pyglider/ncprocess.py b/pyglider/ncprocess.py index b366d41..7ac5d48 100644 --- a/pyglider/ncprocess.py +++ b/pyglider/ncprocess.py @@ -195,6 +195,7 @@ def make_gridfiles( depth_bins=None, dz=1, starttime='1970-01-01', + exclude_vars=None, ): """ Turn a timeseries netCDF file into a vertically gridded netCDF. @@ -219,6 +220,14 @@ def make_gridfiles( dz : float, default = 1 Vertical grid spacing in meters. Ignored if ``depth_bins`` is not None + starttime : str, default = '1970-01-01' + The minimum time of data that will be gridded. All data before this + time will be dropped + + exclude_vars : list of strings, default empty list + Variable names from the timeseries that should not be gridded. + These variables will be excluded from the gridded netCDF file + Returns ------- outname : str @@ -274,6 +283,7 @@ def make_gridfiles( dsout = xr.Dataset( coords={'depth': ('depth', depths), 'profile': (xdimname, profiles)} ) + # dsout['profile'].attrs = ds.profile_index.attrs dsout['depth'].attrs = { 'units': 'm', 'long_name': 'Depth', @@ -285,14 +295,20 @@ def make_gridfiles( } # Bin by profile index, for the mean time, lat, and lon values for each profile - ds['time_1970'] = ds.temperature.copy() + ds['time_1970'] = ds.longitude.copy() ds['time_1970'].values = ds.time.values.astype(np.float64) - for td in ('time_1970', 'longitude', 'latitude'): + td_lookup = { + 'time_1970': 'mean', + 'longitude': 'mean', + 'latitude': 'mean', + 'profile_direction': lambda x: stats.mode(x, keepdims=True)[0][0], + } + for td, bin_stat in td_lookup.items(): good = np.where(~np.isnan(ds[td]) & (ds['profile_index'] % 1 == 0))[0] dat, xedges, binnumber = stats.binned_statistic( ds['profile_index'].values[good], ds[td].values[good], - statistic='mean', + statistic=bin_stat, bins=[profile_bins], ) if td == 'time_1970': @@ -302,9 +318,12 @@ def make_gridfiles( dsout[td] = (('time'), dat, ds[td].attrs) # Bin by profile index, for the profile start (min) and end (max) times - profile_lookup = {'profile_time_start': "min", 'profile_time_end': "max"} + profile_time_lookup = { + 'profile_time_start': "min", + 'profile_time_end': "max" + } good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0] - for td, bin_stat in profile_lookup.items(): + for td, bin_stat in profile_time_lookup.items(): _log.debug(f'td, bin_stat {td}, {bin_stat}') dat, xedges, binnumber = stats.binned_statistic( ds['profile_index'].values[good], @@ -319,8 +338,12 @@ def make_gridfiles( ds = ds.drop('time_1970') _log.info(f'Done times!') - for k in ds.keys(): - if k in ['time', 'profile', 'longitude', 'latitude', 'depth'] or 'time' in k: + if exclude_vars is None: + exclude_vars = [] + exclude_vars = list(dsout.keys()) + ["distance_over_ground"] + exclude_vars + for k in ds.keys(): + if (k in exclude_vars) or ('time' in k) or ('profile' in k): + _log.debug('Not gridding %s', k) continue _log.info('Gridding %s', k) good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0] @@ -372,10 +395,10 @@ def make_gridfiles( dsout.attrs['time_coverage_end'] = dsout.attrs['time_coverage_end'][:19] # fix standard_name so they don't overlap! try: - dsout['waypoint_latitude'].attrs.pop('standard_name') - dsout['waypoint_longitude'].attrs.pop('standard_name') dsout['profile_time_start'].attrs.pop('standard_name') dsout['profile_time_end'].attrs.pop('standard_name') + dsout['waypoint_latitude'].attrs.pop('standard_name') + dsout['waypoint_longitude'].attrs.pop('standard_name') except: pass # remove, so they can be encoded later: From 296282e7c930adfe39120ead840fae77e4244867 Mon Sep 17 00:00:00 2001 From: Sam Woodman Date: Wed, 9 Jul 2025 02:45:48 +0000 Subject: [PATCH 2/4] Add depth back to 'exclude_vars' --- pyglider/ncprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyglider/ncprocess.py b/pyglider/ncprocess.py index 7ac5d48..590b09b 100644 --- a/pyglider/ncprocess.py +++ b/pyglider/ncprocess.py @@ -340,7 +340,7 @@ def make_gridfiles( if exclude_vars is None: exclude_vars = [] - exclude_vars = list(dsout.keys()) + ["distance_over_ground"] + exclude_vars + exclude_vars += list(dsout.keys()) + ["depth"] for k in ds.keys(): if (k in exclude_vars) or ('time' in k) or ('profile' in k): _log.debug('Not gridding %s', k) From dc562db929db2d246069c3df4b043f43cf4649d3 Mon Sep 17 00:00:00 2001 From: Sam Woodman Date: Tue, 5 Aug 2025 01:24:57 +0000 Subject: [PATCH 3/4] look for grid_exclude flag, rather than exclude_vars argument --- pyglider/ncprocess.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/pyglider/ncprocess.py b/pyglider/ncprocess.py index 590b09b..044ba73 100644 --- a/pyglider/ncprocess.py +++ b/pyglider/ncprocess.py @@ -195,11 +195,16 @@ def make_gridfiles( depth_bins=None, dz=1, starttime='1970-01-01', - exclude_vars=None, ): """ Turn a timeseries netCDF file into a vertically gridded netCDF. + Timeseries variables can be excluded from the gridded netCDF file + by including ``grid_exclude: 'true'`` in the deployment yaml. + 'distance_over_ground' will always be excluded. + 'profile_direction', 'profile_time_start', and 'profile_time_end' + will always be included, but will only have one dimension ('profile'). + Parameters ---------- inname : str or Path @@ -224,17 +229,13 @@ def make_gridfiles( The minimum time of data that will be gridded. All data before this time will be dropped - exclude_vars : list of strings, default empty list - Variable names from the timeseries that should not be gridded. - These variables will be excluded from the gridded netCDF file - Returns ------- outname : str Name of gridded netCDF file. The gridded netCDF file has dimensions of 'depth' and 'profile', so each variable is gridded in depth bins and by profile number. Each profile has a time, latitude, and longitude. - The depth values are the bin centers + The depth values are the bin centers. """ try: os.mkdir(outdir) @@ -283,7 +284,7 @@ def make_gridfiles( dsout = xr.Dataset( coords={'depth': ('depth', depths), 'profile': (xdimname, profiles)} ) - # dsout['profile'].attrs = ds.profile_index.attrs + dsout['profile'].attrs = ds.profile_index.attrs dsout['depth'].attrs = { 'units': 'm', 'long_name': 'Depth', @@ -315,7 +316,7 @@ def make_gridfiles( td = 'time' dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00') _log.info(f'{td} {len(dat)}') - dsout[td] = (('time'), dat, ds[td].attrs) + dsout[td] = (xdimname, dat, ds[td].attrs) # Bin by profile index, for the profile start (min) and end (max) times profile_time_lookup = { @@ -338,13 +339,19 @@ def make_gridfiles( ds = ds.drop('time_1970') _log.info(f'Done times!') - if exclude_vars is None: - exclude_vars = [] - exclude_vars += list(dsout.keys()) + ["depth"] + coordinate_vars = ( + list(dsout.keys()) + + ["depth", "profile_index", "distance_over_ground"] + ) for k in ds.keys(): - if (k in exclude_vars) or ('time' in k) or ('profile' in k): + if (k in coordinate_vars) or ('time' in k): _log.debug('Not gridding %s', k) continue + if 'grid_exclude' in ds[k].attrs: + if ds[k].attrs['grid_exclude'] == 'true': + _log.debug('Not gridding %s due to grid_exclude flag', k) + continue + _log.info('Gridding %s', k) good = np.where(~np.isnan(ds[k]) & (ds['profile_index'] % 1 == 0))[0] if len(good) <= 0: @@ -419,7 +426,7 @@ def make_gridfiles( dsout['mission_number'].attrs['cf_role'] = 'trajectory_id' dsout = dsout.set_coords(['latitude', 'longitude', 'time']) for k in dsout: - if k in ['profile', 'depth', 'latitude', 'longitude', 'time', 'mission_number']: + if k in coordinate_vars + ['mission_number']: dsout[k].attrs['coverage_content_type'] = 'coordinate' else: dsout[k].attrs['coverage_content_type'] = 'physicalMeasurement' From 0f543219b97b2c061d35b85f41490ec7021d5282 Mon Sep 17 00:00:00 2001 From: Sam Woodman Date: Tue, 5 Aug 2025 16:42:11 +0000 Subject: [PATCH 4/4] fix cf attribute --- pyglider/ncprocess.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyglider/ncprocess.py b/pyglider/ncprocess.py index 044ba73..3ce6e0c 100644 --- a/pyglider/ncprocess.py +++ b/pyglider/ncprocess.py @@ -339,12 +339,12 @@ def make_gridfiles( ds = ds.drop('time_1970') _log.info(f'Done times!') - coordinate_vars = ( + grid_exclude_vars = ( list(dsout.keys()) + ["depth", "profile_index", "distance_over_ground"] ) for k in ds.keys(): - if (k in coordinate_vars) or ('time' in k): + if (k in grid_exclude_vars) or ('time' in k): _log.debug('Not gridding %s', k) continue if 'grid_exclude' in ds[k].attrs: @@ -426,8 +426,10 @@ def make_gridfiles( dsout['mission_number'].attrs['cf_role'] = 'trajectory_id' dsout = dsout.set_coords(['latitude', 'longitude', 'time']) for k in dsout: - if k in coordinate_vars + ['mission_number']: + if k in ['profile', 'depth', 'latitude', 'longitude', 'time', 'mission_number']: dsout[k].attrs['coverage_content_type'] = 'coordinate' + elif k in grid_exclude_vars: + dsout[k].attrs['coverage_content_type'] = 'auxiliaryInformation' else: dsout[k].attrs['coverage_content_type'] = 'physicalMeasurement'