diff --git a/pyglider/seaexplorer.py b/pyglider/seaexplorer.py index 6c20c2d..b2682fe 100644 --- a/pyglider/seaexplorer.py +++ b/pyglider/seaexplorer.py @@ -7,6 +7,7 @@ import glob import logging import os +import warnings import numpy as np import polars as pl @@ -338,6 +339,29 @@ def _remove_fill_values(df, fill_value=9999): return df +def _forward_fill(gli, todo='Lat'): + """Forward-fill the specified column (todo) to propagate the last good value at each row.""" + gli = gli.with_columns([ + pl.col(todo).fill_null(strategy="forward").alias("temp_fill") + ]) + gli = gli.with_columns([ + pl.when( + (pl.col(todo) == pl.col("temp_fill").shift(1)) & pl.col(todo).is_not_null() + ).then(np.nan).otherwise(pl.col(todo)).alias(todo) + ]) + gli = gli.drop("temp_fill") + return gli + + +def _drop_if(gli, todo='Lat', condit='DeadReckoning', value=1): + """Drop Lat if DeadReckoning is 1""" + gli = gli.with_columns([ + pl.when(pl.col(condit) == value).then(np.nan).otherwise(pl.col(todo)).alias(todo) + ]) + return gli + + + def raw_to_timeseries( indir, outdir, @@ -348,12 +372,66 @@ def raw_to_timeseries( maxgap=10, interpolate=False, fnamesuffix='', + deadreckon=False, + replace_attrs=None ): """ - A little different than above, for the 4-file version of the data set. + Convert raw seaexplorer data to a timeseries netcdf file. + + Parameters + ---------- + indir : str + Directory with the raw files are kept. + + outdir : str + Directory to write the matching ``*.nc`` files. + + deploymentyaml : str + YAML text file with deployment information for this glider. + + kind : 'raw' or 'sub' + The type of data to process. 'raw' is the full resolution data, 'sub' + is the sub-sampled data. The default is 'raw'. Note that realtime data is + typically sub-sampled. + + profile_filt_time : float + Time in seconds to use for filtering the profiles. Default is 100. + + profile_min_time : float + Minimum time in seconds for a profile to be considered a valid profile. + Default is 300. + + maxgap : float + Maximum gap in seconds to interpolate over. Default is 10. + + interpolate : bool + If *True*, interpolate the data to fill in gaps. Default is False. + + fnamesuffix : str + Suffix to add to the output file name. Default is ''. + + deadreckon : bool + If *True* use the dead reckoning latitude and longitude data from the glider. Default + is *False*, and latitude and longitude are linearly interpolated between surface fixes. + *False* is the default, and recommended to avoid a-physical underwater jumps. + + replace_attrs : dict or None + replace global attributes in the metadata after reading the metadata + file in. Helpful when processing runs with only a couple things that + change. + + + Returns + ------- + outname : str + Name of the output netcdf file. + """ deployment = utils._get_deployment(deploymentyaml) + if replace_attrs: + for att in replace_attrs: + deployment['metadata'][att] = replace_attrs[att] metadata = deployment['metadata'] ncvar = deployment['netcdf_variables'] @@ -365,6 +443,25 @@ def raw_to_timeseries( sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet') sensor = _remove_fill_values(sensor) + # don't use lat/lon if deadreckoned: + if not deadreckon: + if not ncvar['latitude']['source'] == 'Lat': + warnings.warn("For deadreckon=False, it is suggested to use 'Lat' as the source for latitude.") + if not ncvar['longitude']['source'] == 'Lon': + warnings.warn("For deadreckon=False, it is suggested to use 'Lon' as the source for longitude.") + if 'DeadReckoning' in gli.columns: + _log.info('Not using deadreckoning; glider has DeadReckoning column') + gli = _drop_if(gli, todo='Lat', condit='DeadReckoning', value=1) + gli = _drop_if(gli, todo='Lon', condit='DeadReckoning', value=1) + else: + _log.info('Not using deadreckoning; glider does not have DeadReckoning column') + gli = _drop_if(gli, todo='Lat', condit='NavState', value=116) + gli = _drop_if(gli, todo='Lon', condit='NavState', value=116) + # drop a lat/lon if it is not unique. Happens when there + # are stale fixes. + gli = _forward_fill(gli, todo='Lat') + gli = _forward_fill(gli, todo='Lon') + # build a new data set based on info in `deploymentyaml.` # We will use ctd as the interpolant ds = xr.Dataset() diff --git a/pyglider/slocum.py b/pyglider/slocum.py index 707953b..a1e3562 100644 --- a/pyglider/slocum.py +++ b/pyglider/slocum.py @@ -763,10 +763,13 @@ def raw_to_timeseries( val = ebd[sensorname] val = utils._zero_screen(val) # val[val==0] = np.nan + val = xr.DataArray(convert(val.values), coords=ebd.coords, + dims=ebd.dims) val = convert(val) else: _log.debug('DBD sensorname %s', sensorname) - val = convert(dbd[sensorname]) + val = xr.DataArray(convert(dbd[sensorname].values), coords=dbd.coords, + dims=dbd.dims) val = _dbd2ebd(dbd, ds, val) ncvar['method'] = 'linear fill' # make the attributes: diff --git a/tests/_copyresultstoexpected.py b/tests/_copyresultstoexpected.py index 02a1de2..e1889b6 100644 --- a/tests/_copyresultstoexpected.py +++ b/tests/_copyresultstoexpected.py @@ -7,8 +7,7 @@ todo = { 'example-data/example-seaexplorer/L0-timeseries-test/dfo-eva035-20190718.nc': 'expected/example-seaexplorer/L0-timeseries', 'example-data/example-seaexplorer-raw/L0-timeseries-test/dfo-bb046-20200908.nc': 'expected/example-seaexplorer-raw/L0-timeseries', - 'example-data/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc': 'expected/example-slocum/L0-timeseries', - 'example-data/example-slocum-littleendian/L0-timeseries-test/dfo-maria997-20220614.nc': 'expected/example-slocum-littleendian/L0-timeseries', + 'example-data/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc': 'expected/example-slocum/L0-timeseries' } for td in todo: diff --git a/tests/example-data/example-seaexplorer/deploymentRealtime.yml b/tests/example-data/example-seaexplorer/deploymentRealtime.yml index 3893c68..569b690 100644 --- a/tests/example-data/example-seaexplorer/deploymentRealtime.yml +++ b/tests/example-data/example-seaexplorer/deploymentRealtime.yml @@ -89,7 +89,7 @@ netcdf_variables: coordinates: time depth latitude longitude latitude: - source: NAV_LATITUDE + source: Lat long_name: latitude standard_name: latitude units: degrees_north @@ -105,7 +105,7 @@ netcdf_variables: coordinate_reference_frame: urn:ogc:crs:EPSG::4326 longitude: - source: NAV_LONGITUDE + source: Lon long_name: longitude standard_name: longitude units: degrees_east diff --git a/tests/example-data/example-seaexplorer/process_deploymentRealTime.py b/tests/example-data/example-seaexplorer/process_deploymentRealTime.py index 8eef439..968b0f4 100644 --- a/tests/example-data/example-seaexplorer/process_deploymentRealTime.py +++ b/tests/example-data/example-seaexplorer/process_deploymentRealTime.py @@ -29,7 +29,8 @@ seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml, kind='sub') # Make level-1 timeseries netcdf file from th raw files... - outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub') + outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub', + deadreckon=False) ncprocess.extract_timeseries_profiles(outname, profiledir, deploymentyaml) outname2 = ncprocess.make_gridfiles(outname, griddir, deploymentyaml) diff --git a/tests/expected/example-seaexplorer-raw/L0-timeseries/dfo-bb046-20200908.nc b/tests/expected/example-seaexplorer-raw/L0-timeseries/dfo-bb046-20200908.nc index e9684cf..81b6567 100644 Binary files a/tests/expected/example-seaexplorer-raw/L0-timeseries/dfo-bb046-20200908.nc and b/tests/expected/example-seaexplorer-raw/L0-timeseries/dfo-bb046-20200908.nc differ diff --git a/tests/expected/example-seaexplorer/L0-timeseries/dfo-eva035-20190718.nc b/tests/expected/example-seaexplorer/L0-timeseries/dfo-eva035-20190718.nc index dc5b338..92bbee5 100644 Binary files a/tests/expected/example-seaexplorer/L0-timeseries/dfo-eva035-20190718.nc and b/tests/expected/example-seaexplorer/L0-timeseries/dfo-eva035-20190718.nc differ diff --git a/tests/expected/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc b/tests/expected/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc index afbdf38..c844674 100644 Binary files a/tests/expected/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc and b/tests/expected/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc differ diff --git a/tests/test_pyglider.py b/tests/test_pyglider.py index f552d12..e33103a 100644 --- a/tests/test_pyglider.py +++ b/tests/test_pyglider.py @@ -98,7 +98,7 @@ def test_example_seaexplorer_interp_nrt(var): seaexplorer.raw_to_rawnc(rawdir, rawncdir, deploymentyaml_raw) seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml_raw, kind='raw') outname_raw = seaexplorer.raw_to_L0timeseries( - rawncdir, l0tsdir, deploymentyaml_raw, kind='raw' + rawncdir, l0tsdir, deploymentyaml_raw, kind='raw', deadreckon=True ) output_raw = xr.open_dataset(outname_raw) # Open test data file @@ -147,7 +147,7 @@ def test_example_seaexplorer_metadata_raw(): ) outname_interp_raw = seaexplorer.raw_to_L0timeseries( - rawncdir, l0tsdir_interp_raw, interp_yaml, kind='raw' + rawncdir, l0tsdir_interp_raw, interp_yaml, kind='raw', deadreckon=True ) output_interp_raw = xr.open_dataset(outname_interp_raw)