Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 98 additions & 1 deletion pyglider/seaexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import glob
import logging
import os
import warnings

import numpy as np
import polars as pl
Expand Down Expand Up @@ -338,6 +339,29 @@ def _remove_fill_values(df, fill_value=9999):
return df


def _forward_fill(gli, todo='Lat'):
"""Forward-fill the specified column (todo) to propagate the last good value at each row."""
gli = gli.with_columns([
pl.col(todo).fill_null(strategy="forward").alias("temp_fill")
])
gli = gli.with_columns([
pl.when(
(pl.col(todo) == pl.col("temp_fill").shift(1)) & pl.col(todo).is_not_null()
).then(np.nan).otherwise(pl.col(todo)).alias(todo)
])
gli = gli.drop("temp_fill")
return gli


def _drop_if(gli, todo='Lat', condit='DeadReckoning', value=1):
"""Drop Lat if DeadReckoning is 1"""
Copy link

Copilot AI May 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Docstring for _drop_if is hardcoded to 'Lat' and 'DeadReckoning'; generalize to mention parameters todo, condit, and value.

Suggested change
"""Drop Lat if DeadReckoning is 1"""
"""
Drop values in the column specified by `todo` if the column specified by `condit` equals `value`.
Parameters:
gli (polars.DataFrame): The input DataFrame.
todo (str): The name of the column to modify.
condit (str): The name of the column to check the condition against.
value (any): The value to compare against in the `condit` column.
Returns:
polars.DataFrame: The modified DataFrame with updated values in the `todo` column.
"""

Copilot uses AI. Check for mistakes.

gli = gli.with_columns([
pl.when(pl.col(condit) == value).then(np.nan).otherwise(pl.col(todo)).alias(todo)
])
return gli



def raw_to_timeseries(
indir,
outdir,
Expand All @@ -348,12 +372,66 @@ def raw_to_timeseries(
maxgap=10,
interpolate=False,
fnamesuffix='',
deadreckon=False,
replace_attrs=None
):
"""
A little different than above, for the 4-file version of the data set.
Convert raw seaexplorer data to a timeseries netcdf file.

Parameters
----------
indir : str
Directory with the raw files are kept.

outdir : str
Directory to write the matching ``*.nc`` files.

deploymentyaml : str
YAML text file with deployment information for this glider.

kind : 'raw' or 'sub'
The type of data to process. 'raw' is the full resolution data, 'sub'
is the sub-sampled data. The default is 'raw'. Note that realtime data is
typically sub-sampled.

profile_filt_time : float
Time in seconds to use for filtering the profiles. Default is 100.

profile_min_time : float
Minimum time in seconds for a profile to be considered a valid profile.
Default is 300.

maxgap : float
Maximum gap in seconds to interpolate over. Default is 10.

interpolate : bool
If *True*, interpolate the data to fill in gaps. Default is False.

fnamesuffix : str
Suffix to add to the output file name. Default is ''.

deadreckon : bool
If *True* use the dead reckoning latitude and longitude data from the glider. Default
is *False*, and latitude and longitude are linearly interpolated between surface fixes.
*False* is the default, and recommended to avoid a-physical underwater jumps.

replace_attrs : dict or None
replace global attributes in the metadata after reading the metadata
file in. Helpful when processing runs with only a couple things that
change.


Returns
-------
outname : str
Name of the output netcdf file.

"""

deployment = utils._get_deployment(deploymentyaml)
if replace_attrs:
for att in replace_attrs:
deployment['metadata'][att] = replace_attrs[att]

metadata = deployment['metadata']
ncvar = deployment['netcdf_variables']
Expand All @@ -365,6 +443,25 @@ def raw_to_timeseries(
sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet')
sensor = _remove_fill_values(sensor)

# don't use lat/lon if deadreckoned:
if not deadreckon:
if not ncvar['latitude']['source'] == 'Lat':
warnings.warn("For deadreckon=False, it is suggested to use 'Lat' as the source for latitude.")
if not ncvar['longitude']['source'] == 'Lon':
warnings.warn("For deadreckon=False, it is suggested to use 'Lon' as the source for longitude.")
if 'DeadReckoning' in gli.columns:
_log.info('Not using deadreckoning; glider has DeadReckoning column')
gli = _drop_if(gli, todo='Lat', condit='DeadReckoning', value=1)
gli = _drop_if(gli, todo='Lon', condit='DeadReckoning', value=1)
else:
_log.info('Not using deadreckoning; glider does not have DeadReckoning column')
gli = _drop_if(gli, todo='Lat', condit='NavState', value=116)
gli = _drop_if(gli, todo='Lon', condit='NavState', value=116)
# drop a lat/lon if it is not unique. Happens when there
# are stale fixes.
gli = _forward_fill(gli, todo='Lat')
gli = _forward_fill(gli, todo='Lon')

# build a new data set based on info in `deploymentyaml.`
# We will use ctd as the interpolant
ds = xr.Dataset()
Expand Down
5 changes: 4 additions & 1 deletion pyglider/slocum.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,10 +763,13 @@ def raw_to_timeseries(
val = ebd[sensorname]
val = utils._zero_screen(val)
# val[val==0] = np.nan
val = xr.DataArray(convert(val.values), coords=ebd.coords,
dims=ebd.dims)
val = convert(val)
else:
_log.debug('DBD sensorname %s', sensorname)
val = convert(dbd[sensorname])
val = xr.DataArray(convert(dbd[sensorname].values), coords=dbd.coords,
dims=dbd.dims)
val = _dbd2ebd(dbd, ds, val)
ncvar['method'] = 'linear fill'
# make the attributes:
Expand Down
3 changes: 1 addition & 2 deletions tests/_copyresultstoexpected.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
todo = {
'example-data/example-seaexplorer/L0-timeseries-test/dfo-eva035-20190718.nc': 'expected/example-seaexplorer/L0-timeseries',
'example-data/example-seaexplorer-raw/L0-timeseries-test/dfo-bb046-20200908.nc': 'expected/example-seaexplorer-raw/L0-timeseries',
'example-data/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc': 'expected/example-slocum/L0-timeseries',
'example-data/example-slocum-littleendian/L0-timeseries-test/dfo-maria997-20220614.nc': 'expected/example-slocum-littleendian/L0-timeseries',
'example-data/example-slocum/L0-timeseries/dfo-rosie713-20190615.nc': 'expected/example-slocum/L0-timeseries'
}

for td in todo:
Expand Down
4 changes: 2 additions & 2 deletions tests/example-data/example-seaexplorer/deploymentRealtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ netcdf_variables:
coordinates: time depth latitude longitude

latitude:
source: NAV_LATITUDE
source: Lat
long_name: latitude
standard_name: latitude
units: degrees_north
Expand All @@ -105,7 +105,7 @@ netcdf_variables:
coordinate_reference_frame: urn:ogc:crs:EPSG::4326

longitude:
source: NAV_LONGITUDE
source: Lon
long_name: longitude
standard_name: longitude
units: degrees_east
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml, kind='sub')

# Make level-1 timeseries netcdf file from th raw files...
outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub')
outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub',
deadreckon=False)
ncprocess.extract_timeseries_profiles(outname, profiledir, deploymentyaml)
outname2 = ncprocess.make_gridfiles(outname, griddir, deploymentyaml)

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/test_pyglider.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_example_seaexplorer_interp_nrt(var):
seaexplorer.raw_to_rawnc(rawdir, rawncdir, deploymentyaml_raw)
seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml_raw, kind='raw')
outname_raw = seaexplorer.raw_to_L0timeseries(
rawncdir, l0tsdir, deploymentyaml_raw, kind='raw'
rawncdir, l0tsdir, deploymentyaml_raw, kind='raw', deadreckon=True
)
output_raw = xr.open_dataset(outname_raw)
# Open test data file
Expand Down Expand Up @@ -147,7 +147,7 @@ def test_example_seaexplorer_metadata_raw():
)

outname_interp_raw = seaexplorer.raw_to_L0timeseries(
rawncdir, l0tsdir_interp_raw, interp_yaml, kind='raw'
rawncdir, l0tsdir_interp_raw, interp_yaml, kind='raw', deadreckon=True
)
output_interp_raw = xr.open_dataset(outname_interp_raw)

Expand Down
Loading