Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add regrid_Sv functionality to be able to regrid Sv dataset #1291

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
- name: Install dev tools
run: python -m pip install -r requirements-dev.txt
- name: Install echopype
run: python -m pip install -e ".[plot]"
run: python -m pip install -e ".[plot,regrid]"
- name: Print installed packages
run: python -m pip list
- name: Copying test data to services
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
if: ${{ matrix.python-version == '3.9' }}
run: python -m pip install pytest-github-actions-annotate-failures
- name: Install echopype
run: python -m pip install -e ".[plot]"
run: python -m pip install -e ".[plot,regrid]"
- name: Print installed packages
run: python -m pip list
- name: Copying test data to services
Expand Down
8 changes: 8 additions & 0 deletions echopype/commongrid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from ..utils.misc import is_package_installed
from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC

__all__ = [
"compute_MVBS",
"compute_NASC",
"compute_MVBS_index_binning",
]

# Optional dependency, only import
# if scitools-iris is installed
if is_package_installed("iris"):
from .regrid import regrid_Sv # noqa

__all__.append("regrid_Sv")
4 changes: 0 additions & 4 deletions echopype/commongrid/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,3 @@ def compute_NASC(
ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5)

return ds_NASC


def regrid():
return 1
179 changes: 179 additions & 0 deletions echopype/commongrid/regrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from typing import Literal

import iris # noqa
import iris.cube # noqa
import numpy as np
import xarray as xr
from iris.coords import DimCoord # noqa

from ..utils.prov import echopype_prov_attrs, insert_input_processing_level

FNAME = "filenames"
CHANNEL = "channel"
PING_TIME = "ping_time"
RANGE_SAMPLE = "range_sample"
ECHO_RANGE = "echo_range"
DEPTH = "depth"
Sv_var = "Sv"

# Iris dims
PROJECTION_X = "projection_x_coordinate"
PROJECTION_Y = "projection_y_coordinate"


def regrid_Sv(
input_ds: xr.Dataset,
target_grid: xr.Dataset,
range_var: Literal["echo_range", "depth"] = ECHO_RANGE,
) -> xr.Dataset:
"""
Regrid Sv data to a desired grid

Parameters
----------
input_ds : xr.Dataset
The input dataset containing Sv data
target_grid : xr.Dataset
The target grid to regrid the data to,
this dataset should only contain coordinates
range_var : {'echo_range', 'depth'}
The name of the range variable, by default "echo_range"

Returns
-------
xr.Dataset
The regridded dataset
"""
if FNAME in input_ds.dims:
input_ds = input_ds.drop_dims(FNAME)

# Get target dims
target_dims = _get_iris_dims(target_grid, range_var)

# Regrid each channel separately
ds_list = []
for chan in input_ds[CHANNEL]:
channel_Sv = input_ds.sel(channel=chan)
# Ensure no nans in range_sample
data_range = channel_Sv[range_var].dropna(RANGE_SAMPLE)[RANGE_SAMPLE]
channel_Sv = channel_Sv.sel({RANGE_SAMPLE: data_range})

original_dims = _get_iris_dims(channel_Sv, range_var)
regrid_ds = _regrid_data(channel_Sv[Sv_var].data, original_dims, target_dims)
ds_list.append(regrid_ds)

# Convert back to match input dataset
result_ds = xr.concat(ds_list, dim=CHANNEL).to_dataset(name=Sv_var)
result_ds[Sv_var].attrs = {
**input_ds[Sv_var].attrs,
"actual_range": [
round(float(input_ds[Sv_var].min().values), 2),
round(float(input_ds[Sv_var].max().values), 2),
],
}

# Assign original coordinates back
result_ds = result_ds.assign_coords(
{
CHANNEL: input_ds[CHANNEL],
PING_TIME: (PROJECTION_X, target_grid[PING_TIME].data, input_ds[PING_TIME].attrs),
RANGE_SAMPLE: (
PROJECTION_Y,
np.arange(0, len(target_grid[range_var])),
input_ds[RANGE_SAMPLE].attrs,
),
}
)

# Swap dims back to original
result_ds = result_ds.swap_dims(
{
PROJECTION_Y: RANGE_SAMPLE,
PROJECTION_X: PING_TIME,
}
).drop([PROJECTION_Y, PROJECTION_X])

# Re-attach some variables
result_ds["frequency_nominal"] = input_ds["frequency_nominal"] # re-attach frequency_nominal
result_ds[range_var] = (
(CHANNEL, PING_TIME, RANGE_SAMPLE),
np.array(
[[target_grid[range_var].data] * len(target_grid[PING_TIME])] * len(result_ds[CHANNEL])
),
input_ds[range_var].attrs,
)
# Add water level if it exists in Sv dataset
water_level = "water_level"
if range_var == ECHO_RANGE and water_level in input_ds.data_vars:
result_ds[water_level] = input_ds[water_level]

# Add provenance related attributes
prov_dict = echopype_prov_attrs(process_type="processing")
prov_dict["processing_function"] = "commongrid.regrid_Sv"
result_ds = result_ds.assign_attrs(prov_dict)
result_ds = insert_input_processing_level(result_ds, input_ds=input_ds)

return result_ds


def _get_iris_dims(ds, range_var: Literal["echo_range", "depth"] = "echo_range"):
range_dim = "range"

# Original grid
original_dims = []
for idx, dim in enumerate(ds.dims):
data_array = ds[dim]
kwargs = {}
if dim == PING_TIME:
if not np.issubdtype(data_array.dtype, np.datetime64):
raise TypeError(f"Expected time dimension to be datetime64, got {data_array.dtype}")
data_array = data_array.astype("float64")
standard_name = PROJECTION_X
elif (dim.startswith(range_dim) or dim.endswith(range_dim)) or dim == "depth":
data_array = ds[range_var]
if PING_TIME in data_array.dims:
data_array = data_array.isel({PING_TIME: 0})
standard_name = PROJECTION_Y
else:
raise ValueError(f"Unknown dimension {dim}")
kwargs.update(
{
"standard_name": standard_name,
"long_name": data_array.attrs.get("long_name", None),
# "units": data_array.attrs.get("units", None),
}
)
iris_dim = (DimCoord(data_array, **kwargs), idx)
original_dims.append(iris_dim)
return original_dims


def _regrid_data(data, old_dims, new_dims, regridder=None):
"""
Regrid data with iris regridder

Original code: https://github.com/CRIMAC-WP4-Machine-learning/CRIMAC-classifiers-unet/blob/master/crimac_unet/data_preprocessing/regridding.py#L35-L57
:param data: data to be regridded, 2D or 3D
:param old_dims: old data dimensions (list of Iris DimCoord)
:param new_dims: new data dimensions (list of Iris DimCoord)
:param regridder: iris regrid algorithm
:return:
""" # noqa
orig_cube = iris.cube.Cube(data, dim_coords_and_dims=old_dims)
grid_cube = iris.cube.Cube(
np.zeros([coord[0].shape[0] for coord in new_dims]), dim_coords_and_dims=new_dims
)

try:
orig_cube.coord("projection_y_coordinate").guess_bounds()
orig_cube.coord("projection_x_coordinate").guess_bounds()
grid_cube.coord("projection_y_coordinate").guess_bounds()
grid_cube.coord("projection_x_coordinate").guess_bounds()
except ValueError:
pass

if regridder is None:
regridder = iris.analysis.AreaWeighted(mdtol=1)
regrid = orig_cube.regrid(grid_cube, regridder)
regrid_ds = xr.DataArray.from_iris(regrid)
return regrid_ds
64 changes: 64 additions & 0 deletions echopype/tests/commongrid/test_commongrid_regrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest
import echopype as ep
import numpy as np
import xarray as xr


@pytest.mark.integration
def test_regrid_Sv(test_data_samples):
"""
Test running through from open_raw to compute_MVBS.
"""
(
filepath,
sonar_model,
azfp_xml_path,
range_kwargs,
) = test_data_samples
ed = ep.open_raw(filepath, sonar_model, azfp_xml_path)
if ed.sonar_model.lower() == "azfp":
avg_temperature = ed["Environment"]["temperature"].values.mean()
env_params = {
"temperature": avg_temperature,
"salinity": 27.9,
"pressure": 59,
}
range_kwargs["env_params"] = env_params
if "azfp_cal_type" in range_kwargs:
range_kwargs.pop("azfp_cal_type")
Sv = ep.calibrate.compute_Sv(ed, **range_kwargs)

# Setup output grid
depth_data = Sv.echo_range.isel(ping_time=0)
time_data = Sv.ping_time.astype("float64")

# Evenly spaced grid
target_grid = xr.Dataset(
{
"ping_time": (
["ping_time"],
np.linspace(time_data.min().values, time_data.max().values, 300).astype(
"datetime64[ns]"
),
),
"echo_range": (
["echo_range"],
np.linspace(depth_data.min().values, depth_data.max().values, 300),
),
}
)

regridded_Sv = ep.commongrid.regrid_Sv(Sv, target_grid=target_grid)
assert regridded_Sv is not None

# Test to see if values average are close
for channel in regridded_Sv.channel:
original_vals = Sv.sel(channel=channel).Sv.values
regridded_vals = regridded_Sv.sel(channel=channel).Sv.values
assert np.allclose(
np.nanmean(original_vals),
np.nanmean(regridded_vals),
atol=1.0,
rtol=1.0,
equal_nan=True,
)
10 changes: 9 additions & 1 deletion echopype/tests/utils/test_utils_misc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import pytest

import numpy as np
from echopype.utils.misc import depth_from_pressure
from echopype.utils.misc import depth_from_pressure, camelcase2snakecase, is_package_installed

def test_is_package_installed():
assert is_package_installed("iris")
assert not is_package_installed("nonexistent_package")

def test_camelcase2snakecase():
assert camelcase2snakecase("HelloWorld") == "hello_world"
assert camelcase2snakecase("MyNameIs") == "my_name_is"
assert camelcase2snakecase("PythonProgramming") == "python_programming"

def test_depth_from_pressure():
# A single pressure value and defaults for the other arguments
Expand Down
5 changes: 5 additions & 0 deletions echopype/utils/misc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib.util
from typing import List, Optional, Tuple, Union

import numpy as np
Expand All @@ -6,6 +7,10 @@
FloatSequence = Union[List[float], Tuple[float], NDArray[float]]


def is_package_installed(name: str) -> bool:
return importlib.util.find_spec(name) is not None


def camelcase2snakecase(camel_case_str):
"""
Convert string from CamelCase to snake_case
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ setup_requires =
plot =
matplotlib>=3.7
cmocean

regrid =
scitools-iris>=3.8.1,<4