From 2fca48fdca35d40c202508f821f7c44c9471bccb Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Thu, 28 Mar 2024 15:33:32 -0700 Subject: [PATCH 1/7] chore(deps): Add optional regrid dependency of scitools-iris>=3.8.1 --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 67972f4db..baa4e95e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,3 +37,6 @@ setup_requires = plot = matplotlib>=3.7 cmocean + +regrid = + scitools-iris>=3.8.1,<4 From 059dd5fb44ff7a42a95f814c076941ca1ff5fb85 Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Thu, 28 Mar 2024 15:35:59 -0700 Subject: [PATCH 2/7] feat: Add initial regrid_Sv implementation --- echopype/commongrid/__init__.py | 8 ++ echopype/commongrid/api.py | 4 - echopype/commongrid/regrid.py | 174 ++++++++++++++++++++++++++++++++ echopype/utils/misc.py | 5 + 4 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 echopype/commongrid/regrid.py diff --git a/echopype/commongrid/__init__.py b/echopype/commongrid/__init__.py index 63172d2bd..8296ce899 100644 --- a/echopype/commongrid/__init__.py +++ b/echopype/commongrid/__init__.py @@ -1,3 +1,4 @@ +from ..utils.misc import is_package_installed from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC __all__ = [ @@ -5,3 +6,10 @@ "compute_NASC", "compute_MVBS_index_binning", ] + +# Optional dependency, only import +# if scitools-iris is installed +if is_package_installed("iris"): + from .regrid import regrid_Sv # noqa + + __all__.append("regrid_Sv") diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 396a943be..3fe986a2f 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -403,7 +403,3 @@ def compute_NASC( ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5) return ds_NASC - - -def regrid(): - return 1 diff --git a/echopype/commongrid/regrid.py b/echopype/commongrid/regrid.py new file mode 100644 index 000000000..f12328106 --- /dev/null +++ b/echopype/commongrid/regrid.py @@ -0,0 +1,174 @@ +from typing import Literal + +import iris # noqa +import numpy as np +import xarray as xr +from iris.coords import DimCoord # noqa + +from ..utils.prov import echopype_prov_attrs, insert_input_processing_level + +FNAME = "filenames" +CHANNEL = "channel" +PING_TIME = "ping_time" +RANGE_SAMPLE = "range_sample" +ECHO_RANGE = "echo_range" +DEPTH = "depth" +Sv_var = "Sv" + +# Iris dims +PROJECTION_X = "projection_x_coordinate" +PROJECTION_Y = "projection_y_coordinate" + + +def regrid_Sv( + input_ds: xr.Dataset, + target_grid: xr.Dataset, + range_var: Literal["echo_range", "depth"] = ECHO_RANGE, +) -> xr.Dataset: + """ + Regrid Sv data to a desired grid + + Parameters + ---------- + input_ds : xr.Dataset + The input dataset containing Sv data + target_grid : xr.Dataset + The target grid to regrid the data to, + this dataset should only contain coordinates + range_var : {'echo_range', 'depth'} + The name of the range variable, by default "echo_range" + + Returns + ------- + xr.Dataset + The regridded dataset + """ + if FNAME in input_ds.dims: + input_ds = input_ds.drop_dims(FNAME) + + # Get target dims + target_dims = _get_iris_dims(target_grid, range_var) + + # Regrid each channel separately + ds_list = [] + for chan in input_ds[CHANNEL]: + channel_Sv = input_ds.sel(channel=chan) + original_dims = _get_iris_dims(channel_Sv, range_var) + regrid_ds = _regrid_data(channel_Sv[Sv_var].data, original_dims, target_dims) + ds_list.append(regrid_ds) + + # Convert back to match input dataset + result_ds = xr.concat(ds_list, dim=CHANNEL).to_dataset(name=Sv_var) + result_ds[Sv_var].attrs = { + **input_ds[Sv_var].attrs, + "actual_range": [ + round(float(input_ds[Sv_var].min().values), 2), + round(float(input_ds[Sv_var].max().values), 2), + ], + } + + # Assign original coordinates back + result_ds = result_ds.assign_coords( + { + CHANNEL: input_ds[CHANNEL], + PING_TIME: (PROJECTION_X, target_grid[PING_TIME].data, input_ds[PING_TIME].attrs), + RANGE_SAMPLE: ( + PROJECTION_Y, + np.arange(0, len(target_grid[range_var])), + input_ds[RANGE_SAMPLE].attrs, + ), + } + ) + + # Swap dims back to original + result_ds = result_ds.swap_dims( + { + PROJECTION_Y: RANGE_SAMPLE, + PROJECTION_X: PING_TIME, + } + ).drop([PROJECTION_Y, PROJECTION_X]) + + # Re-attach some variables + result_ds["frequency_nominal"] = input_ds["frequency_nominal"] # re-attach frequency_nominal + result_ds[range_var] = ( + (CHANNEL, PING_TIME, RANGE_SAMPLE), + np.array( + [[target_grid[range_var].data] * len(target_grid[PING_TIME])] * len(result_ds[CHANNEL]) + ), + input_ds[range_var].attrs, + ) + # Add water level if it exists in Sv dataset + water_level = "water_level" + if range_var == ECHO_RANGE and water_level in input_ds.data_vars: + result_ds[water_level] = input_ds[water_level] + + # Add provenance related attributes + prov_dict = echopype_prov_attrs(process_type="processing") + prov_dict["processing_function"] = "commongrid.regrid_Sv" + result_ds = result_ds.assign_attrs(prov_dict) + result_ds = insert_input_processing_level(result_ds, input_ds=input_ds) + + return result_ds + + +def _get_iris_dims(ds, range_var: Literal["echo_range", "depth"] = "echo_range"): + range_dim = "range" + + # Original grid + original_dims = [] + for idx, dim in enumerate(ds.dims): + data_array = ds[dim] + kwargs = {} + if dim == PING_TIME: + if not np.issubdtype(data_array.dtype, np.datetime64): + raise TypeError(f"Expected time dimension to be datetime64, got {data_array.dtype}") + data_array = data_array.astype("float64") + standard_name = PROJECTION_X + elif (dim.startswith(range_dim) or dim.endswith(range_dim)) or dim == "depth": + data_array = ds[range_var] + if PING_TIME in data_array.dims: + data_array = data_array.isel({PING_TIME: 0}) + standard_name = PROJECTION_Y + else: + raise ValueError(f"Unknown dimension {dim}") + kwargs.update( + { + "standard_name": standard_name, + "long_name": data_array.attrs.get("long_name", None), + # "units": data_array.attrs.get("units", None), + } + ) + iris_dim = (DimCoord(data_array, **kwargs), idx) + original_dims.append(iris_dim) + return original_dims + + +def _regrid_data(data, old_dims, new_dims, regridder=None): + """ + Regrid data with iris regridder + + Original code: https://github.com/CRIMAC-WP4-Machine-learning/CRIMAC-classifiers-unet/blob/master/crimac_unet/data_preprocessing/regridding.py#L35-L57 + :param data: data to be regridded, 2D or 3D + :param old_dims: old data dimensions (list of Iris DimCoord) + :param new_dims: new data dimensions (list of Iris DimCoord) + :param regridder: iris regrid algorithm + :return: + """ # noqa + orig_cube = iris.cube.Cube(data, dim_coords_and_dims=old_dims) + grid_cube = iris.cube.Cube( + np.zeros([coord[0].shape[0] for coord in new_dims]), dim_coords_and_dims=new_dims + ) + + try: + orig_cube.coord("projection_y_coordinate").guess_bounds() + orig_cube.coord("projection_x_coordinate").guess_bounds() + grid_cube.coord("projection_y_coordinate").guess_bounds() + grid_cube.coord("projection_x_coordinate").guess_bounds() + except ValueError: + pass + + if regridder is None: + regridder = iris.analysis.AreaWeighted(mdtol=1) + regrid = orig_cube.regrid(grid_cube, regridder) + regrid_ds = xr.DataArray.from_iris(regrid) + return regrid_ds diff --git a/echopype/utils/misc.py b/echopype/utils/misc.py index c4aab43c8..ef2dbbfed 100644 --- a/echopype/utils/misc.py +++ b/echopype/utils/misc.py @@ -1,3 +1,4 @@ +import importlib.util from typing import List, Optional, Tuple, Union import numpy as np @@ -6,6 +7,10 @@ FloatSequence = Union[List[float], Tuple[float], NDArray[float]] +def is_package_installed(name: str) -> bool: + return importlib.util.find_spec(name) is not None + + def camelcase2snakecase(camel_case_str): """ Convert string from CamelCase to snake_case From 0ce4b05ad8a34f926cc1cf00c47eebbb51c91206 Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Fri, 29 Mar 2024 14:13:03 -0700 Subject: [PATCH 3/7] fix: Add iris.cube import and initial integration test --- echopype/commongrid/regrid.py | 1 + .../commongrid/test_commongrid_regrid.py | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 echopype/tests/commongrid/test_commongrid_regrid.py diff --git a/echopype/commongrid/regrid.py b/echopype/commongrid/regrid.py index f12328106..7fdbc654d 100644 --- a/echopype/commongrid/regrid.py +++ b/echopype/commongrid/regrid.py @@ -1,6 +1,7 @@ from typing import Literal import iris # noqa +import iris.cube # noqa import numpy as np import xarray as xr from iris.coords import DimCoord # noqa diff --git a/echopype/tests/commongrid/test_commongrid_regrid.py b/echopype/tests/commongrid/test_commongrid_regrid.py new file mode 100644 index 000000000..48eca63dd --- /dev/null +++ b/echopype/tests/commongrid/test_commongrid_regrid.py @@ -0,0 +1,47 @@ +import pytest +import echopype as ep +import numpy as np +import xarray as xr + +@pytest.mark.integration +def test_regrid_Sv(test_data_samples): + """ + Test running through from open_raw to compute_MVBS. + """ + ( + filepath, + sonar_model, + azfp_xml_path, + range_kwargs, + ) = test_data_samples + ed = ep.open_raw(filepath, sonar_model, azfp_xml_path) + if ed.sonar_model.lower() == "azfp": + avg_temperature = ed["Environment"]["temperature"].values.mean() + env_params = { + "temperature": avg_temperature, + "salinity": 27.9, + "pressure": 59, + } + range_kwargs["env_params"] = env_params + if "azfp_cal_type" in range_kwargs: + range_kwargs.pop("azfp_cal_type") + Sv = ep.calibrate.compute_Sv(ed, **range_kwargs) + + # Setup output grid + channel_Sv = Sv.isel(channel=0) + depth_data = channel_Sv.echo_range.isel(ping_time=0).data + time_data = channel_Sv.ping_time.data.astype('float64') + # Evenly spaced grid + target_grid = xr.Dataset({ + "ping_time": (["ping_time"], np.linspace(time_data[0], time_data[-1], 300).astype('datetime64[ns]')), + "echo_range": (["echo_range"], np.linspace(depth_data[0], depth_data[-1], 300)), + }) + + regridded_Sv = ep.commongrid.regrid_Sv(Sv, target_grid=target_grid) + assert regridded_Sv is not None + + # Test to see if values average are close + for channel in regridded_Sv.channel: + original_vals = Sv.sel(channel=channel).Sv.values + regridded_vals = regridded_Sv.sel(channel=channel).Sv.values + assert np.allclose(np.nanmean(original_vals), np.nanmean(regridded_vals), atol=1.0, rtol=1.0) From 8ada53e545658e7e4a64f655f2ae3809b7bdf289 Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Mon, 1 Apr 2024 09:13:03 -0700 Subject: [PATCH 4/7] ci: Add regrid optional to be installed for testing --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 69b0659a6..935efe777 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -52,7 +52,7 @@ jobs: - name: Install dev tools run: python -m pip install -r requirements-dev.txt - name: Install echopype - run: python -m pip install -e ".[plot]" + run: python -m pip install -e ".[plot,regrid]" - name: Print installed packages run: python -m pip list - name: Copying test data to services diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b44acb8b7..fae47bcf4 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -53,7 +53,7 @@ jobs: if: ${{ matrix.python-version == '3.9' }} run: python -m pip install pytest-github-actions-annotate-failures - name: Install echopype - run: python -m pip install -e ".[plot]" + run: python -m pip install -e ".[plot,regrid]" - name: Print installed packages run: python -m pip list - name: Copying test data to services From 9f31885aa8c667e8e2e286452261dab626ad969b Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Mon, 1 Apr 2024 12:15:31 -0700 Subject: [PATCH 5/7] feat: Update regrid_Sv to handle complex samples --- echopype/commongrid/regrid.py | 4 ++ .../commongrid/test_commongrid_regrid.py | 37 ++++++++++++++----- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/echopype/commongrid/regrid.py b/echopype/commongrid/regrid.py index 7fdbc654d..35473df25 100644 --- a/echopype/commongrid/regrid.py +++ b/echopype/commongrid/regrid.py @@ -54,6 +54,10 @@ def regrid_Sv( ds_list = [] for chan in input_ds[CHANNEL]: channel_Sv = input_ds.sel(channel=chan) + # Ensure no nans in range_sample + data_range = channel_Sv[range_var].dropna(RANGE_SAMPLE)[RANGE_SAMPLE] + channel_Sv = channel_Sv.sel({RANGE_SAMPLE: data_range}) + original_dims = _get_iris_dims(channel_Sv, range_var) regrid_ds = _regrid_data(channel_Sv[Sv_var].data, original_dims, target_dims) ds_list.append(regrid_ds) diff --git a/echopype/tests/commongrid/test_commongrid_regrid.py b/echopype/tests/commongrid/test_commongrid_regrid.py index 48eca63dd..3cea2fb91 100644 --- a/echopype/tests/commongrid/test_commongrid_regrid.py +++ b/echopype/tests/commongrid/test_commongrid_regrid.py @@ -3,6 +3,7 @@ import numpy as np import xarray as xr + @pytest.mark.integration def test_regrid_Sv(test_data_samples): """ @@ -26,17 +27,29 @@ def test_regrid_Sv(test_data_samples): if "azfp_cal_type" in range_kwargs: range_kwargs.pop("azfp_cal_type") Sv = ep.calibrate.compute_Sv(ed, **range_kwargs) - + # Setup output grid - channel_Sv = Sv.isel(channel=0) + channel_Sv = Sv.isel(channel=1) depth_data = channel_Sv.echo_range.isel(ping_time=0).data - time_data = channel_Sv.ping_time.data.astype('float64') + time_data = channel_Sv.ping_time.data.astype("float64") + # If there are NaNs in the depth data, remove them + if np.isnan(depth_data).any(): + depth_data = depth_data[~np.isnan(depth_data)] + # Evenly spaced grid - target_grid = xr.Dataset({ - "ping_time": (["ping_time"], np.linspace(time_data[0], time_data[-1], 300).astype('datetime64[ns]')), - "echo_range": (["echo_range"], np.linspace(depth_data[0], depth_data[-1], 300)), - }) - + target_grid = xr.Dataset( + { + "ping_time": ( + ["ping_time"], + np.linspace(np.min(time_data), np.max(time_data), 300).astype("datetime64[ns]"), + ), + "echo_range": ( + ["echo_range"], + np.linspace(np.min(depth_data), np.max(depth_data), 300), + ), + } + ) + regridded_Sv = ep.commongrid.regrid_Sv(Sv, target_grid=target_grid) assert regridded_Sv is not None @@ -44,4 +57,10 @@ def test_regrid_Sv(test_data_samples): for channel in regridded_Sv.channel: original_vals = Sv.sel(channel=channel).Sv.values regridded_vals = regridded_Sv.sel(channel=channel).Sv.values - assert np.allclose(np.nanmean(original_vals), np.nanmean(regridded_vals), atol=1.0, rtol=1.0) + assert np.allclose( + np.nanmean(original_vals), + np.nanmean(regridded_vals), + atol=1.0, + rtol=1.0, + equal_nan=True, + ) From bb257cc24b2d17769ddb9e614cfe1b2d7b678d9a Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Mon, 1 Apr 2024 13:17:08 -0700 Subject: [PATCH 6/7] test: Add test for misc utils --- echopype/tests/utils/test_utils_misc.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/echopype/tests/utils/test_utils_misc.py b/echopype/tests/utils/test_utils_misc.py index 5765acac9..3994d1b86 100644 --- a/echopype/tests/utils/test_utils_misc.py +++ b/echopype/tests/utils/test_utils_misc.py @@ -1,8 +1,16 @@ import pytest import numpy as np -from echopype.utils.misc import depth_from_pressure +from echopype.utils.misc import depth_from_pressure, camelcase2snakecase, is_package_installed +def test_is_package_installed(): + assert is_package_installed("iris") + assert not is_package_installed("nonexistent_package") + +def test_camelcase2snakecase(): + assert camelcase2snakecase("HelloWorld") == "hello_world" + assert camelcase2snakecase("MyNameIs") == "my_name_is" + assert camelcase2snakecase("PythonProgramming") == "python_programming" def test_depth_from_pressure(): # A single pressure value and defaults for the other arguments From 1411c69523539a0c6bf52e639d2b7828ababf7dd Mon Sep 17 00:00:00 2001 From: Landung 'Don' Setiawan Date: Mon, 1 Apr 2024 13:46:56 -0700 Subject: [PATCH 7/7] test: Tweak test out grid to use dimension data extent --- .../tests/commongrid/test_commongrid_regrid.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/echopype/tests/commongrid/test_commongrid_regrid.py b/echopype/tests/commongrid/test_commongrid_regrid.py index 3cea2fb91..cf9fa2941 100644 --- a/echopype/tests/commongrid/test_commongrid_regrid.py +++ b/echopype/tests/commongrid/test_commongrid_regrid.py @@ -29,23 +29,21 @@ def test_regrid_Sv(test_data_samples): Sv = ep.calibrate.compute_Sv(ed, **range_kwargs) # Setup output grid - channel_Sv = Sv.isel(channel=1) - depth_data = channel_Sv.echo_range.isel(ping_time=0).data - time_data = channel_Sv.ping_time.data.astype("float64") - # If there are NaNs in the depth data, remove them - if np.isnan(depth_data).any(): - depth_data = depth_data[~np.isnan(depth_data)] + depth_data = Sv.echo_range.isel(ping_time=0) + time_data = Sv.ping_time.astype("float64") # Evenly spaced grid target_grid = xr.Dataset( { "ping_time": ( ["ping_time"], - np.linspace(np.min(time_data), np.max(time_data), 300).astype("datetime64[ns]"), + np.linspace(time_data.min().values, time_data.max().values, 300).astype( + "datetime64[ns]" + ), ), "echo_range": ( ["echo_range"], - np.linspace(np.min(depth_data), np.max(depth_data), 300), + np.linspace(depth_data.min().values, depth_data.max().values, 300), ), } )