Skip to content

Commit

Permalink
Use cube chunks for weights in aggregations with smart weights (#6288)
Browse files Browse the repository at this point in the history
* Use cube chunks for smart weights

* Fix docstring for area_weights

* Remove unused code

* Added whatsnew
  • Loading branch information
schlunma authored Jan 30, 2025
1 parent 9d23270 commit 9cf8b3a
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 19 deletions.
7 changes: 6 additions & 1 deletion docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ This document explains the changes made to Iris for this release
:doc:`/developers_guide/release_do_nothing` to be more thorough and apply
lessons learned from recent releases. (:pull:`6062`)

#. `@schlunma`_ made lazy [smart
weights](https://github.com/SciTools/iris/pull/5084) used for cube
aggregations have the same chunks as their parent cube if broadcasting is
necessary. (:issue:`6285`, :pull:`6288`)


.. comment
Whatsnew author names (@github name) in alphabetical order. Note that,
Expand All @@ -115,4 +120,4 @@ This document explains the changes made to Iris for this release
.. _@stefsmeets: https://github.com/stefsmeets

.. comment
Whatsnew resources in alphabetical order:
Whatsnew resources in alphabetical order:
5 changes: 5 additions & 0 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,10 +1198,15 @@ def __init__(self, weights, cube):
dim_metadata = cube._dimensional_metadata(weights)
derived_array = dim_metadata._core_values()
if dim_metadata.shape != cube.shape:
if isinstance(derived_array, da.Array):
chunks = cube.lazy_data().chunks
else:
chunks = None
derived_array = iris.util.broadcast_to_shape(
derived_array,
cube.shape,
dim_metadata.cube_dims(cube),
chunks=chunks,
)
derived_units = dim_metadata.units

Expand Down
5 changes: 2 additions & 3 deletions lib/iris/analysis/cartography.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,8 @@ def area_weights(cube, normalize=False, compute=True, chunks=None):
If False, return a lazy dask array. If True, return a numpy array.
chunks : tuple, optional
If compute is False and a value is provided, then the result will use
these chunks instead of the same chunks as the cube data. The values
provided here will only be used along dimensions that are not latitude
or longitude.
these chunks. The values provided here will only be used along
dimensions that are not latitude or longitude.
Returns
-------
Expand Down
47 changes: 32 additions & 15 deletions lib/iris/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,8 +1192,6 @@ def test_rotate_1d(self):

@_shared_utils.skip_data
class TestAreaWeights:
# Note: chunks is simply ignored for non-lazy data
@pytest.mark.parametrize("chunks", [None, (2, 3)])
@pytest.fixture(autouse=True)
def _setup(self, request):
self.request = request
Expand Down Expand Up @@ -1726,15 +1724,24 @@ def test_weights_in_kwargs(self):
assert kwargs == {"test_kwarg": "test", "weights": "ignored"}


@pytest.mark.parametrize("lazy", [True, False])
class TestWeights:
@pytest.fixture(autouse=True)
def _setup_test_data(self):
self.array_lib = np
self.target_type = np.ndarray
def _setup_test_data(self, lazy):
if lazy:
self.array_lib = da
self.target_type = da.Array
self.chunks = ((2,), (1, 1, 1))
else:
self.array_lib = np
self.target_type = np.ndarray
self.chunks = None
self.create_test_data()

def create_test_data(self):
self.data = self.array_lib.arange(6).reshape(2, 3)
if self.chunks is not None:
self.data = self.data.rechunk(self.chunks)
self.lat = iris.coords.DimCoord(
self.array_lib.array([0, 1]),
standard_name="latitude",
Expand Down Expand Up @@ -1770,13 +1777,17 @@ def test_init_with_array(self):
assert isinstance(weights.units, cf_units.Unit)
assert weights.array is self.data
assert weights.units == "1"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_cube(self):
weights = _Weights(self.cube, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
assert weights.array is self.data
assert weights.units == "K"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_str_dim_coord(self):
weights = _Weights("latitude", self.cube)
Expand All @@ -1792,20 +1803,28 @@ def test_init_with_str_aux_coord(self):
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[3, 3, 3], [4, 4, 4]])
assert weights.units == "s"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_str_ancillary_variable(self):
weights = _Weights("ancvar", self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[5, 6, 7], [5, 6, 7]])
assert weights.units == "kg"
# Chunks of existing array dimensions passed to broadcast_to_shape are
# ignored
if self.chunks is not None:
assert weights.array.chunks == ((2,), (3,))

def test_init_with_str_cell_measure(self):
weights = _Weights("cell_area", self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, self.data)
assert weights.units == "m2"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_dim_coord(self):
weights = _Weights(self.lat, self.cube)
Expand All @@ -1821,20 +1840,28 @@ def test_init_with_aux_coord(self):
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[3, 3, 3], [4, 4, 4]])
assert weights.units == "s"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_ancillary_variable(self):
weights = _Weights(self.ancillary_variable, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, [[5, 6, 7], [5, 6, 7]])
assert weights.units == "kg"
# Chunks of existing array dimensions passed to broadcast_to_shape are
# ignored
if self.chunks is not None:
assert weights.array.chunks == ((2,), (3,))

def test_init_with_cell_measure(self):
weights = _Weights(self.cell_measure, self.cube)
assert isinstance(weights.array, self.target_type)
assert isinstance(weights.units, cf_units.Unit)
_shared_utils.assert_array_equal(weights.array, self.data)
assert weights.units == "m2"
if self.chunks is not None:
assert weights.array.chunks == self.chunks

def test_init_with_list(self):
list_in = [0, 1, 2]
Expand All @@ -1845,16 +1872,6 @@ def test_init_with_list(self):
assert weights.units == "1"


class TestWeightsLazy(TestWeights):
"""Repeat tests from ``TestWeights`` with lazy arrays."""

@pytest.fixture(autouse=True)
def _setup_test_data(self):
self.array_lib = da
self.target_type = da.core.Array
self.create_test_data()


def test__Groupby_repr():
groupby_coord = iris.coords.AuxCoord([2000, 2000], var_name="year")
shared_coord = iris.coords.DimCoord(
Expand Down

0 comments on commit 9cf8b3a

Please sign in to comment.