Skip to content

Commit e5c7f11

Browse files
committed
fix: Improve error message for non-existent dimension in groupby reduce (Fixes #10875)
1 parent 3c6b050 commit e5c7f11

File tree

3 files changed

+104
-12
lines changed

3 files changed

+104
-12
lines changed

xarray/core/groupby.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,16 @@ def check_reduce_dims(reduce_dims, dimensions):
7979
if reduce_dims is not ...:
8080
if is_scalar(reduce_dims):
8181
reduce_dims = [reduce_dims]
82-
if any(dim not in dimensions for dim in reduce_dims):
83-
raise ValueError(
84-
f"cannot reduce over dimensions {reduce_dims!r}. expected either '...' "
85-
f"to reduce over all dimensions or one or more of {dimensions!r}. "
86-
f"Alternatively, install the `flox` package. "
87-
)
82+
missing_dims = [dim for dim in reduce_dims if dim not in dimensions]
83+
if missing_dims:
84+
if len(missing_dims) == 1:
85+
raise ValueError(
86+
f"{missing_dims[0]!r} not found in array dimensions {dimensions!r}"
87+
)
88+
else:
89+
raise ValueError(
90+
f"dimensions {missing_dims!r} not found in array dimensions {dimensions!r}"
91+
)
8892

8993

9094
def _codes_to_group_indices(codes: np.ndarray, N: int) -> GroupIndices:
@@ -1108,10 +1112,17 @@ def _flox_reduce(
11081112
# Do this so we raise the same error message whether flox is present or not.
11091113
# Better to control it here than in flox.
11101114
for grouper in self.groupers:
1111-
if any(
1112-
d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim
1113-
):
1114-
raise ValueError(f"cannot reduce over dimensions {dim}.")
1115+
all_dims = set(grouper.codes.dims) | set(obj.dims)
1116+
missing = [d for d in parsed_dim if d not in all_dims]
1117+
if missing:
1118+
if len(missing) == 1:
1119+
raise ValueError(
1120+
f"{missing[0]!r} not found in array dimensions {tuple(obj.dims)!r}"
1121+
)
1122+
else:
1123+
raise ValueError(
1124+
f"dimensions {missing!r} not found in array dimensions {tuple(obj.dims)!r}"
1125+
)
11151126

11161127
has_missing_groups = (
11171128
self.encoded.unique_coord.size != self.encoded.full_index.size

xarray/tests/test_groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -771,10 +771,10 @@ def test_groupby_reduce_dimension_error(array) -> None:
771771
grouped = array.groupby("y")
772772
# assert_identical(array, grouped.mean())
773773

774-
with pytest.raises(ValueError, match=r"cannot reduce over dimensions"):
774+
with pytest.raises(ValueError, match=r"not found in array dimensions"):
775775
grouped.mean("huh")
776776

777-
with pytest.raises(ValueError, match=r"cannot reduce over dimensions"):
777+
with pytest.raises(ValueError, match=r"not found in array dimensions"):
778778
grouped.mean(("x", "y", "asd"))
779779

780780
assert_identical(array.mean("x"), grouped.reduce(np.mean, "x"))

xarray/tests/test_issue_10875.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""Test for issue #10875: Clear error message when reducing over non-existent dimension."""
2+
import numpy as np
3+
import pytest
4+
5+
import xarray as xr
6+
7+
8+
class TestGroupbyDimensionError:
9+
"""Tests for clearer error messages in groupby reduce operations."""
10+
11+
def test_groupby_reduce_missing_dim_single(self):
12+
"""Groupby reduce with single missing dimension should have clear error."""
13+
ds = xr.DataArray(
14+
np.reshape(range(27), (3, 3, 3)),
15+
coords=dict(
16+
lon=range(3),
17+
lat=range(3),
18+
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
19+
),
20+
)
21+
22+
with pytest.raises(ValueError, match=r"'longitude' not found in array dimensions"):
23+
ds.groupby("time").std(dim="longitude")
24+
25+
def test_groupby_reduce_missing_dim_multiple(self):
26+
"""Groupby reduce with multiple missing dimensions should list them."""
27+
ds = xr.DataArray(
28+
np.reshape(range(27), (3, 3, 3)),
29+
coords=dict(
30+
lon=range(3),
31+
lat=range(3),
32+
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
33+
),
34+
)
35+
36+
with pytest.raises(ValueError, match=r"not found in array dimensions"):
37+
ds.groupby("time").std(dim=["longitude", "latitude"])
38+
39+
def test_standard_reduce_error_matches(self):
40+
"""Standard reduce and groupby reduce should have similar error format."""
41+
ds = xr.DataArray(
42+
np.reshape(range(27), (3, 3, 3)),
43+
coords=dict(
44+
lon=range(3),
45+
lat=range(3),
46+
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
47+
),
48+
)
49+
50+
standard_error_msg = None
51+
try:
52+
ds.std(dim="longitude")
53+
except ValueError as e:
54+
standard_error_msg = str(e)
55+
56+
groupby_error_msg = None
57+
try:
58+
ds.groupby("time").std(dim="longitude")
59+
except ValueError as e:
60+
groupby_error_msg = str(e)
61+
62+
assert "longitude" in standard_error_msg
63+
assert "longitude" in groupby_error_msg
64+
assert "not found in array dimensions" in standard_error_msg
65+
assert "not found in array dimensions" in groupby_error_msg
66+
67+
def test_groupby_reduce_valid_dim_still_works(self):
68+
"""Ensure valid dimensions still work correctly."""
69+
ds = xr.DataArray(
70+
np.reshape(range(27), (3, 3, 3)),
71+
dims=["lon", "lat", "time"],
72+
coords=dict(
73+
lon=range(3),
74+
lat=range(3),
75+
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
76+
),
77+
)
78+
79+
result = ds.groupby("time").std(dim="lon")
80+
assert result is not None
81+
assert "lon" not in result.dims

0 commit comments

Comments
 (0)