diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 28c47d61e11..68f0a0b7aee 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,12 @@ New Features Breaking Changes ~~~~~~~~~~~~~~~~ +- :py:meth:`Dataset.identical`,` :py:meth:`DataArray.identical`, and + :py:func:`testings.assert_identical` now compare indexes (xindexes). + Two objects with identical data but different indexes will no longer + be considered identical. This also affects (:issue:`11033` :pull:`11035`). + By `Ian Hunt-Isaak `_. + Deprecations ~~~~~~~~~~~~ @@ -39,6 +45,10 @@ Bug Fixes - Ensure that ``keep_attrs='drop'`` and ``keep_attrs=False`` remove attrs from result, even when there is only one xarray object given to ``apply_ufunc`` (:issue:`10982` :pull:`10997`). By `Julia Signell `_. +- :py:meth:`~xarray.indexes.RangeIndex.equals` now uses floating point error tolerant + ``np.isclose`` by default to handle accumulated floating point errors from + slicing operations. Use ``exact=True`` for exact comparison (:pull:`11035`). + By `Ian Hunt-Isaak `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 80f7cb6d011..320b283431b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4677,6 +4677,13 @@ def from_iris(cls, cube: iris_Cube) -> Self: def _all_compat(self, other: Self, compat_str: str) -> bool: """Helper function for equals, broadcast_equals, and identical""" + # For identical, also compare indexes + if compat_str == "identical": + from xarray.core.indexes import indexes_identical + + if not indexes_identical(self.xindexes, other.xindexes): + return False + def compat(x, y): return getattr(x.variable, compat_str)(y.variable) @@ -4796,8 +4803,8 @@ def equals(self, other: Self) -> bool: return False def identical(self, other: Self) -> bool: - """Like equals, but also checks the array name and attributes, and - attributes on all coordinates. + """Like equals, but also checks the array name, attributes, + attributes on all coordinates, and indexes. Parameters ---------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 10b7070736b..01baa9aed3d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1514,6 +1514,13 @@ def _all_compat( if not callable(compat): compat_str = compat + # For identical, also compare indexes + if compat_str == "identical": + from xarray.core.indexes import indexes_identical + + if not indexes_identical(self.xindexes, other.xindexes): + return False + # some stores (e.g., scipy) do not seem to preserve order, so don't # require matching order for equality def compat(x: Variable, y: Variable) -> bool: @@ -1672,8 +1679,8 @@ def equals(self, other: Self) -> bool: return False def identical(self, other: Self) -> bool: - """Like equals, but also checks all dataset attributes and the - attributes on all variables and coordinates. + """Like equals, but also checks all dataset attributes, the + attributes on all variables and coordinates, and indexes. Example ------- diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 155428ea8e0..25437be0990 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -973,6 +973,60 @@ def _compat_to_str(compat): return compat +def diff_indexes_repr(a_indexes, b_indexes, col_width: int = 20) -> str: + """Generate diff representation for indexes.""" + a_keys = set(a_indexes.keys()) + b_keys = set(b_indexes.keys()) + + summary = [] + + if only_a := a_keys - b_keys: + summary.append(f"Indexes only on the left object: {sorted(only_a)}") + + if only_b := b_keys - a_keys: + summary.append(f"Indexes only on the right object: {sorted(only_b)}") + + # Check for indexes on the same coordinates but with different types or values + common_keys = a_keys & b_keys + diff_items = [] + + for key in sorted(common_keys): + a_idx = a_indexes[key] + b_idx = b_indexes[key] + + # Check if indexes differ + indexes_equal = False + if type(a_idx) is type(b_idx): + try: + indexes_equal = a_idx.equals(b_idx) + except NotImplementedError: + # Fall back to variable comparison + a_var = a_indexes.variables[key] + b_var = b_indexes.variables[key] + indexes_equal = a_var.equals(b_var) + + if not indexes_equal: + # Format the index values similar to variable diff + try: + a_repr = inline_index_repr( + a_indexes.to_pandas_indexes()[key], max_width=70 + ) + b_repr = inline_index_repr( + b_indexes.to_pandas_indexes()[key], max_width=70 + ) + except TypeError: + # Custom indexes may not support to_pandas_index() + a_repr = repr(a_idx) + b_repr = repr(b_idx) + diff_items.append(f"L {key!s:<{col_width}} {a_repr}") + diff_items.append(f"R {key!s:<{col_width}} {b_repr}") + + if diff_items: + summary.append("Differing indexes:\n" + "\n".join(diff_items)) + + return "\n".join(summary) + + def diff_array_repr(a, b, compat): # used for DataArray, Variable and IndexVariable summary = [ @@ -1002,10 +1056,14 @@ def diff_array_repr(a, b, compat): ): summary.append(coords_diff) - if compat == "identical" and ( - attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat) - ): - summary.append(attrs_diff) + if compat == "identical": + if hasattr(a, "xindexes") and ( + indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes) + ): + summary.append(indexes_diff) + + if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat): + summary.append(attrs_diff) return "\n".join(summary) @@ -1043,10 +1101,12 @@ def diff_dataset_repr(a, b, compat): ): summary.append(data_diff) - if compat == "identical" and ( - attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat) - ): - summary.append(attrs_diff) + if compat == "identical": + if indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes): + summary.append(indexes_diff) + + if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat): + summary.append(attrs_diff) return "\n".join(summary) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index c4f091446c3..3438f95c794 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -2015,6 +2015,60 @@ def indexes_equal( return cast(bool, equal) +def indexes_identical( + a_indexes: Indexes[Index], + b_indexes: Indexes[Index], +) -> bool: + """Check if two Indexes objects are identical. + + Two Indexes objects are identical if they have the same set of + indexed coordinate names, each corresponding pair of indexes are + the same type, and are equal (using Index.equals()). + + Unlike indexes_equal(), this function does NOT fall back to variable + comparison when index types differ - different index types means + not identical. + + Parameters + ---------- + a_indexes : Indexes + First Indexes object to compare. + b_indexes : Indexes + Second Indexes object to compare. + + Returns + ------- + bool + True if the two Indexes objects are identical. + """ + # Must have same indexed coordinate names + if set(a_indexes.keys()) != set(b_indexes.keys()): + return False + + # Compare each index pair + # Note: could optimize for PandasMultiIndex where multiple coord names + # share the same index object, but this is not performance critical + for coord_name in a_indexes.keys(): + a_idx = a_indexes[coord_name] + b_idx = b_indexes[coord_name] + + # For identical(), index types must match + if type(a_idx) is not type(b_idx): + return False + + try: + if not a_idx.equals(b_idx): + return False + except NotImplementedError: + # Fall back to variable comparison when equals() not implemented + a_var = a_indexes.variables[coord_name] + b_var = b_indexes.variables[coord_name] + if not a_var.equals(b_var): + return False + + return True + + def indexes_all_equal( elements: Sequence[tuple[Index, dict[Hashable, Variable]]], exclude_dims: frozenset[Hashable], diff --git a/xarray/indexes/range_index.py b/xarray/indexes/range_index.py index ab619c6f722..0a402ce663f 100644 --- a/xarray/indexes/range_index.py +++ b/xarray/indexes/range_index.py @@ -75,14 +75,45 @@ def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: return {self.dim: positions} def equals( - self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None + self, + other: CoordinateTransform, + exclude: frozenset[Hashable] | None = None, + *, + exact: bool = False, ) -> bool: + """Check equality with another RangeCoordinateTransform. + + Parameters + ---------- + other : CoordinateTransform + The other transform to compare with. + exclude : frozenset of hashable, optional + Dimensions excluded from checking (unused for 1D RangeIndex). + exact : bool, default False + If False (default), use np.isclose() for floating point comparisons + to handle accumulated floating point errors from slicing operations. + If True, require exact equality of start and stop values. + + Returns + ------- + bool + True if the transforms are equal, False otherwise. + """ if not isinstance(other, RangeCoordinateTransform): return False - return ( - self.start == other.start - and self.stop == other.stop + if exact: + return ( + self.start == other.start + and self.stop == other.stop + and self.size == other.size + ) + + # Use np.isclose for floating point comparisons to handle accumulated + # floating point errors (e.g., from slicing operations) + return bool( + np.isclose(self.start, other.start) + and np.isclose(self.stop, other.stop) and self.size == other.size ) @@ -130,6 +161,35 @@ class RangeIndex(CoordinateTransformIndex): def __init__(self, transform: RangeCoordinateTransform): super().__init__(transform) + def equals( + self, + other: "Index", + *, + exclude: frozenset[Hashable] | None = None, + exact: bool = False, + ) -> bool: + """Check equality with another RangeIndex. + + Parameters + ---------- + other : Index + The other index to compare with. + exclude : frozenset of hashable, optional + Dimensions excluded from checking (unused for 1D RangeIndex). + exact : bool, default False + If False (default), use np.isclose() for floating point comparisons + to handle accumulated floating point errors from slicing operations. + If True, require exact equality of start and stop values. + + Returns + ------- + bool + True if the indexes are equal, False otherwise. + """ + if not isinstance(other, RangeIndex): + return False + return self.transform.equals(other.transform, exclude=exclude, exact=exact) + @classmethod def arange( cls, diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 4adbb63cff6..90cc18cf8d9 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -332,9 +332,45 @@ def assert_equal(a, b, check_default_indexes=True): xarray.testing._assert_internal_invariants(b, check_default_indexes) -def assert_identical(a, b, check_default_indexes=True): +def assert_identical(a, b, check_default_indexes=True, check_indexes=None): + """Assert that two xarray objects are identical. + + This is a test-internal wrapper around xarray.testing.assert_identical + that also validates internal invariants. + + Parameters + ---------- + a, b : xarray objects + Objects to compare. + check_default_indexes : bool, default True + If True, validates that 1D dimension coordinates have default indexes + (internal invariant check). Set to False for objects that intentionally + lack default indexes. + check_indexes : bool, optional + If not specified (default), defaults to the value of check_default_indexes + for backwards compatibility. + If True (default), compare indexes as part of identity check. + If False, skip index comparison (only check data, attrs, names). + """ __tracebackhide__ = True - xarray.testing.assert_identical(a, b) + # For backwards compatibility, check_default_indexes=False implies check_indexes=False + # unless check_indexes is explicitly specified + if check_indexes is None: + check_indexes = check_default_indexes + if check_indexes: + xarray.testing.assert_identical(a, b) + else: + # Drop all indexes before comparing to skip index comparison + from xarray import DataArray, Dataset + + if isinstance(a, Dataset | DataArray): + a_no_idx = a.drop_indexes(list(a.xindexes)) + b_no_idx = b.drop_indexes(list(b.xindexes)) + else: + a_no_idx, b_no_idx = a, b + + xarray.testing.assert_identical(a_no_idx, b_no_idx) + xarray.testing._assert_internal_invariants(a, check_default_indexes) xarray.testing._assert_internal_invariants(b, check_default_indexes) diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index eb1a9492a49..0bae42114b4 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -6,6 +6,8 @@ import pytest import xarray as xr +from xarray.core.coordinates import Coordinates +from xarray.core.indexes import Index from xarray.tests import has_dask try: @@ -237,3 +239,248 @@ def __array__( getattr(xr.testing, func)(a, b) assert len(w) == 0 + + +class CustomIndex(Index): + """Custom index without equals() implementation for testing.""" + + pass + + +class CustomIndexWithEquals(Index): + """Custom index with equals() implementation for testing.""" + + def __init__(self, name: str = "default"): + self.name = name + + def equals(self, other: Index, **kwargs) -> bool: + if not isinstance(other, CustomIndexWithEquals): + return False + return self.name == other.name + + +class TestAssertIdenticalXindexes: + """Tests for xindex comparison in assert_identical.""" + + @pytest.fixture + def dataset_with_extra_coord(self) -> xr.Dataset: + """Dataset with a coordinate that can be indexed. + + Returns a Dataset with 'time' indexed and 'time_metadata' not indexed:: + + + Dimensions: (time: 4) + Coordinates: + * time (time) float64 0.1 0.2 0.3 0.4 + time_metadata (time) int64 10 15 20 25 + Data variables: + data (time) int64 0 1 2 3 + + xindexes: ['time'] + """ + return xr.Dataset( + {"data": ("time", [0, 1, 2, 3])}, + coords={ + "time": [0.1, 0.2, 0.3, 0.4], + "time_metadata": ("time", [10, 15, 20, 25]), + }, + ) + + @pytest.fixture + def dataset_2d(self) -> xr.Dataset: + """2D dataset for MultiIndex tests. + + Returns a Dataset with both 'x' and 'y' indexed:: + + + Dimensions: (x: 2, y: 2) + Coordinates: + * x (x) int64 10 20 + * y (y) None: + """Test that Dataset.identical() and assert_identical detect different xindexes.""" + ds = dataset_with_extra_coord + ds_extra_index = ds.set_xindex("time_metadata") + + # equals should pass (indexes not compared) + assert ds.equals(ds_extra_index) + xr.testing.assert_equal(ds, ds_extra_index) + + # identical should fail (indexes ARE compared) + assert not ds.identical(ds_extra_index) + with pytest.raises(AssertionError, match="Indexes only on the right"): + xr.testing.assert_identical(ds, ds_extra_index) + + def test_assert_identical_same_xindexes( + self, dataset_with_extra_coord: xr.Dataset + ) -> None: + """Test that assert_identical passes when xindexes match.""" + ds = dataset_with_extra_coord + + # Same base datasets - should pass + xr.testing.assert_identical(ds, ds.copy()) + + # Both with extra index - should pass + ds_extra1 = ds.set_xindex("time_metadata") + ds_extra2 = ds.set_xindex("time_metadata") + xr.testing.assert_identical(ds_extra1, ds_extra2) + + def test_dataarray_xindex_difference( + self, dataset_with_extra_coord: xr.Dataset + ) -> None: + """Test that DataArray.identical() and assert_identical detect different xindexes.""" + ds = dataset_with_extra_coord + ds_extra_index = ds.set_xindex("time_metadata") + + da = ds["data"] + da_extra_index = ds_extra_index["data"] + + # equals should pass (indexes not compared) + assert da.equals(da_extra_index) + xr.testing.assert_equal(da, da_extra_index) + + # identical should fail (indexes ARE compared) + assert not da.identical(da_extra_index) + with pytest.raises(AssertionError, match="Indexes only on the right"): + xr.testing.assert_identical(da, da_extra_index) + + def test_identical_custom_index_without_equals(self) -> None: + """Test identical() with custom index that doesn't implement equals(). + + When equals() is not implemented, falls back to variable comparison. + Two different CustomIndex objects with the same coordinate values + should be considered identical via the fallback mechanism. + """ + coords1 = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + coords2 = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + + ds1 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords1) + ds2 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords2) + + # Different index objects but same variable values + # Should be identical via fallback to variable comparison + assert ds1.xindexes["x"] is not ds2.xindexes["x"] # Different objects + assert ds1.identical(ds2) + xr.testing.assert_identical(ds1, ds2) + + def test_identical_custom_index_with_equals(self) -> None: + """Test identical() with custom index that implements equals(). + + This tests that a custom Index.equals() implementation is actually + called and its result determines identity. + """ + coords1 = Coordinates( + coords={"x": ("x", [1, 2, 3])}, + indexes={"x": CustomIndexWithEquals("index_a")}, + ) + coords2 = Coordinates( + coords={"x": ("x", [1, 2, 3])}, + indexes={"x": CustomIndexWithEquals("index_a")}, + ) + coords3 = Coordinates( + coords={"x": ("x", [1, 2, 3])}, + indexes={"x": CustomIndexWithEquals("index_b")}, + ) + + ds1 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords1) + ds2 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords2) + ds3 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords3) + + # Same index name - should be identical + assert ds1.identical(ds2) + xr.testing.assert_identical(ds1, ds2) + + # Different index name (same coord values) - should not be identical + # This specifically tests the custom equals() is being called + assert not ds1.identical(ds3) + with pytest.raises(AssertionError, match="Differing indexes"): + xr.testing.assert_identical(ds1, ds3) + + def test_identical_mixed_index_types(self) -> None: + """Test identical() when comparing different index types. + + Different index types should NOT be considered identical, even if + the underlying coordinate values are the same. + """ + # Create dataset with PandasIndex (default) + ds_pandas = xr.Dataset( + {"data": ("x", [10, 20, 30])}, + coords={"x": [1, 2, 3]}, + ) + + # Create dataset with CustomIndex + coords_custom = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + ds_custom = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords_custom) + + # Different index types - should NOT be identical + assert not ds_pandas.identical(ds_custom) + + # But equals should still pass (compares data values only) + assert ds_pandas.equals(ds_custom) + + def test_identical_pandas_multiindex(self, dataset_2d: xr.Dataset) -> None: + """Test identical() with PandasMultiIndex.""" + # Stack to create MultiIndex + ds_stacked1 = dataset_2d.stack(z=("x", "y")) + ds_stacked2 = dataset_2d.stack(z=("x", "y")) + + # Same MultiIndex - should be identical + assert ds_stacked1.identical(ds_stacked2) + xr.testing.assert_identical(ds_stacked1, ds_stacked2) + + # Different stacking order creates different MultiIndex + ds_stacked_different = dataset_2d.stack(z=("y", "x")) + assert not ds_stacked1.identical(ds_stacked_different) + + def test_identical_no_indexes(self) -> None: + """Test identical() when both objects have no indexes. + + Dimensions without coordinates have no indexes. + """ + ds1 = xr.Dataset({"data": (("x", "y"), [[1, 2], [3, 4]])}) + ds2 = xr.Dataset({"data": (("x", "y"), [[1, 2], [3, 4]])}) + + # Dimensions without coordinates = no indexes + assert list(ds1.xindexes.keys()) == [] + assert list(ds2.xindexes.keys()) == [] + assert ds1.identical(ds2) + xr.testing.assert_identical(ds1, ds2) + + def test_identical_index_on_different_coords(self) -> None: + """Test identical() when indexes are on different coordinates.""" + # Index on 'x' + coords1 = Coordinates( + coords={"x": ("x", [1, 2, 3]), "y": ("x", [4, 5, 6])}, + indexes={"x": CustomIndex()}, + ) + # Index on 'y' instead + coords2 = Coordinates( + coords={"x": ("x", [1, 2, 3]), "y": ("x", [4, 5, 6])}, + indexes={"y": CustomIndex()}, + ) + + ds1 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords1) + ds2 = xr.Dataset({"data": ("x", [10, 20, 30])}, coords=coords2) + + # Different indexed coordinates - should not be identical + assert not ds1.identical(ds2) + with pytest.raises(AssertionError, match="Indexes only on the left"): + xr.testing.assert_identical(ds1, ds2) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5aab153117d..1593b56355e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -766,7 +766,7 @@ def test_roundtrip_timedelta_data(self) -> None: expected["td"].encoding = encoding expected["td0"].encoding = encoding with self.roundtrip( - expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="ns")} + expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="s")} ) as actual: assert_identical(expected, actual) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 8c61d7bced2..5207ee3316e 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -948,12 +948,7 @@ def test_concat_promote_shape_with_scalar_coordinates(self) -> None: ] actual = concat(objs, "x") expected = Dataset( - { - "x": [ - pd.Interval(-1, 0, closed="right"), - pd.Interval(0, 1, closed="right"), - ] - } + {"x": pd.IntervalIndex.from_tuples([(-1, 0), (0, 1)], closed="right")} ) assert_identical(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 46c36875954..d25ef5a2771 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5614,7 +5614,12 @@ def test_to_and_from_dict_with_nan_nat( } ) roundtripped = Dataset.from_dict(ds.to_dict(data=data)) - assert_identical(ds, roundtripped) + if data == "array": + # TODO: to_dict(data="array") converts datetime64[ns] to datetime64[us] + # (numpy's default), causing index dtype mismatch on roundtrip. + assert_identical(ds, roundtripped, check_indexes=False) + else: + assert_identical(ds, roundtripped) def test_to_dict_with_numpy_attrs(self) -> None: # this doesn't need to roundtrip diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 5cb6c28dab3..e9b5e569591 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -315,6 +315,10 @@ def test_diff_array_repr(self) -> None: * y (y) int64 24B 1 2 3 Coordinates only on the right object: label (x) int64 16B 1 2 + Indexes only on the left object: ['y'] + Differing indexes: + L x Index(['a', 'b'], dtype='object', name='x') + R x Index(['a', 'c'], dtype='object', name='x') Differing attributes: L units: m R units: kg @@ -519,6 +523,10 @@ def test_diff_dataset_repr(self) -> None: R var1 (x) int64 16B 1 2 Data variables only on the left object: var2 (x) int64 16B 3 4 + Indexes only on the left object: ['y'] + Differing indexes: + L x Index(['a', 'b'], dtype='object', name='x') + R x Index(['a', 'c'], dtype='object', name='x') Differing attributes: L title: mytitle R title: newtitle diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47ea2fcd2b0..b9b9fb151c7 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -3,7 +3,6 @@ import datetime import operator import warnings -from itertools import pairwise from typing import Literal, cast from unittest import mock @@ -1812,13 +1811,7 @@ def test_groupby_bins_multidim(self) -> None: ) actual = field.groupby_bins(by, bins=bins).count() - bincoord = np.array( - [ - pd.Interval(left, right, closed="right") - for left, right in pairwise(bins) - ], - dtype=object, - ) + bincoord = pd.IntervalIndex.from_breaks(bins, closed="right") expected = DataArray( np.array([6, np.nan, 3, 6]), dims="group_bins", @@ -2995,13 +2988,7 @@ def test_multiple_groupers_mixed(use_flox: bool, shuffle: bool) -> None: coords={ "x_bins": ( "x_bins", - np.array( - [ - pd.Interval(5, 15, closed="right"), - pd.Interval(15, 25, closed="right"), - ], - dtype=object, - ), + pd.IntervalIndex.from_breaks([5, 15, 25], closed="right"), ), "letters": ("letters", np.array(["a", "b"], dtype=object)), }, diff --git a/xarray/tests/test_range_index.py b/xarray/tests/test_range_index.py index d0d71a1b550..732bf1ef5c4 100644 --- a/xarray/tests/test_range_index.py +++ b/xarray/tests/test_range_index.py @@ -103,45 +103,45 @@ def test_range_index_isel() -> None: # slicing actual = ds.isel(x=slice(None)) - assert_identical(actual, ds, check_default_indexes=False) + assert_identical(actual, ds, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(1, None)) expected = create_dataset_arange(0.1, 1.0, 0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(None, 2)) expected = create_dataset_arange(0.0, 0.2, 0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(1, 3)) expected = create_dataset_arange(0.1, 0.3, 0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(None, None, 2)) expected = create_dataset_arange(0.0, 1.0, 0.2) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(None, None, -1)) expected = create_dataset_arange(0.9, -0.1, -0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(None, 4, -1)) expected = create_dataset_arange(0.9, 0.4, -0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(8, 4, -1)) expected = create_dataset_arange(0.8, 0.4, -0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.isel(x=slice(8, None, -1)) expected = create_dataset_arange(0.8, -0.1, -0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) # https://github.com/pydata/xarray/issues/10441 ds2 = create_dataset_arange(0.0, 3.0, 0.1) actual = ds2.isel(x=slice(4, None, 3)) expected = create_dataset_arange(0.4, 3.0, 0.3) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) # scalar actual = ds.isel(x=0) @@ -157,7 +157,7 @@ def test_range_index_isel() -> None: # fancy indexing with 1-d Variable actual = ds.isel(x=xr.Variable("y", [0, 2])) expected = xr.Dataset(coords={"x": ("y", [0.0, 0.2])}).set_xindex("x") - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) assert isinstance(actual.xindexes["x"], PandasIndex) # fancy indexing with n-d Variable @@ -228,12 +228,12 @@ def test_range_index_sel() -> None: # start-stop slice actual = ds.sel(x=slice(0.12, 0.28), method="nearest") expected = create_dataset_arange(0.1, 0.3, 0.1) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) # start-stop-step slice actual = ds.sel(x=slice(0.0, 1.0, 0.2), method="nearest") expected = ds.isel(x=range(0, 10, 2)) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) # basic indexing actual = ds.sel(x=0.52, method="nearest") @@ -279,12 +279,12 @@ def test_range_index_rename() -> None: actual = ds.rename_vars(x="y") idx = RangeIndex.arange(0.0, 1.0, 0.1, coord_name="y", dim="x") expected = xr.Dataset(coords=xr.Coordinates.from_xindex(idx)) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) actual = ds.rename_dims(x="y") idx = RangeIndex.arange(0.0, 1.0, 0.1, coord_name="x", dim="y") expected = xr.Dataset(coords=xr.Coordinates.from_xindex(idx)) - assert_identical(actual, expected, check_default_indexes=False) + assert_identical(actual, expected, check_default_indexes=False, check_indexes=True) def test_range_index_repr() -> None: @@ -301,3 +301,89 @@ def test_range_index_repr_inline() -> None: actual = index._repr_inline_(max_width=70) expected = "RangeIndex (start=0, stop=1, step=0.1)" assert actual == expected + + +def test_range_index_equals_floating_point_tolerance() -> None: + """Test that equals() handles floating point precision errors correctly. + + When slicing a RangeIndex, floating point errors can accumulate in the + internal state (e.g., stop=0.30000000000000004 vs stop=0.3), but the + indexes should still be considered equal if they represent the same values. + """ + # Create an index directly + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + + # Create the same index by slicing a larger one + # This will accumulate floating point error: stop = 0.0 + 3 * 0.1 = 0.30000000000000004 + index_large = RangeIndex.arange(0.0, 1.0, 0.1, dim="x") + ds_large = xr.Dataset(coords=xr.Coordinates.from_xindex(index_large)) + ds_sliced = ds_large.isel(x=slice(3)) + index2 = ds_sliced.xindexes["x"] + + # They should be equal despite tiny floating point differences + assert index1.equals(index2) + assert index2.equals(index1) + + # Verify they represent the same values + ds1 = xr.Dataset(coords=xr.Coordinates.from_xindex(index1)) + ds2 = xr.Dataset(coords=xr.Coordinates.from_xindex(index2)) + assert np.allclose(ds1["x"].values, ds2["x"].values) + + +def test_range_index_equals_different_sizes() -> None: + """Test that equals() returns False for indexes with different sizes.""" + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + index2 = RangeIndex.arange(0.0, 0.4, 0.1, dim="x") + + assert not index1.equals(index2) + assert not index2.equals(index1) + + +def test_range_index_equals_different_start() -> None: + """Test that equals() returns False for indexes with significantly different start values.""" + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + index2 = RangeIndex.arange(0.1, 0.4, 0.1, dim="x") + + assert not index1.equals(index2) + assert not index2.equals(index1) + + +def test_range_index_equals_different_stop() -> None: + """Test that equals() returns False for indexes with significantly different stop values.""" + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + index2 = RangeIndex.arange(0.0, 0.5, 0.1, dim="x") + + assert not index1.equals(index2) + assert not index2.equals(index1) + + +def test_range_index_equals_different_type() -> None: + """Test that equals() returns False when comparing with a different index type.""" + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + index2 = PandasIndex(pd.Index([0.0, 0.1, 0.2]), dim="x") + + assert not index1.equals(index2) + # Note: we don't test index2.equals(index1) because PandasIndex.equals() + # has its own logic + + +def test_range_index_equals_exact() -> None: + """Test that equals(exact=True) requires exact floating point match.""" + # Create an index directly + index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + + # Create the same index by slicing - this accumulates floating point error + index_large = RangeIndex.arange(0.0, 1.0, 0.1, dim="x") + ds_large = xr.Dataset(coords=xr.Coordinates.from_xindex(index_large)) + ds_sliced = ds_large.isel(x=slice(3)) + index2 = ds_sliced.xindexes["x"] + + # Default (exact=False) should be equal due to np.isclose tolerance + assert index1.equals(index2) + + # With exact=True, tiny floating point differences cause inequality + assert not index1.equals(index2, exact=True) + + # But identical indexes should still be equal with exact=True + index3 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x") + assert index1.equals(index3, exact=True) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 08a6ddc544f..56b3d9ad22b 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -3666,10 +3666,15 @@ def test_stacking_stacked(self, func, dtype): actual = func(stacked) assert_units_equal(expected, actual) + # TODO: strip_units/attach_units reconstruct DataArrays from scratch, + # losing index structure (e.g., MultiIndex from stack becomes regular Index). + # Fix these utilities to preserve indexes, then remove check_indexes=False. if func.name == "reset_index": - assert_identical(expected, actual, check_default_indexes=False) + assert_identical( + expected, actual, check_default_indexes=False, check_indexes=False + ) else: - assert_identical(expected, actual) + assert_identical(expected, actual, check_indexes=False) @pytest.mark.skip(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3735,7 +3740,10 @@ def test_stacking_reordering(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - assert_identical(expected, actual) + # TODO: strip_units/attach_units reconstruct DataArrays from scratch, + # losing index structure (e.g., MultiIndex from stack becomes regular Index). + # Fix these utilities to preserve indexes, then remove check_indexes=False. + assert_identical(expected, actual, check_indexes=False) @pytest.mark.parametrize( "variant",