Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ New Features
Breaking Changes
~~~~~~~~~~~~~~~~

- :py:meth:`Dataset.identical`,` :py:meth:`DataArray.identical`, and
:py:func:`testings.assert_identical` now compare indexes (xindexes).
Two objects with identical data but different indexes will no longer
be considered identical. This also affects (:issue:`11033` :pull:`11035`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.


Deprecations
~~~~~~~~~~~~
Expand All @@ -39,6 +45,10 @@ Bug Fixes
- Ensure that ``keep_attrs='drop'`` and ``keep_attrs=False`` remove attrs from result, even when there is
only one xarray object given to ``apply_ufunc`` (:issue:`10982` :pull:`10997`).
By `Julia Signell <https://github.com/jsignell>`_.
- :py:meth:`~xarray.indexes.RangeIndex.equals` now uses floating point error tolerant
``np.isclose`` by default to handle accumulated floating point errors from
slicing operations. Use ``exact=True`` for exact comparison (:pull:`11035`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
11 changes: 9 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4677,6 +4677,13 @@ def from_iris(cls, cube: iris_Cube) -> Self:
def _all_compat(self, other: Self, compat_str: str) -> bool:
"""Helper function for equals, broadcast_equals, and identical"""

# For identical, also compare indexes
if compat_str == "identical":
from xarray.core.indexes import indexes_identical

if not indexes_identical(self.xindexes, other.xindexes):
return False

def compat(x, y):
return getattr(x.variable, compat_str)(y.variable)

Expand Down Expand Up @@ -4796,8 +4803,8 @@ def equals(self, other: Self) -> bool:
return False

def identical(self, other: Self) -> bool:
"""Like equals, but also checks the array name and attributes, and
attributes on all coordinates.
"""Like equals, but also checks the array name, attributes,
attributes on all coordinates, and indexes.

Parameters
----------
Expand Down
11 changes: 9 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,13 @@ def _all_compat(
if not callable(compat):
compat_str = compat

# For identical, also compare indexes
if compat_str == "identical":
from xarray.core.indexes import indexes_identical

if not indexes_identical(self.xindexes, other.xindexes):
return False

# some stores (e.g., scipy) do not seem to preserve order, so don't
# require matching order for equality
def compat(x: Variable, y: Variable) -> bool:
Expand Down Expand Up @@ -1672,8 +1679,8 @@ def equals(self, other: Self) -> bool:
return False

def identical(self, other: Self) -> bool:
"""Like equals, but also checks all dataset attributes and the
attributes on all variables and coordinates.
"""Like equals, but also checks all dataset attributes, the
attributes on all variables and coordinates, and indexes.

Example
-------
Expand Down
76 changes: 68 additions & 8 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,60 @@ def _compat_to_str(compat):
return compat


def diff_indexes_repr(a_indexes, b_indexes, col_width: int = 20) -> str:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh neat! I'll take a close look tomorrow. Is there anything different we should do here that would have made your xdggs use cases easier?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's not much that's different (the diff formatting is slightly different). However, compared to indexes_equal it may be worth grouping indexes with indexes.group_by_index() (which would mean we don't have to worry about caching)

"""Generate diff representation for indexes."""
a_keys = set(a_indexes.keys())
b_keys = set(b_indexes.keys())

summary = []

if only_a := a_keys - b_keys:
summary.append(f"Indexes only on the left object: {sorted(only_a)}")

if only_b := b_keys - a_keys:
summary.append(f"Indexes only on the right object: {sorted(only_b)}")

# Check for indexes on the same coordinates but with different types or values
common_keys = a_keys & b_keys
diff_items = []

for key in sorted(common_keys):
a_idx = a_indexes[key]
b_idx = b_indexes[key]

# Check if indexes differ
indexes_equal = False
if type(a_idx) is type(b_idx):
try:
indexes_equal = a_idx.equals(b_idx)
except NotImplementedError:
# Fall back to variable comparison
a_var = a_indexes.variables[key]
b_var = b_indexes.variables[key]
indexes_equal = a_var.equals(b_var)

if not indexes_equal:
# Format the index values similar to variable diff
try:
a_repr = inline_index_repr(
a_indexes.to_pandas_indexes()[key], max_width=70
)
b_repr = inline_index_repr(
b_indexes.to_pandas_indexes()[key], max_width=70
)
except TypeError:
# Custom indexes may not support to_pandas_index()
a_repr = repr(a_idx)
b_repr = repr(b_idx)
Comment on lines +1010 to +1020
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be worth calling index._repr_inline_(max_width=70) with a fallback to repr(index)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this well defined API for a custom index to support? Def happy to add it, just also wondering if the knoweldge of that being helpful is (or should be) written down somewhere

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we already use those in inline_index_repr, so yes, this should be well defined.

This should definitely be part of the custom index development page, and worth adding if it is not already part of that.

diff_items.append(f"L {key!s:<{col_width}} {a_repr}")
diff_items.append(f"R {key!s:<{col_width}} {b_repr}")

if diff_items:
summary.append("Differing indexes:\n" + "\n".join(diff_items))

return "\n".join(summary)


def diff_array_repr(a, b, compat):
# used for DataArray, Variable and IndexVariable
summary = [
Expand Down Expand Up @@ -1002,10 +1056,14 @@ def diff_array_repr(a, b, compat):
):
summary.append(coords_diff)

if compat == "identical" and (
attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat)
):
summary.append(attrs_diff)
if compat == "identical":
if hasattr(a, "xindexes") and (
indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes)
):
summary.append(indexes_diff)

if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat):
summary.append(attrs_diff)

return "\n".join(summary)

Expand Down Expand Up @@ -1043,10 +1101,12 @@ def diff_dataset_repr(a, b, compat):
):
summary.append(data_diff)

if compat == "identical" and (
attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat)
):
summary.append(attrs_diff)
if compat == "identical":
if indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes):
summary.append(indexes_diff)

if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat):
summary.append(attrs_diff)

return "\n".join(summary)

Expand Down
54 changes: 54 additions & 0 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2015,6 +2015,60 @@ def indexes_equal(
return cast(bool, equal)


def indexes_identical(
a_indexes: Indexes[Index],
b_indexes: Indexes[Index],
) -> bool:
"""Check if two Indexes objects are identical.

Two Indexes objects are identical if they have the same set of
indexed coordinate names, each corresponding pair of indexes are
the same type, and are equal (using Index.equals()).

Unlike indexes_equal(), this function does NOT fall back to variable
comparison when index types differ - different index types means
not identical.

Parameters
----------
a_indexes : Indexes
First Indexes object to compare.
b_indexes : Indexes
Second Indexes object to compare.

Returns
-------
bool
True if the two Indexes objects are identical.
"""
# Must have same indexed coordinate names
if set(a_indexes.keys()) != set(b_indexes.keys()):
return False

# Compare each index pair
# Note: could optimize for PandasMultiIndex where multiple coord names
# share the same index object, but this is not performance critical
for coord_name in a_indexes.keys():
a_idx = a_indexes[coord_name]
b_idx = b_indexes[coord_name]

# For identical(), index types must match
if type(a_idx) is not type(b_idx):
return False

try:
if not a_idx.equals(b_idx):
return False
except NotImplementedError:
# Fall back to variable comparison when equals() not implemented
a_var = a_indexes.variables[coord_name]
b_var = b_indexes.variables[coord_name]
if not a_var.equals(b_var):
return False

return True


def indexes_all_equal(
elements: Sequence[tuple[Index, dict[Hashable, Variable]]],
exclude_dims: frozenset[Hashable],
Expand Down
68 changes: 64 additions & 4 deletions xarray/indexes/range_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,45 @@ def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]:
return {self.dim: positions}

def equals(
self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None
self,
other: CoordinateTransform,
exclude: frozenset[Hashable] | None = None,
*,
exact: bool = False,
) -> bool:
"""Check equality with another RangeCoordinateTransform.

Parameters
----------
other : CoordinateTransform
The other transform to compare with.
exclude : frozenset of hashable, optional
Dimensions excluded from checking (unused for 1D RangeIndex).
exact : bool, default False
If False (default), use np.isclose() for floating point comparisons
to handle accumulated floating point errors from slicing operations.
If True, require exact equality of start and stop values.

Returns
-------
bool
True if the transforms are equal, False otherwise.
"""
if not isinstance(other, RangeCoordinateTransform):
return False

return (
self.start == other.start
and self.stop == other.stop
if exact:
return (
self.start == other.start
and self.stop == other.stop
and self.size == other.size
)

# Use np.isclose for floating point comparisons to handle accumulated
# floating point errors (e.g., from slicing operations)
return bool(
np.isclose(self.start, other.start)
and np.isclose(self.stop, other.stop)
and self.size == other.size
)

Expand Down Expand Up @@ -130,6 +161,35 @@ class RangeIndex(CoordinateTransformIndex):
def __init__(self, transform: RangeCoordinateTransform):
super().__init__(transform)

def equals(
self,
other: "Index",
*,
exclude: frozenset[Hashable] | None = None,
exact: bool = False,
) -> bool:
"""Check equality with another RangeIndex.

Parameters
----------
other : Index
The other index to compare with.
exclude : frozenset of hashable, optional
Dimensions excluded from checking (unused for 1D RangeIndex).
exact : bool, default False
If False (default), use np.isclose() for floating point comparisons
to handle accumulated floating point errors from slicing operations.
If True, require exact equality of start and stop values.

Returns
-------
bool
True if the indexes are equal, False otherwise.
"""
if not isinstance(other, RangeIndex):
return False
return self.transform.equals(other.transform, exclude=exclude, exact=exact)

@classmethod
def arange(
cls,
Expand Down
40 changes: 38 additions & 2 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,45 @@ def assert_equal(a, b, check_default_indexes=True):
xarray.testing._assert_internal_invariants(b, check_default_indexes)


def assert_identical(a, b, check_default_indexes=True):
def assert_identical(a, b, check_default_indexes=True, check_indexes=None):
"""Assert that two xarray objects are identical.

This is a test-internal wrapper around xarray.testing.assert_identical
that also validates internal invariants.

Parameters
----------
a, b : xarray objects
Objects to compare.
check_default_indexes : bool, default True
If True, validates that 1D dimension coordinates have default indexes
(internal invariant check). Set to False for objects that intentionally
lack default indexes.
check_indexes : bool, optional
If not specified (default), defaults to the value of check_default_indexes
for backwards compatibility.
If True (default), compare indexes as part of identity check.
If False, skip index comparison (only check data, attrs, names).
"""
__tracebackhide__ = True
xarray.testing.assert_identical(a, b)
# For backwards compatibility, check_default_indexes=False implies check_indexes=False
# unless check_indexes is explicitly specified
if check_indexes is None:
check_indexes = check_default_indexes
if check_indexes:
xarray.testing.assert_identical(a, b)
else:
# Drop all indexes before comparing to skip index comparison
from xarray import DataArray, Dataset

if isinstance(a, Dataset | DataArray):
a_no_idx = a.drop_indexes(list(a.xindexes))
b_no_idx = b.drop_indexes(list(b.xindexes))
else:
a_no_idx, b_no_idx = a, b

xarray.testing.assert_identical(a_no_idx, b_no_idx)

xarray.testing._assert_internal_invariants(a, check_default_indexes)
xarray.testing._assert_internal_invariants(b, check_default_indexes)

Expand Down
Loading
Loading