-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
FIX: assert_identical now considers xindexes + improve RangeIndex equals
#11035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
81615ed
287812f
6f27ac7
7fce3c3
0ac4870
2167013
dbb649a
b2ffd28
eccb09c
6e4099a
348306c
dcfc46e
1a1219a
6214d4f
27b4275
789b824
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -973,6 +973,60 @@ def _compat_to_str(compat): | |
| return compat | ||
|
|
||
|
|
||
| def diff_indexes_repr(a_indexes, b_indexes, col_width: int = 20) -> str: | ||
| """Generate diff representation for indexes.""" | ||
| a_keys = set(a_indexes.keys()) | ||
| b_keys = set(b_indexes.keys()) | ||
|
|
||
| summary = [] | ||
|
|
||
| if only_a := a_keys - b_keys: | ||
| summary.append(f"Indexes only on the left object: {sorted(only_a)}") | ||
|
|
||
| if only_b := b_keys - a_keys: | ||
| summary.append(f"Indexes only on the right object: {sorted(only_b)}") | ||
|
|
||
| # Check for indexes on the same coordinates but with different types or values | ||
| common_keys = a_keys & b_keys | ||
| diff_items = [] | ||
|
|
||
| for key in sorted(common_keys): | ||
| a_idx = a_indexes[key] | ||
| b_idx = b_indexes[key] | ||
|
|
||
| # Check if indexes differ | ||
| indexes_equal = False | ||
| if type(a_idx) is type(b_idx): | ||
| try: | ||
| indexes_equal = a_idx.equals(b_idx) | ||
| except NotImplementedError: | ||
| # Fall back to variable comparison | ||
| a_var = a_indexes.variables[key] | ||
| b_var = b_indexes.variables[key] | ||
| indexes_equal = a_var.equals(b_var) | ||
|
|
||
| if not indexes_equal: | ||
| # Format the index values similar to variable diff | ||
| try: | ||
| a_repr = inline_index_repr( | ||
| a_indexes.to_pandas_indexes()[key], max_width=70 | ||
| ) | ||
| b_repr = inline_index_repr( | ||
| b_indexes.to_pandas_indexes()[key], max_width=70 | ||
| ) | ||
| except TypeError: | ||
| # Custom indexes may not support to_pandas_index() | ||
| a_repr = repr(a_idx) | ||
| b_repr = repr(b_idx) | ||
|
Comment on lines
+1010
to
+1020
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be worth calling
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this well defined API for a custom index to support? Def happy to add it, just also wondering if the knoweldge of that being helpful is (or should be) written down somewhere
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we already use those in inline_index_repr, so yes, this should be well defined. This should definitely be part of the custom index development page, and worth adding if it is not already part of that. |
||
| diff_items.append(f"L {key!s:<{col_width}} {a_repr}") | ||
| diff_items.append(f"R {key!s:<{col_width}} {b_repr}") | ||
|
|
||
| if diff_items: | ||
| summary.append("Differing indexes:\n" + "\n".join(diff_items)) | ||
|
|
||
| return "\n".join(summary) | ||
|
|
||
|
|
||
| def diff_array_repr(a, b, compat): | ||
| # used for DataArray, Variable and IndexVariable | ||
| summary = [ | ||
|
|
@@ -1002,10 +1056,14 @@ def diff_array_repr(a, b, compat): | |
| ): | ||
| summary.append(coords_diff) | ||
|
|
||
| if compat == "identical" and ( | ||
| attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat) | ||
| ): | ||
| summary.append(attrs_diff) | ||
| if compat == "identical": | ||
| if hasattr(a, "xindexes") and ( | ||
| indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes) | ||
| ): | ||
| summary.append(indexes_diff) | ||
|
|
||
| if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat): | ||
| summary.append(attrs_diff) | ||
|
|
||
| return "\n".join(summary) | ||
|
|
||
|
|
@@ -1043,10 +1101,12 @@ def diff_dataset_repr(a, b, compat): | |
| ): | ||
| summary.append(data_diff) | ||
|
|
||
| if compat == "identical" and ( | ||
| attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat) | ||
| ): | ||
| summary.append(attrs_diff) | ||
| if compat == "identical": | ||
| if indexes_diff := diff_indexes_repr(a.xindexes, b.xindexes): | ||
| summary.append(indexes_diff) | ||
|
|
||
| if attrs_diff := diff_attrs_repr(a.attrs, b.attrs, compat): | ||
| summary.append(attrs_diff) | ||
|
|
||
| return "\n".join(summary) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've implemented something very similar: https://github.com/xarray-contrib/xdggs/blob/52d8b1dd23bf809757c7e3f5c04945129f6905af/xdggs/tests/__init__.py#L64-L105
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh neat! I'll take a close look tomorrow. Is there anything different we should do here that would have made your xdggs use cases easier?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there's not much that's different (the diff formatting is slightly different). However, compared to
indexes_equalit may be worth grouping indexes withindexes.group_by_index()(which would mean we don't have to worry about caching)