Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ New Features
automatically replace any existing index being set instead of erroring
or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a coordinate
without an index will now automatically create a temporary
:py:class:`~xarray.indexes.PandasIndex` to perform the selection
(:issue:`9703`, :pull:`11029`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.


Breaking Changes
~~~~~~~~~~~~~~~~
Expand Down
12 changes: 10 additions & 2 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,26 @@ def group_indexers_by_index(
options: Mapping[str, Any],
) -> list[tuple[Index, dict[Any, Any]]]:
"""Returns a list of unique indexes and their corresponding indexers."""
# import here instead of at top to guard against circular imports
from xarray.core.indexes import PandasIndex

unique_indexes = {}
grouped_indexers: Mapping[int | None, dict] = defaultdict(dict)

for key, label in indexers.items():
index: Index = obj.xindexes.get(key, None)
if (key in obj.coords) and (index is None):
# TODO: should we raise a more informative error
# here if the index creation fails? I.e. if a 2D coord
# or something else that PandasIndex cannot support.
index = PandasIndex.from_variables(
{key: obj.coords[key].variable}, options={}
)

if index is not None:
index_id = id(index)
unique_indexes[index_id] = index
grouped_indexers[index_id][key] = label
elif key in obj.coords:
raise KeyError(f"no index found for coordinate {key!r}")
elif key not in obj.dims:
raise KeyError(
f"{key!r} is not a valid dimension or coordinate for "
Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,11 +1190,14 @@ def test_sel_float_multiindex(self) -> None:
assert_equal(actual, expected)

def test_sel_no_index(self) -> None:
array = DataArray(np.arange(10), dims="x")
array = DataArray(np.arange(10), dims="x").assign_coords(
{"x_meta": ("x", np.linspace(0.1, 1, 10))}
)
assert_identical(array[0], array.sel(x=0))
assert_identical(array[:5], array.sel(x=slice(5)))
assert_identical(array[[0, -1]], array.sel(x=[0, -1]))
assert_identical(array[array < 5], array.sel(x=(array < 5)))
assert_identical(array[1], array.sel(x_meta=0.2))

def test_sel_method(self) -> None:
data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))])
Expand Down
30 changes: 30 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3167,6 +3167,36 @@
with pytest.raises(ValueError, match=r".*would corrupt the following index.*"):
ds.drop_indexes("a")

def test_sel_on_unindexed_coordinate(self) -> None:
# Test that .sel() works on coordinates without an index by creating
# a PandasIndex on the fly
ds = Dataset(
{"data": (["x", "y"], np.arange(6).reshape(2, 3))},
coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ["a", "b", "c"]},
)
# Drop the index on y to create an unindexed dim coord
# also check that coord y_meta works despite not being on a data var
ds = ds.drop_indexes("y")
assert "y" not in ds.xindexes
assert "y_meta" not in ds.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py311-bare-min-and-scipy

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py311-bare-minimum

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py313-no-numba

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py311-min-versions

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py313

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py311

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py313-no-dask

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / macos-latest | test-py313

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / macos-latest | test-py311

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes

Check failure on line 3181 in xarray/tests/test_dataset.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest | test-py313-with-typing

TestDataset.test_sel_on_unindexed_coordinate AssertionError: assert 'y_meta' not in Indexes:\n x PandasIndex\n y_meta PandasIndex + where Indexes:\n x PandasIndex\n y_meta PandasIndex = <xarray.Dataset> Size: 100B\nDimensions: (x: 2, y: 3, y_meta: 3)\nCoordinates:\n * x (x) int64 16B 0 1\n y ... int64 24B 10 20 30\n * y_meta (y_meta) <U1 12B 'a' 'b' 'c'\nData variables:\n data (x, y) int64 48B 0 1 2 3 4 5.xindexes
assert "y" in ds.coords

# .sel() should still work by creating a PandasIndex on the fly
result = ds.sel(y=20)
expected = ds.isel(y=1)
assert_identical(result, expected)

result = ds.sel(y_meta="b")
expected = ds.isel(y=1)
assert_identical(result, expected)

# Also test with slice - compare data values directly since the result
# has no index on y (which triggers internal invariant checks)
result_slice = ds.sel(y=slice(10, 20))
expected_slice = ds.isel(y=slice(0, 2))
assert_array_equal(result_slice["data"].values, expected_slice["data"].values)
assert_array_equal(result_slice["y"].values, expected_slice["y"].values)

def test_drop_dims(self) -> None:
data = xr.Dataset(
{
Expand Down
21 changes: 19 additions & 2 deletions xarray/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,32 @@ def test_group_indexers_by_index(self) -> None:
assert indexers == {"y": 0}
assert len(grouped_indexers) == 3

with pytest.raises(KeyError, match=r"no index found for coordinate 'y2'"):
indexing.group_indexers_by_index(data, {"y2": 2.0}, {})
with pytest.raises(
KeyError, match=r"'w' is not a valid dimension or coordinate"
):
indexing.group_indexers_by_index(data, {"w": "a"}, {})
with pytest.raises(ValueError, match=r"cannot supply.*"):
indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"})

def test_group_indexers_by_index_creates_index_for_unindexed_coord(self) -> None:
# Test that selecting on a coordinate without an index creates a PandasIndex on the fly
data = DataArray(
np.zeros((2, 3)), coords={"x": [0, 1], "y": [10, 20, 30]}, dims=("x", "y")
)
data.coords["y2"] = ("y", [2.0, 3.0, 4.0])

# y2 is a coordinate but has no index
assert "y2" in data.coords
assert "y2" not in data.xindexes

# group_indexers_by_index should create a PandasIndex on the fly
grouped_indexers = indexing.group_indexers_by_index(data, {"y2": 3.0}, {})

assert len(grouped_indexers) == 1
idx, indexers = grouped_indexers[0]
assert isinstance(idx, PandasIndex)
assert indexers == {"y2": 3.0}

def test_map_index_queries(self) -> None:
def create_sel_results(
x_indexer,
Expand Down
Loading