diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1c463e885fc..28c47d61e11 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -18,6 +18,12 @@ New Features automatically replace any existing index being set instead of erroring or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. +- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a 1-dimensional coordinate + without an index will now automatically create a temporary + :py:class:`~xarray.indexes.PandasIndex` to perform the selection + (:issue:`9703`, :pull:`11029`). + By `Ian Hunt-Isaak `_. + Breaking Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index eead0a1b8af..c34efe325c7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -138,18 +138,30 @@ def group_indexers_by_index( options: Mapping[str, Any], ) -> list[tuple[Index, dict[Any, Any]]]: """Returns a list of unique indexes and their corresponding indexers.""" + # import here instead of at top to guard against circular imports + from xarray.core.indexes import PandasIndex + unique_indexes = {} grouped_indexers: Mapping[int | None, dict] = defaultdict(dict) for key, label in indexers.items(): index: Index = obj.xindexes.get(key, None) + if index is None and key in obj.coords: + coord = obj.coords[key] + if coord.ndim != 1: + raise ValueError( + "Could not automatically create PandasIndex for " + f"coord {key!r} with {coord.ndim} dimensions. Please explicitly " + "set the index using `set_xindex`." + ) + index = PandasIndex.from_variables( + {key: obj.coords[key].variable}, options={} + ) if index is not None: index_id = id(index) unique_indexes[index_id] = index grouped_indexers[index_id][key] = label - elif key in obj.coords: - raise KeyError(f"no index found for coordinate {key!r}") elif key not in obj.dims: raise KeyError( f"{key!r} is not a valid dimension or coordinate for " diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index df9d29843ff..f7d5ca27d7e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1190,11 +1190,14 @@ def test_sel_float_multiindex(self) -> None: assert_equal(actual, expected) def test_sel_no_index(self) -> None: - array = DataArray(np.arange(10), dims="x") + array = DataArray(np.arange(10), dims="x").assign_coords( + {"x_meta": ("x", np.linspace(0.1, 1, 10))} + ) assert_identical(array[0], array.sel(x=0)) assert_identical(array[:5], array.sel(x=slice(5))) assert_identical(array[[0, -1]], array.sel(x=[0, -1])) assert_identical(array[array < 5], array.sel(x=(array < 5))) + assert_identical(array[1], array.sel(x_meta=0.2)) def test_sel_method(self) -> None: data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 17d3e25b642..46c36875954 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3167,6 +3167,43 @@ def test_drop_indexes(self) -> None: with pytest.raises(ValueError, match=r".*would corrupt the following index.*"): ds.drop_indexes("a") + def test_sel_on_unindexed_coordinate(self) -> None: + # Test that .sel() works on coordinates without an index by creating + # a PandasIndex on the fly + ds = Dataset( + {"data": (["x", "y"], np.arange(6).reshape(2, 3))}, + coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ("y", ["a", "b", "c"])}, + ) + # Drop the index on y to create an unindexed dim coord + # also check that coord y_meta works despite not being a dim coord + ds = ds.drop_indexes("y") + assert "y" not in ds.xindexes + assert "y_meta" not in ds.xindexes + assert "y" in ds.coords + + # .sel() should still work by creating a PandasIndex on the fly + result = ds.sel(y=20) + expected = ds.isel(y=1) + assert_identical(result, expected, check_default_indexes=False) + + result = ds.sel(y_meta="b") + expected = ds.isel(y=1) + assert_identical(result, expected, check_default_indexes=False) + + # check that our auto-created indexes are ephemeral + assert "y" not in ds.xindexes + assert "y_meta" not in ds.xindexes + assert "y" in ds.coords + + result_slice = ds.sel(y=slice(10, 20)) + expected_slice = ds.isel(y=slice(0, 2)) + assert_identical( + result_slice["data"], expected_slice["data"], check_default_indexes=False + ) + assert_identical( + result_slice["y"], expected_slice["y"], check_default_indexes=False + ) + def test_drop_dims(self) -> None: data = xr.Dataset( { diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 6b564c6f032..dfb3283a16e 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -109,8 +109,6 @@ def test_group_indexers_by_index(self) -> None: assert indexers == {"y": 0} assert len(grouped_indexers) == 3 - with pytest.raises(KeyError, match=r"no index found for coordinate 'y2'"): - indexing.group_indexers_by_index(data, {"y2": 2.0}, {}) with pytest.raises( KeyError, match=r"'w' is not a valid dimension or coordinate" ): @@ -118,6 +116,25 @@ def test_group_indexers_by_index(self) -> None: with pytest.raises(ValueError, match=r"cannot supply.*"): indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"}) + def test_group_indexers_by_index_creates_index_for_unindexed_coord(self) -> None: + # Test that selecting on a coordinate without an index creates a PandasIndex on the fly + data = DataArray( + np.zeros((2, 3)), coords={"x": [0, 1], "y": [10, 20, 30]}, dims=("x", "y") + ) + data.coords["y2"] = ("y", [2.0, 3.0, 4.0]) + + # y2 is a coordinate but has no index + assert "y2" in data.coords + assert "y2" not in data.xindexes + + # group_indexers_by_index should create a PandasIndex on the fly + grouped_indexers = indexing.group_indexers_by_index(data, {"y2": 3.0}, {}) + + assert len(grouped_indexers) == 1 + idx, indexers = grouped_indexers[0] + assert isinstance(idx, PandasIndex) + assert indexers == {"y2": 3.0} + def test_map_index_queries(self) -> None: def create_sel_results( x_indexer,