Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ New Features
automatically replace any existing index being set instead of erroring
or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a 1-dimensional coordinate
without an index will now automatically create a temporary
:py:class:`~xarray.indexes.PandasIndex` to perform the selection
(:issue:`9703`, :pull:`11029`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.


Breaking Changes
~~~~~~~~~~~~~~~~
Expand Down
16 changes: 14 additions & 2 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,30 @@ def group_indexers_by_index(
options: Mapping[str, Any],
) -> list[tuple[Index, dict[Any, Any]]]:
"""Returns a list of unique indexes and their corresponding indexers."""
# import here instead of at top to guard against circular imports
from xarray.core.indexes import PandasIndex

unique_indexes = {}
grouped_indexers: Mapping[int | None, dict] = defaultdict(dict)

for key, label in indexers.items():
index: Index = obj.xindexes.get(key, None)
if index is None and key in obj.coords:
coord = obj.coords[key]
if coord.ndim != 1:
raise ValueError(
"Could not automatically create PandasIndex for "
f"coord {key!r} with {coord.ndim} dimensions. Please explicitly "
"set the index using `set_xindex`."
)
index = PandasIndex.from_variables(
{key: obj.coords[key].variable}, options={}
)

if index is not None:
index_id = id(index)
unique_indexes[index_id] = index
grouped_indexers[index_id][key] = label
elif key in obj.coords:
raise KeyError(f"no index found for coordinate {key!r}")
elif key not in obj.dims:
raise KeyError(
f"{key!r} is not a valid dimension or coordinate for "
Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,11 +1190,14 @@ def test_sel_float_multiindex(self) -> None:
assert_equal(actual, expected)

def test_sel_no_index(self) -> None:
array = DataArray(np.arange(10), dims="x")
array = DataArray(np.arange(10), dims="x").assign_coords(
{"x_meta": ("x", np.linspace(0.1, 1, 10))}
)
assert_identical(array[0], array.sel(x=0))
assert_identical(array[:5], array.sel(x=slice(5)))
assert_identical(array[[0, -1]], array.sel(x=[0, -1]))
assert_identical(array[array < 5], array.sel(x=(array < 5)))
assert_identical(array[1], array.sel(x_meta=0.2))

def test_sel_method(self) -> None:
data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))])
Expand Down
37 changes: 37 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3167,6 +3167,43 @@ def test_drop_indexes(self) -> None:
with pytest.raises(ValueError, match=r".*would corrupt the following index.*"):
ds.drop_indexes("a")

def test_sel_on_unindexed_coordinate(self) -> None:
# Test that .sel() works on coordinates without an index by creating
# a PandasIndex on the fly
ds = Dataset(
{"data": (["x", "y"], np.arange(6).reshape(2, 3))},
coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ("y", ["a", "b", "c"])},
)
# Drop the index on y to create an unindexed dim coord
# also check that coord y_meta works despite not being a dim coord
ds = ds.drop_indexes("y")
assert "y" not in ds.xindexes
assert "y_meta" not in ds.xindexes
assert "y" in ds.coords

# .sel() should still work by creating a PandasIndex on the fly
result = ds.sel(y=20)
expected = ds.isel(y=1)
assert_identical(result, expected, check_default_indexes=False)

result = ds.sel(y_meta="b")
expected = ds.isel(y=1)
assert_identical(result, expected, check_default_indexes=False)

# check that our auto-created indexes are ephemeral
assert "y" not in ds.xindexes
assert "y_meta" not in ds.xindexes
assert "y" in ds.coords

result_slice = ds.sel(y=slice(10, 20))
expected_slice = ds.isel(y=slice(0, 2))
assert_identical(
result_slice["data"], expected_slice["data"], check_default_indexes=False
)
assert_identical(
result_slice["y"], expected_slice["y"], check_default_indexes=False
)

def test_drop_dims(self) -> None:
data = xr.Dataset(
{
Expand Down
21 changes: 19 additions & 2 deletions xarray/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,32 @@ def test_group_indexers_by_index(self) -> None:
assert indexers == {"y": 0}
assert len(grouped_indexers) == 3

with pytest.raises(KeyError, match=r"no index found for coordinate 'y2'"):
indexing.group_indexers_by_index(data, {"y2": 2.0}, {})
with pytest.raises(
KeyError, match=r"'w' is not a valid dimension or coordinate"
):
indexing.group_indexers_by_index(data, {"w": "a"}, {})
with pytest.raises(ValueError, match=r"cannot supply.*"):
indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"})

def test_group_indexers_by_index_creates_index_for_unindexed_coord(self) -> None:
# Test that selecting on a coordinate without an index creates a PandasIndex on the fly
data = DataArray(
np.zeros((2, 3)), coords={"x": [0, 1], "y": [10, 20, 30]}, dims=("x", "y")
)
data.coords["y2"] = ("y", [2.0, 3.0, 4.0])

# y2 is a coordinate but has no index
assert "y2" in data.coords
assert "y2" not in data.xindexes

# group_indexers_by_index should create a PandasIndex on the fly
grouped_indexers = indexing.group_indexers_by_index(data, {"y2": 3.0}, {})

assert len(grouped_indexers) == 1
idx, indexers = grouped_indexers[0]
assert isinstance(idx, PandasIndex)
assert indexers == {"y2": 3.0}

def test_map_index_queries(self) -> None:
def create_sel_results(
x_indexer,
Expand Down
Loading