diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b4aa6447c0a1b..e9458046f6cde 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -68,6 +68,7 @@ Other enhancements - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) +- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bcb27d0320c91..7bfafb3e17536 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1582,11 +1582,7 @@ def _validate_key(self, key, axis: AxisInt) -> None: if com.is_bool_indexer(key): if hasattr(key, "index") and isinstance(key.index, Index): if key.index.inferred_type == "integer": - raise NotImplementedError( - "iLocation based boolean " - "indexing on an integer type " - "is not available" - ) + return raise ValueError( "iLocation based boolean indexing cannot use an indexable as a mask" ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2f6998a85c80b..fc057d3a23a90 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -726,21 +726,44 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value): @pytest.mark.filterwarnings("ignore::UserWarning") def test_iloc_mask(self): - # GH 3631, iloc with a mask (of a series) should raise + # GH 60994, iloc with a mask (of a series) should return accordingly df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) mask = df.a % 2 == 0 msg = "iLocation based boolean indexing cannot use an indexable as a mask" with pytest.raises(ValueError, match=msg): df.iloc[mask] + mask.index = range(len(mask)) - msg = "iLocation based boolean indexing on an integer type is not available" - with pytest.raises(NotImplementedError, match=msg): + msg = "Unalignable boolean Series provided as indexer" + with pytest.raises(IndexingError, match=msg): df.iloc[mask] # ndarray ok result = df.iloc[np.array([True] * len(mask), dtype=bool)] tm.assert_frame_equal(result, df) + result = df.iloc[np.array([True, False, True, False, True], dtype=bool)] + tm.assert_frame_equal( + result, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"]) + ) + + # series (index does not match) + msg = "Unalignable boolean Series provided as indexer" + with pytest.raises(IndexingError, match=msg): + df.iloc[Series([True] * len(mask), dtype=bool)] + + df = DataFrame(list(range(5)), columns=["a"]) + + result = df.iloc[Series([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + result = df.iloc[Series([True, False, True, False, True], dtype=bool)] + tm.assert_frame_equal( + result, DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4]) + ) + + df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) + # the possibilities locs = np.arange(4) nums = 2**locs @@ -753,18 +776,13 @@ def test_iloc_mask(self): (None, ".iloc"): "0b1100", ("index", ""): "0b11", ("index", ".loc"): "0b11", - ("index", ".iloc"): ( - "iLocation based boolean indexing cannot use an indexable as a mask" - ), - ("locs", ""): "Unalignable boolean Series provided as indexer " - "(index of the boolean Series and of the indexed " - "object do not match).", - ("locs", ".loc"): "Unalignable boolean Series provided as indexer " - "(index of the boolean Series and of the " - "indexed object do not match).", - ("locs", ".iloc"): ( - "iLocation based boolean indexing on an integer type is not available" - ), + ( + "index", + ".iloc", + ): "iLocation based boolean indexing cannot use an indexable as a mask", + ("locs", ""): "Unalignable boolean Series provided as indexer", + ("locs", ".loc"): "Unalignable boolean Series provided as indexer", + ("locs", ".iloc"): "Unalignable boolean Series provided as indexer", } # UserWarnings from reindex of a boolean mask @@ -780,18 +798,26 @@ def test_iloc_mask(self): else: accessor = df answer = str(bin(accessor[mask]["nums"].sum())) - except (ValueError, IndexingError, NotImplementedError) as err: + except (ValueError, IndexingError) as err: answer = str(err) key = ( idx, method, ) - r = expected.get(key) - if r != answer: - raise AssertionError( - f"[{key}] does not match [{answer}], received [{r}]" + expected_result = expected.get(key) + + # Fix the assertion to check for substring match + if ( + idx is None or (idx == "index" and method != ".iloc") + ) and "0b" in expected_result: + # For successful numeric results, exact match is needed + assert expected_result == answer, ( + f"[{key}] does not match [{answer}]" ) + else: + # For error messages, substring match is sufficient + assert expected_result in answer, f"[{key}] not found in [{answer}]" def test_iloc_non_unique_indexing(self): # GH 4017, non-unique indexing (on the axis)