From 69468cd2e591312fe49c0dca0a949e7ce19e8697 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 21 Mar 2025 11:53:14 -0700 Subject: [PATCH 01/10] updated indexing.py to allow iloc.__getitem__ --- pandas/core/indexing.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bcb27d0320c91..7bfafb3e17536 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1582,11 +1582,7 @@ def _validate_key(self, key, axis: AxisInt) -> None: if com.is_bool_indexer(key): if hasattr(key, "index") and isinstance(key.index, Index): if key.index.inferred_type == "integer": - raise NotImplementedError( - "iLocation based boolean " - "indexing on an integer type " - "is not available" - ) + return raise ValueError( "iLocation based boolean indexing cannot use an indexable as a mask" ) From ded44cb8e6bdde2a6a12b0317f8cc2f46641c354 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 21 Mar 2025 12:07:47 -0700 Subject: [PATCH 02/10] Updated test_iloc_mask test --- pandas/tests/indexing/test_iloc.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2f6998a85c80b..c95d607bc3438 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -726,16 +726,17 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value): @pytest.mark.filterwarnings("ignore::UserWarning") def test_iloc_mask(self): - # GH 3631, iloc with a mask (of a series) should raise + # GH 60994, iloc with a mask (of a series) should return accordingly df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) mask = df.a % 2 == 0 msg = "iLocation based boolean indexing cannot use an indexable as a mask" with pytest.raises(ValueError, match=msg): df.iloc[mask] + mask.index = range(len(mask)) - msg = "iLocation based boolean indexing on an integer type is not available" - with pytest.raises(NotImplementedError, match=msg): - df.iloc[mask] + result = df.iloc[mask] + expected = df.iloc[[0, 2, 4]] + tm.assert_frame_equal(result, expected) # ndarray ok result = df.iloc[np.array([True] * len(mask), dtype=bool)] @@ -753,18 +754,14 @@ def test_iloc_mask(self): (None, ".iloc"): "0b1100", ("index", ""): "0b11", ("index", ".loc"): "0b11", - ("index", ".iloc"): ( - "iLocation based boolean indexing cannot use an indexable as a mask" - ), + ("index", ".iloc"): "0b11", ("locs", ""): "Unalignable boolean Series provided as indexer " "(index of the boolean Series and of the indexed " "object do not match).", ("locs", ".loc"): "Unalignable boolean Series provided as indexer " "(index of the boolean Series and of the " "indexed object do not match).", - ("locs", ".iloc"): ( - "iLocation based boolean indexing on an integer type is not available" - ), + ("locs", ".iloc"): "0b1", } # UserWarnings from reindex of a boolean mask @@ -780,7 +777,10 @@ def test_iloc_mask(self): else: accessor = df answer = str(bin(accessor[mask]["nums"].sum())) - except (ValueError, IndexingError, NotImplementedError) as err: + except ( + ValueError, + IndexingError, + ) as err: answer = str(err) key = ( From b4d58e1b397b39ced71454fab59a4a7d0dc8ecc5 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 21 Mar 2025 12:42:21 -0700 Subject: [PATCH 03/10] bugfix test_iloc_mask test --- pandas/tests/indexing/test_iloc.py | 38 ++++++++++++++++++------------ 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c95d607bc3438..510ec8260786f 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -735,8 +735,9 @@ def test_iloc_mask(self): mask.index = range(len(mask)) result = df.iloc[mask] - expected = df.iloc[[0, 2, 4]] - tm.assert_frame_equal(result, expected) + msg = "Unalignable boolean Series provided as indexer" + with pytest.raises(IndexingError, match=msg): + df.iloc[mask] # ndarray ok result = df.iloc[np.array([True] * len(mask), dtype=bool)] @@ -754,21 +755,20 @@ def test_iloc_mask(self): (None, ".iloc"): "0b1100", ("index", ""): "0b11", ("index", ".loc"): "0b11", - ("index", ".iloc"): "0b11", - ("locs", ""): "Unalignable boolean Series provided as indexer " - "(index of the boolean Series and of the indexed " - "object do not match).", - ("locs", ".loc"): "Unalignable boolean Series provided as indexer " - "(index of the boolean Series and of the " - "indexed object do not match).", - ("locs", ".iloc"): "0b1", + ( + "index", + ".iloc", + ): "iLocation based boolean indexing cannot use an indexable as a mask", + ("locs", ""): "Unalignable boolean Series provided as indexer", + ("locs", ".loc"): "Unalignable boolean Series provided as indexer", + ("locs", ".iloc"): "Unalignable boolean Series provided as indexer", } # UserWarnings from reindex of a boolean mask for idx in [None, "index", "locs"]: mask = (df.nums > 2).values if idx: - mask_index = getattr(df, idx)[::-1] + mask_index = getattr(df, idx if idx == "index" else "locs")[::-1] mask = Series(mask, list(mask_index)) for method in ["", ".loc", ".iloc"]: try: @@ -787,11 +787,19 @@ def test_iloc_mask(self): idx, method, ) - r = expected.get(key) - if r != answer: - raise AssertionError( - f"[{key}] does not match [{answer}], received [{r}]" + expected_result = expected.get(key) + + # Fix the assertion to check for substring match + if ( + idx is None or (idx == "index" and method != ".iloc") + ) and "0b" in expected_result: + # For successful numeric results, exact match is needed + assert expected_result == answer, ( + f"[{key}] does not match [{answer}]" ) + else: + # For error messages, substring match is sufficient + assert expected_result in answer, f"[{key}] not found in [{answer}]" def test_iloc_non_unique_indexing(self): # GH 4017, non-unique indexing (on the axis) From 326b91cbc89d05ad3b82202c1c62e4ef31ae8aac Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 21 Mar 2025 13:46:31 -0700 Subject: [PATCH 04/10] bugfix test_iloc_mask --- pandas/tests/indexing/test_iloc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 510ec8260786f..def550ff410a0 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -734,7 +734,6 @@ def test_iloc_mask(self): df.iloc[mask] mask.index = range(len(mask)) - result = df.iloc[mask] msg = "Unalignable boolean Series provided as indexer" with pytest.raises(IndexingError, match=msg): df.iloc[mask] From 2c8174c76aded2402535e1c714bcd84a7c3d34f0 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 21 Mar 2025 14:57:34 -0700 Subject: [PATCH 05/10] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b4aa6447c0a1b..e9458046f6cde 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -68,6 +68,7 @@ Other enhancements - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) +- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) From 9345465f114bf9bbcfc5f1ff596fcbb7486b02da Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 29 Mar 2025 13:46:29 -0700 Subject: [PATCH 06/10] added test to test_iloc_mask --- pandas/tests/indexing/test_iloc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index def550ff410a0..e88eac79239b4 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -742,6 +742,9 @@ def test_iloc_mask(self): result = df.iloc[np.array([True] * len(mask), dtype=bool)] tm.assert_frame_equal(result, df) + result2 = df.iloc[np.array([True, False, True, False, True], dtype=bool)] + tm.assert_frame_equal(result2, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"])) + # the possibilities locs = np.arange(4) nums = 2**locs From 7533f64dfad439b256714c56203c8031e96cc02f Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 29 Mar 2025 13:47:51 -0700 Subject: [PATCH 07/10] formatting --- pandas/tests/indexing/test_iloc.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index e88eac79239b4..0f0611d13243b 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -770,7 +770,7 @@ def test_iloc_mask(self): for idx in [None, "index", "locs"]: mask = (df.nums > 2).values if idx: - mask_index = getattr(df, idx if idx == "index" else "locs")[::-1] + mask_index = getattr(df, idx)[::-1] mask = Series(mask, list(mask_index)) for method in ["", ".loc", ".iloc"]: try: @@ -779,10 +779,7 @@ def test_iloc_mask(self): else: accessor = df answer = str(bin(accessor[mask]["nums"].sum())) - except ( - ValueError, - IndexingError, - ) as err: + except (ValueError, IndexingError) as err: answer = str(err) key = ( From 35bf005cdef7aba53787899e163bbd3bcce81ff6 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 29 Mar 2025 13:54:07 -0700 Subject: [PATCH 08/10] precommit --- pandas/tests/indexing/test_iloc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0f0611d13243b..7e4bf73de8b6b 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -743,7 +743,9 @@ def test_iloc_mask(self): tm.assert_frame_equal(result, df) result2 = df.iloc[np.array([True, False, True, False, True], dtype=bool)] - tm.assert_frame_equal(result2, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"])) + tm.assert_frame_equal( + result2, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"]) + ) # the possibilities locs = np.arange(4) From 6780260af737311b2047ac784618fa94a5fea48f Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 02:06:52 -0700 Subject: [PATCH 09/10] added tests for series bool mask --- pandas/tests/indexing/test_iloc.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 7e4bf73de8b6b..fc057d3a23a90 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -742,11 +742,28 @@ def test_iloc_mask(self): result = df.iloc[np.array([True] * len(mask), dtype=bool)] tm.assert_frame_equal(result, df) - result2 = df.iloc[np.array([True, False, True, False, True], dtype=bool)] + result = df.iloc[np.array([True, False, True, False, True], dtype=bool)] tm.assert_frame_equal( - result2, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"]) + result, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"]) ) + # series (index does not match) + msg = "Unalignable boolean Series provided as indexer" + with pytest.raises(IndexingError, match=msg): + df.iloc[Series([True] * len(mask), dtype=bool)] + + df = DataFrame(list(range(5)), columns=["a"]) + + result = df.iloc[Series([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + result = df.iloc[Series([True, False, True, False, True], dtype=bool)] + tm.assert_frame_equal( + result, DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4]) + ) + + df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) + # the possibilities locs = np.arange(4) nums = 2**locs From 1c92fc8786d46defd85fb3b781d1145066dd4df5 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 14:25:40 -0700 Subject: [PATCH 10/10] precommit --- pandas/tests/indexing/test_iloc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index fc057d3a23a90..b4b5ce3a34def 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -758,9 +758,7 @@ def test_iloc_mask(self): tm.assert_frame_equal(result, df) result = df.iloc[Series([True, False, True, False, True], dtype=bool)] - tm.assert_frame_equal( - result, DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4]) - ) + tm.assert_frame_equal(result, DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4])) df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])