@@ -283,13 +283,60 @@ def test_contains_nan(any_string_dtype):
283
283
284
284
def test_contains_compiled_regex (any_string_dtype ):
285
285
# GH#61942
286
- ser = Series (["foo" , "bar" , "baz" ], dtype = any_string_dtype )
286
+ expected_dtype = (
287
+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
288
+ )
289
+
290
+ ser = Series (["foo" , "bar" , "Baz" ], dtype = any_string_dtype )
291
+
287
292
pat = re .compile ("ba." )
288
293
result = ser .str .contains (pat )
294
+ expected = Series ([False , True , False ], dtype = expected_dtype )
295
+ tm .assert_series_equal (result , expected )
296
+
297
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
298
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
299
+ result = ser .str .contains (pat , case = False )
300
+ expected = Series ([False , True , True ], dtype = expected_dtype )
301
+ tm .assert_series_equal (result , expected )
302
+ else :
303
+ with pytest .raises (
304
+ ValueError , match = "cannot process flags argument with a compiled pattern"
305
+ ):
306
+ ser .str .contains (pat , case = False )
307
+
308
+ pat = re .compile ("ba." , flags = re .IGNORECASE )
309
+ result = ser .str .contains (pat )
310
+ expected = Series ([False , True , True ], dtype = expected_dtype )
311
+ tm .assert_series_equal (result , expected )
312
+
313
+ # TODO should this be supported?
314
+ with pytest .raises (
315
+ ValueError , match = "cannot process flags argument with a compiled pattern"
316
+ ):
317
+ ser .str .contains (pat , flags = re .IGNORECASE )
318
+
289
319
320
+ def test_contains_compiled_regex_flags (any_string_dtype ):
321
+ # ensure other (than ignorecase) flags are respected
290
322
expected_dtype = (
291
323
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
292
324
)
325
+
326
+ ser = Series (["foobar" , "foo\n bar" , "Baz" ], dtype = any_string_dtype )
327
+
328
+ pat = re .compile ("^ba" )
329
+ result = ser .str .contains (pat )
330
+ expected = Series ([False , False , False ], dtype = expected_dtype )
331
+ tm .assert_series_equal (result , expected )
332
+
333
+ pat = re .compile ("^ba" , flags = re .MULTILINE )
334
+ result = ser .str .contains (pat )
335
+ expected = Series ([False , True , False ], dtype = expected_dtype )
336
+ tm .assert_series_equal (result , expected )
337
+
338
+ pat = re .compile ("^ba" , flags = re .MULTILINE | re .IGNORECASE )
339
+ result = ser .str .contains (pat )
293
340
expected = Series ([False , True , True ], dtype = expected_dtype )
294
341
tm .assert_series_equal (result , expected )
295
342
@@ -833,14 +880,36 @@ def test_match_case_kwarg(any_string_dtype):
833
880
834
881
def test_match_compiled_regex (any_string_dtype ):
835
882
# GH#61952
836
- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
837
- result = values .str .match (re .compile (r"ab" ), case = False )
838
883
expected_dtype = (
839
884
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
840
885
)
886
+
887
+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
888
+
889
+ result = values .str .match (re .compile ("ab" ))
890
+ expected = Series ([True , False , True , False ], dtype = expected_dtype )
891
+ tm .assert_series_equal (result , expected )
892
+
893
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
894
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
895
+ result = values .str .match (re .compile ("ab" ), case = False )
896
+ expected = Series ([True , True , True , True ], dtype = expected_dtype )
897
+ tm .assert_series_equal (result , expected )
898
+ else :
899
+ with pytest .raises (
900
+ ValueError , match = "cannot process flags argument with a compiled pattern"
901
+ ):
902
+ values .str .match (re .compile ("ab" ), case = False )
903
+
904
+ result = values .str .match (re .compile ("ab" , flags = re .IGNORECASE ))
841
905
expected = Series ([True , True , True , True ], dtype = expected_dtype )
842
906
tm .assert_series_equal (result , expected )
843
907
908
+ with pytest .raises (
909
+ ValueError , match = "cannot process flags argument with a compiled pattern"
910
+ ):
911
+ values .str .match (re .compile ("ab" ), flags = re .IGNORECASE )
912
+
844
913
845
914
# --------------------------------------------------------------------------------------
846
915
# str.fullmatch
@@ -913,14 +982,36 @@ def test_fullmatch_case_kwarg(any_string_dtype):
913
982
914
983
def test_fullmatch_compiled_regex (any_string_dtype ):
915
984
# GH#61952
916
- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
917
- result = values .str .fullmatch (re .compile (r"ab" ), case = False )
918
985
expected_dtype = (
919
986
np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
920
987
)
988
+
989
+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
990
+
991
+ result = values .str .fullmatch (re .compile ("ab" ))
992
+ expected = Series ([True , False , False , False ], dtype = expected_dtype )
993
+ tm .assert_series_equal (result , expected )
994
+
995
+ # TODO this currently works for pyarrow-backed dtypes but raises for python
996
+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
997
+ result = values .str .fullmatch (re .compile ("ab" ), case = False )
998
+ expected = Series ([True , True , False , False ], dtype = expected_dtype )
999
+ tm .assert_series_equal (result , expected )
1000
+ else :
1001
+ with pytest .raises (
1002
+ ValueError , match = "cannot process flags argument with a compiled pattern"
1003
+ ):
1004
+ values .str .fullmatch (re .compile ("ab" ), case = False )
1005
+
1006
+ result = values .str .fullmatch (re .compile ("ab" , flags = re .IGNORECASE ))
921
1007
expected = Series ([True , True , False , False ], dtype = expected_dtype )
922
1008
tm .assert_series_equal (result , expected )
923
1009
1010
+ with pytest .raises (
1011
+ ValueError , match = "cannot process flags argument with a compiled pattern"
1012
+ ):
1013
+ values .str .fullmatch (re .compile ("ab" ), flags = re .IGNORECASE )
1014
+
924
1015
925
1016
# --------------------------------------------------------------------------------------
926
1017
# str.findall
0 commit comments