diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 1540c6850e7..a7da1b37126 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -16,11 +16,69 @@ import numpy as np +from rmm import RMMError + from ..options import _env_get_bool from ..testing._utils import assert_eq from .annotation import nvtx +class CudfPandasDebugWarning(UserWarning): + """Base warning for an incorrect result in cuDF or Pandas. Or the Pandas result was uncomputable""" + + pass + + +class CudfPandasResultsDifferentWarning(UserWarning): + """Warns when the results from cuDF and Pandas were different""" + + pass + + +class CudfPandasPandasErrorWarning(UserWarning): + """Warns when the results from Pandas could not be computed""" + + pass + + +class CudfPandasDebuggingFailedWarning(UserWarning): + """Warns when the cuDF-Pandas debugging fails""" + + pass + + +class CudfPandasDebugFallbackWarning(UserWarning): + """Base warning fof when fallback occurs""" + + pass + + +class CudfPandasDebugOOMWarning(CudfPandasDebugFallbackWarning): + """Warns when cuDF produces a MemoryError or an rmm.RMMError""" + + pass + + +class CudfPandasDebugNotImplementedErrorWarning( + CudfPandasDebugFallbackWarning +): + """Warns cuDF produces a NotImplementedError""" + + pass + + +class CudfPandasDebugAttributeErrorWarning(CudfPandasDebugFallbackWarning): + """Warns when cuDF produces an AttributeError""" + + pass + + +class CudfPandasDebugTypeErrorWarning(CudfPandasDebugFallbackWarning): + """Warns when cuDF produces a TypeError""" + + pass + + def call_operator(fn, args, kwargs): return fn(*args, **kwargs) @@ -915,22 +973,50 @@ def _fast_slow_function_call( except Exception as e: warnings.warn( "The result from pandas could not be computed. " - f"The exception was {e}." + f"The exception was {e}.", + CudfPandasPandasErrorWarning, ) else: try: - _assert_fast_slow_eq(result, slow_result) + _assert_fast_slow_eq(result, slow_result, **kwargs) except AssertionError as e: warnings.warn( "The results from cudf and pandas were different. " - f"The exception was {e}." + f"The exception was {e}.", + CudfPandasResultsDifferentWarning, ) except Exception as e: warnings.warn( - "Pandas debugging mode failed. " - f"The exception was {e}." + "cuDF-Pandas debugging failed. " + f"The exception was {e}.", + CudfPandasDebuggingFailedWarning, ) - except Exception: + except Exception as e: + if _env_get_bool("CUDF_PANDAS_FALLBACK_DEBUGGING", False): + if isinstance(e, (RMMError, MemoryError)): + warnings.warn( + "Out of Memory Error. Falling back to the slow path. " + f"The exception was {e}.", + CudfPandasDebugOOMWarning, + ) + elif isinstance(e, NotImplementedError): + warnings.warn( + "NotImplementedError. Falling back to the slow path. " + f"The exception was {e}.", + CudfPandasDebugNotImplementedErrorWarning, + ) + elif isinstance(e, AttributeError): + warnings.warn( + "AttributeError. Falling back to the slow path. " + f"The exception was {e}.", + CudfPandasDebugAttributeErrorWarning, + ) + elif isinstance(e, TypeError): + warnings.warn( + "TypeError. Falling back to the slow path. " + f"The exception was {e}.", + CudfPandasDebugTypeErrorWarning, + ) with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 5be4d350c0b..7a765307542 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -20,8 +20,20 @@ from numba import NumbaDeprecationWarning from pytz import utc +from rmm import RMMError + from cudf.pandas import LOADED, Profiler -from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object +from cudf.pandas.fast_slow_proxy import ( + CudfPandasDebugAttributeErrorWarning, + CudfPandasDebuggingFailedWarning, + CudfPandasDebugNotImplementedErrorWarning, + CudfPandasDebugOOMWarning, + CudfPandasDebugTypeErrorWarning, + CudfPandasPandasErrorWarning, + CudfPandasResultsDifferentWarning, + _Unusable, + is_proxy_object, +) if not LOADED: raise ImportError("These tests must be run with cudf.pandas loaded") @@ -1429,6 +1441,127 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] +def mock_mean_one(self, *args, **kwargs): + return np.float64(1.0) + + +def mock_mean_exception(self, *args, **kwargs): + raise Exception() + + +def mock_mean_none(self, *args, **kwargs): + return None + + +def mock_mean_memory_error(self, *args, **kwargs): + raise MemoryError() + + +def mock_mean_rmm_error(self, *args, **kwargs): + raise RMMError(1, "error") + + +def mock_mean_not_impl_error(self, *args, **kwargs): + raise NotImplementedError() + + +def mock_mean_attr_error(self, *args, **kwargs): + raise AttributeError() + + +def mock_mean_type_error(self, *args, **kwargs): + raise TypeError() + + +@pytest.mark.parametrize( + "mock_mean, warning, match_str, env_var, original_mean, proxy_attr", + [ + ( + mock_mean_one, + CudfPandasResultsDifferentWarning, + "The results from cudf and pandas were different.", + "CUDF_PANDAS_DEBUGGING", + pd.Series.mean, + "_fsproxy_slow", + ), + ( + mock_mean_exception, + CudfPandasPandasErrorWarning, + "The result from pandas could not be computed.", + "CUDF_PANDAS_DEBUGGING", + pd.Series.mean, + "_fsproxy_slow", + ), + ( + mock_mean_none, + CudfPandasDebuggingFailedWarning, + "cuDF-Pandas debugging failed.", + "CUDF_PANDAS_DEBUGGING", + pd.Series.mean, + "_fsproxy_slow", + ), + ( + mock_mean_memory_error, + CudfPandasDebugOOMWarning, + "Out of Memory Error.", + "CUDF_PANDAS_FALLBACK_DEBUGGING", + cudf.Series.mean, + "_fsproxy_fast", + ), + ( + mock_mean_rmm_error, + CudfPandasDebugOOMWarning, + "Out of Memory Error.", + "CUDF_PANDAS_FALLBACK_DEBUGGING", + cudf.Series.mean, + "_fsproxy_fast", + ), + ( + mock_mean_not_impl_error, + CudfPandasDebugNotImplementedErrorWarning, + "NotImplementedError.", + "CUDF_PANDAS_FALLBACK_DEBUGGING", + cudf.Series.mean, + "_fsproxy_fast", + ), + ( + mock_mean_attr_error, + CudfPandasDebugAttributeErrorWarning, + "AttributeError.", + "CUDF_PANDAS_FALLBACK_DEBUGGING", + cudf.Series.mean, + "_fsproxy_fast", + ), + ( + mock_mean_type_error, + CudfPandasDebugTypeErrorWarning, + "TypeError.", + "CUDF_PANDAS_FALLBACK_DEBUGGING", + cudf.Series.mean, + "_fsproxy_fast", + ), + ], +) +def test_cudf_pandas_debugging( + monkeypatch, + mock_mean, + warning, + match_str, + env_var, + original_mean, + proxy_attr, +): + with monkeypatch.context() as monkeycontext: + monkeypatch.setattr(xpd.Series.mean, proxy_attr, mock_mean) + monkeycontext.setenv(env_var, "True") + s = xpd.Series([1, 2]) + with pytest.warns(warning, match=match_str): + assert s.mean() == 1.5 + + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. + monkeypatch.setattr(xpd.Series.mean, proxy_attr, original_mean) + + @pytest.mark.parametrize( "env_value", ["", "cuda", "pool", "async", "managed", "managed_pool", "abc"], @@ -1456,67 +1589,6 @@ def test_rmm_option_on_import(env_value): assert sp_completed.returncode == 1 -def test_cudf_pandas_debugging_different_results(monkeypatch): - cudf_mean = cudf.Series.mean - - def mock_mean_one(self, *args, **kwargs): - return np.float64(1.0) - - with monkeypatch.context() as monkeycontext: - monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean_one) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") - s = xpd.Series([1, 2]) - with pytest.warns( - UserWarning, - match="The results from cudf and pandas were different.", - ): - assert s.mean() == 1.0 - # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. - monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean) - - -def test_cudf_pandas_debugging_pandas_error(monkeypatch): - pd_mean = pd.Series.mean - - def mock_mean_exception(self, *args, **kwargs): - raise Exception() - - with monkeypatch.context() as monkeycontext: - monkeycontext.setattr( - xpd.Series.mean, "_fsproxy_slow", mock_mean_exception - ) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") - s = xpd.Series([1, 2]) - with pytest.warns( - UserWarning, - match="The result from pandas could not be computed.", - ): - s = xpd.Series([1, 2]) - assert s.mean() == 1.5 - # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. - monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) - - -def test_cudf_pandas_debugging_failed(monkeypatch): - pd_mean = pd.Series.mean - - def mock_mean_none(self, *args, **kwargs): - return None - - with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(xpd.Series.mean, "_fsproxy_slow", mock_mean_none) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") - s = xpd.Series([1, 2]) - with pytest.warns( - UserWarning, - match="Pandas debugging mode failed.", - ): - s = xpd.Series([1, 2]) - assert s.mean() == 1.5 - # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. - monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) - - def test_excelwriter_pathlike(): assert isinstance(pd.ExcelWriter("foo.xlsx"), os.PathLike)