Skip to content

Commit

Permalink
Merge branch 'feature/fallback-debugging' of github.com:Matt711/cudf …
Browse files Browse the repository at this point in the history
…into branch-24.08
  • Loading branch information
Matt711 committed Jun 17, 2024
2 parents 64931fc + 1c4c163 commit bb60845
Show file tree
Hide file tree
Showing 2 changed files with 226 additions and 68 deletions.
98 changes: 92 additions & 6 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,69 @@

import numpy as np

from rmm import RMMError

from ..options import _env_get_bool
from ..testing._utils import assert_eq
from .annotation import nvtx


class CudfPandasDebugWarning(UserWarning):
"""Base warning for an incorrect result in cuDF or Pandas. Or the Pandas result was uncomputable"""

pass


class CudfPandasResultsDifferentWarning(UserWarning):
"""Warns when the results from cuDF and Pandas were different"""

pass


class CudfPandasPandasErrorWarning(UserWarning):
"""Warns when the results from Pandas could not be computed"""

pass


class CudfPandasDebuggingFailedWarning(UserWarning):
"""Warns when the cuDF-Pandas debugging fails"""

pass


class CudfPandasDebugFallbackWarning(UserWarning):
"""Base warning fof when fallback occurs"""

pass


class CudfPandasDebugOOMWarning(CudfPandasDebugFallbackWarning):
"""Warns when cuDF produces a MemoryError or an rmm.RMMError"""

pass


class CudfPandasDebugNotImplementedErrorWarning(
CudfPandasDebugFallbackWarning
):
"""Warns cuDF produces a NotImplementedError"""

pass


class CudfPandasDebugAttributeErrorWarning(CudfPandasDebugFallbackWarning):
"""Warns when cuDF produces an AttributeError"""

pass


class CudfPandasDebugTypeErrorWarning(CudfPandasDebugFallbackWarning):
"""Warns when cuDF produces a TypeError"""

pass


def call_operator(fn, args, kwargs):
return fn(*args, **kwargs)

Expand Down Expand Up @@ -915,22 +973,50 @@ def _fast_slow_function_call(
except Exception as e:
warnings.warn(
"The result from pandas could not be computed. "
f"The exception was {e}."
f"The exception was {e}.",
CudfPandasPandasErrorWarning,
)
else:
try:
_assert_fast_slow_eq(result, slow_result)
_assert_fast_slow_eq(result, slow_result, **kwargs)
except AssertionError as e:
warnings.warn(
"The results from cudf and pandas were different. "
f"The exception was {e}."
f"The exception was {e}.",
CudfPandasResultsDifferentWarning,
)
except Exception as e:
warnings.warn(
"Pandas debugging mode failed. "
f"The exception was {e}."
"cuDF-Pandas debugging failed. "
f"The exception was {e}.",
CudfPandasDebuggingFailedWarning,
)
except Exception:
except Exception as e:
if _env_get_bool("CUDF_PANDAS_FALLBACK_DEBUGGING", False):
if isinstance(e, (RMMError, MemoryError)):
warnings.warn(
"Out of Memory Error. Falling back to the slow path. "
f"The exception was {e}.",
CudfPandasDebugOOMWarning,
)
elif isinstance(e, NotImplementedError):
warnings.warn(
"NotImplementedError. Falling back to the slow path. "
f"The exception was {e}.",
CudfPandasDebugNotImplementedErrorWarning,
)
elif isinstance(e, AttributeError):
warnings.warn(
"AttributeError. Falling back to the slow path. "
f"The exception was {e}.",
CudfPandasDebugAttributeErrorWarning,
)
elif isinstance(e, TypeError):
warnings.warn(
"TypeError. Falling back to the slow path. "
f"The exception was {e}.",
CudfPandasDebugTypeErrorWarning,
)
with nvtx.annotate(
"EXECUTE_SLOW",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
Expand Down
196 changes: 134 additions & 62 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,20 @@
from numba import NumbaDeprecationWarning
from pytz import utc

from rmm import RMMError

from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.pandas.fast_slow_proxy import (
CudfPandasDebugAttributeErrorWarning,
CudfPandasDebuggingFailedWarning,
CudfPandasDebugNotImplementedErrorWarning,
CudfPandasDebugOOMWarning,
CudfPandasDebugTypeErrorWarning,
CudfPandasPandasErrorWarning,
CudfPandasResultsDifferentWarning,
_Unusable,
is_proxy_object,
)

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")
Expand Down Expand Up @@ -1429,6 +1441,127 @@ def test_holidays_within_dates(holiday, start, expected):
) == [utc.localize(dt) for dt in expected]


def mock_mean_one(self, *args, **kwargs):
return np.float64(1.0)


def mock_mean_exception(self, *args, **kwargs):
raise Exception()


def mock_mean_none(self, *args, **kwargs):
return None


def mock_mean_memory_error(self, *args, **kwargs):
raise MemoryError()


def mock_mean_rmm_error(self, *args, **kwargs):
raise RMMError(1, "error")


def mock_mean_not_impl_error(self, *args, **kwargs):
raise NotImplementedError()


def mock_mean_attr_error(self, *args, **kwargs):
raise AttributeError()


def mock_mean_type_error(self, *args, **kwargs):
raise TypeError()


@pytest.mark.parametrize(
"mock_mean, warning, match_str, env_var, original_mean, proxy_attr",
[
(
mock_mean_one,
CudfPandasResultsDifferentWarning,
"The results from cudf and pandas were different.",
"CUDF_PANDAS_DEBUGGING",
pd.Series.mean,
"_fsproxy_slow",
),
(
mock_mean_exception,
CudfPandasPandasErrorWarning,
"The result from pandas could not be computed.",
"CUDF_PANDAS_DEBUGGING",
pd.Series.mean,
"_fsproxy_slow",
),
(
mock_mean_none,
CudfPandasDebuggingFailedWarning,
"cuDF-Pandas debugging failed.",
"CUDF_PANDAS_DEBUGGING",
pd.Series.mean,
"_fsproxy_slow",
),
(
mock_mean_memory_error,
CudfPandasDebugOOMWarning,
"Out of Memory Error.",
"CUDF_PANDAS_FALLBACK_DEBUGGING",
cudf.Series.mean,
"_fsproxy_fast",
),
(
mock_mean_rmm_error,
CudfPandasDebugOOMWarning,
"Out of Memory Error.",
"CUDF_PANDAS_FALLBACK_DEBUGGING",
cudf.Series.mean,
"_fsproxy_fast",
),
(
mock_mean_not_impl_error,
CudfPandasDebugNotImplementedErrorWarning,
"NotImplementedError.",
"CUDF_PANDAS_FALLBACK_DEBUGGING",
cudf.Series.mean,
"_fsproxy_fast",
),
(
mock_mean_attr_error,
CudfPandasDebugAttributeErrorWarning,
"AttributeError.",
"CUDF_PANDAS_FALLBACK_DEBUGGING",
cudf.Series.mean,
"_fsproxy_fast",
),
(
mock_mean_type_error,
CudfPandasDebugTypeErrorWarning,
"TypeError.",
"CUDF_PANDAS_FALLBACK_DEBUGGING",
cudf.Series.mean,
"_fsproxy_fast",
),
],
)
def test_cudf_pandas_debugging(
monkeypatch,
mock_mean,
warning,
match_str,
env_var,
original_mean,
proxy_attr,
):
with monkeypatch.context() as monkeycontext:
monkeypatch.setattr(xpd.Series.mean, proxy_attr, mock_mean)
monkeycontext.setenv(env_var, "True")
s = xpd.Series([1, 2])
with pytest.warns(warning, match=match_str):
assert s.mean() == 1.5

# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, proxy_attr, original_mean)


@pytest.mark.parametrize(
"env_value",
["", "cuda", "pool", "async", "managed", "managed_pool", "abc"],
Expand Down Expand Up @@ -1456,67 +1589,6 @@ def test_rmm_option_on_import(env_value):
assert sp_completed.returncode == 1


def test_cudf_pandas_debugging_different_results(monkeypatch):
cudf_mean = cudf.Series.mean

def mock_mean_one(self, *args, **kwargs):
return np.float64(1.0)

with monkeypatch.context() as monkeycontext:
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean_one)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="The results from cudf and pandas were different.",
):
assert s.mean() == 1.0
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean)


def test_cudf_pandas_debugging_pandas_error(monkeypatch):
pd_mean = pd.Series.mean

def mock_mean_exception(self, *args, **kwargs):
raise Exception()

with monkeypatch.context() as monkeycontext:
monkeycontext.setattr(
xpd.Series.mean, "_fsproxy_slow", mock_mean_exception
)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="The result from pandas could not be computed.",
):
s = xpd.Series([1, 2])
assert s.mean() == 1.5
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean)


def test_cudf_pandas_debugging_failed(monkeypatch):
pd_mean = pd.Series.mean

def mock_mean_none(self, *args, **kwargs):
return None

with monkeypatch.context() as monkeycontext:
monkeycontext.setattr(xpd.Series.mean, "_fsproxy_slow", mock_mean_none)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="Pandas debugging mode failed.",
):
s = xpd.Series([1, 2])
assert s.mean() == 1.5
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean)


def test_excelwriter_pathlike():
assert isinstance(pd.ExcelWriter("foo.xlsx"), os.PathLike)

Expand Down

0 comments on commit bb60845

Please sign in to comment.