diff --git a/bazel/requirements/BUILD.bazel b/bazel/requirements/BUILD.bazel index 58d8003c..aca69d96 100644 --- a/bazel/requirements/BUILD.bazel +++ b/bazel/requirements/BUILD.bazel @@ -21,12 +21,14 @@ _GENERATE_TOOL = ":parse_and_generate_requirements" _GENERATE_COMMAND = "$(location " + _GENERATE_TOOL + ") $(location " + _SRC_REQUIREMENT_FILE + ") --schema $(location " + _SCHEMA_FILE + ") {options} > $@" -_TEMPLATE_FOLDER_PATH = "//bazel/requirements/templates" - _AUTOGEN_HEADERS = """# DO NOT EDIT! # Generate by running 'bazel run //bazel/requirements:sync_requirements' """ +# "---" is a document start marker, which is legit but optional (https://yaml.org/spec/1.1/#c-document-start). This +# is needed for conda meta.yaml to bypass some bug from conda side. +_YAML_START_DOCUMENT_MARKER = "---" + _GENERATED_REQUIREMENTS_FILES = { "requirements_txt": { "cmd": "--mode dev_version --format text", @@ -77,7 +79,7 @@ _GENERATED_REQUIREMENTS_FILES = { "{generated}.body".format(generated = value["generated"]), ], outs = [value["generated"]], - cmd = "(echo -e \""+ _AUTOGEN_HEADERS +"\" ; cat $(location :{generated}.body) ) > $@".format( + cmd = "(echo -e \"" + _AUTOGEN_HEADERS + "\" ; cat $(location :{generated}.body) ) > $@".format( generated = value["generated"], ), tools = [_GENERATE_TOOL], @@ -99,15 +101,24 @@ genrule( ) yq( - name = "gen_conda_meta", + name = "gen_conda_meta_body_format", srcs = [ ":meta.body.yaml", - "{template_folder}:meta.tpl.yaml".format(template_folder = _TEMPLATE_FOLDER_PATH), + "//bazel/requirements/templates:meta.tpl.yaml", ], - outs = ["meta.yaml"], + outs = ["meta.body.formatted.yaml"], expression = ". as $item ireduce ({}; . * $item ) | sort_keys(..)", ) +genrule( + name = "gen_conda_meta", + srcs = [ + ":meta.body.formatted.yaml", + ], + outs = ["meta.yaml"], + cmd = "(echo -e \"" + _AUTOGEN_HEADERS + "\" ; echo \"" + _YAML_START_DOCUMENT_MARKER + "\"; cat $(location :meta.body.formatted.yaml) ) > $@", +) + # Create a test target for each file that Bazel should # write to the source tree. [ diff --git a/bazel/requirements/templates/meta.tpl.yaml b/bazel/requirements/templates/meta.tpl.yaml index 260f3955..f6f5be6c 100644 --- a/bazel/requirements/templates/meta.tpl.yaml +++ b/bazel/requirements/templates/meta.tpl.yaml @@ -1,9 +1,3 @@ -# DO NOT EDIT! -# Generated by //bazel/requirements:gen_conda_meta -# To update, run: -# bazel run //bazel/requirements:sync_requirements -# - package: name: snowflake-ml-python diff --git a/ci/conda_recipe/meta.yaml b/ci/conda_recipe/meta.yaml index 607c0485..c9f3502d 100644 --- a/ci/conda_recipe/meta.yaml +++ b/ci/conda_recipe/meta.yaml @@ -1,8 +1,7 @@ # DO NOT EDIT! -# Generated by //bazel/requirements:gen_conda_meta -# To update, run: -# bazel run //bazel/requirements:sync_requirements -# +# Generate by running 'bazel run //bazel/requirements:sync_requirements' + +--- about: description: | Snowflake ML client Library is used for interacting with Snowflake to build machine learning solutions. diff --git a/ci/get_excluded_tests.sh b/ci/get_excluded_tests.sh index b59057c7..199db075 100755 --- a/ci/get_excluded_tests.sh +++ b/ci/get_excluded_tests.sh @@ -12,7 +12,7 @@ # The missing dependency cuold happen when a new operator is being developed, but not yet released. set -o pipefail -set -eu +set -u echo "Running "$0 diff --git a/snowflake/ml/modeling/impute/simple_imputer.py b/snowflake/ml/modeling/impute/simple_imputer.py index 1c579df1..e01c4231 100644 --- a/snowflake/ml/modeling/impute/simple_imputer.py +++ b/snowflake/ml/modeling/impute/simple_imputer.py @@ -15,6 +15,8 @@ from snowflake.snowpark import functions as F, types as T from snowflake.snowpark._internal import utils as snowpark_utils +_SUBPROJECT = "Impute" + STRATEGY_TO_STATE_DICT = { "constant": None, "mean": _utils.NumericStatistics.MEAN, @@ -194,10 +196,7 @@ def check_type_consistency(col_types: Dict[str, T.DataType]) -> None: return input_col_datatypes - @telemetry.send_api_usage_telemetry( - project=base.PROJECT, - subproject=base.SUBPROJECT, - ) + @telemetry.send_api_usage_telemetry(project=base.PROJECT, subproject=_SUBPROJECT) def fit(self, dataset: snowpark.DataFrame) -> "SimpleImputer": """ Compute values to impute for the dataset according to the strategy. @@ -214,7 +213,7 @@ def fit(self, dataset: snowpark.DataFrame) -> "SimpleImputer": input_col_datatypes = self._get_dataset_input_col_datatypes(dataset) self.statistics_: Dict[str, Any] = {} - statement_params = telemetry.get_statement_params(base.PROJECT, base.SUBPROJECT, self.__class__.__name__) + statement_params = telemetry.get_statement_params(base.PROJECT, _SUBPROJECT, self.__class__.__name__) if self.strategy == "constant": if self.fill_value is None: @@ -274,14 +273,8 @@ def fit(self, dataset: snowpark.DataFrame) -> "SimpleImputer": self._is_fitted = True return self - @telemetry.send_api_usage_telemetry( - project=base.PROJECT, - subproject=base.SUBPROJECT, - ) - @telemetry.add_stmt_params_to_df( - project=base.PROJECT, - subproject=base.SUBPROJECT, - ) + @telemetry.send_api_usage_telemetry(project=base.PROJECT, subproject=_SUBPROJECT) + @telemetry.add_stmt_params_to_df(project=base.PROJECT, subproject=_SUBPROJECT) def transform(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> Union[snowpark.DataFrame, pd.DataFrame]: """ Transform the input dataset by imputing the computed statistics in the input columns. diff --git a/snowflake/ml/modeling/metrics/BUILD.bazel b/snowflake/ml/modeling/metrics/BUILD.bazel index d5bd8657..08b85709 100644 --- a/snowflake/ml/modeling/metrics/BUILD.bazel +++ b/snowflake/ml/modeling/metrics/BUILD.bazel @@ -13,6 +13,7 @@ py_library( "precision_recall_fscore_support.py", "precision_score.py", "regression.py", + "roc_curve.py", ], deps = [ ":init", diff --git a/snowflake/ml/modeling/metrics/__init__.py b/snowflake/ml/modeling/metrics/__init__.py index ed8be701..730ed70f 100644 --- a/snowflake/ml/modeling/metrics/__init__.py +++ b/snowflake/ml/modeling/metrics/__init__.py @@ -4,6 +4,7 @@ from .covariance import covariance from .precision_recall_fscore_support import precision_recall_fscore_support from .precision_score import precision_score +from .roc_curve import roc_curve __all__ = [ "accuracy_score", @@ -12,4 +13,5 @@ "covariance", "precision_recall_fscore_support", "precision_score", + "roc_curve", ] diff --git a/snowflake/ml/modeling/metrics/precision_recall_fscore_support.py b/snowflake/ml/modeling/metrics/precision_recall_fscore_support.py index ecd5504f..65d4894e 100644 --- a/snowflake/ml/modeling/metrics/precision_recall_fscore_support.py +++ b/snowflake/ml/modeling/metrics/precision_recall_fscore_support.py @@ -115,10 +115,18 @@ def precision_recall_fscore_support( session = df._session assert session is not None - query = df.queries["queries"][-1] sproc_name = f"precision_recall_fscore_support_{snowpark_utils.generate_random_alphanumeric()}" statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT) + cols = [] + if isinstance(y_true_col_names, str): + cols = [y_true_col_names, y_pred_col_names] + elif isinstance(y_true_col_names, list): + cols = y_true_col_names + y_pred_col_names # type:ignore[assignment, operator] + if sample_weight_col_name: + cols.append(sample_weight_col_name) + query = df[cols].queries["queries"][-1] + @F.sproc( # type: ignore[misc] session=session, name=sproc_name, diff --git a/snowflake/ml/modeling/metrics/roc_curve.py b/snowflake/ml/modeling/metrics/roc_curve.py new file mode 100644 index 00000000..fae77f9a --- /dev/null +++ b/snowflake/ml/modeling/metrics/roc_curve.py @@ -0,0 +1,94 @@ +from typing import Optional, Tuple, Union + +import cloudpickle +import numpy.typing as npt +from sklearn import metrics + +from snowflake import snowpark +from snowflake.ml._internal import telemetry +from snowflake.snowpark import functions as F +from snowflake.snowpark._internal import utils as snowpark_utils + +_PROJECT = "ModelDevelopment" +_SUBPROJECT = "Metrics" + + +@telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT) +def roc_curve( + *, + df: snowpark.DataFrame, + y_true_col_name: str, + y_score_col_name: str, + pos_label: Optional[Union[str, int]] = None, + sample_weight_col_name: Optional[str] = None, + drop_intermediate: bool = True, +) -> Tuple[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike]: + """ + Compute Receiver operating characteristic (ROC). + + Note: this implementation is restricted to the binary classification task. + + Args: + df: Input dataframe. + y_true_col_name: Column name representing true binary labels. + If labels are not either {-1, 1} or {0, 1}, then pos_label should be + explicitly given. + y_score_col_name: Column name representing target scores, can either + be probability estimates of the positive class, confidence values, + or non-thresholded measure of decisions (as returned by + "decision_function" on some classifiers). + pos_label: The label of the positive class. + When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1}, + ``pos_label`` is set to 1, otherwise an error will be raised. + sample_weight_col_name: Column name representing sample weights. + drop_intermediate: Whether to drop some suboptimal thresholds which would + not appear on a plotted ROC curve. This is useful in order to create + lighter ROC curves. + + Returns: + fpr: ndarray of shape (>2,) + Increasing false positive rates such that element i is the false + positive rate of predictions with score >= `thresholds[i]`. + tpr : ndarray of shape (>2,) + Increasing true positive rates such that element `i` is the true + positive rate of predictions with score >= `thresholds[i]`. + thresholds : ndarray of shape = (n_thresholds,) + Decreasing thresholds on the decision function used to compute + fpr and tpr. `thresholds[0]` represents no instances being predicted + and is arbitrarily set to `max(y_score) + 1`. + """ + session = df._session + assert session is not None + sproc_name = f"roc_curve_{snowpark_utils.generate_random_alphanumeric()}" + statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT) + + cols = [y_true_col_name, y_score_col_name] + if sample_weight_col_name: + cols.append(sample_weight_col_name) + query = df[cols].queries["queries"][-1] + + @F.sproc( # type: ignore[misc] + session=session, + name=sproc_name, + replace=True, + packages=["cloudpickle", "scikit-learn", "snowflake-snowpark-python"], + statement_params=statement_params, + ) + def roc_curve_sproc(session: snowpark.Session) -> bytes: + df = session.sql(query).to_pandas(statement_params=statement_params) + y_true = df[y_true_col_name] + y_score = df[y_score_col_name] + sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None + fpr, tpr, thresholds = metrics.roc_curve( + y_true, + y_score, + pos_label=pos_label, + sample_weight=sample_weight, + drop_intermediate=drop_intermediate, + ) + + return cloudpickle.dumps((fpr, tpr, thresholds)) # type: ignore[no-any-return] + + loaded_data = cloudpickle.loads(session.call(sproc_name)) + res: Tuple[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike] = loaded_data + return res diff --git a/tests/integ/snowflake/ml/modeling/framework/utils.py b/tests/integ/snowflake/ml/modeling/framework/utils.py index 9c5fc250..60c1efbb 100644 --- a/tests/integ/snowflake/ml/modeling/framework/utils.py +++ b/tests/integ/snowflake/ml/modeling/framework/utils.py @@ -144,7 +144,7 @@ class DataType(Enum): def gen_fuzz_data( - rows: int, types: List[DataType], low: int = MIN_INT, high: int = MAX_INT + rows: int, types: List[DataType], low: Union[int, List[int]] = MIN_INT, high: Union[int, List[int]] = MAX_INT ) -> Tuple[List[Any], List[str]]: """ Generate random data based on input column types and row count. @@ -153,8 +153,8 @@ def gen_fuzz_data( Args: rows: num of rows to generate types: type per column - low: lower bound of the output interval (inclusive) - high: upper bound of the output interval (exclusive) + low: lower bound(s) of the output interval (inclusive) + high: upper bound(s) of the output interval (exclusive) Returns: A tuple of generated data and column names @@ -166,10 +166,12 @@ def gen_fuzz_data( names = ["ID"] for idx, t in enumerate(types): + _low = low if isinstance(low, int) else low[idx] + _high = high if isinstance(high, int) else high[idx] if t == DataType.INTEGER: - data.append(np.random.randint(low, high, rows)) + data.append(np.random.randint(_low, _high, rows)) elif t == DataType.FLOAT: - data.append(np.random.uniform(low, high, rows)) + data.append(np.random.uniform(_low, _high, rows)) else: raise ValueError(f"Unsupported data type {t}") names.append(f"COL_{idx}") diff --git a/tests/integ/snowflake/ml/modeling/metrics/BUILD.bazel b/tests/integ/snowflake/ml/modeling/metrics/BUILD.bazel index d4befcbf..58ce132b 100644 --- a/tests/integ/snowflake/ml/modeling/metrics/BUILD.bazel +++ b/tests/integ/snowflake/ml/modeling/metrics/BUILD.bazel @@ -2,6 +2,9 @@ load("//bazel:py_rules.bzl", "py_test") package(default_visibility = ["//visibility:public"]) +SHARD_COUNT = 3 +TIMEOUT = "long" # 900s + py_test( name = "test_r2_score", srcs = ["test_r2_score.py"], @@ -23,7 +26,7 @@ py_test( py_test( name = "test_confusion_matrix", - timeout = "long", + timeout = TIMEOUT, srcs = ["test_confusion_matrix.py"], deps = [ "//snowflake/ml/modeling/metrics", @@ -34,7 +37,7 @@ py_test( py_test( name = "test_correlation", - timeout = "long", + timeout = TIMEOUT, srcs = ["test_correlation.py"], deps = [ "//snowflake/ml/modeling/metrics", @@ -44,7 +47,7 @@ py_test( py_test( name = "test_covariance", - timeout = "long", + timeout = TIMEOUT, srcs = ["test_covariance.py"], deps = [ "//snowflake/ml/modeling/metrics", @@ -54,8 +57,9 @@ py_test( py_test( name = "test_precision_recall_fscore_support", - timeout = "long", + timeout = TIMEOUT, srcs = ["test_precision_recall_fscore_support.py"], + shard_count = SHARD_COUNT, deps = [ "//snowflake/ml/modeling/metrics", "//snowflake/ml/utils:connection_params", @@ -65,8 +69,21 @@ py_test( py_test( name = "test_precision_score", - timeout = "long", + timeout = TIMEOUT, srcs = ["test_precision_score.py"], + shard_count = SHARD_COUNT, + deps = [ + "//snowflake/ml/modeling/metrics", + "//snowflake/ml/utils:connection_params", + "//tests/integ/snowflake/ml/modeling/framework:utils", + ], +) + +py_test( + name = "test_roc_curve", + timeout = TIMEOUT, + srcs = ["test_roc_curve.py"], + shard_count = SHARD_COUNT, deps = [ "//snowflake/ml/modeling/metrics", "//snowflake/ml/utils:connection_params", diff --git a/tests/integ/snowflake/ml/modeling/metrics/test_accuracy_score.py b/tests/integ/snowflake/ml/modeling/metrics/test_accuracy_score.py index 5f114412..573d0e6d 100644 --- a/tests/integ/snowflake/ml/modeling/metrics/test_accuracy_score.py +++ b/tests/integ/snowflake/ml/modeling/metrics/test_accuracy_score.py @@ -4,6 +4,7 @@ from typing import Any, Dict import numpy as np +import pandas as pd from absl.testing import parameterized from absl.testing.absltest import main from sklearn import metrics as sklearn_metrics @@ -13,15 +14,17 @@ from snowflake.ml.utils import connection_params from tests.integ.snowflake.ml.modeling.framework import utils +_ROWS = 100 +_TYPES = [utils.DataType.INTEGER] * 4 + [utils.DataType.FLOAT] _BINARY_DATA, _SCHEMA = utils.gen_fuzz_data( - rows=100, - types=[utils.DataType.INTEGER] * 4 + [utils.DataType.FLOAT], + rows=_ROWS, + types=_TYPES, low=0, high=2, ) _MULTICLASS_DATA, _ = utils.gen_fuzz_data( - rows=100, - types=[utils.DataType.INTEGER] * 4 + [utils.DataType.FLOAT], + rows=_ROWS, + types=_TYPES, low=0, high=5, ) @@ -39,99 +42,75 @@ def setUp(self) -> None: """Creates Snowpark and Snowflake environments for testing.""" self._session = snowpark.Session.builder.configs(connection_params.SnowflakeLoginOptions()).create() - self._binary_input_df = self._session.create_dataframe(_BINARY_DATA, schema=_SCHEMA) - self._binary_pandas_df = self._binary_input_df.to_pandas() - self._multiclass_input_df = self._session.create_dataframe(_MULTICLASS_DATA, schema=_SCHEMA) - self._multiclass_pandas_df = self._multiclass_input_df.to_pandas() - def tearDown(self) -> None: self._session.close() @parameterized.parameters( # type: ignore[misc] - {"params": {"y_true_col_names": [_Y_TRUE_COL, _Y_TRUE_COLS], "y_pred_col_names": [_Y_PRED_COL, _Y_PRED_COLS]}}, - ) - def test_accuracy_score(self, params: Dict[str, Any]) -> None: - for i in range(len(params["y_true_col_names"])): - y_true_col_names = params["y_true_col_names"][i] - y_pred_col_names = params["y_pred_col_names"][i] - input_df = self._multiclass_input_df if isinstance(y_true_col_names, str) else self._binary_input_df - pandas_df = self._multiclass_pandas_df if isinstance(y_true_col_names, str) else self._binary_pandas_df - - score = snowml_metrics.accuracy_score( - df=input_df, y_true_col_names=y_true_col_names, y_pred_col_names=y_pred_col_names, normalize=False - ) - score_sklearn = sklearn_metrics.accuracy_score( - pandas_df[y_true_col_names], pandas_df[y_pred_col_names], normalize=False - ) - np.testing.assert_allclose(score, score_sklearn) - - @parameterized.parameters( # type: ignore[misc] - {"params": {"y_true_col_names": [_Y_TRUE_COL, _Y_TRUE_COLS], "y_pred_col_names": [_Y_PRED_COL, _Y_PRED_COLS]}}, + { + "params": { + "sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL], + "values": [ + {"data": _BINARY_DATA, "y_true": _Y_TRUE_COLS, "y_pred": _Y_PRED_COLS}, + {"data": _MULTICLASS_DATA, "y_true": _Y_TRUE_COL, "y_pred": _Y_PRED_COL}, + ], + } + }, ) def test_accuracy_score_sample_weight(self, params: Dict[str, Any]) -> None: - for i in range(len(params["y_true_col_names"])): - y_true_col_names = params["y_true_col_names"][i] - y_pred_col_names = params["y_pred_col_names"][i] - input_df = self._multiclass_input_df if isinstance(y_true_col_names, str) else self._binary_input_df - pandas_df = self._multiclass_pandas_df if isinstance(y_true_col_names, str) else self._binary_pandas_df - - score = snowml_metrics.accuracy_score( - df=input_df, - y_true_col_names=y_true_col_names, - y_pred_col_names=y_pred_col_names, - sample_weight_col_name=_SAMPLE_WEIGHT_COL, - normalize=False, - ) - score_sklearn = sklearn_metrics.accuracy_score( - pandas_df[y_true_col_names], - pandas_df[y_pred_col_names], - sample_weight=pandas_df[_SAMPLE_WEIGHT_COL].to_numpy(), - normalize=False, - ) - np.testing.assert_allclose(score, score_sklearn) + for values in params["values"]: + data = values["data"] + y_true = values["y_true"] + y_pred = values["y_pred"] + pandas_df = pd.DataFrame(data, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for sample_weight_col_name in params["sample_weight_col_name"]: + actual_score = snowml_metrics.accuracy_score( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + sample_weight_col_name=sample_weight_col_name, + ) + sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None + sklearn_score = sklearn_metrics.accuracy_score( + pandas_df[y_true], + pandas_df[y_pred], + sample_weight=sample_weight, + ) + np.testing.assert_allclose(actual_score, sklearn_score) @parameterized.parameters( # type: ignore[misc] - {"params": {"y_true_col_names": [_Y_TRUE_COL, _Y_TRUE_COLS], "y_pred_col_names": [_Y_PRED_COL, _Y_PRED_COLS]}}, + { + "params": { + "normalize": [True, False], + "values": [ + {"data": _BINARY_DATA, "y_true": _Y_TRUE_COLS, "y_pred": _Y_PRED_COLS}, + {"data": _MULTICLASS_DATA, "y_true": _Y_TRUE_COL, "y_pred": _Y_PRED_COL}, + ], + } + }, ) def test_accuracy_score_normalized(self, params: Dict[str, Any]) -> None: - for i in range(len(params["y_true_col_names"])): - y_true_col_names = params["y_true_col_names"][i] - y_pred_col_names = params["y_pred_col_names"][i] - input_df = self._multiclass_input_df if isinstance(y_true_col_names, str) else self._binary_input_df - pandas_df = self._multiclass_pandas_df if isinstance(y_true_col_names, str) else self._binary_pandas_df - - score = snowml_metrics.accuracy_score( - df=input_df, y_true_col_names=y_true_col_names, y_pred_col_names=y_pred_col_names, normalize=True - ) - score_sklearn = sklearn_metrics.accuracy_score( - pandas_df[y_true_col_names], pandas_df[y_pred_col_names], normalize=True - ) - np.testing.assert_allclose(score, score_sklearn) - - @parameterized.parameters( # type: ignore[misc] - {"params": {"y_true_col_names": [_Y_TRUE_COL, _Y_TRUE_COLS], "y_pred_col_names": [_Y_PRED_COL, _Y_PRED_COLS]}}, - ) - def test_accuracy_score_sample_weight_normalized(self, params: Dict[str, Any]) -> None: - for i in range(len(params["y_true_col_names"])): - y_true_col_names = params["y_true_col_names"][i] - y_pred_col_names = params["y_pred_col_names"][i] - input_df = self._multiclass_input_df if isinstance(y_true_col_names, str) else self._binary_input_df - pandas_df = self._multiclass_pandas_df if isinstance(y_true_col_names, str) else self._binary_pandas_df - - score = snowml_metrics.accuracy_score( - df=input_df, - y_true_col_names=y_true_col_names, - y_pred_col_names=y_pred_col_names, - sample_weight_col_name=_SAMPLE_WEIGHT_COL, - normalize=True, - ) - score_sklearn = sklearn_metrics.accuracy_score( - pandas_df[y_true_col_names], - pandas_df[y_pred_col_names], - sample_weight=pandas_df[_SAMPLE_WEIGHT_COL].to_numpy(), - normalize=True, - ) - np.testing.assert_allclose(score, score_sklearn) + for values in params["values"]: + data = values["data"] + y_true = values["y_true"] + y_pred = values["y_pred"] + pandas_df = pd.DataFrame(data, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for normalize in params["normalize"]: + actual_score = snowml_metrics.accuracy_score( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + normalize=normalize, + ) + sklearn_score = sklearn_metrics.accuracy_score( + pandas_df[y_true], + pandas_df[y_pred], + normalize=normalize, + ) + np.testing.assert_allclose(actual_score, sklearn_score) if __name__ == "__main__": diff --git a/tests/integ/snowflake/ml/modeling/metrics/test_precision_recall_fscore_support.py b/tests/integ/snowflake/ml/modeling/metrics/test_precision_recall_fscore_support.py index 6272ea87..9a0365bd 100644 --- a/tests/integ/snowflake/ml/modeling/metrics/test_precision_recall_fscore_support.py +++ b/tests/integ/snowflake/ml/modeling/metrics/test_precision_recall_fscore_support.py @@ -16,7 +16,13 @@ _ROWS = 100 _TYPES = [utils.DataType.INTEGER] * 4 + [utils.DataType.FLOAT] -_DATA, _SCHEMA = utils.gen_fuzz_data( +_BINARY_DATA, _SCHEMA = utils.gen_fuzz_data( + rows=_ROWS, + types=_TYPES, + low=0, + high=2, +) +_MULTICLASS_DATA, _ = utils.gen_fuzz_data( rows=_ROWS, types=_TYPES, low=0, @@ -39,11 +45,47 @@ def setUp(self) -> None: def tearDown(self) -> None: self._session.close() + @parameterized.parameters( # type: ignore[misc] + { + "params": { + "beta": [1.0, 0.5], + "values": [ + {"data": _BINARY_DATA, "y_true": _Y_TRUE_COLS, "y_pred": _Y_PRED_COLS}, + {"data": _MULTICLASS_DATA, "y_true": _Y_TRUE_COL, "y_pred": _Y_PRED_COL}, + ], + } + }, + ) + def test_precision_recall_fscore_support_beta(self, params: Dict[str, Any]) -> None: + for values in params["values"]: + data = values["data"] + y_true = values["y_true"] + y_pred = values["y_pred"] + pandas_df = pd.DataFrame(data, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for beta in params["beta"]: + actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + beta=beta, + ) + sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( + pandas_df[y_true], + pandas_df[y_pred], + beta=beta, + ) + np.testing.assert_allclose( + np.array((actual_p, actual_r, actual_f, actual_s)), + np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s)), + ) + @parameterized.parameters( # type: ignore[misc] {"params": {"labels": [None, [2, 0, 4]]}}, ) def test_precision_recall_fscore_support_labels(self, params: Dict[str, Any]) -> None: - pandas_df = pd.DataFrame(_DATA, columns=_SCHEMA) + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) for labels in params["labels"]: @@ -64,24 +106,23 @@ def test_precision_recall_fscore_support_labels(self, params: Dict[str, Any]) -> ) @parameterized.parameters( # type: ignore[misc] - {"params": {"sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL]}}, + {"params": {"pos_label": [0, 2, 4]}}, ) - def test_precision_recall_fscore_support_sample_weight(self, params: Dict[str, Any]) -> None: - pandas_df = pd.DataFrame(_DATA, columns=_SCHEMA) + def test_precision_recall_fscore_support_pos_label(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) - for sample_weight_col_name in params["sample_weight_col_name"]: + for pos_label in params["pos_label"]: actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( df=input_df, y_true_col_names=_Y_TRUE_COL, y_pred_col_names=_Y_PRED_COL, - sample_weight_col_name=sample_weight_col_name, + pos_label=pos_label, ) - sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( pandas_df[_Y_TRUE_COL], pandas_df[_Y_PRED_COL], - sample_weight=sample_weight, + pos_label=pos_label, ) np.testing.assert_allclose( np.array((actual_p, actual_r, actual_f, actual_s)), @@ -89,16 +130,50 @@ def test_precision_recall_fscore_support_sample_weight(self, params: Dict[str, A ) @parameterized.parameters( # type: ignore[misc] - {"params": {"average": [None, "binary", "micro", "macro", "samples", "weighted"]}}, + { + "params": { + "sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL], + "values": [ + {"data": _BINARY_DATA, "y_true": _Y_TRUE_COLS, "y_pred": _Y_PRED_COLS}, + {"data": _MULTICLASS_DATA, "y_true": _Y_TRUE_COL, "y_pred": _Y_PRED_COL}, + ], + } + }, + ) + def test_precision_recall_fscore_support_sample_weight(self, params: Dict[str, Any]) -> None: + for values in params["values"]: + data = values["data"] + y_true = values["y_true"] + y_pred = values["y_pred"] + pandas_df = pd.DataFrame(data, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for sample_weight_col_name in params["sample_weight_col_name"]: + actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + sample_weight_col_name=sample_weight_col_name, + ) + sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None + sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( + pandas_df[y_true], + pandas_df[y_pred], + sample_weight=sample_weight, + ) + np.testing.assert_allclose( + np.array((actual_p, actual_r, actual_f, actual_s)), + np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s)), + ) + + @parameterized.parameters( # type: ignore[misc] + {"params": {"average": [None, "micro", "macro", "weighted"]}}, ) - def test_precision_recall_fscore_support_average(self, params: Dict[str, Any]) -> None: - pandas_df = pd.DataFrame(_DATA, columns=_SCHEMA) + def test_precision_recall_fscore_support_average_multiclass(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) for average in params["average"]: - if average == "binary" or average == "samples": - continue - actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( df=input_df, y_true_col_names=_Y_TRUE_COL, @@ -115,48 +190,37 @@ def test_precision_recall_fscore_support_average(self, params: Dict[str, Any]) - np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s), dtype=np.float_), ) - data, _ = utils.gen_fuzz_data( - rows=_ROWS, - types=_TYPES, - low=0, - high=2, - ) - binary_pandas_df = pd.DataFrame(data, columns=_SCHEMA) - binary_input_df = self._session.create_dataframe(binary_pandas_df) - - # binary - actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( - df=binary_input_df, - y_true_col_names=_Y_TRUE_COL, - y_pred_col_names=_Y_PRED_COL, - average="binary", - ) - sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( - binary_pandas_df[_Y_TRUE_COL], - binary_pandas_df[_Y_PRED_COL], - average="binary", - ) - np.testing.assert_allclose( - np.array((actual_p, actual_r, actual_f, actual_s), dtype=np.float_), - np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s), dtype=np.float_), - ) + @parameterized.parameters( # type: ignore[misc] + { + "params": { + "average": ["binary", "samples"], + "y_true": [_Y_TRUE_COL, _Y_TRUE_COLS], + "y_pred": [_Y_PRED_COL, _Y_PRED_COLS], + } + }, + ) + def test_precision_recall_fscore_support_average_binary(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_BINARY_DATA, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) - # samples - actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( - df=binary_input_df, - y_true_col_names=_Y_TRUE_COLS, - y_pred_col_names=_Y_PRED_COLS, - average="samples", - ) - sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( - binary_pandas_df[_Y_TRUE_COLS], - binary_pandas_df[_Y_PRED_COLS], - average="samples", - ) - np.testing.assert_allclose( - np.array((actual_p, actual_r, actual_f, actual_s), dtype=np.float_), - np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s), dtype=np.float_), - ) + for idx, average in enumerate(params["average"]): + y_true = params["y_true"][idx] + y_pred = params["y_pred"][idx] + actual_p, actual_r, actual_f, actual_s = snowml_metrics.precision_recall_fscore_support( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + average=average, + ) + sklearn_p, sklearn_r, sklearn_f, sklearn_s = sklearn_metrics.precision_recall_fscore_support( + pandas_df[y_true], + pandas_df[y_pred], + average=average, + ) + np.testing.assert_allclose( + np.array((actual_p, actual_r, actual_f, actual_s), dtype=np.float_), + np.array((sklearn_p, sklearn_r, sklearn_f, sklearn_s), dtype=np.float_), + ) @parameterized.parameters( # type: ignore[misc] {"params": {"zero_division": ["warn", 0, 1]}}, diff --git a/tests/integ/snowflake/ml/modeling/metrics/test_precision_score.py b/tests/integ/snowflake/ml/modeling/metrics/test_precision_score.py index e45cd16c..c3ae6636 100644 --- a/tests/integ/snowflake/ml/modeling/metrics/test_precision_score.py +++ b/tests/integ/snowflake/ml/modeling/metrics/test_precision_score.py @@ -16,12 +16,18 @@ _ROWS = 100 _TYPES = [utils.DataType.INTEGER] * 4 + [utils.DataType.FLOAT] -_DATA, _SCHEMA = utils.gen_fuzz_data( +_BINARY_DATA, _SCHEMA = utils.gen_fuzz_data( rows=_ROWS, types=_TYPES, low=0, high=2, ) +_MULTICLASS_DATA, _ = utils.gen_fuzz_data( + rows=_ROWS, + types=_TYPES, + low=0, + high=5, +) _Y_TRUE_COL = _SCHEMA[1] _Y_PRED_COL = _SCHEMA[2] _Y_TRUE_COLS = [_SCHEMA[1], _SCHEMA[2]] @@ -43,13 +49,7 @@ def tearDown(self) -> None: {"params": {"labels": [None, [2, 0, 4]]}}, ) def test_precision_score_labels(self, params: Dict[str, Any]) -> None: - data, _ = utils.gen_fuzz_data( - rows=_ROWS, - types=_TYPES, - low=0, - high=5, - ) - pandas_df = pd.DataFrame(data, columns=_SCHEMA) + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) for labels in params["labels"]: @@ -69,87 +69,113 @@ def test_precision_score_labels(self, params: Dict[str, Any]) -> None: np.testing.assert_allclose(actual_p, sklearn_p) @parameterized.parameters( # type: ignore[misc] - {"params": {"sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL]}}, + {"params": {"pos_label": [0, 2, 4]}}, ) - def test_precision_score_sample_weight(self, params: Dict[str, Any]) -> None: - pandas_df = pd.DataFrame(_DATA, columns=_SCHEMA) + def test_precision_score_pos_label(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) - for sample_weight_col_name in params["sample_weight_col_name"]: + for pos_label in params["pos_label"]: actual_p = snowml_metrics.precision_score( df=input_df, y_true_col_names=_Y_TRUE_COL, y_pred_col_names=_Y_PRED_COL, - sample_weight_col_name=sample_weight_col_name, + pos_label=pos_label, + average="micro", ) - sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None sklearn_p = sklearn_metrics.precision_score( pandas_df[_Y_TRUE_COL], pandas_df[_Y_PRED_COL], - sample_weight=sample_weight, + pos_label=pos_label, + average="micro", ) np.testing.assert_allclose(actual_p, sklearn_p) @parameterized.parameters( # type: ignore[misc] - {"params": {"average": [None, "binary", "micro", "macro", "samples", "weighted"]}}, + { + "params": { + "sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL], + "values": [ + {"data": _BINARY_DATA, "y_true": _Y_TRUE_COLS, "y_pred": _Y_PRED_COLS}, + {"data": _MULTICLASS_DATA, "y_true": _Y_TRUE_COL, "y_pred": _Y_PRED_COL}, + ], + } + }, ) - def test_precision_score_average(self, params: Dict[str, Any]) -> None: - data, _ = utils.gen_fuzz_data( - rows=_ROWS, - types=_TYPES, - low=0, - high=5, - ) - multiclass_pandas_df = pd.DataFrame(data, columns=_SCHEMA) - multiclass_input_df = self._session.create_dataframe(multiclass_pandas_df) + def test_precision_score_sample_weight(self, params: Dict[str, Any]) -> None: + for values in params["values"]: + data = values["data"] + y_true = values["y_true"] + y_pred = values["y_pred"] + pandas_df = pd.DataFrame(data, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for sample_weight_col_name in params["sample_weight_col_name"]: + actual_p = snowml_metrics.precision_score( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + sample_weight_col_name=sample_weight_col_name, + average="micro", + ) + sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None + sklearn_p = sklearn_metrics.precision_score( + pandas_df[y_true], + pandas_df[y_pred], + sample_weight=sample_weight, + average="micro", + ) + np.testing.assert_allclose(actual_p, sklearn_p) - for average in params["average"]: - if average == "binary" or average == "samples": - continue + @parameterized.parameters( # type: ignore[misc] + {"params": {"average": [None, "micro", "macro", "weighted"]}}, + ) + def test_precision_score_average_multiclass(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + for average in params["average"]: actual_p = snowml_metrics.precision_score( - df=multiclass_input_df, + df=input_df, y_true_col_names=_Y_TRUE_COL, y_pred_col_names=_Y_PRED_COL, average=average, ) sklearn_p = sklearn_metrics.precision_score( - multiclass_pandas_df[_Y_TRUE_COL], - multiclass_pandas_df[_Y_PRED_COL], + pandas_df[_Y_TRUE_COL], + pandas_df[_Y_PRED_COL], average=average, ) np.testing.assert_allclose(actual_p, sklearn_p) - pandas_df = pd.DataFrame(_DATA, columns=_SCHEMA) + @parameterized.parameters( # type: ignore[misc] + { + "params": { + "average": ["binary", "samples"], + "y_true": [_Y_TRUE_COL, _Y_TRUE_COLS], + "y_pred": [_Y_PRED_COL, _Y_PRED_COLS], + } + }, + ) + def test_precision_score_average_binary(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_BINARY_DATA, columns=_SCHEMA) input_df = self._session.create_dataframe(pandas_df) - # binary - actual_p = snowml_metrics.precision_score( - df=input_df, - y_true_col_names=_Y_TRUE_COL, - y_pred_col_names=_Y_PRED_COL, - average="binary", - ) - sklearn_p = sklearn_metrics.precision_score( - pandas_df[_Y_TRUE_COL], - pandas_df[_Y_PRED_COL], - average="binary", - ) - np.testing.assert_allclose(actual_p, sklearn_p) - - # samples - actual_p = snowml_metrics.precision_score( - df=input_df, - y_true_col_names=_Y_TRUE_COLS, - y_pred_col_names=_Y_PRED_COLS, - average="samples", - ) - sklearn_p = sklearn_metrics.precision_score( - pandas_df[_Y_TRUE_COLS], - pandas_df[_Y_PRED_COLS], - average="samples", - ) - np.testing.assert_allclose(actual_p, sklearn_p) + for idx, average in enumerate(params["average"]): + y_true = params["y_true"][idx] + y_pred = params["y_pred"][idx] + actual_p = snowml_metrics.precision_score( + df=input_df, + y_true_col_names=y_true, + y_pred_col_names=y_pred, + average=average, + ) + sklearn_p = sklearn_metrics.precision_score( + pandas_df[y_true], + pandas_df[y_pred], + average=average, + ) + np.testing.assert_allclose(actual_p, sklearn_p) @parameterized.parameters( # type: ignore[misc] {"params": {"zero_division": ["warn", 0, 1]}}, diff --git a/tests/integ/snowflake/ml/modeling/metrics/test_roc_curve.py b/tests/integ/snowflake/ml/modeling/metrics/test_roc_curve.py new file mode 100644 index 00000000..7ca91162 --- /dev/null +++ b/tests/integ/snowflake/ml/modeling/metrics/test_roc_curve.py @@ -0,0 +1,121 @@ +# +# Copyright (c) 2012-2022 Snowflake Computing Inc. All rights reserved. +# +from typing import Any, Dict + +import numpy as np +import pandas as pd +from absl.testing import parameterized +from absl.testing.absltest import main +from sklearn import metrics as sklearn_metrics + +from snowflake import snowpark +from snowflake.ml.modeling import metrics as snowml_metrics +from snowflake.ml.utils import connection_params +from tests.integ.snowflake.ml.modeling.framework import utils + +_ROWS = 100 +_TYPES = [utils.DataType.INTEGER] + [utils.DataType.FLOAT] * 2 +_BINARY_DATA, _SCHEMA = utils.gen_fuzz_data( + rows=_ROWS, + types=_TYPES, + low=0, + high=[2, 1, 1], +) +_MULTICLASS_DATA, _ = utils.gen_fuzz_data( + rows=_ROWS, + types=_TYPES, + low=0, + high=[5, 1, 1], +) +_Y_TRUE_COL = _SCHEMA[1] +_Y_SCORE_COL = _SCHEMA[2] +_SAMPLE_WEIGHT_COL = _SCHEMA[3] + + +class RocCurveTest(parameterized.TestCase): + """Test ROC.""" + + def setUp(self) -> None: + """Creates Snowpark and Snowflake environments for testing.""" + self._session = snowpark.Session.builder.configs(connection_params.SnowflakeLoginOptions()).create() + + def tearDown(self) -> None: + self._session.close() + + @parameterized.parameters( # type: ignore[misc] + {"params": {"pos_label": [0, 2, 4]}}, + ) + def test_roc_curve_pos_label(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_MULTICLASS_DATA, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for pos_label in params["pos_label"]: + actual_fpr, actual_tpr, actual_thresholds = snowml_metrics.roc_curve( + df=input_df, + y_true_col_name=_Y_TRUE_COL, + y_score_col_name=_Y_SCORE_COL, + pos_label=pos_label, + ) + sklearn_fpr, sklearn_tpr, sklearn_thresholds = sklearn_metrics.roc_curve( + pandas_df[_Y_TRUE_COL], + pandas_df[_Y_SCORE_COL], + pos_label=pos_label, + ) + np.testing.assert_allclose( + np.array((actual_fpr, actual_tpr, actual_thresholds)), + np.array((sklearn_fpr, sklearn_tpr, sklearn_thresholds)), + ) + + @parameterized.parameters( # type: ignore[misc] + {"params": {"sample_weight_col_name": [None, _SAMPLE_WEIGHT_COL]}}, + ) + def test_roc_curve_sample_weight(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_BINARY_DATA, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for sample_weight_col_name in params["sample_weight_col_name"]: + actual_fpr, actual_tpr, actual_thresholds = snowml_metrics.roc_curve( + df=input_df, + y_true_col_name=_Y_TRUE_COL, + y_score_col_name=_Y_SCORE_COL, + sample_weight_col_name=sample_weight_col_name, + ) + sample_weight = pandas_df[sample_weight_col_name].to_numpy() if sample_weight_col_name else None + sklearn_fpr, sklearn_tpr, sklearn_thresholds = sklearn_metrics.roc_curve( + pandas_df[_Y_TRUE_COL], + pandas_df[_Y_SCORE_COL], + sample_weight=sample_weight, + ) + np.testing.assert_allclose( + np.array((actual_fpr, actual_tpr, actual_thresholds)), + np.array((sklearn_fpr, sklearn_tpr, sklearn_thresholds)), + ) + + @parameterized.parameters( # type: ignore[misc] + {"params": {"drop_intermediate": [True, False]}}, + ) + def test_roc_curve_drop_intermediate(self, params: Dict[str, Any]) -> None: + pandas_df = pd.DataFrame(_BINARY_DATA, columns=_SCHEMA) + input_df = self._session.create_dataframe(pandas_df) + + for drop_intermediate in params["drop_intermediate"]: + actual_fpr, actual_tpr, actual_thresholds = snowml_metrics.roc_curve( + df=input_df, + y_true_col_name=_Y_TRUE_COL, + y_score_col_name=_Y_SCORE_COL, + drop_intermediate=drop_intermediate, + ) + sklearn_fpr, sklearn_tpr, sklearn_thresholds = sklearn_metrics.roc_curve( + pandas_df[_Y_TRUE_COL], + pandas_df[_Y_SCORE_COL], + drop_intermediate=drop_intermediate, + ) + np.testing.assert_allclose( + np.array((actual_fpr, actual_tpr, actual_thresholds)), + np.array((sklearn_fpr, sklearn_tpr, sklearn_thresholds)), + ) + + +if __name__ == "__main__": + main()