From 549b7698a132970331326bce8cce167f735827b7 Mon Sep 17 00:00:00 2001 From: Mikhail Sveshnikov Date: Wed, 28 Aug 2024 13:47:44 +0400 Subject: [PATCH] fix recsys stuff (#1265) * fix recsys stuff #1233 #1231 * fix tests --- .../descriptors/semantic_similarity.py | 2 +- src/evidently/metrics/recsys/base_top_k.py | 26 +++++++++++--- src/evidently/metrics/recsys/f_beta_top_k.py | 4 +-- src/evidently/metrics/recsys/hit_rate_k.py | 4 +-- tests/metrics/recsys/test_f_beta_top_k.py | 18 +++++----- tests/metrics/recsys/test_hit_rate_k.py | 12 +++---- tests/metrics/recsys/test_map_k.py | 12 +++---- tests/metrics/recsys/test_mar_k.py | 8 ++--- tests/metrics/recsys/test_precision_top_k.py | 34 ++++++++++++++----- tests/metrics/recsys/test_recall_top_k.py | 12 +++---- 10 files changed, 83 insertions(+), 49 deletions(-) diff --git a/src/evidently/descriptors/semantic_similarity.py b/src/evidently/descriptors/semantic_similarity.py index 07f6b287bc..664dc41c1d 100644 --- a/src/evidently/descriptors/semantic_similarity.py +++ b/src/evidently/descriptors/semantic_similarity.py @@ -12,7 +12,7 @@ def feature(self, columns: List[str]) -> GeneratedFeature: return SemanticSimilarityFeature(columns=columns, display_name=self.display_name) -class SemanticSimilatiryDescriptor(FeatureDescriptor): +class SemanticSimilarityDescriptor(FeatureDescriptor): with_column: str def feature(self, column_name: str) -> GeneratedFeatures: diff --git a/src/evidently/metrics/recsys/base_top_k.py b/src/evidently/metrics/recsys/base_top_k.py index 4e31100ad9..ca66d31719 100644 --- a/src/evidently/metrics/recsys/base_top_k.py +++ b/src/evidently/metrics/recsys/base_top_k.py @@ -30,7 +30,25 @@ class Config: k: int current: pd.Series + current_value: float reference: Optional[pd.Series] = None + reference_value: Optional[float] = None + + def __init__( + self, + k: int, + current: pd.Series, + current_value: Optional[float] = None, + reference: Optional[pd.Series] = None, + reference_value: Optional[float] = None, + ): + super().__init__( + k=k, + current=current, + current_value=current_value if current_value is not None else current[k - 1], + reference=reference, + reference_value=reference_value if reference_value is not None or reference is None else reference[k - 1], + ) class TopKMetric(Metric[TopKMetricResult], abc.ABC): @@ -56,11 +74,11 @@ def calculate(self, data: InputData) -> TopKMetricResult: if self.no_feedback_users: key = f"{self.key()}_include_no_feedback" - current = pd.Series(index=result.current["k"], data=result.current[key]) + current = pd.Series(data=result.current[key]) ref_data = result.reference reference: Optional[pd.Series] = None if ref_data is not None: - reference = pd.Series(index=ref_data["k"], data=ref_data[key]) + reference = pd.Series(data=ref_data[key]) return TopKMetricResult(k=self.k, reference=reference, current=current) @abc.abstractmethod @@ -76,9 +94,9 @@ class TopKMetricRenderer(MetricRenderer): def render_html(self, obj: TopKMetric) -> List[BaseWidgetInfo]: metric_result = obj.get_result() k = metric_result.k - counters = [CounterData.float(label="current", value=metric_result.current[k], precision=3)] + counters = [CounterData.float(label="current", value=metric_result.current[k - 1], precision=3)] if metric_result.reference is not None: - counters.append(CounterData.float(label="reference", value=metric_result.reference[k], precision=3)) + counters.append(CounterData.float(label="reference", value=metric_result.reference[k - 1], precision=3)) fig = plot_metric_k(metric_result.current, metric_result.reference, self.yaxis_name) header_part = " No feedback users included." if not obj.no_feedback_users: diff --git a/src/evidently/metrics/recsys/f_beta_top_k.py b/src/evidently/metrics/recsys/f_beta_top_k.py index 59ff2c1249..28c99f29ea 100644 --- a/src/evidently/metrics/recsys/f_beta_top_k.py +++ b/src/evidently/metrics/recsys/f_beta_top_k.py @@ -47,11 +47,11 @@ def calculate(self, data: InputData) -> TopKMetricResult: pr_key = "precision" rc_key = "recall" result = self._precision_recall_calculation.get_result() - current = pd.Series(index=result.current["k"], data=self.fbeta(result.current[pr_key], result.current[rc_key])) + current = pd.Series(data=self.fbeta(result.current[pr_key], result.current[rc_key])) ref_data = result.reference reference: Optional[pd.Series] = None if ref_data is not None: - reference = pd.Series(index=ref_data["k"], data=self.fbeta(ref_data[pr_key], ref_data[rc_key])) + reference = pd.Series(data=self.fbeta(ref_data[pr_key], ref_data[rc_key])) return TopKMetricResult(k=self.k, reference=reference, current=current) def fbeta(self, precision, recall): diff --git a/src/evidently/metrics/recsys/hit_rate_k.py b/src/evidently/metrics/recsys/hit_rate_k.py index f605eb032c..e5a7292c61 100644 --- a/src/evidently/metrics/recsys/hit_rate_k.py +++ b/src/evidently/metrics/recsys/hit_rate_k.py @@ -44,7 +44,7 @@ def get_values(self, df, max_k): for k in range(1, max_k + 1): df_k = df[(df.target == 1) & (df.preds <= k)] res.append(df_k.users.nunique() / user_num) - return pd.Series(index=[x for x in range(1, max_k + 1)], data=res) + return pd.Series(data=res) def calculate(self, data: InputData) -> HitRateKMetricResult: curr, ref = get_curr_and_ref_df(data, self.min_rel_score, self.no_feedback_users, True) @@ -57,6 +57,6 @@ def calculate(self, data: InputData) -> HitRateKMetricResult: @default_renderer(wrap_type=HitRateKMetric) -class PrecisionTopKMetricRenderer(TopKMetricRenderer): +class HitRateKMetricRenderer(TopKMetricRenderer): yaxis_name = "HitRate@k" header = "Hit Rate" diff --git a/tests/metrics/recsys/test_f_beta_top_k.py b/tests/metrics/recsys/test_f_beta_top_k.py index 3ceb85f6aa..de19545080 100644 --- a/tests/metrics/recsys/test_f_beta_top_k.py +++ b/tests/metrics/recsys/test_f_beta_top_k.py @@ -23,9 +23,9 @@ def test_fbeta_values(): results = metric.get_result() assert len(results.current) == 3 - assert results.current[1] == 0.5 - assert np.isclose(results.current[2], 0.33333333333333) - assert np.isclose(results.current[3], 0.49999962499990) + assert results.current[0] == 0.5 + assert np.isclose(results.current[1], 0.33333333333333) + assert np.isclose(results.current[2], 0.49999962499990) def test_fbeta_scores(): @@ -44,9 +44,9 @@ def test_fbeta_scores(): results = metric.get_result() assert len(results.current) == 3 - assert results.current[1] == 0.5 - assert np.isclose(results.current[2], 0.33333333333333) - assert np.isclose(results.current[3], 0.49999962499990) + assert results.current[0] == 0.5 + assert np.isclose(results.current[1], 0.33333333333333) + assert np.isclose(results.current[2], 0.49999962499990) def test_fbeta_scores_include_no_feedback(): @@ -65,6 +65,6 @@ def test_fbeta_scores_include_no_feedback(): results = metric.get_result() assert len(results.current) == 3 - assert np.isclose(results.current[1], 0.33333333333) - assert np.isclose(results.current[2], 0.22222222222) - assert np.isclose(results.current[3], 0.33333333333) + assert np.isclose(results.current[0], 0.33333333333) + assert np.isclose(results.current[1], 0.22222222222) + assert np.isclose(results.current[2], 0.33333333333) diff --git a/tests/metrics/recsys/test_hit_rate_k.py b/tests/metrics/recsys/test_hit_rate_k.py index 8fe6e64e78..1d80d274cc 100644 --- a/tests/metrics/recsys/test_hit_rate_k.py +++ b/tests/metrics/recsys/test_hit_rate_k.py @@ -23,9 +23,9 @@ def test_hit_rate_value(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.5) assert np.isclose(results.current[1], 0.5) - assert np.isclose(results.current[2], 0.5) - assert np.isclose(results.current[3], 1) + assert np.isclose(results.current[2], 1) def test_hit_rate_value_judged_only(): @@ -44,9 +44,9 @@ def test_hit_rate_value_judged_only(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.3333333) assert np.isclose(results.current[1], 0.3333333) - assert np.isclose(results.current[2], 0.3333333) - assert np.isclose(results.current[3], 0.6666666) + assert np.isclose(results.current[2], 0.6666666) def test_hit_rate_value_judged_only_scores(): @@ -65,6 +65,6 @@ def test_hit_rate_value_judged_only_scores(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.3333333) assert np.isclose(results.current[1], 0.3333333) - assert np.isclose(results.current[2], 0.3333333) - assert np.isclose(results.current[3], 0.6666666) + assert np.isclose(results.current[2], 0.6666666) diff --git a/tests/metrics/recsys/test_map_k.py b/tests/metrics/recsys/test_map_k.py index 1be7769b9f..0a809c204d 100644 --- a/tests/metrics/recsys/test_map_k.py +++ b/tests/metrics/recsys/test_map_k.py @@ -23,9 +23,9 @@ def test_map_value(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.5) assert np.isclose(results.current[1], 0.5) - assert np.isclose(results.current[2], 0.5) - assert np.isclose(results.current[3], 0.6666666) + assert np.isclose(results.current[2], 0.6666666) def test_map_value_judged_only(): @@ -44,9 +44,9 @@ def test_map_value_judged_only(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.3333333) assert np.isclose(results.current[1], 0.3333333) - assert np.isclose(results.current[2], 0.3333333) - assert np.isclose(results.current[3], 0.4444444) + assert np.isclose(results.current[2], 0.4444444) def test_map_value_judged_only_scores(): @@ -65,6 +65,6 @@ def test_map_value_judged_only_scores(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.3333333) assert np.isclose(results.current[1], 0.3333333) - assert np.isclose(results.current[2], 0.3333333) - assert np.isclose(results.current[3], 0.4444444) + assert np.isclose(results.current[2], 0.4444444) diff --git a/tests/metrics/recsys/test_mar_k.py b/tests/metrics/recsys/test_mar_k.py index f0d8bfa852..74fe7a30f6 100644 --- a/tests/metrics/recsys/test_mar_k.py +++ b/tests/metrics/recsys/test_mar_k.py @@ -22,9 +22,9 @@ def test_mar_values(): results = metric.get_result() assert len(results.current) == 3 + assert results.current[0] == 0.5 assert results.current[1] == 0.5 - assert results.current[2] == 0.5 - assert results.current[3] == 1 + assert results.current[2] == 1 def test_mar_scores(): @@ -43,6 +43,6 @@ def test_mar_scores(): results = metric.get_result() assert len(results.current) == 3 + assert results.current[0] == 0.5 assert results.current[1] == 0.5 - assert results.current[2] == 0.5 - assert results.current[3] == 1 + assert results.current[2] == 1 diff --git a/tests/metrics/recsys/test_precision_top_k.py b/tests/metrics/recsys/test_precision_top_k.py index 56a71efc05..074a73888b 100644 --- a/tests/metrics/recsys/test_precision_top_k.py +++ b/tests/metrics/recsys/test_precision_top_k.py @@ -1,10 +1,26 @@ +import json + import numpy as np import pandas as pd +from evidently._pydantic_compat import parse_obj_as +from evidently.base_metric import MetricResult from evidently.metrics import PrecisionTopKMetric +from evidently.metrics.recsys.base_top_k import TopKMetricResult from evidently.pipeline.column_mapping import ColumnMapping from evidently.pipeline.column_mapping import RecomType from evidently.report import Report +from evidently.utils import NumpyEncoder + + +def test_value(): + result = TopKMetricResult( + k=2, current=pd.Series([0, 1]), current_value=1, reference=pd.Series([2, 3]), reference_value=3 + ) + payload = json.loads(json.dumps(result.dict(), cls=NumpyEncoder)) + payload2 = {k: v for k, v in payload.items() if not k.endswith("_value")} + result2 = parse_obj_as(MetricResult, payload2) + assert json.loads(json.dumps(result2.dict(), cls=NumpyEncoder)) == payload def test_precision_value(): @@ -23,9 +39,9 @@ def test_precision_value(): results = metric.get_result() assert len(results.current) == 3 - assert results.current[1] == 0.5 - assert results.current[2] == 0.25 - assert np.isclose(results.current[3], 0.333333) + assert results.current[0] == 0.5 + assert results.current[1] == 0.25 + assert np.isclose(results.current[2], 0.333333) def test_precision_value_judged_only(): @@ -44,9 +60,9 @@ def test_precision_value_judged_only(): results = metric.get_result() assert len(results.current) == 3 - assert np.isclose(results.current[1], 0.333333) - assert np.isclose(results.current[2], 0.166666) - assert np.isclose(results.current[3], 0.222222) + assert np.isclose(results.current[0], 0.333333) + assert np.isclose(results.current[1], 0.166666) + assert np.isclose(results.current[2], 0.222222) def test_precision_value_judged_only_scores(): @@ -65,6 +81,6 @@ def test_precision_value_judged_only_scores(): results = metric.get_result() assert len(results.current) == 3 - assert np.isclose(results.current[1], 0.333333) - assert np.isclose(results.current[2], 0.166666) - assert np.isclose(results.current[3], 0.222222) + assert np.isclose(results.current[0], 0.333333) + assert np.isclose(results.current[1], 0.166666) + assert np.isclose(results.current[2], 0.222222) diff --git a/tests/metrics/recsys/test_recall_top_k.py b/tests/metrics/recsys/test_recall_top_k.py index 573bdffcd8..336a99d2fb 100644 --- a/tests/metrics/recsys/test_recall_top_k.py +++ b/tests/metrics/recsys/test_recall_top_k.py @@ -23,9 +23,9 @@ def test_recall_values(): results = metric.get_result() assert len(results.current) == 3 + assert results.current[0] == 0.5 assert results.current[1] == 0.5 - assert results.current[2] == 0.5 - assert results.current[3] == 1 + assert results.current[2] == 1 def test_recall_scores(): @@ -44,9 +44,9 @@ def test_recall_scores(): results = metric.get_result() assert len(results.current) == 3 + assert results.current[0] == 0.5 assert results.current[1] == 0.5 - assert results.current[2] == 0.5 - assert results.current[3] == 1 + assert results.current[2] == 1 def test_recsll_include_no_feedback(): @@ -65,6 +65,6 @@ def test_recsll_include_no_feedback(): results = metric.get_result() assert len(results.current) == 3 + assert np.isclose(results.current[0], 0.333333333) assert np.isclose(results.current[1], 0.333333333) - assert np.isclose(results.current[2], 0.333333333) - assert np.isclose(results.current[3], 0.666666666) + assert np.isclose(results.current[2], 0.666666666)