Merge pull request #2086 from recommenders-team/daviddavo/r-precision

Added r-precision
recommenders-team · Apr 15, 2024 · f7e8194 · f7e8194
2 parents a1cd82c + 3539525
commit f7e8194
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 0 deletions.
diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py
@@ -541,6 +541,63 @@ def recall_at_k(
     return (df_hit_count["hit"] / df_hit_count["actual"]).sum() / n_users
 
 
+def r_precision_at_k(
+    rating_true,
+    rating_pred,
+    col_user=DEFAULT_USER_COL,
+    col_item=DEFAULT_ITEM_COL,
+    col_prediction=DEFAULT_PREDICTION_COL,
+    relevancy_method="top_k",
+    k=DEFAULT_K,
+    threshold=DEFAULT_THRESHOLD,
+    **_,
+):
+    """R-precision at K.
+
+    R-precision can be defined as the precision@R for each user, where R is the
+    numer of relevant items for the query. Its also equivalent to the recall at
+    the R-th position.
+    
+    Note:
+        As R can be high, in this case, the k indicates the maximum possible R.
+        If every user has more than k true items, then r-precision@k is equal to
+        precision@k. You might need to raise the k value to get meaningful results.
+
+    Args:
+        rating_true (pandas.DataFrame): True DataFrame
+        rating_pred (pandas.DataFrame): Predicted DataFrame
+        col_user (str): column name for user
+        col_item (str): column name for item
+        col_prediction (str): column name for prediction
+        relevancy_method (str): method for determining relevancy ['top_k', 'by_threshold', None]. None means that the
+            top k items are directly provided, so there is no need to compute the relevancy operation.
+        k (int): number of top k items per user
+        threshold (float): threshold of top items per user (optional)
+
+    Returns:
+        float: recall at k (min=0, max=1). The maximum value is 1 even when fewer than
+        k items exist for a user in rating_true.
+    """
+    df_hit, df_hit_count, n_users = merge_ranking_true_pred(
+        rating_true=rating_true,
+        rating_pred=rating_pred,
+        col_user=col_user,
+        col_item=col_item,
+        col_prediction=col_prediction,
+        relevancy_method=relevancy_method,
+        k=k,
+        threshold=threshold,
+    )
+
+    if df_hit.shape[0] == 0:
+        return 0.0
+
+    df_merged = df_hit.merge(df_hit_count[[col_user, 'actual']])
+    df_merged = df_merged[df_merged['rank'] <= df_merged['actual']]
+
+    return (df_merged.groupby(col_user).size() / df_hit_count.set_index(col_user)['actual']).mean()
+
+
 def ndcg_at_k(
     rating_true,
     rating_pred,
@@ -824,6 +881,7 @@ def get_top_k_items(
     exp_var.__name__: exp_var,
     precision_at_k.__name__: precision_at_k,
     recall_at_k.__name__: recall_at_k,
+    r_precision_at_k.__name__: r_precision_at_k,
     ndcg_at_k.__name__: ndcg_at_k,
     map_at_k.__name__: map_at_k,
     map.__name__: map,

diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py
@@ -25,6 +25,7 @@
     exp_var,
     get_top_k_items,
     precision_at_k,
+    r_precision_at_k,
     recall_at_k,
     ndcg_at_k,
     map_at_k,
@@ -366,6 +367,20 @@ def test_python_recall_at_k(rating_true, rating_pred, rating_nohit):
     assert recall_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)
 
 
+def test_python_r_precision(rating_true, rating_pred, rating_nohit):
+    assert r_precision_at_k(
+        rating_true=rating_true,
+        rating_pred=rating_true,
+        col_prediction=DEFAULT_RATING_COL,
+        k=10,
+    ) == pytest.approx(1, TOL)
+    assert r_precision_at_k(rating_true, rating_nohit, k=5) == 0.0
+    assert r_precision_at_k(rating_true, rating_pred, k=3) == pytest.approx(0.21111, TOL)
+    assert r_precision_at_k(rating_true, rating_pred, k=5) == pytest.approx(0.24444, TOL)
+    # Equivalent to precision
+    assert r_precision_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)
+
+
 def test_python_auc(rating_true_binary, rating_pred_binary):
     assert auc(
         rating_true=rating_true_binary,