Skip to content

Commit

Permalink
Merge pull request #2086 from recommenders-team/daviddavo/r-precision
Browse files Browse the repository at this point in the history
Added r-precision
  • Loading branch information
miguelgfierro committed Apr 15, 2024
2 parents a1cd82c + 3539525 commit f7e8194
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
58 changes: 58 additions & 0 deletions recommenders/evaluation/python_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,63 @@ def recall_at_k(
return (df_hit_count["hit"] / df_hit_count["actual"]).sum() / n_users


def r_precision_at_k(
rating_true,
rating_pred,
col_user=DEFAULT_USER_COL,
col_item=DEFAULT_ITEM_COL,
col_prediction=DEFAULT_PREDICTION_COL,
relevancy_method="top_k",
k=DEFAULT_K,
threshold=DEFAULT_THRESHOLD,
**_,
):
"""R-precision at K.
R-precision can be defined as the precision@R for each user, where R is the
numer of relevant items for the query. Its also equivalent to the recall at
the R-th position.
Note:
As R can be high, in this case, the k indicates the maximum possible R.
If every user has more than k true items, then r-precision@k is equal to
precision@k. You might need to raise the k value to get meaningful results.
Args:
rating_true (pandas.DataFrame): True DataFrame
rating_pred (pandas.DataFrame): Predicted DataFrame
col_user (str): column name for user
col_item (str): column name for item
col_prediction (str): column name for prediction
relevancy_method (str): method for determining relevancy ['top_k', 'by_threshold', None]. None means that the
top k items are directly provided, so there is no need to compute the relevancy operation.
k (int): number of top k items per user
threshold (float): threshold of top items per user (optional)
Returns:
float: recall at k (min=0, max=1). The maximum value is 1 even when fewer than
k items exist for a user in rating_true.
"""
df_hit, df_hit_count, n_users = merge_ranking_true_pred(
rating_true=rating_true,
rating_pred=rating_pred,
col_user=col_user,
col_item=col_item,
col_prediction=col_prediction,
relevancy_method=relevancy_method,
k=k,
threshold=threshold,
)

if df_hit.shape[0] == 0:
return 0.0

df_merged = df_hit.merge(df_hit_count[[col_user, 'actual']])
df_merged = df_merged[df_merged['rank'] <= df_merged['actual']]

return (df_merged.groupby(col_user).size() / df_hit_count.set_index(col_user)['actual']).mean()


def ndcg_at_k(
rating_true,
rating_pred,
Expand Down Expand Up @@ -824,6 +881,7 @@ def get_top_k_items(
exp_var.__name__: exp_var,
precision_at_k.__name__: precision_at_k,
recall_at_k.__name__: recall_at_k,
r_precision_at_k.__name__: r_precision_at_k,
ndcg_at_k.__name__: ndcg_at_k,
map_at_k.__name__: map_at_k,
map.__name__: map,
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/recommenders/evaluation/test_python_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
exp_var,
get_top_k_items,
precision_at_k,
r_precision_at_k,
recall_at_k,
ndcg_at_k,
map_at_k,
Expand Down Expand Up @@ -366,6 +367,20 @@ def test_python_recall_at_k(rating_true, rating_pred, rating_nohit):
assert recall_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)


def test_python_r_precision(rating_true, rating_pred, rating_nohit):
assert r_precision_at_k(
rating_true=rating_true,
rating_pred=rating_true,
col_prediction=DEFAULT_RATING_COL,
k=10,
) == pytest.approx(1, TOL)
assert r_precision_at_k(rating_true, rating_nohit, k=5) == 0.0
assert r_precision_at_k(rating_true, rating_pred, k=3) == pytest.approx(0.21111, TOL)
assert r_precision_at_k(rating_true, rating_pred, k=5) == pytest.approx(0.24444, TOL)
# Equivalent to precision
assert r_precision_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)


def test_python_auc(rating_true_binary, rating_pred_binary):
assert auc(
rating_true=rating_true_binary,
Expand Down

0 comments on commit f7e8194

Please sign in to comment.