Skip to content

Commit

Permalink
Fix eval metrics (#5)
Browse files Browse the repository at this point in the history
* update ndcg to use log base 2 and relevance scores > 1

* replace binary search with brute force in np/cp kernels

* add ndcg test on nfcorpus
  • Loading branch information
edknv committed Nov 4, 2023
1 parent d6563d2 commit df5e74b
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 100 deletions.
67 changes: 21 additions & 46 deletions crossfit/backend/cupy/kernels.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,25 @@
from numba import cuda


@cuda.jit(device=True)
def cuda_searchsorted(arr, val, side):
"""
Binary search on a sorted array.
====== ============================
`side` returned index `i` satisfies
====== ============================
0 ``arr[i-1] < val <= arr[i]``
1 ``arr[i-1] <= val < arr[i]``
====== ============================
"""
left = 0
right = len(arr)
while left < right:
mid = (left + right) // 2
if arr[mid] < val or (side == 1 and arr[mid] <= val):
left = mid + 1
else:
right = mid
return left


@cuda.jit
def _numba_lookup(A_indptr, A_cols, A_data, B, vals):
i = cuda.grid(1)

n_rows_a = len(A_indptr) - 1
if n_rows_a == len(B):
ind_start, ind_end = A_indptr[i], A_indptr[i + 1]
for j in range(B.shape[1]):
left_idx = cuda_searchsorted(A_cols[ind_start:ind_end], B[i][j], 0)
right_idx = cuda_searchsorted(A_cols[ind_start:ind_end], B[i][j], 1)
if left_idx != right_idx:
vals[i][j] = A_data[ind_start:ind_end][left_idx]
else:
for j in range(B.shape[1]):
left_idx = cuda_searchsorted(A_cols, B[i][j], 0)
right_idx = cuda_searchsorted(A_cols, B[i][j], 1)
if left_idx != right_idx:
vals[i][j] = A_data[left_idx]
if i < B.shape[0]:
n_rows_a = len(A_indptr) - 1
if n_rows_a == len(B):
ind_start, ind_end = A_indptr[i], A_indptr[i + 1]
for j in range(B.shape[1]):
for k in range(ind_start, ind_end):
if A_cols[k] == B[i][j]:
vals[i][j] = A_data[k]
break
else:
for j in range(B.shape[1]):
for k in range(len(A_cols)):
if A_cols[k] == B[i][j]:
vals[i][j] = A_data[k]
break


@cuda.jit
Expand Down Expand Up @@ -74,15 +52,12 @@ def _numba_setop(self_idx_ptr, self_col_idx, self_data, other_idx_ptr, other_col
os, oe = other_idx_ptr[i], other_idx_ptr[i + 1]

for j in range(ss, se):
left_idx = cuda_searchsorted(other_col_idx[os:oe], self_col_idx[j], 0)
right_idx = cuda_searchsorted(other_col_idx[os:oe], self_col_idx[j], 1)

if intersect:
found = left_idx == right_idx
else:
found = left_idx != right_idx

if found:
found = False
for k in range(os, oe):
if self_col_idx[j] == other_col_idx[k]:
found = True
break
if (intersect and not found) or (not intersect and found):
self_data[j] = 0


Expand Down
2 changes: 1 addition & 1 deletion crossfit/backend/cupy/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _setop(self, other, mode):
def sort(self):
from crossfit.backend.cupy.kernels import _numba_sort, determine_blocks_threads

blocks, threads = determine_blocks_threads(len(self.idx_ptr))
blocks, threads = determine_blocks_threads(len(self.idx_ptr) - 1)
_numba_sort[blocks, threads](self.idx_ptr, self.col_idx, self.data)

def intersection(self, other):
Expand Down
33 changes: 18 additions & 15 deletions crossfit/backend/numpy/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,16 +168,18 @@ def _numba_lookup(A_indptr, A_cols, A_data, B):
if n_rows_a == len(B):
for i in numba.prange(B.shape[0]):
ind_start, ind_end = A_indptr[i], A_indptr[i + 1]
left_idx = np.searchsorted(A_cols[ind_start:ind_end], B[i])
right_idx = np.searchsorted(A_cols[ind_start:ind_end], B[i], side="right")
found = left_idx != right_idx
vals[i][found] = A_data[ind_start:ind_end][left_idx[found]]
for j in range(len(B[i])):
for k in range(ind_start, ind_end):
if A_cols[k] == B[i][j]:
vals[i][j] = A_data[k]
break
else:
for i in numba.prange(B.shape[0]):
left_idx = np.searchsorted(A_cols, B[i])
right_idx = np.searchsorted(A_cols, B[i], side="right")
found = left_idx != right_idx
vals[i][found] = A_data[left_idx[found]]
for j in range(len(B[i])):
for k in range(len(A_cols)):
if A_cols[k] == B[i][j]:
vals[i][j] = A_data[k]
break

return vals

Expand All @@ -197,13 +199,14 @@ def _numba_setop(self_idx_ptr, self_col_idx, self_data, other_idx_ptr, other_col
ss, se = self_idx_ptr[i], self_idx_ptr[i + 1]
os, oe = other_idx_ptr[i], other_idx_ptr[i + 1]

left_idx = np.searchsorted(other_col_idx[os:oe], self_col_idx[ss:se])
right_idx = np.searchsorted(other_col_idx[os:oe], self_col_idx[ss:se], side="right")
if intersect:
found = left_idx == right_idx
else:
found = left_idx != right_idx
self_data[ss:se][found] = 0
for j in range(ss, se):
found = False
for k in range(os, oe):
if self_col_idx[j] == other_col_idx[k]:
found = True
break
if (intersect and not found) or (not intersect and found):
self_data[j] = 0


@numba.njit
Expand Down
9 changes: 8 additions & 1 deletion crossfit/metric/ranking/ndcg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class DCG(RankingMetric):
SCALERS = {"identity": lambda x: x, "power": lambda x: np.power(x, 2) - 1}
LOGS = {"2": lambda x: np.log2(x), "e": lambda x: np.log(x)}

def __init__(self, k=None, relevance_scaling="identity", log_base="e"):
def __init__(self, k=None, relevance_scaling="identity", log_base="2"):
self._k = k
if relevance_scaling not in self.SCALERS:
raise ValueError("Relevance scaling must be 'identity' or 'power'.")
Expand Down Expand Up @@ -38,4 +38,11 @@ def _score(self, y_true: SparseLabels, y_pred_labels: MaskedArray):
ideal_labels = y_true.get_labels_for(y_true.as_rankings(), self._k)
idcg = self._dcg(y_true, ideal_labels)

ndcg = dcg / idcg

if idcg.shape[0] == 1 and ndcg.shape[0] > 1:
idcg = np.ones_like(ndcg) * idcg

ndcg[idcg == 0] = np.NaN

return dcg / idcg
2 changes: 1 addition & 1 deletion crossfit/metric/ranking/precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, k, truncated=False):
super().__init__(k)
self._truncated = truncated

def _precision(self, y_true: SparseBinaryLabels, y_pred_labels: MaskedArray):
def _precision(self, y_true: SparseLabels, y_pred_labels: MaskedArray):
n_pos = y_true.get_n_positives(y_pred_labels.shape[0])
n_relevant = np.sum(
(y_pred_labels.data[:, : self._k] == 1)
Expand Down
6 changes: 3 additions & 3 deletions crossfit/metric/ranking/recall.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from crossfit.metric.ranking.base import BinaryRankingMetric, SparseBinaryLabels
from crossfit.metric.ranking.base import BinaryRankingMetric, SparseLabels
from crossfit.data.array.masked import MaskedArray


Expand All @@ -9,10 +9,10 @@ def __init__(self, k, truncated=False):
super().__init__(k)
self._truncated = truncated

def _recall(self, y_true: SparseBinaryLabels, y_pred_labels: MaskedArray):
def _recall(self, y_true: SparseLabels, y_pred_labels: MaskedArray):
n_pos = y_true.get_n_positives(y_pred_labels.shape[0])
n_relevant = np.sum(
(y_pred_labels.data[:, : self._k] == 1) & (~y_pred_labels.mask[:, : self._k]), axis=-1
(y_pred_labels.data[:, : self._k] >= 1) & (~y_pred_labels.mask[:, : self._k]), axis=-1
)

scores = np.NaN * np.zeros_like(n_relevant, dtype=float)
Expand Down
60 changes: 29 additions & 31 deletions crossfit/report/beir/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from crossfit.report.beir.embed import embed
from crossfit.calculate.aggregate import Aggregator
from crossfit.metric.continuous.mean import Mean
from crossfit.metric.ranking import AP, NDCG, Precision, Recall, SparseBinaryLabels, SparseRankings
from crossfit.metric.ranking import AP, NDCG, Precision, Recall, SparseBinaryLabels, SparseNumericLabels, SparseRankings
from crossfit.report.base import Report
from crossfit.op.vector_search import VectorSearchOp
from crossfit.backend.torch.model import Model
Expand All @@ -34,13 +34,13 @@ def __init__(
self.metrics = metrics

def prepare(self, df):
encoder = self.create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"])
obs_csr = self.create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder)
pred_csr = self.create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder)
encoder = create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"])
obs_csr = create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder)
pred_csr = create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder)

# TODO: Fix dispatch
labels = SparseBinaryLabels(CrossSparse.from_matrix(obs_csr))
rankings = SparseRankings(CrossSparse.from_matrix(pred_csr))
labels = SparseNumericLabels.from_matrix(obs_csr)
rankings = SparseRankings.from_scores(pred_csr)

outputs = {}
with crossarray:
Expand All @@ -49,42 +49,40 @@ def prepare(self, df):
metric_at_k = metric(k=k)
result = metric_at_k.score(labels, rankings)

# TODO: Does this make sense?
result = np.nan_to_num(result)
result = np.where(result > 1, 1, result)

outputs[metric_at_k.name()] = Mean.from_array(result, axis=0)

return outputs

def create_label_encoder(self, df, cols) -> LabelEncoder:
# Extract leaves (flattened arrays)
_leaves = []

for col in cols:
_leaves.append(df[col].list.leaves)
def create_label_encoder(df, cols) -> LabelEncoder:
# Extract leaves (flattened arrays)
_leaves = []

# Concatenate and get unique values for fit_transform
all_ids = cudf.concat(_leaves).unique()
for col in cols:
_leaves.append(df[col].list.leaves)

# Label Encoding
le = LabelEncoder()
le.fit(all_ids)
# Concatenate and get unique values for fit_transform
all_ids = cudf.concat(_leaves).unique()

return le
# Label Encoding
le = LabelEncoder()
le.fit(all_ids)

def create_csr_matrix(self, ids, scores, label_encoder: LabelEncoder):
num_rows = scores.size
num_columns = label_encoder.classes_.shape[0]
return le

values = scores.list.leaves.values.astype(cp.float32)
indices = label_encoder.transform(ids.list.leaves).values
indptr = scores.list._column.offsets.values
sparse_matrix = cp.sparse.csr_matrix(
(values, indices, indptr), shape=(num_rows, num_columns)
)

return sparse_matrix
def create_csr_matrix(ids, scores, label_encoder: LabelEncoder):
num_rows = scores.size
num_columns = label_encoder.classes_.shape[0]

values = scores.list.leaves.values.astype(cp.float32)
indices = label_encoder.transform(ids.list.leaves).values
indptr = scores.list._column.offsets.values
sparse_matrix = cp.sparse.csr_matrix(
(values, indices, indptr), shape=(num_rows, num_columns)
)

return sparse_matrix


def join_predictions(data, predictions):
Expand Down
1 change: 0 additions & 1 deletion tests/metrics/ranking/test_ndcg.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def test_numeric_score(self, y_gold, y_pred, expect, params):
],
)
def test_binary_score(self, y_gold, y_pred, expect, params):
print(y_gold, y_pred)
y_gold = SparseBinaryLabels.from_positive_indices(y_gold)
if len(y_pred) == 0 or [] in y_pred:
with pytest.warns(UserWarning):
Expand Down
40 changes: 39 additions & 1 deletion tests/report/beir/test_report.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import pytest

pytest.importorskip("cupy")
beir = pytest.importorskip("beir")

import numpy as np

import crossfit as cf
from crossfit.data.sparse.ranking import SparseNumericLabels, SparseRankings
from crossfit.metric.ranking import NDCG
from crossfit.report.beir.report import (create_csr_matrix,
create_label_encoder,
join_predictions)


@pytest.mark.singlegpu
@pytest.mark.parametrize("dataset", ["hotpotqa", "nq"])
@pytest.mark.parametrize("dataset", ["nq"])
def test_beir_report(
dataset, model_name="sentence-transformers/all-MiniLM-L6-v2", k=10
):
Expand Down Expand Up @@ -34,3 +42,33 @@ def test_beir_report(
assert ("split", "test") in report.result_df.index.values.tolist()
for col in expected_columns:
assert report.result_df.loc[("split", "test"), col].item() > 0.0


@pytest.mark.singlegpu
@pytest.mark.parametrize("dataset", ["hotpotqa"])
def test_no_invalid_scores(dataset, model_name="sentence-transformers/all-MiniLM-L6-v2", k=10):
model = cf.SentenceTransformerModel(model_name)
vector_search = cf.TorchExactSearch(k=k)
embeds = cf.embed(
dataset,
model,
vector_search=vector_search,
overwrite=True,
tiny_sample=True,
)
test = embeds.data.test.ddf()
test["split"] = "test"
df = join_predictions(test, embeds.predictions).compute()

encoder = create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"])
obs_csr = create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder)
pred_csr = create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder)

labels = SparseNumericLabels.from_matrix(obs_csr)
rankings = SparseRankings.from_scores(pred_csr)

ndcg = NDCG(5).score(labels, rankings)

assert ndcg.min() >= 0
assert ndcg.max() <= 1
assert not np.isinf(ndcg).any()

0 comments on commit df5e74b

Please sign in to comment.