diff --git a/crossfit/backend/cupy/kernels.py b/crossfit/backend/cupy/kernels.py index 338599e..2068873 100644 --- a/crossfit/backend/cupy/kernels.py +++ b/crossfit/backend/cupy/kernels.py @@ -1,47 +1,25 @@ from numba import cuda -@cuda.jit(device=True) -def cuda_searchsorted(arr, val, side): - """ - Binary search on a sorted array. - - ====== ============================ - `side` returned index `i` satisfies - ====== ============================ - 0 ``arr[i-1] < val <= arr[i]`` - 1 ``arr[i-1] <= val < arr[i]`` - ====== ============================ - """ - left = 0 - right = len(arr) - while left < right: - mid = (left + right) // 2 - if arr[mid] < val or (side == 1 and arr[mid] <= val): - left = mid + 1 - else: - right = mid - return left - - @cuda.jit def _numba_lookup(A_indptr, A_cols, A_data, B, vals): i = cuda.grid(1) - n_rows_a = len(A_indptr) - 1 - if n_rows_a == len(B): - ind_start, ind_end = A_indptr[i], A_indptr[i + 1] - for j in range(B.shape[1]): - left_idx = cuda_searchsorted(A_cols[ind_start:ind_end], B[i][j], 0) - right_idx = cuda_searchsorted(A_cols[ind_start:ind_end], B[i][j], 1) - if left_idx != right_idx: - vals[i][j] = A_data[ind_start:ind_end][left_idx] - else: - for j in range(B.shape[1]): - left_idx = cuda_searchsorted(A_cols, B[i][j], 0) - right_idx = cuda_searchsorted(A_cols, B[i][j], 1) - if left_idx != right_idx: - vals[i][j] = A_data[left_idx] + if i < B.shape[0]: + n_rows_a = len(A_indptr) - 1 + if n_rows_a == len(B): + ind_start, ind_end = A_indptr[i], A_indptr[i + 1] + for j in range(B.shape[1]): + for k in range(ind_start, ind_end): + if A_cols[k] == B[i][j]: + vals[i][j] = A_data[k] + break + else: + for j in range(B.shape[1]): + for k in range(len(A_cols)): + if A_cols[k] == B[i][j]: + vals[i][j] = A_data[k] + break @cuda.jit @@ -74,15 +52,12 @@ def _numba_setop(self_idx_ptr, self_col_idx, self_data, other_idx_ptr, other_col os, oe = other_idx_ptr[i], other_idx_ptr[i + 1] for j in range(ss, se): - left_idx = cuda_searchsorted(other_col_idx[os:oe], self_col_idx[j], 0) - right_idx = cuda_searchsorted(other_col_idx[os:oe], self_col_idx[j], 1) - - if intersect: - found = left_idx == right_idx - else: - found = left_idx != right_idx - - if found: + found = False + for k in range(os, oe): + if self_col_idx[j] == other_col_idx[k]: + found = True + break + if (intersect and not found) or (not intersect and found): self_data[j] = 0 diff --git a/crossfit/backend/cupy/sparse.py b/crossfit/backend/cupy/sparse.py index 982b7da..9d9e4b5 100644 --- a/crossfit/backend/cupy/sparse.py +++ b/crossfit/backend/cupy/sparse.py @@ -122,7 +122,7 @@ def _setop(self, other, mode): def sort(self): from crossfit.backend.cupy.kernels import _numba_sort, determine_blocks_threads - blocks, threads = determine_blocks_threads(len(self.idx_ptr)) + blocks, threads = determine_blocks_threads(len(self.idx_ptr) - 1) _numba_sort[blocks, threads](self.idx_ptr, self.col_idx, self.data) def intersection(self, other): diff --git a/crossfit/backend/numpy/sparse.py b/crossfit/backend/numpy/sparse.py index e71f606..28acad7 100644 --- a/crossfit/backend/numpy/sparse.py +++ b/crossfit/backend/numpy/sparse.py @@ -168,16 +168,18 @@ def _numba_lookup(A_indptr, A_cols, A_data, B): if n_rows_a == len(B): for i in numba.prange(B.shape[0]): ind_start, ind_end = A_indptr[i], A_indptr[i + 1] - left_idx = np.searchsorted(A_cols[ind_start:ind_end], B[i]) - right_idx = np.searchsorted(A_cols[ind_start:ind_end], B[i], side="right") - found = left_idx != right_idx - vals[i][found] = A_data[ind_start:ind_end][left_idx[found]] + for j in range(len(B[i])): + for k in range(ind_start, ind_end): + if A_cols[k] == B[i][j]: + vals[i][j] = A_data[k] + break else: for i in numba.prange(B.shape[0]): - left_idx = np.searchsorted(A_cols, B[i]) - right_idx = np.searchsorted(A_cols, B[i], side="right") - found = left_idx != right_idx - vals[i][found] = A_data[left_idx[found]] + for j in range(len(B[i])): + for k in range(len(A_cols)): + if A_cols[k] == B[i][j]: + vals[i][j] = A_data[k] + break return vals @@ -197,13 +199,14 @@ def _numba_setop(self_idx_ptr, self_col_idx, self_data, other_idx_ptr, other_col ss, se = self_idx_ptr[i], self_idx_ptr[i + 1] os, oe = other_idx_ptr[i], other_idx_ptr[i + 1] - left_idx = np.searchsorted(other_col_idx[os:oe], self_col_idx[ss:se]) - right_idx = np.searchsorted(other_col_idx[os:oe], self_col_idx[ss:se], side="right") - if intersect: - found = left_idx == right_idx - else: - found = left_idx != right_idx - self_data[ss:se][found] = 0 + for j in range(ss, se): + found = False + for k in range(os, oe): + if self_col_idx[j] == other_col_idx[k]: + found = True + break + if (intersect and not found) or (not intersect and found): + self_data[j] = 0 @numba.njit diff --git a/crossfit/metric/ranking/ndcg.py b/crossfit/metric/ranking/ndcg.py index b0957d7..66b9c02 100644 --- a/crossfit/metric/ranking/ndcg.py +++ b/crossfit/metric/ranking/ndcg.py @@ -9,7 +9,7 @@ class DCG(RankingMetric): SCALERS = {"identity": lambda x: x, "power": lambda x: np.power(x, 2) - 1} LOGS = {"2": lambda x: np.log2(x), "e": lambda x: np.log(x)} - def __init__(self, k=None, relevance_scaling="identity", log_base="e"): + def __init__(self, k=None, relevance_scaling="identity", log_base="2"): self._k = k if relevance_scaling not in self.SCALERS: raise ValueError("Relevance scaling must be 'identity' or 'power'.") @@ -38,4 +38,11 @@ def _score(self, y_true: SparseLabels, y_pred_labels: MaskedArray): ideal_labels = y_true.get_labels_for(y_true.as_rankings(), self._k) idcg = self._dcg(y_true, ideal_labels) + ndcg = dcg / idcg + + if idcg.shape[0] == 1 and ndcg.shape[0] > 1: + idcg = np.ones_like(ndcg) * idcg + + ndcg[idcg == 0] = np.NaN + return dcg / idcg diff --git a/crossfit/metric/ranking/precision.py b/crossfit/metric/ranking/precision.py index a6e6936..5436258 100644 --- a/crossfit/metric/ranking/precision.py +++ b/crossfit/metric/ranking/precision.py @@ -14,7 +14,7 @@ def __init__(self, k, truncated=False): super().__init__(k) self._truncated = truncated - def _precision(self, y_true: SparseBinaryLabels, y_pred_labels: MaskedArray): + def _precision(self, y_true: SparseLabels, y_pred_labels: MaskedArray): n_pos = y_true.get_n_positives(y_pred_labels.shape[0]) n_relevant = np.sum( (y_pred_labels.data[:, : self._k] == 1) diff --git a/crossfit/metric/ranking/recall.py b/crossfit/metric/ranking/recall.py index 57c24a8..c003a56 100644 --- a/crossfit/metric/ranking/recall.py +++ b/crossfit/metric/ranking/recall.py @@ -1,6 +1,6 @@ import numpy as np -from crossfit.metric.ranking.base import BinaryRankingMetric, SparseBinaryLabels +from crossfit.metric.ranking.base import BinaryRankingMetric, SparseLabels from crossfit.data.array.masked import MaskedArray @@ -9,10 +9,10 @@ def __init__(self, k, truncated=False): super().__init__(k) self._truncated = truncated - def _recall(self, y_true: SparseBinaryLabels, y_pred_labels: MaskedArray): + def _recall(self, y_true: SparseLabels, y_pred_labels: MaskedArray): n_pos = y_true.get_n_positives(y_pred_labels.shape[0]) n_relevant = np.sum( - (y_pred_labels.data[:, : self._k] == 1) & (~y_pred_labels.mask[:, : self._k]), axis=-1 + (y_pred_labels.data[:, : self._k] >= 1) & (~y_pred_labels.mask[:, : self._k]), axis=-1 ) scores = np.NaN * np.zeros_like(n_relevant, dtype=float) diff --git a/crossfit/report/beir/report.py b/crossfit/report/beir/report.py index 910e396..0036ff7 100644 --- a/crossfit/report/beir/report.py +++ b/crossfit/report/beir/report.py @@ -13,7 +13,7 @@ from crossfit.report.beir.embed import embed from crossfit.calculate.aggregate import Aggregator from crossfit.metric.continuous.mean import Mean -from crossfit.metric.ranking import AP, NDCG, Precision, Recall, SparseBinaryLabels, SparseRankings +from crossfit.metric.ranking import AP, NDCG, Precision, Recall, SparseBinaryLabels, SparseNumericLabels, SparseRankings from crossfit.report.base import Report from crossfit.op.vector_search import VectorSearchOp from crossfit.backend.torch.model import Model @@ -34,13 +34,13 @@ def __init__( self.metrics = metrics def prepare(self, df): - encoder = self.create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"]) - obs_csr = self.create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder) - pred_csr = self.create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder) + encoder = create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"]) + obs_csr = create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder) + pred_csr = create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder) # TODO: Fix dispatch - labels = SparseBinaryLabels(CrossSparse.from_matrix(obs_csr)) - rankings = SparseRankings(CrossSparse.from_matrix(pred_csr)) + labels = SparseNumericLabels.from_matrix(obs_csr) + rankings = SparseRankings.from_scores(pred_csr) outputs = {} with crossarray: @@ -49,42 +49,40 @@ def prepare(self, df): metric_at_k = metric(k=k) result = metric_at_k.score(labels, rankings) - # TODO: Does this make sense? - result = np.nan_to_num(result) - result = np.where(result > 1, 1, result) - outputs[metric_at_k.name()] = Mean.from_array(result, axis=0) return outputs - def create_label_encoder(self, df, cols) -> LabelEncoder: - # Extract leaves (flattened arrays) - _leaves = [] - for col in cols: - _leaves.append(df[col].list.leaves) +def create_label_encoder(df, cols) -> LabelEncoder: + # Extract leaves (flattened arrays) + _leaves = [] - # Concatenate and get unique values for fit_transform - all_ids = cudf.concat(_leaves).unique() + for col in cols: + _leaves.append(df[col].list.leaves) - # Label Encoding - le = LabelEncoder() - le.fit(all_ids) + # Concatenate and get unique values for fit_transform + all_ids = cudf.concat(_leaves).unique() - return le + # Label Encoding + le = LabelEncoder() + le.fit(all_ids) - def create_csr_matrix(self, ids, scores, label_encoder: LabelEncoder): - num_rows = scores.size - num_columns = label_encoder.classes_.shape[0] + return le - values = scores.list.leaves.values.astype(cp.float32) - indices = label_encoder.transform(ids.list.leaves).values - indptr = scores.list._column.offsets.values - sparse_matrix = cp.sparse.csr_matrix( - (values, indices, indptr), shape=(num_rows, num_columns) - ) - return sparse_matrix +def create_csr_matrix(ids, scores, label_encoder: LabelEncoder): + num_rows = scores.size + num_columns = label_encoder.classes_.shape[0] + + values = scores.list.leaves.values.astype(cp.float32) + indices = label_encoder.transform(ids.list.leaves).values + indptr = scores.list._column.offsets.values + sparse_matrix = cp.sparse.csr_matrix( + (values, indices, indptr), shape=(num_rows, num_columns) + ) + + return sparse_matrix def join_predictions(data, predictions): diff --git a/tests/metrics/ranking/test_ndcg.py b/tests/metrics/ranking/test_ndcg.py index ff78013..d91bea3 100644 --- a/tests/metrics/ranking/test_ndcg.py +++ b/tests/metrics/ranking/test_ndcg.py @@ -105,7 +105,6 @@ def test_numeric_score(self, y_gold, y_pred, expect, params): ], ) def test_binary_score(self, y_gold, y_pred, expect, params): - print(y_gold, y_pred) y_gold = SparseBinaryLabels.from_positive_indices(y_gold) if len(y_pred) == 0 or [] in y_pred: with pytest.warns(UserWarning): diff --git a/tests/report/beir/test_report.py b/tests/report/beir/test_report.py index cf62244..009400e 100644 --- a/tests/report/beir/test_report.py +++ b/tests/report/beir/test_report.py @@ -1,12 +1,20 @@ import pytest +pytest.importorskip("cupy") beir = pytest.importorskip("beir") +import numpy as np + import crossfit as cf +from crossfit.data.sparse.ranking import SparseNumericLabels, SparseRankings +from crossfit.metric.ranking import NDCG +from crossfit.report.beir.report import (create_csr_matrix, + create_label_encoder, + join_predictions) @pytest.mark.singlegpu -@pytest.mark.parametrize("dataset", ["hotpotqa", "nq"]) +@pytest.mark.parametrize("dataset", ["nq"]) def test_beir_report( dataset, model_name="sentence-transformers/all-MiniLM-L6-v2", k=10 ): @@ -34,3 +42,33 @@ def test_beir_report( assert ("split", "test") in report.result_df.index.values.tolist() for col in expected_columns: assert report.result_df.loc[("split", "test"), col].item() > 0.0 + + +@pytest.mark.singlegpu +@pytest.mark.parametrize("dataset", ["hotpotqa"]) +def test_no_invalid_scores(dataset, model_name="sentence-transformers/all-MiniLM-L6-v2", k=10): + model = cf.SentenceTransformerModel(model_name) + vector_search = cf.TorchExactSearch(k=k) + embeds = cf.embed( + dataset, + model, + vector_search=vector_search, + overwrite=True, + tiny_sample=True, + ) + test = embeds.data.test.ddf() + test["split"] = "test" + df = join_predictions(test, embeds.predictions).compute() + + encoder = create_label_encoder(df, ["corpus-index-pred", "corpus-index-obs"]) + obs_csr = create_csr_matrix(df["corpus-index-obs"], df["score-obs"], encoder) + pred_csr = create_csr_matrix(df["corpus-index-pred"], df["score-pred"], encoder) + + labels = SparseNumericLabels.from_matrix(obs_csr) + rankings = SparseRankings.from_scores(pred_csr) + + ndcg = NDCG(5).score(labels, rankings) + + assert ndcg.min() >= 0 + assert ndcg.max() <= 1 + assert not np.isinf(ndcg).any()