diff --git a/wikipedia-sentences/app.py b/wikipedia-sentences/app.py index 511869c35..313867cef 100644 --- a/wikipedia-sentences/app.py +++ b/wikipedia-sentences/app.py @@ -21,7 +21,7 @@ def print_topk(resp, sentence): print(f"\n\n\nTa-Dah🔮, here's what we found for: {sentence}") for idx, match in enumerate(doc.matches): - score = match.score.value + score = match.scores['cosine'].value if score < 0.0: continue print(f'> {idx:>2d}({score:.2f}). {match.text}') diff --git a/wikipedia-sentences/indexer.py b/wikipedia-sentences/indexer.py index 999a7cc0d..0756f9a10 100644 --- a/wikipedia-sentences/indexer.py +++ b/wikipedia-sentences/indexer.py @@ -3,6 +3,7 @@ from jina import Document, requests, Executor, DocumentArray + class NumpyIndexer(Executor): def __init__(self, **kwargs): super().__init__(**kwargs) @@ -12,7 +13,6 @@ def __init__(self, **kwargs): def index(self, docs: 'DocumentArray', **kwargs): self._docs.extend(docs) - @requests(on='/search') def search(self, docs: 'DocumentArray', parameters: Dict = None, **kwargs): if parameters is None: @@ -26,12 +26,12 @@ def search(self, docs: 'DocumentArray', parameters: Dict = None, **kwargs): for _q, _positions, _dists in zip(docs, positions, dist): for position, _dist in zip(_positions, _dists): d = Document(self._docs[int(position)]) - d.score.value = 1 - _dist + d.scores['cosine'] = 1 - _dist _q.matches.append(d) @staticmethod def _get_sorted_top_k( - dist: 'np.array', top_k: int + dist: 'np.array', top_k: int ) -> Tuple['np.ndarray', 'np.ndarray']: if top_k >= dist.shape[1]: idx = dist.argsort(axis=1)[:, :top_k] @@ -44,31 +44,37 @@ def _get_sorted_top_k( dist = np.take_along_axis(dist, idx_fs, axis=1) return idx, dist - + + def _get_ones(x, y): return np.ones((x, y)) + def _ext_A(A): nA, dim = A.shape A_ext = _get_ones(nA, dim * 3) - A_ext[:, dim : 2 * dim] = A - A_ext[:, 2 * dim :] = A ** 2 + A_ext[:, dim: 2 * dim] = A + A_ext[:, 2 * dim:] = A ** 2 return A_ext + def _ext_B(B): nB, dim = B.shape B_ext = _get_ones(dim * 3, nB) B_ext[:dim] = (B ** 2).T - B_ext[dim : 2 * dim] = -2.0 * B.T + B_ext[dim: 2 * dim] = -2.0 * B.T del B return B_ext + def _euclidean(A_ext, B_ext): sqdist = A_ext.dot(B_ext).clip(min=0) return np.sqrt(sqdist) + def _norm(A): return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True) + def _cosine(A_norm_ext, B_norm_ext): return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2 \ No newline at end of file diff --git a/wikipedia-sentences/requirements.txt b/wikipedia-sentences/requirements.txt index bfc581f5d..2e4664b9b 100644 --- a/wikipedia-sentences/requirements.txt +++ b/wikipedia-sentences/requirements.txt @@ -1,4 +1,4 @@ -click==7.1.2 +click==8.0.1 transformers==4.6.1 torch==1.7.1 -jina[scipy, http]==2.0.0rc5.dev15 \ No newline at end of file +jina[scipy, http]==2.0.13 \ No newline at end of file