From 3fb3ec10d038032b5e8ddfef1214b47230ec2a21 Mon Sep 17 00:00:00 2001 From: Ismail Ashraq Date: Tue, 16 Jul 2024 14:14:42 +0800 Subject: [PATCH 1/6] add sparse vector to pinecone query --- semantic_router/index/pinecone.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index fdd87320..dfc5aaef 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -462,6 +462,7 @@ def describe(self) -> Dict: def query( self, vector: np.ndarray, + sparse_vector: Optional[dict] = None, top_k: int = 5, route_filter: Optional[List[str]] = None, ) -> Tuple[np.ndarray, List[str]]: @@ -474,6 +475,7 @@ def query( filter_query = None results = self.index.query( vector=[query_vector_list], + sparse_vector=sparse_vector, top_k=top_k, filter=filter_query, include_metadata=True, @@ -486,6 +488,7 @@ def query( async def aquery( self, vector: np.ndarray, + sparse_vector: Optional[dict] = None, top_k: int = 5, route_filter: Optional[List[str]] = None, ) -> Tuple[np.ndarray, List[str]]: @@ -498,6 +501,7 @@ async def aquery( filter_query = None results = await self._async_query( vector=query_vector_list, + sparse_vector=sparse_vector, namespace=self.namespace or "", filter=filter_query, top_k=top_k, @@ -514,6 +518,7 @@ def delete_index(self): async def _async_query( self, vector: list[float], + sparse_vector: Optional[dict] = None, namespace: str = "", filter: Optional[dict] = None, top_k: int = 5, @@ -521,6 +526,7 @@ async def _async_query( ): params = { "vector": vector, + "sparse_vector": sparse_vector, "namespace": namespace, "filter": filter, "top_k": top_k, From 51d2bc9a41893b4e95679c6a6e5311844313c9bf Mon Sep 17 00:00:00 2001 From: Ismail Ashraq Date: Tue, 16 Jul 2024 14:15:02 +0800 Subject: [PATCH 2/6] aquery method for local index --- semantic_router/index/local.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/semantic_router/index/local.py b/semantic_router/index/local.py index 7e32f3a8..1116ffe4 100644 --- a/semantic_router/index/local.py +++ b/semantic_router/index/local.py @@ -98,6 +98,35 @@ def query( scores, idx = top_scores(sim, top_k) route_names = [self.routes[i] for i in idx] return scores, route_names + + async def aquery( + self, + vector: np.ndarray, + top_k: int = 5, + route_filter: Optional[List[str]] = None, + ) -> Tuple[np.ndarray, List[str]]: + """ + Search the index for the query and return top_k results. + """ + if self.index is None or self.routes is None: + raise ValueError("Index or routes are not populated.") + if route_filter is not None: + filtered_index = [] + filtered_routes = [] + for route, vec in zip(self.routes, self.index): + if route in route_filter: + filtered_index.append(vec) + filtered_routes.append(route) + if not filtered_routes: + raise ValueError("No routes found matching the filter criteria.") + sim = similarity_matrix(vector, np.array(filtered_index)) + scores, idx = top_scores(sim, top_k) + route_names = [filtered_routes[i] for i in idx] + else: + sim = similarity_matrix(vector, self.index) + scores, idx = top_scores(sim, top_k) + route_names = [self.routes[i] for i in idx] + return scores, route_names def delete(self, route_name: str): """ From b6e2b19c3be6fa61b4dbdcae10807235e547d378 Mon Sep 17 00:00:00 2001 From: Ismail Ashraq Date: Tue, 16 Jul 2024 14:52:11 +0800 Subject: [PATCH 3/6] fix lint errors --- semantic_router/index/pinecone.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index dfc5aaef..732e1f35 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -462,9 +462,9 @@ def describe(self) -> Dict: def query( self, vector: np.ndarray, - sparse_vector: Optional[dict] = None, top_k: int = 5, route_filter: Optional[List[str]] = None, + **kwargs: Any, ) -> Tuple[np.ndarray, List[str]]: if self.index is None: raise ValueError("Index is not populated.") @@ -475,7 +475,7 @@ def query( filter_query = None results = self.index.query( vector=[query_vector_list], - sparse_vector=sparse_vector, + sparse_vector=kwargs.get('sparse_vector', None), top_k=top_k, filter=filter_query, include_metadata=True, @@ -488,9 +488,9 @@ def query( async def aquery( self, vector: np.ndarray, - sparse_vector: Optional[dict] = None, top_k: int = 5, route_filter: Optional[List[str]] = None, + **kwargs: Any, ) -> Tuple[np.ndarray, List[str]]: if self.async_client is None or self.host is None: raise ValueError("Async client or host are not initialized.") @@ -501,7 +501,7 @@ async def aquery( filter_query = None results = await self._async_query( vector=query_vector_list, - sparse_vector=sparse_vector, + sparse_vector=kwargs.get('sparse_vector', None), namespace=self.namespace or "", filter=filter_query, top_k=top_k, From 3fa78fc491c818e616c6b7390e5731c1a6ee3c75 Mon Sep 17 00:00:00 2001 From: Ismail Ashraq Date: Tue, 16 Jul 2024 15:10:38 +0800 Subject: [PATCH 4/6] more lint fixes --- semantic_router/index/local.py | 2 +- semantic_router/index/pinecone.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/semantic_router/index/local.py b/semantic_router/index/local.py index 1116ffe4..5426ec76 100644 --- a/semantic_router/index/local.py +++ b/semantic_router/index/local.py @@ -98,7 +98,7 @@ def query( scores, idx = top_scores(sim, top_k) route_names = [self.routes[i] for i in idx] return scores, route_names - + async def aquery( self, vector: np.ndarray, diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index 732e1f35..c04cdc9a 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -475,7 +475,7 @@ def query( filter_query = None results = self.index.query( vector=[query_vector_list], - sparse_vector=kwargs.get('sparse_vector', None), + sparse_vector=kwargs.get("sparse_vector", None), top_k=top_k, filter=filter_query, include_metadata=True, @@ -501,7 +501,7 @@ async def aquery( filter_query = None results = await self._async_query( vector=query_vector_list, - sparse_vector=kwargs.get('sparse_vector', None), + sparse_vector=kwargs.get("sparse_vector", None), namespace=self.namespace or "", filter=filter_query, top_k=top_k, From 7f22e96514ec710e21afab338f5c567a49521be0 Mon Sep 17 00:00:00 2001 From: Ismail Ashraq Date: Tue, 16 Jul 2024 15:21:34 +0800 Subject: [PATCH 5/6] update docstring --- semantic_router/index/pinecone.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index c04cdc9a..a578eb01 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -466,6 +466,23 @@ def query( route_filter: Optional[List[str]] = None, **kwargs: Any, ) -> Tuple[np.ndarray, List[str]]: + """ + Search the index for the query vector and return the top_k results. + + :param vector: The query vector to search for. + :type vector: np.ndarray + :param top_k: The number of top results to return, defaults to 5. + :type top_k: int, optional + :param route_filter: A list of route names to filter the search results, defaults to None. + :type route_filter: Optional[List[str]], optional + :param kwargs: Additional keyword arguments for the query, including sparse_vector. + :type kwargs: Any + :keyword sparse_vector: An optional sparse vector to include in the query. + :type sparse_vector: Optional[dict] + :return: A tuple containing an array of scores and a list of route names. + :rtype: Tuple[np.ndarray, List[str]] + :raises ValueError: If the index is not populated. + """ if self.index is None: raise ValueError("Index is not populated.") query_vector_list = vector.tolist() @@ -492,6 +509,23 @@ async def aquery( route_filter: Optional[List[str]] = None, **kwargs: Any, ) -> Tuple[np.ndarray, List[str]]: + """ + Asynchronously search the index for the query vector and return the top_k results. + + :param vector: The query vector to search for. + :type vector: np.ndarray + :param top_k: The number of top results to return, defaults to 5. + :type top_k: int, optional + :param route_filter: A list of route names to filter the search results, defaults to None. + :type route_filter: Optional[List[str]], optional + :param kwargs: Additional keyword arguments for the query, including sparse_vector. + :type kwargs: Any + :keyword sparse_vector: An optional sparse vector to include in the query. + :type sparse_vector: Optional[dict] + :return: A tuple containing an array of scores and a list of route names. + :rtype: Tuple[np.ndarray, List[str]] + :raises ValueError: If the index is not populated. + """ if self.async_client is None or self.host is None: raise ValueError("Async client or host are not initialized.") query_vector_list = vector.tolist() From 75a17993ff0e24784461cda6f5119f147eff4236 Mon Sep 17 00:00:00 2001 From: James Briggs Date: Tue, 16 Jul 2024 15:41:01 +0800 Subject: [PATCH 6/6] chore: update versions --- pyproject.toml | 2 +- semantic_router/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9e2cfd67..f78ed13f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "semantic-router" -version = "0.0.52" +version = "0.0.53" description = "Super fast semantic router for AI decision making" authors = [ "James Briggs ", diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py index 50989c62..19d5381f 100644 --- a/semantic_router/__init__.py +++ b/semantic_router/__init__.py @@ -4,4 +4,4 @@ __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"] -__version__ = "0.0.50" +__version__ = "0.0.53"