Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handling non-existing local routes in sync #358

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions semantic_router/index/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from semantic_router.index.base import BaseIndex
from semantic_router.utils.logger import logger
from semantic_router.route import Route


def clean_route_name(route_name: str) -> str:
Expand Down Expand Up @@ -203,6 +204,7 @@

def _sync_index(self, local_routes: dict):
remote_routes = self.get_routes()

remote_dict: dict = {route: set() for route, _ in remote_routes}
for route, utterance in remote_routes:
remote_dict[route].add(utterance)
Expand All @@ -215,19 +217,27 @@

routes_to_add = []
routes_to_delete = []
layer_routes = {}

Check warning on line 220 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L220

Added line #L220 was not covered by tests

for route in all_routes:
local_utterances = local_dict.get(route, set())
remote_utterances = remote_dict.get(route, set())

if not local_utterances and not remote_utterances:
continue

Check warning on line 227 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L226-L227

Added lines #L226 - L227 were not covered by tests

if self.sync == "error":
if local_utterances != remote_utterances:
raise ValueError(
f"Synchronization error: Differences found in route '{route}'"
)
utterances_to_include: set = set()
if local_utterances:
layer_routes[route] = list(local_utterances)

Check warning on line 236 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L235-L236

Added lines #L235 - L236 were not covered by tests
elif self.sync == "remote":
utterances_to_include = set()
if remote_utterances:
layer_routes[route] = list(remote_utterances)

Check warning on line 240 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L239-L240

Added lines #L239 - L240 were not covered by tests
elif self.sync == "local":
utterances_to_include = local_utterances - remote_utterances
routes_to_delete.extend(
Expand All @@ -237,11 +247,17 @@
if utterance not in local_utterances
]
)
if local_utterances:
layer_routes[route] = list(local_utterances)

Check warning on line 251 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L250-L251

Added lines #L250 - L251 were not covered by tests
elif self.sync == "merge-force-remote":
if route in local_dict and route not in remote_dict:
utterances_to_include = local_utterances
if local_utterances:
layer_routes[route] = list(local_utterances)

Check warning on line 256 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L255-L256

Added lines #L255 - L256 were not covered by tests
else:
utterances_to_include = set()
if remote_utterances:
layer_routes[route] = list(remote_utterances)

Check warning on line 260 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L259-L260

Added lines #L259 - L260 were not covered by tests
elif self.sync == "merge-force-local":
if route in local_dict:
utterances_to_include = local_utterances - remote_utterances
Expand All @@ -252,10 +268,18 @@
if utterance not in local_utterances
]
)
if local_utterances:
layer_routes[route] = local_utterances

Check warning on line 272 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L271-L272

Added lines #L271 - L272 were not covered by tests
else:
utterances_to_include = set()
if remote_utterances:
layer_routes[route] = list(remote_utterances)

Check warning on line 276 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L275-L276

Added lines #L275 - L276 were not covered by tests
elif self.sync == "merge":
utterances_to_include = local_utterances - remote_utterances
if local_utterances or remote_utterances:
layer_routes[route] = list(

Check warning on line 280 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L279-L280

Added lines #L279 - L280 were not covered by tests
remote_utterances.union(local_utterances)
)
else:
raise ValueError("Invalid sync mode specified")

Expand All @@ -272,7 +296,7 @@
]
)

return routes_to_add, routes_to_delete
return routes_to_add, routes_to_delete, layer_routes

Check warning on line 299 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L299

Added line #L299 was not covered by tests

def _batch_upsert(self, batch: List[Dict]):
"""Helper method for upserting a single batch of records."""
Expand Down Expand Up @@ -308,8 +332,8 @@
routes: List[str],
utterances: List[str],
batch_size: int = 100,
):
"""Add vectors to Pinecone in batches."""
) -> List[Route]:
"""Add vectors to Pinecone in batches and return the overall updated list of Route objects."""
if self.index is None:
self.dimensions = self.dimensions or len(embeddings[0])
self.index = self._init_index(force_create=True)
Expand All @@ -320,7 +344,15 @@
"embeddings": embeddings,
}
if self.sync is not None:
data_to_upsert, data_to_delete = self._sync_index(local_routes=local_routes)
data_to_upsert, data_to_delete, layer_routes_dict = self._sync_index(

Check warning on line 347 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L347

Added line #L347 was not covered by tests
local_routes=local_routes
)

layer_routes = [

Check warning on line 351 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L351

Added line #L351 was not covered by tests
Route(name=route, utterances=layer_routes_dict[route])
for route in layer_routes_dict.keys()
]

routes_to_delete: dict = {}
for route, utterance in data_to_delete:
routes_to_delete.setdefault(route, []).append(utterance)
Expand All @@ -335,6 +367,7 @@
]
if ids_to_delete and self.index:
self.index.delete(ids=ids_to_delete)

else:
data_to_upsert = [
(vector, route, utterance)
Expand All @@ -350,6 +383,8 @@
batch = vectors_to_upsert[i : i + batch_size]
self._batch_upsert(batch)

return layer_routes

Check warning on line 386 in semantic_router/index/pinecone.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/index/pinecone.py#L386

Added line #L386 was not covered by tests

def _get_route_ids(self, route_name: str):
clean_route = clean_route_name(route_name)
ids, _ = self._get_all(prefix=f"{clean_route}#")
Expand Down
40 changes: 38 additions & 2 deletions semantic_router/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,20 @@
if route.score_threshold is None:
route.score_threshold = self.score_threshold
# if routes list has been passed, we initialize index now
if len(self.routes) > 0:
if self.index.sync:
# initialize index now
if len(self.routes) > 0:
self._add_and_sync_routes(routes=self.routes)

Check warning on line 223 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L222-L223

Added lines #L222 - L223 were not covered by tests
else:
dummy_embedding = self.encoder(["dummy"])

Check warning on line 225 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L225

Added line #L225 was not covered by tests

layer_routes = self.index._add_and_sync(

Check warning on line 227 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L227

Added line #L227 was not covered by tests
embeddings=dummy_embedding,
routes=[],
utterances=[],
)
self._set_layer_routes(layer_routes)

Check warning on line 232 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L232

Added line #L232 was not covered by tests
elif len(self.routes) > 0:
self._add_routes(routes=self.routes)

def check_for_matching_routes(self, top_class: str) -> Optional[Route]:
Expand Down Expand Up @@ -385,6 +397,14 @@
)
return self._pass_threshold(scores, threshold)

def _set_layer_routes(self, new_routes: List[Route]):
"""
Set and override the current routes with a new list of routes.

:param new_routes: List of Route objects to set as the current routes.
"""
self.routes = new_routes

Check warning on line 406 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L406

Added line #L406 was not covered by tests

def __str__(self):
return (
f"RouteLayer(encoder={self.encoder}, "
Expand Down Expand Up @@ -471,11 +491,27 @@
# create route array
route_names = [route.name for route in routes for _ in route.utterances]
# add everything to the index
self.index._add_and_sync(
self.index.add(
embeddings=embedded_utterances,
routes=route_names,
utterances=all_utterances,
)

def _add_and_sync_routes(self, routes: List[Route]):
# create embeddings for all routes and sync at startup with remote ones based on sync setting
all_utterances = [

Check warning on line 502 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L502

Added line #L502 was not covered by tests
utterance for route in routes for utterance in route.utterances
]
embedded_utterances = self.encoder(all_utterances)

Check warning on line 505 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L505

Added line #L505 was not covered by tests
# create route array
route_names = [route.name for route in routes for _ in route.utterances]

Check warning on line 507 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L507

Added line #L507 was not covered by tests
# add everything to the index
layer_routes = self.index._add_and_sync(

Check warning on line 509 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L509

Added line #L509 was not covered by tests
embeddings=embedded_utterances,
routes=route_names,
utterances=all_utterances,
)
self._set_layer_routes(layer_routes)

Check warning on line 514 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L514

Added line #L514 was not covered by tests

def _encode(self, text: str) -> Any:
"""Given some text, encode it."""
Expand Down
Loading