Merge pull request #170 from aurelio-labs/james/semantic-splits

add set threshold
aurelio-labs · Feb 23, 2024 · 4feb431 · 4feb431
2 parents 599be55 + 2fa2b00
commit 4feb431
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 3 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "semantic-router"
-version = "0.0.23"
+version = "0.0.24"
 description = "Super fast semantic router for AI decision making"
 authors = [
     "James Briggs <[email protected]>",

diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py
@@ -4,4 +4,4 @@
 
 __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
 
-__version__ = "0.0.23"
+__version__ = "0.0.24"
diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py
@@ -14,6 +14,7 @@ def __init__(
         self,
         encoder: BaseEncoder,
         threshold_adjustment=0.01,
+        dynamic_threshold: bool = True,
         window_size=5,
         min_split_tokens=100,
         max_split_tokens=300,
@@ -25,6 +26,7 @@ def __init__(
         self.calculated_threshold: float
         self.encoder = encoder
         self.threshold_adjustment = threshold_adjustment
+        self.dynamic_threshold = dynamic_threshold
         self.window_size = window_size
         self.plot_splits = plot_splits
         self.min_split_tokens = min_split_tokens
@@ -321,7 +323,10 @@ def __call__(self, docs: List[str]) -> List[DocumentSplit]:
                 )
             docs = split_to_sentences(docs[0])
         encoded_docs = self.encode_documents(docs)
-        self.find_optimal_threshold(docs, encoded_docs)
+        if self.dynamic_threshold:
+            self.find_optimal_threshold(docs, encoded_docs)
+        else:
+            self.calculated_threshold = self.encoder.score_threshold
         similarities = self.calculate_similarity_scores(encoded_docs)
         split_indices = self.find_split_indices(similarities=similarities)
         splits = self.split_documents(docs, split_indices, similarities)