Merge pull request #356 from aurelio-labs/tolga/async_dynamic_routes

feat: integrate the OpenAILLM async into the RL
aurelio-labs · Jul 19, 2024 · cbb685f · cbb685f
2 parents a59e7d1 + ce0489b
commit cbb685f
Show file tree

Hide file tree

Showing 11 changed files with 1,160 additions and 26 deletions.
diff --git a/docs/08-async-dynamic-routes.ipynb b/docs/08-async-dynamic-routes.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "semantic-router"
-version = "0.0.53"
+version = "0.0.54"
 description = "Super fast semantic router for AI decision making"
 authors = [
     "James Briggs <[email protected]>",

diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py
@@ -4,4 +4,4 @@
 
 __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
 
-__version__ = "0.0.53"
+__version__ = "0.0.54"
diff --git a/semantic_router/layer.py b/semantic_router/layer.py
@@ -297,10 +297,15 @@ async def acall(
                     "Route has a function schema, but no text was provided."
                 )
             if route.function_schemas and not isinstance(route.llm, BaseLLM):
-                raise NotImplementedError(
-                    "Dynamic routes not yet supported for async calls."
-                )
-            return route(text)
+                if not self.llm:
+                    logger.warning(
+                        "No LLM provided for dynamic route, will use OpenAI LLM default"
+                    )
+                    self.llm = OpenAILLM()
+                    route.llm = self.llm
+                else:
+                    route.llm = self.llm
+            return await route.acall(text)
         elif passed and route is not None and simulate_static:
             return RouteChoice(
                 name=route.name,

diff --git a/semantic_router/llms/openai.py b/semantic_router/llms/openai.py
@@ -22,6 +22,7 @@
 
 class OpenAILLM(BaseLLM):
     client: Optional[openai.OpenAI]
+    async_client: Optional[openai.AsyncOpenAI]
     temperature: Optional[float]
     max_tokens: Optional[int]
 
@@ -39,6 +40,7 @@ def __init__(
         if api_key is None:
             raise ValueError("OpenAI API key cannot be 'None'.")
         try:
+            self.async_client = openai.AsyncOpenAI(api_key=api_key)
             self.client = openai.OpenAI(api_key=api_key)
         except Exception as e:
             raise ValueError(
@@ -64,6 +66,23 @@ def _extract_tool_calls_info(
             )
         return tool_calls_info
 
+    async def async_extract_tool_calls_info(
+        self, tool_calls: List[ChatCompletionMessageToolCall]
+    ) -> List[Dict[str, Any]]:
+        tool_calls_info = []
+        for tool_call in tool_calls:
+            if tool_call.function.arguments is None:
+                raise ValueError(
+                    "Invalid output, expected arguments to be specified for each tool call."
+                )
+            tool_calls_info.append(
+                {
+                    "function_name": tool_call.function.name,
+                    "arguments": json.loads(tool_call.function.arguments),
+                }
+            )
+        return tool_calls_info
+
     def __call__(
         self,
         messages: List[Message],
@@ -108,6 +127,50 @@ def __call__(
             logger.error(f"LLM error: {e}")
             raise Exception(f"LLM error: {e}") from e
 
+    async def acall(
+        self,
+        messages: List[Message],
+        function_schemas: Optional[List[Dict[str, Any]]] = None,
+    ) -> str:
+        if self.async_client is None:
+            raise ValueError("OpenAI async_client is not initialized.")
+        try:
+            tools: Union[List[Dict[str, Any]], NotGiven] = (
+                function_schemas if function_schemas is not None else NOT_GIVEN
+            )
+
+            completion = await self.async_client.chat.completions.create(
+                model=self.name,
+                messages=[m.to_openai() for m in messages],
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                tools=tools,  # type: ignore # We pass a list of dicts which get interpreted as Iterable[ChatCompletionToolParam].
+            )
+
+            if function_schemas:
+                tool_calls = completion.choices[0].message.tool_calls
+                if tool_calls is None:
+                    raise ValueError("Invalid output, expected a tool call.")
+                if len(tool_calls) < 1:
+                    raise ValueError(
+                        "Invalid output, expected at least one tool to be specified."
+                    )
+
+                # Collecting multiple tool calls information
+                output = str(
+                    await self.async_extract_tool_calls_info(tool_calls)
+                )  # str in keeping with base type.
+            else:
+                content = completion.choices[0].message.content
+                if content is None:
+                    raise ValueError("Invalid output, expected content.")
+                output = content
+            return output
+
+        except Exception as e:
+            logger.error(f"LLM error: {e}")
+            raise Exception(f"LLM error: {e}") from e
+
     def extract_function_inputs(
         self, query: str, function_schemas: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
@@ -122,6 +185,25 @@ def extract_function_inputs(
         output = output.replace("'", '"')
         function_inputs = json.loads(output)
         logger.info(f"Function inputs: {function_inputs}")
+        logger.info(f"function_schemas: {function_schemas}")
+        if not self._is_valid_inputs(function_inputs, function_schemas):
+            raise ValueError("Invalid inputs")
+        return function_inputs
+
+    async def async_extract_function_inputs(
+        self, query: str, function_schemas: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        system_prompt = "You are an intelligent AI. Given a command or request from the user, call the function to complete the request."
+        messages = [
+            Message(role="system", content=system_prompt),
+            Message(role="user", content=query),
+        ]
+        output = await self.acall(messages=messages, function_schemas=function_schemas)
+        if not output:
+            raise Exception("No output generated for extract function input")
+        output = output.replace("'", '"')
+        function_inputs = json.loads(output)
+        logger.info(f"OpenAI => Function Inputs: {function_inputs}")
         if not self._is_valid_inputs(function_inputs, function_schemas):
             raise ValueError("Invalid inputs")
         return function_inputs

diff --git a/semantic_router/route.py b/semantic_router/route.py
@@ -76,6 +76,28 @@ def __call__(self, query: Optional[str] = None) -> RouteChoice:
             func_call = None
         return RouteChoice(name=self.name, function_call=func_call)
 
+    async def acall(self, query: Optional[str] = None) -> RouteChoice:
+        if self.function_schemas:
+            if not self.llm:
+                raise ValueError(
+                    "LLM is required for dynamic routes. Please ensure the `llm` "
+                    "attribute is set."
+                )
+            elif query is None:
+                raise ValueError(
+                    "Query is required for dynamic routes. Please ensure the `query` "
+                    "argument is passed."
+                )
+            # if a function schema is provided we generate the inputs
+            extracted_inputs = await self.llm.async_extract_function_inputs(  # type: ignore # openai-llm
+                query=query, function_schemas=self.function_schemas
+            )
+            func_call = extracted_inputs
+        else:
+            # otherwise we just pass None for the call
+            func_call = None
+        return RouteChoice(name=self.name, function_call=func_call)
+
     # def to_dict(self) -> Dict[str, Any]:
     #     return self.dict()
 

diff --git a/semantic_router/utils/defaults.py b/semantic_router/utils/defaults.py
@@ -8,8 +8,8 @@ class EncoderDefault(Enum):
         "language_model": "BAAI/bge-small-en-v1.5",
     }
     OPENAI = {
-        "embedding_model": os.getenv("OPENAI_MODEL_NAME", "text-embedding-ada-002"),
-        "language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo"),
+        "embedding_model": os.getenv("OPENAI_MODEL_NAME", "text-embedding-3-small"),
+        "language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-4o"),
     }
     COHERE = {
         "embedding_model": os.getenv("COHERE_MODEL_NAME", "embed-english-v3.0"),
@@ -20,10 +20,10 @@ class EncoderDefault(Enum):
         "language_model": os.getenv("MISTRALAI_CHAT_MODEL_NAME", "mistral-tiny"),
     }
     AZURE = {
-        "embedding_model": os.getenv("AZURE_OPENAI_MODEL", "text-embedding-ada-002"),
-        "language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo"),
+        "embedding_model": os.getenv("AZURE_OPENAI_MODEL", "text-embedding-3-small"),
+        "language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-4o"),
         "deployment_name": os.getenv(
-            "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002"
+            "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-small"
         ),
     }
     GOOGLE = {

diff --git a/tests/unit/llms/test_llm_azure_openai.py b/tests/unit/llms/test_llm_azure_openai.py
@@ -13,9 +13,7 @@ def azure_openai_llm(mocker):
 class TestOpenAILLM:
     def test_azure_openai_llm_init_with_api_key(self, azure_openai_llm):
         assert azure_openai_llm.client is not None, "Client should be initialized"
-        assert (
-            azure_openai_llm.name == "gpt-3.5-turbo"
-        ), "Default name not set correctly"
+        assert azure_openai_llm.name == "gpt-4o", "Default name not set correctly"
 
     def test_azure_openai_llm_init_success(self, mocker):
         mocker.patch("os.getenv", return_value="fake-api-key")

diff --git a/tests/unit/llms/test_llm_openai.py b/tests/unit/llms/test_llm_openai.py
@@ -43,7 +43,7 @@ def openai_llm(mocker):
 class TestOpenAILLM:
     def test_openai_llm_init_with_api_key(self, openai_llm):
         assert openai_llm.client is not None, "Client should be initialized"
-        assert openai_llm.name == "gpt-3.5-turbo", "Default name not set correctly"
+        assert openai_llm.name == "gpt-4o", "Default name not set correctly"
 
     def test_openai_llm_init_success(self, mocker):
         mocker.patch("os.getenv", return_value="fake-api-key")

diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py
@@ -40,7 +40,7 @@ def cohere_encoder(mocker):
 @pytest.fixture
 def openai_encoder(mocker):
     mocker.patch.object(OpenAIEncoder, "__call__", side_effect=mock_encoder_call)
-    return OpenAIEncoder(name="text-embedding-ada-002", openai_api_key="test_api_key")
+    return OpenAIEncoder(name="text-embedding-3-small", openai_api_key="test_api_key")
 
 
 @pytest.fixture
@@ -88,8 +88,8 @@ def test_initialization(self, openai_encoder, routes):
             alpha=0.8,
         )
         assert route_layer.index is not None and route_layer.categories is not None
-        assert openai_encoder.score_threshold == 0.82
-        assert route_layer.score_threshold == 0.82
+        assert openai_encoder.score_threshold == 0.3
+        assert route_layer.score_threshold == 0.3
         assert route_layer.top_k == 10
         assert route_layer.alpha == 0.8
         assert len(route_layer.index) == 5
@@ -104,7 +104,7 @@ def test_initialization_different_encoders(self, cohere_encoder, openai_encoder)
         route_layer_openai = HybridRouteLayer(
             encoder=openai_encoder, sparse_encoder=sparse_encoder
         )
-        assert route_layer_openai.score_threshold == 0.82
+        assert route_layer_openai.score_threshold == 0.3
 
     def test_add_route(self, openai_encoder):
         route_layer = HybridRouteLayer(

diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py
@@ -87,7 +87,7 @@ def cohere_encoder(mocker):
 @pytest.fixture
 def openai_encoder(mocker):
     mocker.patch.object(OpenAIEncoder, "__call__", side_effect=mock_encoder_call)
-    return OpenAIEncoder(name="text-embedding-ada-002", openai_api_key="test_api_key")
+    return OpenAIEncoder(name="text-embedding-3-small", openai_api_key="test_api_key")
 
 
 @pytest.fixture
@@ -155,8 +155,8 @@ def test_initialization(self, openai_encoder, routes, index_cls):
         route_layer = RouteLayer(
             encoder=openai_encoder, routes=routes, top_k=10, index=index_cls()
         )
-        assert openai_encoder.score_threshold == 0.82
-        assert route_layer.score_threshold == 0.82
+        assert openai_encoder.score_threshold == 0.3
+        assert route_layer.score_threshold == 0.3
         assert route_layer.top_k == 10
         assert len(route_layer.index) if route_layer.index is not None else 0 == 5
         assert (
@@ -172,7 +172,7 @@ def test_initialization_different_encoders(
         assert cohere_encoder.score_threshold == 0.3
         assert route_layer_cohere.score_threshold == 0.3
         route_layer_openai = RouteLayer(encoder=openai_encoder, index=index_cls())
-        assert route_layer_openai.score_threshold == 0.82
+        assert route_layer_openai.score_threshold == 0.3
 
     def test_initialization_no_encoder(self, openai_encoder, index_cls):
         os.environ["OPENAI_API_KEY"] = "test_api_key"
@@ -189,8 +189,8 @@ def test_initialization_dynamic_route(
         route_layer_openai = RouteLayer(
             encoder=openai_encoder, routes=dynamic_routes, index=index_cls()
         )
-        assert openai_encoder.score_threshold == 0.82
-        assert route_layer_openai.score_threshold == 0.82
+        assert openai_encoder.score_threshold == 0.3
+        assert route_layer_openai.score_threshold == 0.3
 
     def test_add_route(self, openai_encoder, index_cls):
         route_layer = RouteLayer(encoder=openai_encoder, index=index_cls())
@@ -542,7 +542,7 @@ def test_get_thresholds(self, openai_encoder, routes, index_cls):
         route_layer = RouteLayer(
             encoder=openai_encoder, routes=routes, index=index_cls()
         )
-        assert route_layer.get_thresholds() == {"Route 1": 0.82, "Route 2": 0.82}
+        assert route_layer.get_thresholds() == {"Route 1": 0.3, "Route 2": 0.3}
 
     def test_with_multiple_routes_passing_threshold(
         self, openai_encoder, routes, index_cls