Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: integrate the OpenAILLM async into the RL #356

Merged
merged 16 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,027 changes: 1,027 additions & 0 deletions docs/08-async-dynamic-routes.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "semantic-router"
version = "0.0.53"
version = "0.0.54"
description = "Super fast semantic router for AI decision making"
authors = [
"James Briggs <[email protected]>",
Expand Down
2 changes: 1 addition & 1 deletion semantic_router/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]

__version__ = "0.0.53"
__version__ = "0.0.54"
13 changes: 9 additions & 4 deletions semantic_router/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,15 @@
"Route has a function schema, but no text was provided."
)
if route.function_schemas and not isinstance(route.llm, BaseLLM):
raise NotImplementedError(
"Dynamic routes not yet supported for async calls."
)
return route(text)
if not self.llm:
logger.warning(

Check warning on line 301 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L300-L301

Added lines #L300 - L301 were not covered by tests
"No LLM provided for dynamic route, will use OpenAI LLM default"
)
self.llm = OpenAILLM()
route.llm = self.llm

Check warning on line 305 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L304-L305

Added lines #L304 - L305 were not covered by tests
else:
route.llm = self.llm
return await route.acall(text)

Check warning on line 308 in semantic_router/layer.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/layer.py#L307-L308

Added lines #L307 - L308 were not covered by tests
elif passed and route is not None and simulate_static:
return RouteChoice(
name=route.name,
Expand Down
82 changes: 82 additions & 0 deletions semantic_router/llms/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

class OpenAILLM(BaseLLM):
client: Optional[openai.OpenAI]
async_client: Optional[openai.AsyncOpenAI]
temperature: Optional[float]
max_tokens: Optional[int]

Expand All @@ -39,6 +40,7 @@
if api_key is None:
raise ValueError("OpenAI API key cannot be 'None'.")
try:
self.async_client = openai.AsyncOpenAI(api_key=api_key)
self.client = openai.OpenAI(api_key=api_key)
except Exception as e:
raise ValueError(
Expand All @@ -64,6 +66,23 @@
)
return tool_calls_info

async def async_extract_tool_calls_info(
self, tool_calls: List[ChatCompletionMessageToolCall]
) -> List[Dict[str, Any]]:
tool_calls_info = []
for tool_call in tool_calls:
if tool_call.function.arguments is None:
raise ValueError(

Check warning on line 75 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L72-L75

Added lines #L72 - L75 were not covered by tests
"Invalid output, expected arguments to be specified for each tool call."
)
tool_calls_info.append(

Check warning on line 78 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L78

Added line #L78 was not covered by tests
{
"function_name": tool_call.function.name,
"arguments": json.loads(tool_call.function.arguments),
}
)
return tool_calls_info

Check warning on line 84 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L84

Added line #L84 was not covered by tests

def __call__(
self,
messages: List[Message],
Expand Down Expand Up @@ -108,6 +127,50 @@
logger.error(f"LLM error: {e}")
raise Exception(f"LLM error: {e}") from e

async def acall(
self,
messages: List[Message],
function_schemas: Optional[List[Dict[str, Any]]] = None,
) -> str:
if self.async_client is None:
raise ValueError("OpenAI async_client is not initialized.")
try:
tools: Union[List[Dict[str, Any]], NotGiven] = (

Check warning on line 138 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L135-L138

Added lines #L135 - L138 were not covered by tests
function_schemas if function_schemas is not None else NOT_GIVEN
)

completion = await self.async_client.chat.completions.create(

Check warning on line 142 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L142

Added line #L142 was not covered by tests
model=self.name,
messages=[m.to_openai() for m in messages],
temperature=self.temperature,
max_tokens=self.max_tokens,
tools=tools, # type: ignore # We pass a list of dicts which get interpreted as Iterable[ChatCompletionToolParam].
)

if function_schemas:
tool_calls = completion.choices[0].message.tool_calls
if tool_calls is None:
raise ValueError("Invalid output, expected a tool call.")
if len(tool_calls) < 1:
raise ValueError(

Check warning on line 155 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L150-L155

Added lines #L150 - L155 were not covered by tests
"Invalid output, expected at least one tool to be specified."
)

# Collecting multiple tool calls information
output = str(

Check warning on line 160 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L160

Added line #L160 was not covered by tests
await self.async_extract_tool_calls_info(tool_calls)
) # str in keeping with base type.
else:
content = completion.choices[0].message.content
if content is None:
raise ValueError("Invalid output, expected content.")
output = content
return output

Check warning on line 168 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L164-L168

Added lines #L164 - L168 were not covered by tests

except Exception as e:
logger.error(f"LLM error: {e}")
raise Exception(f"LLM error: {e}") from e

Check warning on line 172 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L170-L172

Added lines #L170 - L172 were not covered by tests

def extract_function_inputs(
self, query: str, function_schemas: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
Expand All @@ -122,6 +185,25 @@
output = output.replace("'", '"')
function_inputs = json.loads(output)
logger.info(f"Function inputs: {function_inputs}")
logger.info(f"function_schemas: {function_schemas}")
if not self._is_valid_inputs(function_inputs, function_schemas):
raise ValueError("Invalid inputs")
return function_inputs

async def async_extract_function_inputs(
self, query: str, function_schemas: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
system_prompt = "You are an intelligent AI. Given a command or request from the user, call the function to complete the request."
messages = [

Check warning on line 197 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L196-L197

Added lines #L196 - L197 were not covered by tests
Message(role="system", content=system_prompt),
Message(role="user", content=query),
]
output = await self.acall(messages=messages, function_schemas=function_schemas)
if not output:
raise Exception("No output generated for extract function input")
output = output.replace("'", '"')
function_inputs = json.loads(output)
logger.info(f"OpenAI => Function Inputs: {function_inputs}")

Check warning on line 206 in semantic_router/llms/openai.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/llms/openai.py#L201-L206

Added lines #L201 - L206 were not covered by tests
if not self._is_valid_inputs(function_inputs, function_schemas):
raise ValueError("Invalid inputs")
return function_inputs
Expand Down
22 changes: 22 additions & 0 deletions semantic_router/route.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,28 @@
func_call = None
return RouteChoice(name=self.name, function_call=func_call)

async def acall(self, query: Optional[str] = None) -> RouteChoice:
if self.function_schemas:
if not self.llm:
raise ValueError(

Check warning on line 82 in semantic_router/route.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/route.py#L80-L82

Added lines #L80 - L82 were not covered by tests
"LLM is required for dynamic routes. Please ensure the `llm` "
"attribute is set."
)
elif query is None:
raise ValueError(

Check warning on line 87 in semantic_router/route.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/route.py#L86-L87

Added lines #L86 - L87 were not covered by tests
"Query is required for dynamic routes. Please ensure the `query` "
"argument is passed."
)
# if a function schema is provided we generate the inputs
extracted_inputs = await self.llm.async_extract_function_inputs( # type: ignore # openai-llm

Check warning on line 92 in semantic_router/route.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/route.py#L92

Added line #L92 was not covered by tests
query=query, function_schemas=self.function_schemas
)
func_call = extracted_inputs

Check warning on line 95 in semantic_router/route.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/route.py#L95

Added line #L95 was not covered by tests
else:
# otherwise we just pass None for the call
func_call = None
return RouteChoice(name=self.name, function_call=func_call)

Check warning on line 99 in semantic_router/route.py

View check run for this annotation

Codecov / codecov/patch

semantic_router/route.py#L98-L99

Added lines #L98 - L99 were not covered by tests

# def to_dict(self) -> Dict[str, Any]:
# return self.dict()

Expand Down
10 changes: 5 additions & 5 deletions semantic_router/utils/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class EncoderDefault(Enum):
"language_model": "BAAI/bge-small-en-v1.5",
}
OPENAI = {
"embedding_model": os.getenv("OPENAI_MODEL_NAME", "text-embedding-ada-002"),
"language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo"),
"embedding_model": os.getenv("OPENAI_MODEL_NAME", "text-embedding-3-small"),
"language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-4o"),
}
COHERE = {
"embedding_model": os.getenv("COHERE_MODEL_NAME", "embed-english-v3.0"),
Expand All @@ -20,10 +20,10 @@ class EncoderDefault(Enum):
"language_model": os.getenv("MISTRALAI_CHAT_MODEL_NAME", "mistral-tiny"),
}
AZURE = {
"embedding_model": os.getenv("AZURE_OPENAI_MODEL", "text-embedding-ada-002"),
"language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo"),
"embedding_model": os.getenv("AZURE_OPENAI_MODEL", "text-embedding-3-small"),
"language_model": os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-4o"),
"deployment_name": os.getenv(
"AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002"
"AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-small"
),
}
GOOGLE = {
Expand Down
4 changes: 1 addition & 3 deletions tests/unit/llms/test_llm_azure_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ def azure_openai_llm(mocker):
class TestOpenAILLM:
def test_azure_openai_llm_init_with_api_key(self, azure_openai_llm):
assert azure_openai_llm.client is not None, "Client should be initialized"
assert (
azure_openai_llm.name == "gpt-3.5-turbo"
), "Default name not set correctly"
assert azure_openai_llm.name == "gpt-4o", "Default name not set correctly"

def test_azure_openai_llm_init_success(self, mocker):
mocker.patch("os.getenv", return_value="fake-api-key")
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/llms/test_llm_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def openai_llm(mocker):
class TestOpenAILLM:
def test_openai_llm_init_with_api_key(self, openai_llm):
assert openai_llm.client is not None, "Client should be initialized"
assert openai_llm.name == "gpt-3.5-turbo", "Default name not set correctly"
assert openai_llm.name == "gpt-4o", "Default name not set correctly"

def test_openai_llm_init_success(self, mocker):
mocker.patch("os.getenv", return_value="fake-api-key")
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_hybrid_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def cohere_encoder(mocker):
@pytest.fixture
def openai_encoder(mocker):
mocker.patch.object(OpenAIEncoder, "__call__", side_effect=mock_encoder_call)
return OpenAIEncoder(name="text-embedding-ada-002", openai_api_key="test_api_key")
return OpenAIEncoder(name="text-embedding-3-small", openai_api_key="test_api_key")


@pytest.fixture
Expand Down Expand Up @@ -88,8 +88,8 @@ def test_initialization(self, openai_encoder, routes):
alpha=0.8,
)
assert route_layer.index is not None and route_layer.categories is not None
assert openai_encoder.score_threshold == 0.82
assert route_layer.score_threshold == 0.82
assert openai_encoder.score_threshold == 0.3
assert route_layer.score_threshold == 0.3
assert route_layer.top_k == 10
assert route_layer.alpha == 0.8
assert len(route_layer.index) == 5
Expand All @@ -104,7 +104,7 @@ def test_initialization_different_encoders(self, cohere_encoder, openai_encoder)
route_layer_openai = HybridRouteLayer(
encoder=openai_encoder, sparse_encoder=sparse_encoder
)
assert route_layer_openai.score_threshold == 0.82
assert route_layer_openai.score_threshold == 0.3

def test_add_route(self, openai_encoder):
route_layer = HybridRouteLayer(
Expand Down
14 changes: 7 additions & 7 deletions tests/unit/test_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def cohere_encoder(mocker):
@pytest.fixture
def openai_encoder(mocker):
mocker.patch.object(OpenAIEncoder, "__call__", side_effect=mock_encoder_call)
return OpenAIEncoder(name="text-embedding-ada-002", openai_api_key="test_api_key")
return OpenAIEncoder(name="text-embedding-3-small", openai_api_key="test_api_key")


@pytest.fixture
Expand Down Expand Up @@ -155,8 +155,8 @@ def test_initialization(self, openai_encoder, routes, index_cls):
route_layer = RouteLayer(
encoder=openai_encoder, routes=routes, top_k=10, index=index_cls()
)
assert openai_encoder.score_threshold == 0.82
assert route_layer.score_threshold == 0.82
assert openai_encoder.score_threshold == 0.3
assert route_layer.score_threshold == 0.3
assert route_layer.top_k == 10
assert len(route_layer.index) if route_layer.index is not None else 0 == 5
assert (
Expand All @@ -172,7 +172,7 @@ def test_initialization_different_encoders(
assert cohere_encoder.score_threshold == 0.3
assert route_layer_cohere.score_threshold == 0.3
route_layer_openai = RouteLayer(encoder=openai_encoder, index=index_cls())
assert route_layer_openai.score_threshold == 0.82
assert route_layer_openai.score_threshold == 0.3

def test_initialization_no_encoder(self, openai_encoder, index_cls):
os.environ["OPENAI_API_KEY"] = "test_api_key"
Expand All @@ -189,8 +189,8 @@ def test_initialization_dynamic_route(
route_layer_openai = RouteLayer(
encoder=openai_encoder, routes=dynamic_routes, index=index_cls()
)
assert openai_encoder.score_threshold == 0.82
assert route_layer_openai.score_threshold == 0.82
assert openai_encoder.score_threshold == 0.3
assert route_layer_openai.score_threshold == 0.3

def test_add_route(self, openai_encoder, index_cls):
route_layer = RouteLayer(encoder=openai_encoder, index=index_cls())
Expand Down Expand Up @@ -542,7 +542,7 @@ def test_get_thresholds(self, openai_encoder, routes, index_cls):
route_layer = RouteLayer(
encoder=openai_encoder, routes=routes, index=index_cls()
)
assert route_layer.get_thresholds() == {"Route 1": 0.82, "Route 2": 0.82}
assert route_layer.get_thresholds() == {"Route 1": 0.3, "Route 2": 0.3}

def test_with_multiple_routes_passing_threshold(
self, openai_encoder, routes, index_cls
Expand Down
Loading