From 0d536dbd9daff77d5e9973761fc873f3128fd26c Mon Sep 17 00:00:00 2001 From: matt-bernstein <60152561+matt-bernstein@users.noreply.github.com> Date: Fri, 2 Aug 2024 17:54:00 -0400 Subject: [PATCH] Litellm refactor (#172) --- adala/runtimes/__init__.py | 1 + adala/runtimes/_litellm.py | 250 ++++++++++----- adala/runtimes/_openai.py | 50 +-- adala/utils/llm.py | 276 ----------------- ... I am 25 years old.-expected_result1].yaml | 103 ------- ...th exclamation mark-expected_result0].yaml | 98 ------ ... I am 25 years old.-expected_result1].yaml | 103 ------- ...th exclamation mark-expected_result0].yaml | 98 ------ ...th exclamation mark-expected_result0].yaml | 98 ------ tests/cassettes/test_llm/test_llm_async.yaml | 198 ++++++++++++ tests/cassettes/test_llm/test_llm_sync.yaml | 291 ++++++++++++++++++ tests/test_llm.py | 90 +++--- 12 files changed, 715 insertions(+), 941 deletions(-) delete mode 100644 adala/utils/llm.py delete mode 100644 tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml delete mode 100644 tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml delete mode 100644 tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml delete mode 100644 tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml delete mode 100644 tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml create mode 100644 tests/cassettes/test_llm/test_llm_async.yaml create mode 100644 tests/cassettes/test_llm/test_llm_sync.yaml diff --git a/adala/runtimes/__init__.py b/adala/runtimes/__init__.py index 3718065d..df9aae16 100644 --- a/adala/runtimes/__init__.py +++ b/adala/runtimes/__init__.py @@ -1,2 +1,3 @@ from .base import Runtime, AsyncRuntime from ._openai import OpenAIChatRuntime, OpenAIVisionRuntime, AsyncOpenAIChatRuntime +from ._litellm import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py index ac12ba1e..25b33be0 100644 --- a/adala/runtimes/_litellm.py +++ b/adala/runtimes/_litellm.py @@ -1,7 +1,11 @@ +import asyncio import logging -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional import litellm +from litellm.exceptions import AuthenticationError +import instructor +import traceback from adala.utils.internal_data import InternalDataFrame from adala.utils.logs import print_error from adala.utils.parse import ( @@ -9,42 +13,81 @@ partial_str_format, parse_template_to_pydantic_class, ) -from adala.utils.llm import ( - parallel_async_get_llm_response, - get_llm_response, - ConstrainedLLMResponse, - UnconstrainedLLMResponse, - ErrorLLMResponse, - LiteLLMInferenceSettings, -) -from openai import NotFoundError from pydantic import ConfigDict, field_validator from rich import print from .base import AsyncRuntime, Runtime +instructor_client = instructor.from_litellm(litellm.completion) +async_instructor_client = instructor.from_litellm(litellm.acompletion) + logger = logging.getLogger(__name__) -class LiteLLMChatRuntime(LiteLLMInferenceSettings, Runtime): +def get_messages( + user_prompt: str, + system_prompt: Optional[str] = None, + instruction_first: bool = True, +): + messages = [{"role": "user", "content": user_prompt}] + if system_prompt: + if instruction_first: + messages.insert(0, {"role": "system", "content": system_prompt}) + else: + messages[0]["content"] += system_prompt + return messages + + +class LiteLLMChatRuntime(Runtime): """ Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and chat completion models to perform the skill. + The default model provider is [OpenAI](https://openai.com/), using the OPENAI_API_KEY environment variable. Other providers [can be chosen](https://litellm.vercel.app/docs/set_keys) through environment variables or passed parameters. + Attributes: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. + model: model name. Refer to litellm supported models for how to pass + this: https://litellm.vercel.app/docs/providers + max_tokens: Maximum tokens to generate. + temperature: Temperature for sampling. + seed: Integer seed to reduce nondeterminism in generation. + + Extra parameters passed to this class will be used for inference. See `litellm.types.completion.CompletionRequest` for a full list. Some common ones are: + api_key: API key, optional. If provided, will be used to authenticate + with the provider of your specified model. + base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI. + api_version (Optional[str]): API version, optional except for Azure. + timeout: Timeout in seconds. """ - model_config = ConfigDict(arbitrary_types_allowed=True) # for @computed_field + model: str = "gpt-4o-mini" + max_tokens: int = 1000 + temperature: float = 0.0 + seed: Optional[int] = 47 + + model_config = ConfigDict(extra="allow") def init_runtime(self) -> "Runtime": # check model availability + # extension of litellm.check_valid_key for non-openai deployments try: - if self.api_key: - litellm.check_valid_key(model=self.model, api_key=self.api_key) - except NotFoundError: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.completion( + messages=messages, + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, + ) + except AuthenticationError: raise ValueError( - f'Requested model "{self.model}" is not available with your api_key.' + f'Requested model "{self.model}" is not available with your api_key and settings.' + ) + except Exception as e: + raise ValueError( + f'Failed to check availability of requested model "{self.model}": {e}' ) return self @@ -52,17 +95,19 @@ def get_llm_response(self, messages: List[Dict[str, str]]) -> str: # TODO: sunset this method in favor of record_to_record if self.verbose: print(f"**Prompt content**:\n{messages}") - response: Union[ErrorLLMResponse, UnconstrainedLLMResponse] = get_llm_response( + completion = litellm.completion( messages=messages, - inference_settings=LiteLLMInferenceSettings( - **self.dict(include=LiteLLMInferenceSettings.model_fields.keys()) - ), + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, ) - if isinstance(response, ErrorLLMResponse): - raise ValueError(f"{response.adala_message}\n{response.adala_details}") + completion_text = completion.choices[0].message.content if self.verbose: - print(f"**Response**:\n{response.text}") - return response.text + print(f"**Response**:\n{completion_text}") + return completion_text def record_to_record( self, @@ -95,35 +140,93 @@ def record_to_record( response_model = parse_template_to_pydantic_class( output_template, provided_field_schema=field_schema ) - - response: Union[ConstrainedLLMResponse, ErrorLLMResponse] = get_llm_response( - user_prompt=input_template.format(**record, **extra_fields), - system_prompt=instructions_template, - instruction_first=instructions_first, - response_model=response_model, - inference_settings=LiteLLMInferenceSettings( - **self.dict(include=LiteLLMInferenceSettings.model_fields.keys()) - ), + messages = get_messages( + input_template.format(**record, **extra_fields), + instructions_template, + instructions_first, ) - if isinstance(response, ErrorLLMResponse): + try: + # returns a pydantic model named Output + response = instructor_client.chat.completions.create( + messages=messages, + response_model=response_model, + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, + ) + except Exception as e: + error_message = type(e).__name__ + # error_details = str(e) + error_details = traceback.format_exc() if self.verbose: - print_error(response.adala_message, response.adala_details) - return response.model_dump(by_alias=True) + print_error(error_message, error_details) + # TODO change this format + error_dct = { + "_adala_error": True, + "_adala_message": error_message, + "_adala_details": error_details, + } + return error_dct - return response.data + return response.dict() -class AsyncLiteLLMChatRuntime(LiteLLMInferenceSettings, AsyncRuntime): +class AsyncLiteLLMChatRuntime(AsyncRuntime): """ Runtime that uses [OpenAI API](https://openai.com/) and chat completion models to perform the skill. It uses async calls to OpenAI API. + The default model provider is [OpenAI](https://openai.com/), using the OPENAI_API_KEY environment variable. Other providers [can be chosen](https://litellm.vercel.app/docs/set_keys) through environment variables or passed parameters. + Attributes: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. + model: model name. Refer to litellm supported models for how to pass + this: https://litellm.vercel.app/docs/providers + max_tokens: Maximum tokens to generate. + temperature: Temperature for sampling. + seed: Integer seed to reduce nondeterminism in generation. + + Extra parameters passed to this class will be used for inference. See `litellm.types.completion.CompletionRequest` for a full list. Some common ones are: + api_key: API key, optional. If provided, will be used to authenticate + with the provider of your specified model. + base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI. + api_version (Optional[str]): API version, optional except for Azure. + timeout: Timeout in seconds. """ - model_config = ConfigDict(arbitrary_types_allowed=True) # for @computed_field + model: str = "gpt-4o-mini" + max_tokens: int = 1000 + temperature: float = 0.0 + seed: Optional[int] = 47 + + model_config = ConfigDict(extra="allow") + + def init_runtime(self) -> "Runtime": + # check model availability + # extension of litellm.check_valid_key for non-openai deployments + try: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.completion( + messages=messages, + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, + ) + except AuthenticationError: + raise ValueError( + f'Requested model "{self.model}" is not available with your api_key and settings.' + ) + except Exception as e: + raise ValueError( + f'Failed to check availability of requested model "{self.model}": {e}' + ) + return self @field_validator("concurrency", mode="before") def check_concurrency(cls, value) -> int: @@ -135,17 +238,6 @@ def check_concurrency(cls, value) -> int: ) return value - def init_runtime(self) -> "Runtime": - # check model availability - try: - if self.api_key: - litellm.check_valid_key(model=self.model, api_key=self.api_key) - except NotFoundError: - raise ValueError( - f'Requested model "{self.model}" is not available in your OpenAI account.' - ) - return self - async def batch_to_batch( self, batch: InternalDataFrame, @@ -167,27 +259,45 @@ async def batch_to_batch( lambda row: input_template.format(**row, **extra_fields), axis=1 ).tolist() - responses: List[Union[ConstrainedLLMResponse, ErrorLLMResponse]] = ( - await parallel_async_get_llm_response( - user_prompts=user_prompts, - system_prompt=instructions_template, - instruction_first=instructions_first, - response_model=response_model, - inference_settings=LiteLLMInferenceSettings( - **self.dict(include=LiteLLMInferenceSettings.model_fields.keys()) - ), + tasks = [ + asyncio.ensure_future( + async_instructor_client.chat.completions.create( + messages=get_messages( + user_prompt, + instructions_template, + instructions_first, + ), + response_model=response_model, + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, + ) ) - ) + for user_prompt in user_prompts + ] + responses = await asyncio.gather(*tasks, return_exceptions=True) # convert list of LLMResponse objects to the dataframe records df_data = [] for response in responses: - if isinstance(response, ErrorLLMResponse): + if isinstance(response, Exception): + error_message = type(response).__name__ + # error_details = str(response) + error_details = traceback.format_exc() if self.verbose: - print_error(response.adala_message, response.adala_details) - df_data.append(response.model_dump(by_alias=True)) + print_error(error_message, error_details) + # TODO change this format + error_dct = { + "_adala_error": True, + "_adala_message": error_message, + "_adala_details": error_details, + } + df_data.append(error_dct) else: - df_data.append(response.data) + df_data.append(response.dict()) output_df = InternalDataFrame(df_data) return output_df.set_index(batch.index) @@ -302,9 +412,11 @@ def record_to_record( completion = litellm.completion( messages=[{"role": "user", "content": content}], - inference_settings=LiteLLMInferenceSettings( - **self.dict(include=LiteLLMInferenceSettings.model_fields.keys()) - ), + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + # extra inference params passed to this runtime + **self.model_extra, ) completion_text = completion.choices[0].message.content diff --git a/adala/runtimes/_openai.py b/adala/runtimes/_openai.py index 83424201..009d1980 100644 --- a/adala/runtimes/_openai.py +++ b/adala/runtimes/_openai.py @@ -1,49 +1,7 @@ -import os - -from pydantic import Field - from ._litellm import AsyncLiteLLMChatRuntime, LiteLLMChatRuntime, LiteLLMVisionRuntime -class OpenAIChatRuntime(LiteLLMChatRuntime): - """ - Runtime that uses [OpenAI API](https://openai.com/) and chat completion - models to perform the skill. - - Attributes: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. - """ - - # TODO does it make any sense for this to be optional? - api_key: str = Field(default=os.getenv("OPENAI_API_KEY")) - - -class AsyncOpenAIChatRuntime(AsyncLiteLLMChatRuntime): - """ - Runtime that uses [OpenAI API](https://openai.com/) and chat completion - models to perform the skill. It uses async calls to OpenAI API. - - Attributes: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. - - """ - - api_key: str = Field(default=os.getenv("OPENAI_API_KEY")) - - -class OpenAIVisionRuntime(LiteLLMVisionRuntime): - """ - Runtime that uses [OpenAI API](https://openai.com/) and vision models to - perform the skill. - Only compatible with OpenAI API version 1.0.0 or higher. - """ - - api_key: str = Field(default=os.getenv("OPENAI_API_KEY")) - # NOTE this check used to exist in OpenAIVisionRuntime.record_to_record, - # but doesn't seem to have a definition - # def init_runtime(self) -> 'Runtime': - # if not check_if_new_openai_version(): - # raise NotImplementedError( - # f'{self.__class__.__name__} requires OpenAI API version 1.0.0 or higher.' - # ) - # super().init_runtime() +# litellm already reads the OPENAI_API_KEY env var, which was the reason for this class +OpenAIChatRuntime = LiteLLMChatRuntime +AsyncOpenAIChatRuntime = AsyncLiteLLMChatRuntime +OpenAIVisionRuntime = LiteLLMVisionRuntime diff --git a/adala/utils/llm.py b/adala/utils/llm.py deleted file mode 100644 index 8d43b053..00000000 --- a/adala/utils/llm.py +++ /dev/null @@ -1,276 +0,0 @@ -import asyncio -import instructor -import litellm -import traceback -import multiprocessing as mp -from typing import Optional, Dict, List, Type, Union -from pydantic import BaseModel, Field -from pydantic_settings import BaseSettings - -instructor_client = instructor.from_litellm(litellm.completion) -async_instructor_client = instructor.from_litellm(litellm.acompletion) - - -class LLMResponse(BaseModel): - """ - Base class for LLM response. - """ - adala_message: str = Field( - default=None, serialization_alias='_adala_message' - ) - adala_details: str = Field( - default=None, serialization_alias='_adala_details' - ) - - -class ConstrainedLLMResponse(LLMResponse): - """ - LLM response from constrained generation. - `data` object contains fields required by the response model. - """ - - data: Dict = Field(default_factory=dict) - adala_error: bool = Field( - default=False, serialization_alias='_adala_error' - ) - - - -class UnconstrainedLLMResponse(LLMResponse): - """ - LLM response from unconstrained generation. - `text` field contains raw completion text. - """ - - text: str = Field(default=None) - adala_error: bool = Field( - default=False, serialization_alias='_adala_error' - ) - - - -class ErrorLLMResponse(LLMResponse): - """ - LLM response in case of error. - """ - - adala_error: bool = Field(default=True, serialization_alias="_adala_error") - - -class LiteLLMInferenceSettings(BaseSettings): - """ - Common inference settings for LiteLLM. - - Attributes: - model: model name. Refer to litellm supported models for how to pass - this: https://litellm.vercel.app/docs/providers - api_key: API key, optional. If provided, will be used to authenticate - with the provider of your specified model. - base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI. - api_version (Optional[str]): API version, optional except for Azure. - instruction_first: Whether to put instructions first. - response_model: Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned. # noqa - max_tokens: Maximum tokens to generate. - temperature: Temperature for sampling. - timeout: Timeout in seconds. - seed: Integer seed to reduce nondeterminism in generation. - """ - - model: str = "gpt-4o-mini" - api_key: Optional[str] = None - base_url: Optional[str] = None - api_version: Optional[str] = None - max_tokens: int = 1000 - temperature: float = 0.0 - timeout: Optional[Union[float, int]] = None - seed: Optional[int] = 47 - - -def get_messages( - user_prompt: str, - system_prompt: Optional[str] = None, - instruction_first: bool = True, -): - messages = [{"role": "user", "content": user_prompt}] - if system_prompt: - if instruction_first: - messages.insert(0, {"role": "system", "content": system_prompt}) - else: - messages[0]["content"] += system_prompt - return messages - - -async def async_get_llm_response( - user_prompt: Optional[str] = None, - system_prompt: Optional[str] = None, - messages: Optional[List[Dict[str, str]]] = None, - instruction_first: bool = True, - response_model: Optional[Type[BaseModel]] = None, - inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(), -) -> LLMResponse: - """ - Async version of create_completion function with error handling and session timeout. - - Args: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. - user_prompt: User prompt. - system_prompt: System prompt. - messages: List of messages to be sent to the model. If provided, `user_prompt`, `system_prompt` and `instruction_first` will be ignored. - instruction_first: Whether to put instructions first. - response_model: Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned. # noqa - - Returns: - LLMResponse: OpenAI response or error message. - """ - - if not user_prompt and not messages: - raise ValueError("You must provide either `user_prompt` or `messages`.") - - if not messages: - # get messages from user_prompt and system_prompt - messages = get_messages(user_prompt, system_prompt, instruction_first) - - if response_model is None: - # unconstrained generation - return raw completion text and store it in `data` field: {"text": completion_text} - try: - completion = await litellm.acompletion( - messages=messages, - **inference_settings.dict(), - ) - completion_text = completion.choices[0].message.content - return UnconstrainedLLMResponse(text=completion_text) - except Exception as e: - return ErrorLLMResponse( - adala_message=type(e).__name__, adala_details=traceback.format_exc() - ) - - # constrained generation branch - use `response_model` to constrain the LLM response - try: - instructor_response, completion = ( - await async_instructor_client.chat.completions.create_with_completion( - messages=messages, - response_model=response_model, - **inference_settings.dict(), - ) - ) - return ConstrainedLLMResponse( - data=instructor_response.model_dump(by_alias=True) - ) - except Exception as e: - return ErrorLLMResponse( - adala_message=type(e).__name__, adala_details=traceback.format_exc() - ) - - -async def parallel_async_get_llm_response( - user_prompts: List[str], - system_prompt: Optional[str] = None, - instruction_first: bool = True, - response_model: Optional[Type[BaseModel]] = None, - inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(), -): - tasks = [ - asyncio.ensure_future( - async_get_llm_response( - inference_settings=inference_settings, - user_prompt=user_prompt, - system_prompt=system_prompt, - instruction_first=instruction_first, - response_model=response_model, - ) - ) - for user_prompt in user_prompts - ] - responses = await asyncio.gather(*tasks) - return responses - - -def get_llm_response( - user_prompt: Optional[str] = None, - system_prompt: Optional[str] = None, - messages: Optional[List[Dict[str, str]]] = None, - instruction_first: bool = True, - response_model: Optional[Type[BaseModel]] = None, - inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(), -) -> LLMResponse: - """ - Synchronous version of create_completion function with error handling and session timeout. - - Args: - inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM. - user_prompt (Optional[str]): User prompt. - system_prompt (Optional[str]): System prompt. - messages (Optional[List[Dict[str, str]]]): List of messages to be sent to the model. If provided, `user_prompt`, `system_prompt` and `instruction_first` will be ignored. - instruction_first (Optional[bool]): Whether to put instructions first. - response_model (Optional[Type[BaseModel]]): Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned. - - Returns: - Dict[str, Any]: OpenAI response or error message. - """ - - if not user_prompt and not messages: - raise ValueError("You must provide either `user_prompt` or `messages`.") - - if not messages: - # get messages from user_prompt and system_prompt - messages = get_messages(user_prompt, system_prompt, instruction_first) - - if response_model is None: - # unconstrained generation - return raw completion text and store it in `data` field: {"text": completion_text} - # TODO: this branch can be considered as deprecated at some point, as we always want to run LLM constrained by pydantic model # noqa - try: - completion = litellm.completion( - messages=messages, - **inference_settings.dict(), - ) - completion_text = completion.choices[0].message.content - return UnconstrainedLLMResponse(text=completion_text) - except Exception as e: - return ErrorLLMResponse( - adala_message=type(e).__name__, adala_details=traceback.format_exc() - ) - - # constrained generation branch - use `response_model` to constrain the LLM response - try: - instructor_response, completion = ( - instructor_client.chat.completions.create_with_completion( - messages=messages, - response_model=response_model, - **inference_settings.dict(), - ) - ) - return ConstrainedLLMResponse( - data=instructor_response.model_dump(by_alias=True) - ) - except Exception as e: - return ErrorLLMResponse( - adala_message=type(e).__name__, adala_details=traceback.format_exc() - ) - - -def parallel_get_llm_response( - user_prompts: List[str], - system_prompt: Optional[str] = None, - messages: Optional[List[Dict[str, str]]] = None, - instruction_first: bool = True, - response_model: Optional[Type[BaseModel]] = None, - inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(), -) -> List[LLMResponse]: - pool = mp.Pool(mp.cpu_count()) - responses = pool.starmap( - get_llm_response, - [ - ( - user_prompt, - system_prompt, - messages, - instruction_first, - response_model, - *inference_settings.dict().values(), - ) - for user_prompt in user_prompts - ], - ) - pool.close() - pool.join() - return responses diff --git a/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml b/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml deleted file mode 100644 index dd0036da..00000000 --- a/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml +++ /dev/null @@ -1,103 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25 - years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": - 0.0, "tool_choice": {"type": "function", "function": {"name": "ExampleResponseModel"}}, - "tools": [{"type": "function", "function": {"name": "ExampleResponseModel", - "description": "Correctly extracted `ExampleResponseModel` with all the required - parameters with correct types", "parameters": {"properties": {"name": {"description": - "Name of the person", "title": "Name", "type": "string"}, "age": {"description": - "Age of the person", "title": "Age", "type": "integer"}}, "required": ["age", - "name"], "type": "object"}}}]}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '678' - content-type: - - application/json - cookie: - - __cf_bm=GCnIKzv_42MjXXHx6e99LITRDI7gjgePt.6A8BRxDzw-1722350554-1.0.1.1-yZmK3AapLGZuJlX9Aj5bDVPX3M_ZUWYaqNz3wdmIs5hSM1axCFcG_7LFPgx.5f_cJqU1nVS3VzxYiiOt2aP7qA; - _cfuvid=HX57FgKgDWUSH9rC9johN5AQlYtb78j0bicydIxUAUY-1722350554428-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.34.0 - x-stainless-arch: - - x64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - Linux - x-stainless-package-version: - - 1.34.0 - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.5 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA2xSXW+bQBB851ec9hkqjI0ceGybKg+uoiZRnKRE6Hxe8IX7Kne0iSz/9+jAMcQq - D6fVzM3s3iz7gBDgW8gJsB11TBoRZX/4atMs0/snzW+ax1/LHxX7d3nPvtbX31cQeoXevCBzH6ov - TEsj0HGtBpq1SB1619kySeZpnKZpT0i9ReFltXHRQkeSKx4lcbKI4mU0uziqd5oztJCT3wEhhOz7 - 08+ptvgKOYnDD0SitbRGyE+XCIFWC48AtZZbR5WDcCSZVg6VH111QkwIp7UoGRVibDx8+0k9hkWF - KNn6ofmbzetGXsdXT3F3d7u6uHpZryf9Bus30w9UdYqdQprwJzw/a0YIKCp77eUr9RnfoDVaWfzZ - Bxme36Zt3UlUzr8B9kWvLiAv4BttBS0gLIDWHknSA3wSH4L/1c/H6nBKXOjatHpjzwKEiitud2WL - 1PYPAeu0GVp4u+d+s92nZYFptTSudLpB5Q2zbLCD8X8aydlx6+C0o2KKZ8FxQLBv1qEsK65qbE3L - +z1DZcq4iufbRRUjQnAI3gEAAP//AwBlKgHP9QIAAA== - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ab61dbb7b092c9b-ORD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 30 Jul 2024 14:42:35 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - heartex - openai-processing-ms: - - '266' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15552000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149998988' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_cdb4fab762c48ad8cbda24c63c4a997b - status: - code: 200 - message: OK -version: 1 diff --git a/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml deleted file mode 100644 index aea2d83d..00000000 --- a/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml +++ /dev/null @@ -1,98 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": "return the word banana with - exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, - "temperature": 0.0}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '167' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.34.0 - x-stainless-arch: - - x64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - Linux - x-stainless-package-version: - - 1.34.0 - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.5 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//VJCxTsMwFEX3fIV5c4PcNFFptjIwFQSICYQiN3lJDY6fsV8lqqr/jpyE - FhYP9/heHfuYCAG6gVJAvVNc986kqy+9UfdYPD2vXx84Xz0eNrfk+WW/oLs1zGKDth9Y82/ruqbe - GWRNdsS1R8UYV+fLLFsUsijyAfTUoIm1znGaU9prq9NMZnkql+n8ZmrvSNcYoBRviRBCHIczetoG - v6EUcvab9BiC6hDK8yUhwJOJCagQdGBlGWYXWJNltIP6Vlll1RVM8HReNdQ5T9toYPfGnPNWWx12 - lUcVyMaFwOTG+ikR4n2w3/8TAuepd1wxfaKNg/NinIPLn11gNjEmVuZPZ5lMehAOgbGvWm079M7r - 8SWtq2QrF03eSkRITskPAAAA//8DANNYverXAQAA - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ab61db2cc7061df-ORD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 30 Jul 2024 14:42:34 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=GCnIKzv_42MjXXHx6e99LITRDI7gjgePt.6A8BRxDzw-1722350554-1.0.1.1-yZmK3AapLGZuJlX9Aj5bDVPX3M_ZUWYaqNz3wdmIs5hSM1axCFcG_7LFPgx.5f_cJqU1nVS3VzxYiiOt2aP7qA; - path=/; expires=Tue, 30-Jul-24 15:12:34 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=HX57FgKgDWUSH9rC9johN5AQlYtb78j0bicydIxUAUY-1722350554428-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - heartex - openai-processing-ms: - - '168' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15552000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149998988' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_35984e1b8c6646cb9d2aa18d58a2f6cc - status: - code: 200 - message: OK -version: 1 diff --git a/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml b/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml deleted file mode 100644 index 1bb0b80b..00000000 --- a/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml +++ /dev/null @@ -1,103 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25 - years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": - 0.0, "tool_choice": {"type": "function", "function": {"name": "ExampleResponseModel"}}, - "tools": [{"type": "function", "function": {"name": "ExampleResponseModel", - "description": "Correctly extracted `ExampleResponseModel` with all the required - parameters with correct types", "parameters": {"properties": {"name": {"description": - "Name of the person", "title": "Name", "type": "string"}, "age": {"description": - "Age of the person", "title": "Age", "type": "integer"}}, "required": ["age", - "name"], "type": "object"}}}]}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '678' - content-type: - - application/json - cookie: - - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw; - _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.34.0 - x-stainless-arch: - - x64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - Linux - x-stainless-package-version: - - 1.34.0 - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.5 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA2xSXW+jMBB851dY+wwnQkKT8FZVvYf2eqpObU/qUSHHWcA9f9U2Utoo//1kSAON - jgdrNeOZXc+yjwgBvoWCAGupZ9KIZP3GfzzfvdVt+/GY3t7f2N3rY+ouFz8zLpYQB4XevCLzn6pv - TEsj0HOtBppZpB6D62yZZfM8zfN5T0i9RRFkjfHJQieSK55kabZI0mUyWx3VreYMHRTkT0QIIfv+ - DHOqLe6gIGn8iUh0jjYIxekSIWC1CAhQ57jzVHmIR5Jp5VGF0VUnxITwWouKUSHGxsO3n9RjWFSI - amZv883NQ+b879Xq6fr547vMLzqrJv0G63fTD1R3ip1CmvAnvDhrRggoKnvt9Y6GjH+hM1o5vOuD - jM9vU9t0EpUPb4B92atLKEq4olbQEuISaBOQLD/AF/Eh+l/9cqwOp8SFbozVG3cWINRccddWFqnr - HwLOazO0CHYv/Wa7L8sCY7U0vvL6L6pguF4PdjD+TyM5O24dvPZUTPF1dBwQ3LvzKKuaqwatsbzf - M9SmSut0vl3UKSJEh+gfAAAA//8DABIDPab1AgAA - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ab61dadbb751ce2-ORD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 30 Jul 2024 14:42:33 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - heartex - openai-processing-ms: - - '309' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15552000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149998989' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_6232d03e756801a673aaf5a4d8e85be6 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml deleted file mode 100644 index 6c8fdaa1..00000000 --- a/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml +++ /dev/null @@ -1,98 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": "return the word Banana with - exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, - "temperature": 0.0}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '167' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.34.0 - x-stainless-arch: - - x64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - Linux - x-stainless-package-version: - - 1.34.0 - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.5 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//VJA9b8IwFEX3/Ar3zaRKAuEjW7t0YaNqhaoqcowJBtvPxA9UhPjvlZNA - 2sXDPb5Xx75GjIHaQMFA7DgJ43S8OKrl+ryqD84b/zl+36/PL8auLvz48baEUWhgtZeC7q1ngcZp - SQpth0UjOcmwms6ybJwneZ61wOBG6lCrHcUTjI2yKs6SbBInszid9+0dKiE9FOwrYoyxa3sGT7uR - P1CwZHRPjPSe1xKKxyXGoEEdEuDeK0/cEowGKNCStK36K7fc8ifo4e2xqrF2DVbBwJ60fuRbZZXf - lY3kHm1Y8ISuq98ixr5b+9M/IXANGkcl4UHaMJjm3RwMfzbAcc8Iies/nXnU64G/eJKm3Cpby8Y1 - qnvJ1pXTNK2m01mVLiC6Rb8AAAD//wMATZZOl9cBAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ab61dab0ff51ce2-ORD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 30 Jul 2024 14:42:33 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw; - path=/; expires=Tue, 30-Jul-24 15:12:33 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - heartex - openai-processing-ms: - - '253' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15552000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149998988' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_d96ab7ce0e87524aca13b64d3eb63519 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml deleted file mode 100644 index 6c8fdaa1..00000000 --- a/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml +++ /dev/null @@ -1,98 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "user", "content": "return the word Banana with - exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, - "temperature": 0.0}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '167' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.34.0 - x-stainless-arch: - - x64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - Linux - x-stainless-package-version: - - 1.34.0 - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.5 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//VJA9b8IwFEX3/Ar3zaRKAuEjW7t0YaNqhaoqcowJBtvPxA9UhPjvlZNA - 2sXDPb5Xx75GjIHaQMFA7DgJ43S8OKrl+ryqD84b/zl+36/PL8auLvz48baEUWhgtZeC7q1ngcZp - SQpth0UjOcmwms6ybJwneZ61wOBG6lCrHcUTjI2yKs6SbBInszid9+0dKiE9FOwrYoyxa3sGT7uR - P1CwZHRPjPSe1xKKxyXGoEEdEuDeK0/cEowGKNCStK36K7fc8ifo4e2xqrF2DVbBwJ60fuRbZZXf - lY3kHm1Y8ISuq98ixr5b+9M/IXANGkcl4UHaMJjm3RwMfzbAcc8Iies/nXnU64G/eJKm3Cpby8Y1 - qnvJ1pXTNK2m01mVLiC6Rb8AAAD//wMATZZOl9cBAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ab61dab0ff51ce2-ORD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 30 Jul 2024 14:42:33 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw; - path=/; expires=Tue, 30-Jul-24 15:12:33 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - heartex - openai-processing-ms: - - '253' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15552000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149998988' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_d96ab7ce0e87524aca13b64d3eb63519 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/cassettes/test_llm/test_llm_async.yaml b/tests/cassettes/test_llm/test_llm_async.yaml new file mode 100644 index 00000000..ec691f66 --- /dev/null +++ b/tests/cassettes/test_llm/test_llm_async.yaml @@ -0,0 +1,198 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model": + "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + cookie: + - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw; + _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.34.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.34.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1RQMU4DMRDs7xWLG5ocuuROgaRBFCAiCgoKJBCKHN/enYnttew9hYAi8Q2+x0uQ + k5BAs8XMzuzsfGQAQtdiCkJ1kpX1Jp+EfnazHlequFJPV6u72X33PryeTPB9/PQgBklBi1dU/Ks6 + U2S9QdbkdrQKKBmT6/B8NBqX1UVRbglLNZokaz3nFeVWO52PilGVF+f58GKv7kgrjGIKzxkAwMd2 + ppyuxjcxhWLwi1iMUbYopoclABHIJETIGHVk6VgMjqQix+i20WenFmrSroUVGjMA7qRbRmgogIxL + 7doTuKUVyAX1DGvqL+Gxk/z9+RWBXAICWO1qYKrl+lLsb2wO4Qy1PtAiPeJ6Yw54o52O3TygjORS + kMjkd/JNBvCyLaH/95fwgaznOdMSXTIcVjs7caz+DznZk0wszREvy2yfT8R1ZLTzRrsWgw9610jj + 50VTlHXVFIgi22Q/AAAA//8DABxR2lkfAgAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ad1395d4fad115f-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 02 Aug 2024 21:40:03 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '797' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998994' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_47a4dc30b9407937968c483aa09ceff2 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25 + years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": + 0.0, "tool_choice": {"type": "function", "function": {"name": "Output"}}, "tools": + [{"type": "function", "function": {"name": "Output", "description": "Correctly + extracted `Output` with all the required parameters with correct types", "parameters": + {"properties": {"name": {"description": "name:", "title": "Name", "type": "string"}, + "age": {"description": "age:", "title": "Age", "type": "string"}}, "required": + ["age", "name"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '609' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.34.0 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.34.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA2xSXWvjMBB8968Q+xwX25c6tt/KBUofjoYSegfnYhRFdtTqC2l9JIT890N2Gqeh + ehDLjGZ22NUxIgTEFioCbEeRKSvj0vVPj/3z6rDk79uytPfpYv2S//tFDx/7V5gFhdm8c4afqjtm + lJUchdEjzRynyINrusiy/Me8SOYDocyWyyDrLMZzEyuhRZwl2TxOFnFanNU7Ixj3UJG/ESGEHIc7 + 5NRbvoeKJLNPRHHvacehujwiBJyRAQHqvfBINcJsIpnRyHWIrnsprwg0RjaMSjk1Hs/xqp6GRaVs + 8uVjIX6vhdYPT6sC3Usr16vl/s9Vv9H6YIdAba/ZZUhX/AWvbpoRApqqQfvco+3xRkkIUNf1imsM + qeFYD+9rqGr4SZ2kNcxqoN2IZPc1nOCLwSn6rn47V6fLnKXprDMbfzM2aIUWftc4Tv0QHzwaO7YI + dm/DPvsvKwLrjLLYoPngOhgW+WgH0y+ayPS8a0CDVE54mUfnfOAPHrlqWqE77qwTw3KhtU25Seim + bXmRQnSK/gMAAP//AwA5j4eK6gIAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ad139653ed6aca8-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 02 Aug 2024 21:40:04 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=U4BB8rS3qUTVmapjGfZ.4G4BDSdqnWqY7cbBwpNPl6g-1722634804-1.0.1.1-n5XtE16CEMWFE3A6E9jUdY4KyUKP0oRj0SaPZjLM2jPzWLiAf1.Hvqi6QRq8oIujqIvQ0ykEPlS9dLJtDlU4sw; + path=/; expires=Fri, 02-Aug-24 22:10:04 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=D6V7SPQ_QwPJ5VvdeALdmXEpXj0vSTw1snLHBIe2UFk-1722634804819-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '550' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998989' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_93262bdf66657d0a9484aa65a83f4648 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_llm/test_llm_sync.yaml b/tests/cassettes/test_llm/test_llm_sync.yaml new file mode 100644 index 00000000..c53339c1 --- /dev/null +++ b/tests/cassettes/test_llm/test_llm_sync.yaml @@ -0,0 +1,291 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model": + "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.34.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.34.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1RQy07DMBC85ysWX7g0qE2jEnrhgqoikOACHBCqnGSTmNpeY28EFarEb/B7fAly + n3BZaWd2RjP7mQAIVYspiKqTXBmn0wvfz5byfrac3OW3xeNcdpObcFWW47cHLMQgKqh8xYr3qrOK + jNPIiuyWrjxKxug6Os+yyTif5MWGMFSjjrLWcZpTapRVaTbM8nR4no525lVHqsIgpvCcAAB8bmbM + aWv8EFMYDvaIwRBki2J6OAIQnnREhAxBBZaWxeBIVmQZ7Sb69amBmpRt4R21HgB30i5hRf0JzOkd + ZEk9x/USnjrJP1/fAchGwINRtgamWq4uxc57fQilqXWeyljA9lof8EZZFbqFRxnIxgCByW3l6wTg + ZVO+/9dHOE/G8YJpiTYajvKtnTi+/A9Z7EgmlvqIj7Nkl0+EVWA0i0bZFr3zavuJxi0uyqEsmwaL + kUjWyS8AAAD//wMAUgDU/BcCAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ad13593be2713ec-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 02 Aug 2024 21:37:28 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw; + path=/; expires=Fri, 02-Aug-24 22:07:28 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '771' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998994' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_96401bab351a48f55043d438c93d3f94 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": "return the word Banana with + exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, + "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '167' + content-type: + - application/json + cookie: + - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw; + _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.34.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.34.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1SQO0/DMBSF9/wKc+cG5UXTZKNDEVIHFgZAKHLcm9Tg+Fq2y0NV/ztyElpYPJzP + 5+izjxFjIHdQMxB77sVgVFzZw0Zttt1zfv94W91Ruv4sP0xePuinbguL0KD2DYX/bV0LGoxCL0lP + WFjkHsNqWmbZMi+WRTWCgXaoQq03Pi4oHqSWcZZkRZyUcbqa23uSAh3U7CVijLHjeAZPvcMvqFmy + +E0GdI73CPX5EmNgSYUEuHPSea49LC5QkPaoR/U111zzK5jh6byqqDeW2mCgD0qd805q6faNRe5I + hwXnyUz1U8TY62h/+CcExtJgfOPpHXUYTG+mObj82QXmM/PkufrTWUWzHrhv53FoOql7tMbK6SWd + aao24W3X4SqF6BT9AAAA//8DAKH4pAjXAQAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ad1359b886c13ec-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 02 Aug 2024 21:37:29 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '628' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998988' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d967d29b6fadbf5acebc749507ccb198 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25 + years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": + 0.0, "tool_choice": {"type": "function", "function": {"name": "Output"}}, "tools": + [{"type": "function", "function": {"name": "Output", "description": "Correctly + extracted `Output` with all the required parameters with correct types", "parameters": + {"properties": {"name": {"description": "name:", "title": "Name", "type": "string"}, + "age": {"description": "age:", "title": "Age", "type": "string"}}, "required": + ["age", "name"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '609' + content-type: + - application/json + cookie: + - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw; + _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.34.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.34.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA2xSXWvjMBB8968Q+xyX2E2c2G+lUFra0oO7pzsXIyuKo1ZfJ61LfSH//ZCci9Nw + ehDLjGZ22NU+IQTEBioCbEeRKSvT0vV36tkO7M9w175/exju561lv186/lTcwCwoTPvGGf5TXTGj + rOQojB5p5jhFHlyzVZ4X14tiOY+EMhsug6yzmC5MqoQWaT7PF+l8lWbro3pnBOMeKvIrIYSQfbxD + Tr3hn1CR6BURxb2nHYfq9IgQcEYGBKj3wiPVCLOJZEYj1yG67qU8I9AY2TAq5dR4PPuzehoWlbJB + l98/5t8/Snvz+KyXZfvZrn/8vHVn/UbrwcZA216z05DO+BNeXTQjBDRVUfvSo+3xQkkIUNf1imsM + qWFfx/c1VDXcUidpDbMaaDci+bKGA3wxOCT/q1+P1eE0Z2k660zrL8YGW6GF3zWOUx/jg0djxxbB + 7jXus/+yIrDOKIsNmneug+G6GO1g+kUTmR13DWiQygkvi+SYD/zgkatmK3THnXUiLhe2timyrC2K + VZuVkBySvwAAAP//AwD4WS/Q6gIAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ad135a30a4113ec-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 02 Aug 2024 21:37:30 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '559' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998989' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7e41971d12f598e154357ffb72fd850b + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_llm.py b/tests/test_llm.py index 84adeab3..49788ddd 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -1,65 +1,55 @@ import pytest -from pydantic import BaseModel, Field -from adala.utils.llm import ( - get_llm_response, - async_get_llm_response, - ConstrainedLLMResponse, - UnconstrainedLLMResponse, -) +import asyncio +import pandas as pd +from adala.runtimes import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime -class ExampleResponseModel(BaseModel): - name: str = Field(..., description="Name of the person") - age: int = Field(..., description="Age of the person") +@pytest.mark.vcr +def test_llm_sync(): + runtime = LiteLLMChatRuntime() -@pytest.mark.parametrize( - "response_model, user_prompt, expected_result", - [ - ( - None, - "return the word Banana with exclamation mark", - UnconstrainedLLMResponse(text="Banana!"), - ), - ( - ExampleResponseModel, - "My name is Carla and I am 25 years old.", - ConstrainedLLMResponse(data={"name": "Carla", "age": 25}), - ), - ], -) -@pytest.mark.vcr -def test_get_llm_response(response_model, user_prompt, expected_result): + # test plaintext success - result = get_llm_response( - user_prompt=user_prompt, - response_model=response_model, + result = runtime.get_llm_response( + messages=[ + {"role": "user", "content": "return the word Banana with exclamation mark"} + ], ) + expected_result = "Banana!" + assert result == expected_result + # test structured success + + result = runtime.record_to_record( + record={"input_name": "Carla", "input_age": 25}, + input_template="My name is {input_name} and I am {input_age} years old.", + instructions_template="", + output_template="name: {name}, age: {age}", + ) + + # note age coerced to string + expected_result = {"name": "Carla", "age": "25"} assert result == expected_result -@pytest.mark.parametrize( - "response_model, user_prompt, expected_result", - [ - ( - None, - "return the word banana with exclamation mark", - UnconstrainedLLMResponse(text="banana!"), - ), - ( - ExampleResponseModel, - "My name is Carla and I am 25 years old.", - ConstrainedLLMResponse(data={"name": "Carla", "age": 25}), - ), - ], -) -@pytest.mark.asyncio @pytest.mark.vcr -async def test_async_get_llm_response(response_model, user_prompt, expected_result): +def test_llm_async(): + + runtime = AsyncLiteLLMChatRuntime() - result = await async_get_llm_response( - user_prompt=user_prompt, response_model=response_model + batch = pd.DataFrame.from_records([{"input_name": "Carla", "input_age": 25}]) + + result = asyncio.run( + runtime.batch_to_batch( + batch, + input_template="My name is {input_name} and I am {input_age} years old.", + instructions_template="", + output_template="name: {name}, age: {age}", + ) ) - assert result == expected_result + # note age coerced to string + expected_result = pd.DataFrame.from_records([{"name": "Carla", "age": "25"}]) + # need 2 all() for row and column axis + assert (result == expected_result).all().all()