diff --git a/adala/runtimes/__init__.py b/adala/runtimes/__init__.py
index 3718065d..df9aae16 100644
--- a/adala/runtimes/__init__.py
+++ b/adala/runtimes/__init__.py
@@ -1,2 +1,3 @@
 from .base import Runtime, AsyncRuntime
 from ._openai import OpenAIChatRuntime, OpenAIVisionRuntime, AsyncOpenAIChatRuntime
+from ._litellm import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime
diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py
index ac12ba1e..25b33be0 100644
--- a/adala/runtimes/_litellm.py
+++ b/adala/runtimes/_litellm.py
@@ -1,7 +1,11 @@
+import asyncio
 import logging
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import litellm
+from litellm.exceptions import AuthenticationError
+import instructor
+import traceback
 from adala.utils.internal_data import InternalDataFrame
 from adala.utils.logs import print_error
 from adala.utils.parse import (
@@ -9,42 +13,81 @@
     partial_str_format,
     parse_template_to_pydantic_class,
 )
-from adala.utils.llm import (
-    parallel_async_get_llm_response,
-    get_llm_response,
-    ConstrainedLLMResponse,
-    UnconstrainedLLMResponse,
-    ErrorLLMResponse,
-    LiteLLMInferenceSettings,
-)
-from openai import NotFoundError
 from pydantic import ConfigDict, field_validator
 from rich import print
 
 from .base import AsyncRuntime, Runtime
 
+instructor_client = instructor.from_litellm(litellm.completion)
+async_instructor_client = instructor.from_litellm(litellm.acompletion)
+
 logger = logging.getLogger(__name__)
 
 
-class LiteLLMChatRuntime(LiteLLMInferenceSettings, Runtime):
+def get_messages(
+    user_prompt: str,
+    system_prompt: Optional[str] = None,
+    instruction_first: bool = True,
+):
+    messages = [{"role": "user", "content": user_prompt}]
+    if system_prompt:
+        if instruction_first:
+            messages.insert(0, {"role": "system", "content": system_prompt})
+        else:
+            messages[0]["content"] += system_prompt
+    return messages
+
+
+class LiteLLMChatRuntime(Runtime):
     """
     Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and chat
     completion models to perform the skill.
 
+    The default model provider is [OpenAI](https://openai.com/), using the OPENAI_API_KEY environment variable. Other providers [can be chosen](https://litellm.vercel.app/docs/set_keys) through environment variables or passed parameters.
+
     Attributes:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
+        model: model name. Refer to litellm supported models for how to pass
+               this: https://litellm.vercel.app/docs/providers
+        max_tokens: Maximum tokens to generate.
+        temperature: Temperature for sampling.
+        seed: Integer seed to reduce nondeterminism in generation.
+
+    Extra parameters passed to this class will be used for inference. See `litellm.types.completion.CompletionRequest` for a full list. Some common ones are:
+        api_key: API key, optional. If provided, will be used to authenticate
+                 with the provider of your specified model.
+        base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI.
+        api_version (Optional[str]): API version, optional except for Azure.
+        timeout: Timeout in seconds.
     """
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)  # for @computed_field
+    model: str = "gpt-4o-mini"
+    max_tokens: int = 1000
+    temperature: float = 0.0
+    seed: Optional[int] = 47
+
+    model_config = ConfigDict(extra="allow")
 
     def init_runtime(self) -> "Runtime":
         # check model availability
+        # extension of litellm.check_valid_key for non-openai deployments
         try:
-            if self.api_key:
-                litellm.check_valid_key(model=self.model, api_key=self.api_key)
-        except NotFoundError:
+            messages = [{"role": "user", "content": "Hey, how's it going?"}]
+            litellm.completion(
+                messages=messages,
+                model=self.model,
+                max_tokens=self.max_tokens,
+                temperature=self.temperature,
+                seed=self.seed,
+                # extra inference params passed to this runtime
+                **self.model_extra,
+            )
+        except AuthenticationError:
             raise ValueError(
-                f'Requested model "{self.model}" is not available with your api_key.'
+                f'Requested model "{self.model}" is not available with your api_key and settings.'
+            )
+        except Exception as e:
+            raise ValueError(
+                f'Failed to check availability of requested model "{self.model}": {e}'
             )
         return self
 
@@ -52,17 +95,19 @@ def get_llm_response(self, messages: List[Dict[str, str]]) -> str:
         # TODO: sunset this method in favor of record_to_record
         if self.verbose:
             print(f"**Prompt content**:\n{messages}")
-        response: Union[ErrorLLMResponse, UnconstrainedLLMResponse] = get_llm_response(
+        completion = litellm.completion(
             messages=messages,
-            inference_settings=LiteLLMInferenceSettings(
-                **self.dict(include=LiteLLMInferenceSettings.model_fields.keys())
-            ),
+            model=self.model,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            seed=self.seed,
+            # extra inference params passed to this runtime
+            **self.model_extra,
         )
-        if isinstance(response, ErrorLLMResponse):
-            raise ValueError(f"{response.adala_message}\n{response.adala_details}")
+        completion_text = completion.choices[0].message.content
         if self.verbose:
-            print(f"**Response**:\n{response.text}")
-        return response.text
+            print(f"**Response**:\n{completion_text}")
+        return completion_text
 
     def record_to_record(
         self,
@@ -95,35 +140,93 @@ def record_to_record(
         response_model = parse_template_to_pydantic_class(
             output_template, provided_field_schema=field_schema
         )
-
-        response: Union[ConstrainedLLMResponse, ErrorLLMResponse] = get_llm_response(
-            user_prompt=input_template.format(**record, **extra_fields),
-            system_prompt=instructions_template,
-            instruction_first=instructions_first,
-            response_model=response_model,
-            inference_settings=LiteLLMInferenceSettings(
-                **self.dict(include=LiteLLMInferenceSettings.model_fields.keys())
-            ),
+        messages = get_messages(
+            input_template.format(**record, **extra_fields),
+            instructions_template,
+            instructions_first,
         )
 
-        if isinstance(response, ErrorLLMResponse):
+        try:
+            # returns a pydantic model named Output
+            response = instructor_client.chat.completions.create(
+                messages=messages,
+                response_model=response_model,
+                model=self.model,
+                max_tokens=self.max_tokens,
+                temperature=self.temperature,
+                seed=self.seed,
+                # extra inference params passed to this runtime
+                **self.model_extra,
+            )
+        except Exception as e:
+            error_message = type(e).__name__
+            # error_details = str(e)
+            error_details = traceback.format_exc()
             if self.verbose:
-                print_error(response.adala_message, response.adala_details)
-            return response.model_dump(by_alias=True)
+                print_error(error_message, error_details)
+            # TODO change this format
+            error_dct = {
+                "_adala_error": True,
+                "_adala_message": error_message,
+                "_adala_details": error_details,
+            }
+            return error_dct
 
-        return response.data
+        return response.dict()
 
 
-class AsyncLiteLLMChatRuntime(LiteLLMInferenceSettings, AsyncRuntime):
+class AsyncLiteLLMChatRuntime(AsyncRuntime):
     """
     Runtime that uses [OpenAI API](https://openai.com/) and chat completion
     models to perform the skill. It uses async calls to OpenAI API.
 
+    The default model provider is [OpenAI](https://openai.com/), using the OPENAI_API_KEY environment variable. Other providers [can be chosen](https://litellm.vercel.app/docs/set_keys) through environment variables or passed parameters.
+
     Attributes:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
+        model: model name. Refer to litellm supported models for how to pass
+               this: https://litellm.vercel.app/docs/providers
+        max_tokens: Maximum tokens to generate.
+        temperature: Temperature for sampling.
+        seed: Integer seed to reduce nondeterminism in generation.
+
+    Extra parameters passed to this class will be used for inference. See `litellm.types.completion.CompletionRequest` for a full list. Some common ones are:
+        api_key: API key, optional. If provided, will be used to authenticate
+                 with the provider of your specified model.
+        base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI.
+        api_version (Optional[str]): API version, optional except for Azure.
+        timeout: Timeout in seconds.
     """
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)  # for @computed_field
+    model: str = "gpt-4o-mini"
+    max_tokens: int = 1000
+    temperature: float = 0.0
+    seed: Optional[int] = 47
+
+    model_config = ConfigDict(extra="allow")
+
+    def init_runtime(self) -> "Runtime":
+        # check model availability
+        # extension of litellm.check_valid_key for non-openai deployments
+        try:
+            messages = [{"role": "user", "content": "Hey, how's it going?"}]
+            litellm.completion(
+                messages=messages,
+                model=self.model,
+                max_tokens=self.max_tokens,
+                temperature=self.temperature,
+                seed=self.seed,
+                # extra inference params passed to this runtime
+                **self.model_extra,
+            )
+        except AuthenticationError:
+            raise ValueError(
+                f'Requested model "{self.model}" is not available with your api_key and settings.'
+            )
+        except Exception as e:
+            raise ValueError(
+                f'Failed to check availability of requested model "{self.model}": {e}'
+            )
+        return self
 
     @field_validator("concurrency", mode="before")
     def check_concurrency(cls, value) -> int:
@@ -135,17 +238,6 @@ def check_concurrency(cls, value) -> int:
             )
         return value
 
-    def init_runtime(self) -> "Runtime":
-        # check model availability
-        try:
-            if self.api_key:
-                litellm.check_valid_key(model=self.model, api_key=self.api_key)
-        except NotFoundError:
-            raise ValueError(
-                f'Requested model "{self.model}" is not available in your OpenAI account.'
-            )
-        return self
-
     async def batch_to_batch(
         self,
         batch: InternalDataFrame,
@@ -167,27 +259,45 @@ async def batch_to_batch(
             lambda row: input_template.format(**row, **extra_fields), axis=1
         ).tolist()
 
-        responses: List[Union[ConstrainedLLMResponse, ErrorLLMResponse]] = (
-            await parallel_async_get_llm_response(
-                user_prompts=user_prompts,
-                system_prompt=instructions_template,
-                instruction_first=instructions_first,
-                response_model=response_model,
-                inference_settings=LiteLLMInferenceSettings(
-                    **self.dict(include=LiteLLMInferenceSettings.model_fields.keys())
-                ),
+        tasks = [
+            asyncio.ensure_future(
+                async_instructor_client.chat.completions.create(
+                    messages=get_messages(
+                        user_prompt,
+                        instructions_template,
+                        instructions_first,
+                    ),
+                    response_model=response_model,
+                    model=self.model,
+                    max_tokens=self.max_tokens,
+                    temperature=self.temperature,
+                    seed=self.seed,
+                    # extra inference params passed to this runtime
+                    **self.model_extra,
+                )
             )
-        )
+            for user_prompt in user_prompts
+        ]
+        responses = await asyncio.gather(*tasks, return_exceptions=True)
 
         # convert list of LLMResponse objects to the dataframe records
         df_data = []
         for response in responses:
-            if isinstance(response, ErrorLLMResponse):
+            if isinstance(response, Exception):
+                error_message = type(response).__name__
+                # error_details = str(response)
+                error_details = traceback.format_exc()
                 if self.verbose:
-                    print_error(response.adala_message, response.adala_details)
-                df_data.append(response.model_dump(by_alias=True))
+                    print_error(error_message, error_details)
+                # TODO change this format
+                error_dct = {
+                    "_adala_error": True,
+                    "_adala_message": error_message,
+                    "_adala_details": error_details,
+                }
+                df_data.append(error_dct)
             else:
-                df_data.append(response.data)
+                df_data.append(response.dict())
 
         output_df = InternalDataFrame(df_data)
         return output_df.set_index(batch.index)
@@ -302,9 +412,11 @@ def record_to_record(
 
         completion = litellm.completion(
             messages=[{"role": "user", "content": content}],
-            inference_settings=LiteLLMInferenceSettings(
-                **self.dict(include=LiteLLMInferenceSettings.model_fields.keys())
-            ),
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            seed=self.seed,
+            # extra inference params passed to this runtime
+            **self.model_extra,
         )
 
         completion_text = completion.choices[0].message.content
diff --git a/adala/runtimes/_openai.py b/adala/runtimes/_openai.py
index 83424201..009d1980 100644
--- a/adala/runtimes/_openai.py
+++ b/adala/runtimes/_openai.py
@@ -1,49 +1,7 @@
-import os
-
-from pydantic import Field
-
 from ._litellm import AsyncLiteLLMChatRuntime, LiteLLMChatRuntime, LiteLLMVisionRuntime
 
 
-class OpenAIChatRuntime(LiteLLMChatRuntime):
-    """
-    Runtime that uses [OpenAI API](https://openai.com/) and chat completion
-    models to perform the skill.
-
-    Attributes:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
-    """
-
-    # TODO does it make any sense for this to be optional?
-    api_key: str = Field(default=os.getenv("OPENAI_API_KEY"))
-
-
-class AsyncOpenAIChatRuntime(AsyncLiteLLMChatRuntime):
-    """
-    Runtime that uses [OpenAI API](https://openai.com/) and chat completion
-    models to perform the skill. It uses async calls to OpenAI API.
-
-    Attributes:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
-
-    """
-
-    api_key: str = Field(default=os.getenv("OPENAI_API_KEY"))
-
-
-class OpenAIVisionRuntime(LiteLLMVisionRuntime):
-    """
-    Runtime that uses [OpenAI API](https://openai.com/) and vision models to
-    perform the skill.
-    Only compatible with OpenAI API version 1.0.0 or higher.
-    """
-
-    api_key: str = Field(default=os.getenv("OPENAI_API_KEY"))
-    # NOTE this check used to exist in OpenAIVisionRuntime.record_to_record,
-    #      but doesn't seem to have a definition
-    # def init_runtime(self) -> 'Runtime':
-    #     if not check_if_new_openai_version():
-    #         raise NotImplementedError(
-    #             f'{self.__class__.__name__} requires OpenAI API version 1.0.0 or higher.'
-    #         )
-    #     super().init_runtime()
+# litellm already reads the OPENAI_API_KEY env var, which was the reason for this class
+OpenAIChatRuntime = LiteLLMChatRuntime
+AsyncOpenAIChatRuntime = AsyncLiteLLMChatRuntime
+OpenAIVisionRuntime = LiteLLMVisionRuntime
diff --git a/adala/utils/llm.py b/adala/utils/llm.py
deleted file mode 100644
index 8d43b053..00000000
--- a/adala/utils/llm.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import asyncio
-import instructor
-import litellm
-import traceback
-import multiprocessing as mp
-from typing import Optional, Dict, List, Type, Union
-from pydantic import BaseModel, Field
-from pydantic_settings import BaseSettings
-
-instructor_client = instructor.from_litellm(litellm.completion)
-async_instructor_client = instructor.from_litellm(litellm.acompletion)
-
-
-class LLMResponse(BaseModel):
-    """
-    Base class for LLM response.
-    """
-    adala_message: str = Field(
-        default=None, serialization_alias='_adala_message'
-    )
-    adala_details: str = Field(
-        default=None, serialization_alias='_adala_details'
-    )
-
-
-class ConstrainedLLMResponse(LLMResponse):
-    """
-    LLM response from constrained generation.
-    `data` object contains fields required by the response model.
-    """
-
-    data: Dict = Field(default_factory=dict)
-    adala_error: bool = Field(
-        default=False, serialization_alias='_adala_error'
-    )
-
-
-
-class UnconstrainedLLMResponse(LLMResponse):
-    """
-    LLM response from unconstrained generation.
-    `text` field contains raw completion text.
-    """
-
-    text: str = Field(default=None)
-    adala_error: bool = Field(
-        default=False, serialization_alias='_adala_error'
-    )
-
-
-
-class ErrorLLMResponse(LLMResponse):
-    """
-    LLM response in case of error.
-    """
-
-    adala_error: bool = Field(default=True, serialization_alias="_adala_error")
-
-
-class LiteLLMInferenceSettings(BaseSettings):
-    """
-    Common inference settings for LiteLLM.
-
-    Attributes:
-        model: model name. Refer to litellm supported models for how to pass
-               this: https://litellm.vercel.app/docs/providers
-        api_key: API key, optional. If provided, will be used to authenticate
-                 with the provider of your specified model.
-        base_url (Optional[str]): Base URL, optional. If provided, will be used to talk to an OpenAI-compatible API provider besides OpenAI.
-        api_version (Optional[str]): API version, optional except for Azure.
-        instruction_first: Whether to put instructions first.
-        response_model: Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned.  # noqa
-        max_tokens: Maximum tokens to generate.
-        temperature: Temperature for sampling.
-        timeout: Timeout in seconds.
-        seed: Integer seed to reduce nondeterminism in generation.
-    """
-
-    model: str = "gpt-4o-mini"
-    api_key: Optional[str] = None
-    base_url: Optional[str] = None
-    api_version: Optional[str] = None
-    max_tokens: int = 1000
-    temperature: float = 0.0
-    timeout: Optional[Union[float, int]] = None
-    seed: Optional[int] = 47
-
-
-def get_messages(
-    user_prompt: str,
-    system_prompt: Optional[str] = None,
-    instruction_first: bool = True,
-):
-    messages = [{"role": "user", "content": user_prompt}]
-    if system_prompt:
-        if instruction_first:
-            messages.insert(0, {"role": "system", "content": system_prompt})
-        else:
-            messages[0]["content"] += system_prompt
-    return messages
-
-
-async def async_get_llm_response(
-    user_prompt: Optional[str] = None,
-    system_prompt: Optional[str] = None,
-    messages: Optional[List[Dict[str, str]]] = None,
-    instruction_first: bool = True,
-    response_model: Optional[Type[BaseModel]] = None,
-    inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(),
-) -> LLMResponse:
-    """
-    Async version of create_completion function with error handling and session timeout.
-
-    Args:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
-        user_prompt: User prompt.
-        system_prompt: System prompt.
-        messages: List of messages to be sent to the model. If provided, `user_prompt`, `system_prompt` and `instruction_first` will be ignored.
-        instruction_first: Whether to put instructions first.
-        response_model: Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned.  # noqa
-
-    Returns:
-        LLMResponse: OpenAI response or error message.
-    """
-
-    if not user_prompt and not messages:
-        raise ValueError("You must provide either `user_prompt` or `messages`.")
-
-    if not messages:
-        # get messages from user_prompt and system_prompt
-        messages = get_messages(user_prompt, system_prompt, instruction_first)
-
-    if response_model is None:
-        # unconstrained generation - return raw completion text and store it in `data` field: {"text": completion_text}
-        try:
-            completion = await litellm.acompletion(
-                messages=messages,
-                **inference_settings.dict(),
-            )
-            completion_text = completion.choices[0].message.content
-            return UnconstrainedLLMResponse(text=completion_text)
-        except Exception as e:
-            return ErrorLLMResponse(
-                adala_message=type(e).__name__, adala_details=traceback.format_exc()
-            )
-
-    # constrained generation branch - use `response_model` to constrain the LLM response
-    try:
-        instructor_response, completion = (
-            await async_instructor_client.chat.completions.create_with_completion(
-                messages=messages,
-                response_model=response_model,
-                **inference_settings.dict(),
-            )
-        )
-        return ConstrainedLLMResponse(
-            data=instructor_response.model_dump(by_alias=True)
-        )
-    except Exception as e:
-        return ErrorLLMResponse(
-            adala_message=type(e).__name__, adala_details=traceback.format_exc()
-        )
-
-
-async def parallel_async_get_llm_response(
-    user_prompts: List[str],
-    system_prompt: Optional[str] = None,
-    instruction_first: bool = True,
-    response_model: Optional[Type[BaseModel]] = None,
-    inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(),
-):
-    tasks = [
-        asyncio.ensure_future(
-            async_get_llm_response(
-                inference_settings=inference_settings,
-                user_prompt=user_prompt,
-                system_prompt=system_prompt,
-                instruction_first=instruction_first,
-                response_model=response_model,
-            )
-        )
-        for user_prompt in user_prompts
-    ]
-    responses = await asyncio.gather(*tasks)
-    return responses
-
-
-def get_llm_response(
-    user_prompt: Optional[str] = None,
-    system_prompt: Optional[str] = None,
-    messages: Optional[List[Dict[str, str]]] = None,
-    instruction_first: bool = True,
-    response_model: Optional[Type[BaseModel]] = None,
-    inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(),
-) -> LLMResponse:
-    """
-    Synchronous version of create_completion function with error handling and session timeout.
-
-    Args:
-        inference_settings (LiteLLMInferenceSettings): Common inference settings for LiteLLM.
-        user_prompt (Optional[str]): User prompt.
-        system_prompt (Optional[str]): System prompt.
-        messages (Optional[List[Dict[str, str]]]): List of messages to be sent to the model. If provided, `user_prompt`, `system_prompt` and `instruction_first` will be ignored.
-        instruction_first (Optional[bool]): Whether to put instructions first.
-        response_model (Optional[Type[BaseModel]]): Pydantic model to constrain the LLM generated response. If not provided, the raw completion text will be returned.
-
-    Returns:
-        Dict[str, Any]: OpenAI response or error message.
-    """
-
-    if not user_prompt and not messages:
-        raise ValueError("You must provide either `user_prompt` or `messages`.")
-
-    if not messages:
-        # get messages from user_prompt and system_prompt
-        messages = get_messages(user_prompt, system_prompt, instruction_first)
-
-    if response_model is None:
-        # unconstrained generation - return raw completion text and store it in `data` field: {"text": completion_text}
-        # TODO: this branch can be considered as deprecated at some point, as we always want to run LLM constrained by pydantic model  # noqa
-        try:
-            completion = litellm.completion(
-                messages=messages,
-                **inference_settings.dict(),
-            )
-            completion_text = completion.choices[0].message.content
-            return UnconstrainedLLMResponse(text=completion_text)
-        except Exception as e:
-            return ErrorLLMResponse(
-                adala_message=type(e).__name__, adala_details=traceback.format_exc()
-            )
-
-    # constrained generation branch - use `response_model` to constrain the LLM response
-    try:
-        instructor_response, completion = (
-            instructor_client.chat.completions.create_with_completion(
-                messages=messages,
-                response_model=response_model,
-                **inference_settings.dict(),
-            )
-        )
-        return ConstrainedLLMResponse(
-            data=instructor_response.model_dump(by_alias=True)
-        )
-    except Exception as e:
-        return ErrorLLMResponse(
-            adala_message=type(e).__name__, adala_details=traceback.format_exc()
-        )
-
-
-def parallel_get_llm_response(
-    user_prompts: List[str],
-    system_prompt: Optional[str] = None,
-    messages: Optional[List[Dict[str, str]]] = None,
-    instruction_first: bool = True,
-    response_model: Optional[Type[BaseModel]] = None,
-    inference_settings: LiteLLMInferenceSettings = LiteLLMInferenceSettings(),
-) -> List[LLMResponse]:
-    pool = mp.Pool(mp.cpu_count())
-    responses = pool.starmap(
-        get_llm_response,
-        [
-            (
-                user_prompt,
-                system_prompt,
-                messages,
-                instruction_first,
-                response_model,
-                *inference_settings.dict().values(),
-            )
-            for user_prompt in user_prompts
-        ],
-    )
-    pool.close()
-    pool.join()
-    return responses
diff --git a/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml b/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml
deleted file mode 100644
index dd0036da..00000000
--- a/tests/cassettes/test_llm/test_async_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml	
+++ /dev/null
@@ -1,103 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25
-      years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature":
-      0.0, "tool_choice": {"type": "function", "function": {"name": "ExampleResponseModel"}},
-      "tools": [{"type": "function", "function": {"name": "ExampleResponseModel",
-      "description": "Correctly extracted `ExampleResponseModel` with all the required
-      parameters with correct types", "parameters": {"properties": {"name": {"description":
-      "Name of the person", "title": "Name", "type": "string"}, "age": {"description":
-      "Age of the person", "title": "Age", "type": "integer"}}, "required": ["age",
-      "name"], "type": "object"}}}]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '678'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=GCnIKzv_42MjXXHx6e99LITRDI7gjgePt.6A8BRxDzw-1722350554-1.0.1.1-yZmK3AapLGZuJlX9Aj5bDVPX3M_ZUWYaqNz3wdmIs5hSM1axCFcG_7LFPgx.5f_cJqU1nVS3VzxYiiOt2aP7qA;
-        _cfuvid=HX57FgKgDWUSH9rC9johN5AQlYtb78j0bicydIxUAUY-1722350554428-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.34.0
-      x-stainless-arch:
-      - x64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - Linux
-      x-stainless-package-version:
-      - 1.34.0
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.5
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA2xSXW+bQBB851ec9hkqjI0ceGybKg+uoiZRnKRE6Hxe8IX7Kne0iSz/9+jAMcQq
-        D6fVzM3s3iz7gBDgW8gJsB11TBoRZX/4atMs0/snzW+ax1/LHxX7d3nPvtbX31cQeoXevCBzH6ov
-        TEsj0HGtBpq1SB1619kySeZpnKZpT0i9ReFltXHRQkeSKx4lcbKI4mU0uziqd5oztJCT3wEhhOz7
-        08+ptvgKOYnDD0SitbRGyE+XCIFWC48AtZZbR5WDcCSZVg6VH111QkwIp7UoGRVibDx8+0k9hkWF
-        KNn6ofmbzetGXsdXT3F3d7u6uHpZryf9Bus30w9UdYqdQprwJzw/a0YIKCp77eUr9RnfoDVaWfzZ
-        Bxme36Zt3UlUzr8B9kWvLiAv4BttBS0gLIDWHknSA3wSH4L/1c/H6nBKXOjatHpjzwKEiitud2WL
-        1PYPAeu0GVp4u+d+s92nZYFptTSudLpB5Q2zbLCD8X8aydlx6+C0o2KKZ8FxQLBv1qEsK65qbE3L
-        +z1DZcq4iufbRRUjQnAI3gEAAP//AwBlKgHP9QIAAA==
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ab61dbb7b092c9b-ORD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 30 Jul 2024 14:42:35 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - heartex
-      openai-processing-ms:
-      - '266'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15552000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149998988'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_cdb4fab762c48ad8cbda24c63c4a997b
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml
deleted file mode 100644
index aea2d83d..00000000
--- a/tests/cassettes/test_llm/test_async_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml	
+++ /dev/null
@@ -1,98 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "return the word banana with
-      exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47,
-      "temperature": 0.0}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '167'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.34.0
-      x-stainless-arch:
-      - x64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - Linux
-      x-stainless-package-version:
-      - 1.34.0
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.5
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//VJCxTsMwFEX3fIV5c4PcNFFptjIwFQSICYQiN3lJDY6fsV8lqqr/jpyE
-        FhYP9/heHfuYCAG6gVJAvVNc986kqy+9UfdYPD2vXx84Xz0eNrfk+WW/oLs1zGKDth9Y82/ruqbe
-        GWRNdsS1R8UYV+fLLFsUsijyAfTUoIm1znGaU9prq9NMZnkql+n8ZmrvSNcYoBRviRBCHIczetoG
-        v6EUcvab9BiC6hDK8yUhwJOJCagQdGBlGWYXWJNltIP6Vlll1RVM8HReNdQ5T9toYPfGnPNWWx12
-        lUcVyMaFwOTG+ikR4n2w3/8TAuepd1wxfaKNg/NinIPLn11gNjEmVuZPZ5lMehAOgbGvWm079M7r
-        8SWtq2QrF03eSkRITskPAAAA//8DANNYverXAQAA
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ab61db2cc7061df-ORD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 30 Jul 2024 14:42:34 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=GCnIKzv_42MjXXHx6e99LITRDI7gjgePt.6A8BRxDzw-1722350554-1.0.1.1-yZmK3AapLGZuJlX9Aj5bDVPX3M_ZUWYaqNz3wdmIs5hSM1axCFcG_7LFPgx.5f_cJqU1nVS3VzxYiiOt2aP7qA;
-        path=/; expires=Tue, 30-Jul-24 15:12:34 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=HX57FgKgDWUSH9rC9johN5AQlYtb78j0bicydIxUAUY-1722350554428-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - heartex
-      openai-processing-ms:
-      - '168'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15552000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149998988'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_35984e1b8c6646cb9d2aa18d58a2f6cc
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml b/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml
deleted file mode 100644
index 1bb0b80b..00000000
--- a/tests/cassettes/test_llm/test_get_llm_response[ExampleResponseModel-My name is Carla and I am 25 years old.-expected_result1].yaml	
+++ /dev/null
@@ -1,103 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25
-      years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature":
-      0.0, "tool_choice": {"type": "function", "function": {"name": "ExampleResponseModel"}},
-      "tools": [{"type": "function", "function": {"name": "ExampleResponseModel",
-      "description": "Correctly extracted `ExampleResponseModel` with all the required
-      parameters with correct types", "parameters": {"properties": {"name": {"description":
-      "Name of the person", "title": "Name", "type": "string"}, "age": {"description":
-      "Age of the person", "title": "Age", "type": "integer"}}, "required": ["age",
-      "name"], "type": "object"}}}]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '678'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw;
-        _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.34.0
-      x-stainless-arch:
-      - x64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - Linux
-      x-stainless-package-version:
-      - 1.34.0
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.5
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA2xSXW+jMBB851dY+wwnQkKT8FZVvYf2eqpObU/qUSHHWcA9f9U2Utoo//1kSAON
-        jgdrNeOZXc+yjwgBvoWCAGupZ9KIZP3GfzzfvdVt+/GY3t7f2N3rY+ouFz8zLpYQB4XevCLzn6pv
-        TEsj0HOtBppZpB6D62yZZfM8zfN5T0i9RRFkjfHJQieSK55kabZI0mUyWx3VreYMHRTkT0QIIfv+
-        DHOqLe6gIGn8iUh0jjYIxekSIWC1CAhQ57jzVHmIR5Jp5VGF0VUnxITwWouKUSHGxsO3n9RjWFSI
-        amZv883NQ+b879Xq6fr547vMLzqrJv0G63fTD1R3ip1CmvAnvDhrRggoKnvt9Y6GjH+hM1o5vOuD
-        jM9vU9t0EpUPb4B92atLKEq4olbQEuISaBOQLD/AF/Eh+l/9cqwOp8SFbozVG3cWINRccddWFqnr
-        HwLOazO0CHYv/Wa7L8sCY7U0vvL6L6pguF4PdjD+TyM5O24dvPZUTPF1dBwQ3LvzKKuaqwatsbzf
-        M9SmSut0vl3UKSJEh+gfAAAA//8DABIDPab1AgAA
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ab61dadbb751ce2-ORD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 30 Jul 2024 14:42:33 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - heartex
-      openai-processing-ms:
-      - '309'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15552000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149998989'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_6232d03e756801a673aaf5a4d8e85be6
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml
deleted file mode 100644
index 6c8fdaa1..00000000
--- a/tests/cassettes/test_llm/test_get_llm_response[None-return the word Banana with exclamation mark-expected_result0].yaml	
+++ /dev/null
@@ -1,98 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "return the word Banana with
-      exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47,
-      "temperature": 0.0}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '167'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.34.0
-      x-stainless-arch:
-      - x64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - Linux
-      x-stainless-package-version:
-      - 1.34.0
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.5
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//VJA9b8IwFEX3/Ar3zaRKAuEjW7t0YaNqhaoqcowJBtvPxA9UhPjvlZNA
-        2sXDPb5Xx75GjIHaQMFA7DgJ43S8OKrl+ryqD84b/zl+36/PL8auLvz48baEUWhgtZeC7q1ngcZp
-        SQpth0UjOcmwms6ybJwneZ61wOBG6lCrHcUTjI2yKs6SbBInszid9+0dKiE9FOwrYoyxa3sGT7uR
-        P1CwZHRPjPSe1xKKxyXGoEEdEuDeK0/cEowGKNCStK36K7fc8ifo4e2xqrF2DVbBwJ60fuRbZZXf
-        lY3kHm1Y8ISuq98ixr5b+9M/IXANGkcl4UHaMJjm3RwMfzbAcc8Iies/nXnU64G/eJKm3Cpby8Y1
-        qnvJ1pXTNK2m01mVLiC6Rb8AAAD//wMATZZOl9cBAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ab61dab0ff51ce2-ORD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 30 Jul 2024 14:42:33 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw;
-        path=/; expires=Tue, 30-Jul-24 15:12:33 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - heartex
-      openai-processing-ms:
-      - '253'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15552000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149998988'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_d96ab7ce0e87524aca13b64d3eb63519
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml b/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml
deleted file mode 100644
index 6c8fdaa1..00000000
--- a/tests/cassettes/test_llm/test_get_llm_response[None-return the word banana with exclamation mark-expected_result0].yaml	
+++ /dev/null
@@ -1,98 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "return the word Banana with
-      exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47,
-      "temperature": 0.0}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '167'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.34.0
-      x-stainless-arch:
-      - x64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - Linux
-      x-stainless-package-version:
-      - 1.34.0
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.5
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//VJA9b8IwFEX3/Ar3zaRKAuEjW7t0YaNqhaoqcowJBtvPxA9UhPjvlZNA
-        2sXDPb5Xx75GjIHaQMFA7DgJ43S8OKrl+ryqD84b/zl+36/PL8auLvz48baEUWhgtZeC7q1ngcZp
-        SQpth0UjOcmwms6ybJwneZ61wOBG6lCrHcUTjI2yKs6SbBInszid9+0dKiE9FOwrYoyxa3sGT7uR
-        P1CwZHRPjPSe1xKKxyXGoEEdEuDeK0/cEowGKNCStK36K7fc8ifo4e2xqrF2DVbBwJ60fuRbZZXf
-        lY3kHm1Y8ISuq98ixr5b+9M/IXANGkcl4UHaMJjm3RwMfzbAcc8Iies/nXnU64G/eJKm3Cpby8Y1
-        qnvJ1pXTNK2m01mVLiC6Rb8AAAD//wMATZZOl9cBAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ab61dab0ff51ce2-ORD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 30 Jul 2024 14:42:33 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=U.QsEfIGTzsqxtcP6FrcHBMwGMWzLvz1VZmhwW6lGNM-1722350553-1.0.1.1-5LrU0EHrNJBvSZhTUjQTM9K4wWlp_BG5ZNuQMBMKYAnIyLSi.3zmjFa9e_YNBFYkD8avCoDMZ1kyr1cBAUPCMw;
-        path=/; expires=Tue, 30-Jul-24 15:12:33 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=kUJx58fC8YYZZDNTWJfOJfMCMZ3dJ1pkSwlXBlwS.g4-1722350553169-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - heartex
-      openai-processing-ms:
-      - '253'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15552000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149998988'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_d96ab7ce0e87524aca13b64d3eb63519
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/cassettes/test_llm/test_llm_async.yaml b/tests/cassettes/test_llm/test_llm_async.yaml
new file mode 100644
index 00000000..ec691f66
--- /dev/null
+++ b/tests/cassettes/test_llm/test_llm_async.yaml
@@ -0,0 +1,198 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model":
+      "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw;
+        _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.34.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.34.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA1RQMU4DMRDs7xWLG5ocuuROgaRBFCAiCgoKJBCKHN/enYnttew9hYAi8Q2+x0uQ
+        k5BAs8XMzuzsfGQAQtdiCkJ1kpX1Jp+EfnazHlequFJPV6u72X33PryeTPB9/PQgBklBi1dU/Ks6
+        U2S9QdbkdrQKKBmT6/B8NBqX1UVRbglLNZokaz3nFeVWO52PilGVF+f58GKv7kgrjGIKzxkAwMd2
+        ppyuxjcxhWLwi1iMUbYopoclABHIJETIGHVk6VgMjqQix+i20WenFmrSroUVGjMA7qRbRmgogIxL
+        7doTuKUVyAX1DGvqL+Gxk/z9+RWBXAICWO1qYKrl+lLsb2wO4Qy1PtAiPeJ6Yw54o52O3TygjORS
+        kMjkd/JNBvCyLaH/95fwgaznOdMSXTIcVjs7caz+DznZk0wszREvy2yfT8R1ZLTzRrsWgw9610jj
+        50VTlHXVFIgi22Q/AAAA//8DABxR2lkfAgAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ad1395d4fad115f-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 02 Aug 2024 21:40:03 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '797'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998994'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_47a4dc30b9407937968c483aa09ceff2
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25
+      years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature":
+      0.0, "tool_choice": {"type": "function", "function": {"name": "Output"}}, "tools":
+      [{"type": "function", "function": {"name": "Output", "description": "Correctly
+      extracted `Output` with all the required parameters with correct types", "parameters":
+      {"properties": {"name": {"description": "name:", "title": "Name", "type": "string"},
+      "age": {"description": "age:", "title": "Age", "type": "string"}}, "required":
+      ["age", "name"], "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '609'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.34.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.34.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA2xSXWvjMBB8968Q+xwX25c6tt/KBUofjoYSegfnYhRFdtTqC2l9JIT890N2Gqeh
+        ehDLjGZ22NUxIgTEFioCbEeRKSvj0vVPj/3z6rDk79uytPfpYv2S//tFDx/7V5gFhdm8c4afqjtm
+        lJUchdEjzRynyINrusiy/Me8SOYDocyWyyDrLMZzEyuhRZwl2TxOFnFanNU7Ixj3UJG/ESGEHIc7
+        5NRbvoeKJLNPRHHvacehujwiBJyRAQHqvfBINcJsIpnRyHWIrnsprwg0RjaMSjk1Hs/xqp6GRaVs
+        8uVjIX6vhdYPT6sC3Usr16vl/s9Vv9H6YIdAba/ZZUhX/AWvbpoRApqqQfvco+3xRkkIUNf1imsM
+        qeFYD+9rqGr4SZ2kNcxqoN2IZPc1nOCLwSn6rn47V6fLnKXprDMbfzM2aIUWftc4Tv0QHzwaO7YI
+        dm/DPvsvKwLrjLLYoPngOhgW+WgH0y+ayPS8a0CDVE54mUfnfOAPHrlqWqE77qwTw3KhtU25Seim
+        bXmRQnSK/gMAAP//AwA5j4eK6gIAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ad139653ed6aca8-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 02 Aug 2024 21:40:04 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=U4BB8rS3qUTVmapjGfZ.4G4BDSdqnWqY7cbBwpNPl6g-1722634804-1.0.1.1-n5XtE16CEMWFE3A6E9jUdY4KyUKP0oRj0SaPZjLM2jPzWLiAf1.Hvqi6QRq8oIujqIvQ0ykEPlS9dLJtDlU4sw;
+        path=/; expires=Fri, 02-Aug-24 22:10:04 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=D6V7SPQ_QwPJ5VvdeALdmXEpXj0vSTw1snLHBIe2UFk-1722634804819-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '550'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998989'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_93262bdf66657d0a9484aa65a83f4648
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_llm/test_llm_sync.yaml b/tests/cassettes/test_llm/test_llm_sync.yaml
new file mode 100644
index 00000000..c53339c1
--- /dev/null
+++ b/tests/cassettes/test_llm/test_llm_sync.yaml
@@ -0,0 +1,291 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model":
+      "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.34.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.34.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA1RQy07DMBC85ysWX7g0qE2jEnrhgqoikOACHBCqnGSTmNpeY28EFarEb/B7fAly
+        n3BZaWd2RjP7mQAIVYspiKqTXBmn0wvfz5byfrac3OW3xeNcdpObcFWW47cHLMQgKqh8xYr3qrOK
+        jNPIiuyWrjxKxug6Os+yyTif5MWGMFSjjrLWcZpTapRVaTbM8nR4no525lVHqsIgpvCcAAB8bmbM
+        aWv8EFMYDvaIwRBki2J6OAIQnnREhAxBBZaWxeBIVmQZ7Sb69amBmpRt4R21HgB30i5hRf0JzOkd
+        ZEk9x/USnjrJP1/fAchGwINRtgamWq4uxc57fQilqXWeyljA9lof8EZZFbqFRxnIxgCByW3l6wTg
+        ZVO+/9dHOE/G8YJpiTYajvKtnTi+/A9Z7EgmlvqIj7Nkl0+EVWA0i0bZFr3zavuJxi0uyqEsmwaL
+        kUjWyS8AAAD//wMAUgDU/BcCAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ad13593be2713ec-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 02 Aug 2024 21:37:28 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw;
+        path=/; expires=Fri, 02-Aug-24 22:07:28 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '771'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998994'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_96401bab351a48f55043d438c93d3f94
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": "return the word Banana with
+      exclamation mark"}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47,
+      "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '167'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw;
+        _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.34.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.34.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA1SQO0/DMBSF9/wKc+cG5UXTZKNDEVIHFgZAKHLcm9Tg+Fq2y0NV/ztyElpYPJzP
+        5+izjxFjIHdQMxB77sVgVFzZw0Zttt1zfv94W91Ruv4sP0xePuinbguL0KD2DYX/bV0LGoxCL0lP
+        WFjkHsNqWmbZMi+WRTWCgXaoQq03Pi4oHqSWcZZkRZyUcbqa23uSAh3U7CVijLHjeAZPvcMvqFmy
+        +E0GdI73CPX5EmNgSYUEuHPSea49LC5QkPaoR/U111zzK5jh6byqqDeW2mCgD0qd805q6faNRe5I
+        hwXnyUz1U8TY62h/+CcExtJgfOPpHXUYTG+mObj82QXmM/PkufrTWUWzHrhv53FoOql7tMbK6SWd
+        aao24W3X4SqF6BT9AAAA//8DAKH4pAjXAQAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ad1359b886c13ec-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 02 Aug 2024 21:37:29 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '628'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998988'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_d967d29b6fadbf5acebc749507ccb198
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25
+      years old."}], "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature":
+      0.0, "tool_choice": {"type": "function", "function": {"name": "Output"}}, "tools":
+      [{"type": "function", "function": {"name": "Output", "description": "Correctly
+      extracted `Output` with all the required parameters with correct types", "parameters":
+      {"properties": {"name": {"description": "name:", "title": "Name", "type": "string"},
+      "age": {"description": "age:", "title": "Age", "type": "string"}}, "required":
+      ["age", "name"], "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '609'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=UYHDcJ7yLbsOr4X6RFVLwnP5NmstFey4TVvwAqc2VHE-1722634648-1.0.1.1-iVxHVwpm0Zuy3SuHJKaxwtiqkY2tULni7_kZocNwJy3WkMGS0gGe1.EVszk8MaHGfdHdHZ4vxdF5PadPfeEPRw;
+        _cfuvid=UcUTj5QipGlVdj3XSkjDWUAzBBitq1f_0HNRV1e5ni8-1722634648619-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.34.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.34.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA2xSXWvjMBB8968Q+xyX2E2c2G+lUFra0oO7pzsXIyuKo1ZfJ61LfSH//ZCci9Nw
+        ehDLjGZ22NU+IQTEBioCbEeRKSvT0vV36tkO7M9w175/exju561lv186/lTcwCwoTPvGGf5TXTGj
+        rOQojB5p5jhFHlyzVZ4X14tiOY+EMhsug6yzmC5MqoQWaT7PF+l8lWbro3pnBOMeKvIrIYSQfbxD
+        Tr3hn1CR6BURxb2nHYfq9IgQcEYGBKj3wiPVCLOJZEYj1yG67qU8I9AY2TAq5dR4PPuzehoWlbJB
+        l98/5t8/Snvz+KyXZfvZrn/8vHVn/UbrwcZA216z05DO+BNeXTQjBDRVUfvSo+3xQkkIUNf1imsM
+        qWFfx/c1VDXcUidpDbMaaDci+bKGA3wxOCT/q1+P1eE0Z2k660zrL8YGW6GF3zWOUx/jg0djxxbB
+        7jXus/+yIrDOKIsNmneug+G6GO1g+kUTmR13DWiQygkvi+SYD/zgkatmK3THnXUiLhe2timyrC2K
+        VZuVkBySvwAAAP//AwD4WS/Q6gIAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ad135a30a4113ec-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 02 Aug 2024 21:37:30 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '559'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998989'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7e41971d12f598e154357ffb72fd850b
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_llm.py b/tests/test_llm.py
index 84adeab3..49788ddd 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -1,65 +1,55 @@
 import pytest
-from pydantic import BaseModel, Field
-from adala.utils.llm import (
-    get_llm_response,
-    async_get_llm_response,
-    ConstrainedLLMResponse,
-    UnconstrainedLLMResponse,
-)
+import asyncio
+import pandas as pd
+from adala.runtimes import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime
 
 
-class ExampleResponseModel(BaseModel):
-    name: str = Field(..., description="Name of the person")
-    age: int = Field(..., description="Age of the person")
+@pytest.mark.vcr
+def test_llm_sync():
 
+    runtime = LiteLLMChatRuntime()
 
-@pytest.mark.parametrize(
-    "response_model, user_prompt, expected_result",
-    [
-        (
-            None,
-            "return the word Banana with exclamation mark",
-            UnconstrainedLLMResponse(text="Banana!"),
-        ),
-        (
-            ExampleResponseModel,
-            "My name is Carla and I am 25 years old.",
-            ConstrainedLLMResponse(data={"name": "Carla", "age": 25}),
-        ),
-    ],
-)
-@pytest.mark.vcr
-def test_get_llm_response(response_model, user_prompt, expected_result):
+    # test plaintext success
 
-    result = get_llm_response(
-        user_prompt=user_prompt,
-        response_model=response_model,
+    result = runtime.get_llm_response(
+        messages=[
+            {"role": "user", "content": "return the word Banana with exclamation mark"}
+        ],
     )
+    expected_result = "Banana!"
+    assert result == expected_result
 
+    # test structured success
+
+    result = runtime.record_to_record(
+        record={"input_name": "Carla", "input_age": 25},
+        input_template="My name is {input_name} and I am {input_age} years old.",
+        instructions_template="",
+        output_template="name: {name}, age: {age}",
+    )
+
+    # note age coerced to string
+    expected_result = {"name": "Carla", "age": "25"}
     assert result == expected_result
 
 
-@pytest.mark.parametrize(
-    "response_model, user_prompt, expected_result",
-    [
-        (
-            None,
-            "return the word banana with exclamation mark",
-            UnconstrainedLLMResponse(text="banana!"),
-        ),
-        (
-            ExampleResponseModel,
-            "My name is Carla and I am 25 years old.",
-            ConstrainedLLMResponse(data={"name": "Carla", "age": 25}),
-        ),
-    ],
-)
-@pytest.mark.asyncio
 @pytest.mark.vcr
-async def test_async_get_llm_response(response_model, user_prompt, expected_result):
+def test_llm_async():
+
+    runtime = AsyncLiteLLMChatRuntime()
 
-    result = await async_get_llm_response(
-        user_prompt=user_prompt, response_model=response_model
+    batch = pd.DataFrame.from_records([{"input_name": "Carla", "input_age": 25}])
+
+    result = asyncio.run(
+        runtime.batch_to_batch(
+            batch,
+            input_template="My name is {input_name} and I am {input_age} years old.",
+            instructions_template="",
+            output_template="name: {name}, age: {age}",
+        )
     )
 
-    assert result == expected_result
+    # note age coerced to string
+    expected_result = pd.DataFrame.from_records([{"name": "Carla", "age": "25"}])
+    # need 2 all() for row and column axis
+    assert (result == expected_result).all().all()