From 70b38bd7830b1d61e4b9e1311b057d757d687dce Mon Sep 17 00:00:00 2001
From: matt-bernstein <60152561+matt-bernstein@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:11:26 -0500
Subject: [PATCH] feat: DIA-1450: Image Classification support in adala (#264)

---
 adala/runtimes/__init__.py                    |   8 +-
 adala/runtimes/_litellm.py                    | 415 ++++++++------
 adala/runtimes/_openai.py                     |   8 +-
 adala/skills/collection/label_studio.py       |  74 ++-
 .../test_label_studio_skill_image_input.yaml  | 215 ++++++++
 .../test_llm/test_vision_runtime.yaml         | 515 ++++++++++++++++++
 tests/test_label_studio_skill.py              |  49 ++
 tests/test_llm.py                             | 177 +++++-
 8 files changed, 1277 insertions(+), 184 deletions(-)
 create mode 100644 tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml
 create mode 100644 tests/cassettes/test_llm/test_vision_runtime.yaml

diff --git a/adala/runtimes/__init__.py b/adala/runtimes/__init__.py
index df9aae16..dbcb33af 100644
--- a/adala/runtimes/__init__.py
+++ b/adala/runtimes/__init__.py
@@ -1,3 +1,7 @@
 from .base import Runtime, AsyncRuntime
-from ._openai import OpenAIChatRuntime, OpenAIVisionRuntime, AsyncOpenAIChatRuntime
-from ._litellm import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime
+from ._openai import OpenAIChatRuntime, AsyncOpenAIChatRuntime, AsyncOpenAIVisionRuntime
+from ._litellm import (
+    LiteLLMChatRuntime,
+    AsyncLiteLLMChatRuntime,
+    AsyncLiteLLMVisionRuntime,
+)
diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py
index 0cb49ae8..7c654cfd 100644
--- a/adala/runtimes/_litellm.py
+++ b/adala/runtimes/_litellm.py
@@ -1,7 +1,9 @@
 import asyncio
 import logging
-from typing import Any, Dict, List, Optional, Type
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Type, Union, Literal, TypedDict, Iterable, Generator
 from functools import cached_property
+from enum import Enum
 import litellm
 from litellm.exceptions import (
     AuthenticationError,
@@ -19,6 +21,7 @@
 from adala.utils.parse import (
     parse_template,
     partial_str_format,
+    TemplateChunks,
 )
 from pydantic import ConfigDict, field_validator, BaseModel
 from pydantic_core import to_jsonable_python
@@ -56,8 +59,25 @@
 )
 
 
+# TODO: consolidate these data models and unify our preprocessing for LLM input into one step RawInputModel -> PreparedInputModel
+class TextMessageChunk(TypedDict):
+    type: Literal["text"]
+    text: str
+
+
+class ImageMessageChunk(TypedDict):
+    type: Literal["image"]
+    image_url: Dict[str, str]
+
+
+MessageChunk = Union[TextMessageChunk, ImageMessageChunk]
+
+Message = Union[str, List[MessageChunk]]
+
+
 def get_messages(
-    user_prompt: str,
+    # user prompt can be a string or a list of multimodal message chunks
+    user_prompt: Message,
     system_prompt: Optional[str] = None,
     instruction_first: bool = True,
 ):
@@ -143,6 +163,54 @@ def _from_litellm(self, **kwargs):
         return instructor.from_litellm(litellm.acompletion, **kwargs)
 
 
+def handle_llm_exception(
+    e: Exception, messages: List[Dict[str, str]], model: str, retries
+) -> tuple[Dict, Usage]:
+    """Handle exceptions from LLM calls and return standardized error dict and usage stats.
+
+    Args:
+        e: The caught exception
+        messages: The messages that were sent to the LLM
+        model: The model name
+        retries: The retry policy object
+
+    Returns:
+        Tuple of (error_dict, usage_stats)
+    """
+
+    if isinstance(e, IncompleteOutputException):
+        usage = e.total_usage
+    elif isinstance(e, InstructorRetryException):
+        usage = e.total_usage
+        # get root cause error from retries
+        e = e.__cause__.last_attempt.exception()
+    else:
+        # Approximate usage for other errors
+        # usage = e.total_usage
+        # not available here, so have to approximate by hand, assuming the same error occurred each time
+        n_attempts = retries.stop.max_attempt_number
+        prompt_tokens = n_attempts * litellm.token_counter(
+            model=model, messages=messages[:-1]
+        )  # response is appended as the last message
+        # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=0,
+            total_tokens=prompt_tokens,
+        )
+        # Catch case where the model does not return a properly formatted out
+        # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103
+        # > AttributeError: 'NoneType' object has no attribute '_raw_response'
+        if type(e).__name__ in {"ValidationError", "AttributeError"}:
+            logger.error(f"Converting error to ConstrainedGenerationError: {str(e)}")
+            logger.debug(f"Traceback:\n{traceback.format_exc()}")
+            e = ConstrainedGenerationError()
+
+        # the only other instructor error that would be thrown is IncompleteOutputException due to max_tokens reached
+
+    return _log_llm_exception(e), usage
+
+
 class LiteLLMChatRuntime(InstructorClientMixin, Runtime):
     """
     Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and chat
@@ -275,43 +343,8 @@ def record_to_record(
             )
             usage = completion.usage
             dct = to_jsonable_python(response)
-        except IncompleteOutputException as e:
-            logger.error(f"Incomplete output error: {str(e)}")
-            logger.error(f"Traceback:\n{traceback.format_exc()}")
-            usage = e.total_usage
-            dct = _log_llm_exception(e)
-        except InstructorRetryException as e:
-            logger.error(f"Instructor retry error: {str(e)}")
-            logger.error(f"Traceback:\n{traceback.format_exc()}")
-            usage = e.total_usage
-            # get root cause error from retries
-            n_attempts = e.n_attempts
-            e = e.__cause__.last_attempt.exception()
-            dct = _log_llm_exception(e)
         except Exception as e:
-            logger.error(f"Other error: {str(e)}")
-            logger.error(f"Traceback:\n{traceback.format_exc()}")
-            # usage = e.total_usage
-            # not available here, so have to approximate by hand, assuming the same error occurred each time
-            n_attempts = retries.stop.max_attempt_number
-            prompt_tokens = n_attempts * litellm.token_counter(
-                model=self.model, messages=messages[:-1]
-            )  # response is appended as the last message
-            # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case
-            completion_tokens = 0
-            usage = Usage(
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                total_tokens=(prompt_tokens + completion_tokens),
-            )
-
-            # Catch case where the model does not return a properly formatted output
-            # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103
-            # > AttributeError: 'NoneType' object has no attribute '_raw_response'
-            if type(e).__name__ in {"ValidationError", "AttributeError"}:
-                e = ConstrainedGenerationError()
-            # there are no other known errors to catch
-            dct = _log_llm_exception(e)
+            dct, usage = handle_llm_exception(e, messages, self.model, retries)
 
         # Add usage data to the response (e.g. token counts, cost)
         dct.update(_get_usage_dict(usage, model=self.model))
@@ -437,45 +470,11 @@ async def batch_to_batch(
         # convert list of LLMResponse objects to the dataframe records
         df_data = []
         for response in responses:
-            if isinstance(response, IncompleteOutputException):
-                e = response
-                usage = e.total_usage
-                dct = _log_llm_exception(e)
-            elif isinstance(response, InstructorRetryException):
-                e = response
-                usage = e.total_usage
-                # get root cause error from retries
-                n_attempts = e.n_attempts
-                e = e.__cause__.last_attempt.exception()
-                dct = _log_llm_exception(e)
-            elif isinstance(response, Exception):
-                e = response
-                # usage = e.total_usage
-                # not available here, so have to approximate by hand, assuming the same error occurred each time
-                n_attempts = retries.stop.max_attempt_number
+            if isinstance(response, Exception):
                 messages = []  # TODO how to get these?
-                prompt_tokens = n_attempts * litellm.token_counter(
-                    model=self.model, messages=messages[:-1]
-                )  # response is appended as the last message
-                # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case
-                completion_tokens = 0
-                usage = Usage(
-                    prompt_tokens,
-                    completion_tokens,
-                    total_tokens=(prompt_tokens + completion_tokens),
+                dct, usage = handle_llm_exception(
+                    response, messages, self.model, retries
                 )
-
-                # Catch case where the model does not return a properly formatted output
-                # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103
-                # > AttributeError: 'NoneType' object has no attribute '_raw_response'
-                if type(e).__name__ in {"ValidationError", "AttributeError"}:
-                    logger.error(
-                        f"Converting error to ConstrainedGenerationError: {str(e)}"
-                    )
-                    logger.debug(f"Traceback:\n{traceback.format_exc()}")
-                    e = ConstrainedGenerationError()
-                # the only other instructor error that would be thrown is IncompleteOutputException due to max_tokens reached
-                dct = _log_llm_exception(e)
             else:
                 resp, completion = response
                 usage = completion.usage
@@ -603,109 +602,203 @@ def get_cost_estimate(
             )
 
 
-class LiteLLMVisionRuntime(LiteLLMChatRuntime):
+class MessageChunkType(Enum):
+    TEXT = "text"
+    IMAGE_URL = "image_url"
+
+
+def split_message_into_chunks(
+    input_template: str, input_field_types: Dict[str, MessageChunkType], **input_fields
+) -> List[MessageChunk]:
+    """Split a template string with field types into a list of message chunks.
+
+    Takes a template string with placeholders and splits it into chunks based on the field types,
+    preserving the text between placeholders.
+
+    Args:
+        input_template (str): Template string with placeholders, e.g. '{a} is a {b} is an {a}'
+        input_field_types (Dict[str, MessageChunkType]): Dict mapping field names to their types
+        **input_fields: Field values to substitute into template
+
+    Returns:
+        List[Dict[str, str]]: List of message chunks with appropriate type and content.
+            Text chunks have format: {'type': 'text', 'text': str}
+            Image chunks have format: {'type': 'image_url', 'image_url': {'url': str}}
+
+    Example:
+        >>> split_message_into_chunks(
+        ...     '{a} is a {b} is an {a}',
+        ...     {'a': MessageChunkType.TEXT, 'b': MessageChunkType.IMAGE_URL},
+        ...     a='the letter a',
+        ...     b='http://example.com/b.jpg'
+        ... )
+        [
+            {'type': 'text', 'text': 'the letter a is a '},
+            {'type': 'image_url', 'image_url': {'url': 'http://example.com/b.jpg'}},
+            {'type': 'text', 'text': ' is an the letter a'}
+        ]
+    """
+    # Parse template to get field positions and surrounding text
+    parsed = parse_template(input_template)
+
+    def add_to_current_chunk(
+        current_chunk: Optional[MessageChunk], chunk: MessageChunk
+    ) -> MessageChunk:
+        if current_chunk:
+            current_chunk["text"] += chunk["text"]
+            return current_chunk
+        else:
+            return chunk
+
+    # Build chunks by iterating through parsed template parts
+    def build_chunks(parsed: Iterable[TemplateChunks]) -> Generator[MessageChunk, None, None]:
+        current_chunk: Optional[MessageChunk] = None
+
+        for part in parsed:
+            if part["type"] == "text":
+                current_chunk = add_to_current_chunk(
+                    current_chunk, {"type": "text", "text": part["text"]}
+                )
+            elif part["type"] == "var":
+                field_value = part["text"]
+                try:
+                    field_type = input_field_types[field_value]
+                except KeyError:
+                    raise ValueError(
+                        f"Field {field_value} not found in input_field_types"
+                    )
+                if field_type == MessageChunkType.TEXT:
+                    # try to substitute in variable and add to current chunk
+                    substituted_text = partial_str_format(
+                        f"{{{field_value}}}", **input_fields
+                    )
+                    if substituted_text != field_value:
+                        current_chunk = add_to_current_chunk(
+                            current_chunk, {"type": "text", "text": substituted_text}
+                        )
+                    else:
+                        # be permissive for unfound variables
+                        current_chunk = add_to_current_chunk(
+                            current_chunk,
+                            {"type": "text", "text": f"{{{field_value}}}"},
+                        )
+                elif field_type == MessageChunkType.IMAGE_URL:
+                    substituted_text = partial_str_format(
+                        f"{{{field_value}}}", **input_fields
+                    )
+                    if substituted_text != field_value:
+                        # push current chunk, push image chunk, and start new chunk
+                        if current_chunk:
+                            yield current_chunk
+                        current_chunk = None
+                        yield {
+                            "type": "image_url",
+                            "image_url": {"url": input_fields[field_value]},
+                        }
+                    else:
+                        # be permissive for unfound variables
+                        current_chunk = add_to_current_chunk(
+                            current_chunk,
+                            {"type": "text", "text": f"{{{field_value}}}"},
+                        )
+
+        if current_chunk:
+            yield current_chunk
+
+    return list(build_chunks(parsed))
+
+
+class AsyncLiteLLMVisionRuntime(AsyncLiteLLMChatRuntime):
     """
     Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and vision
     models to perform the skill.
     """
 
-    def record_to_record(
+    def init_runtime(self) -> "Runtime":
+        super().init_runtime()
+        if not litellm.supports_vision(self.model):
+            raise ValueError(f"Model {self.model} does not support vision")
+        return self
+
+    async def batch_to_batch(
         self,
-        record: Dict[str, str],
+        batch: InternalDataFrame,
         input_template: str,
         instructions_template: str,
-        output_template: str,
+        response_model: Type[BaseModel],
+        output_template: Optional[
+            str
+        ] = None,  # TODO: deprecated in favor of response_model, can be removed
         extra_fields: Optional[Dict[str, str]] = None,
         field_schema: Optional[Dict] = None,
-        instructions_first: bool = False,
-    ) -> Dict[str, str]:
-        """
-        Execute LiteLLM request given record and templates for input,
-        instructions and output.
+        instructions_first: bool = True,
+        input_field_types: Optional[Dict[str, MessageChunkType]] = None,
+    ) -> InternalDataFrame:
+        """Execute batch of requests with async calls to OpenAI API"""
 
-        Args:
-            record: Record to be used for input, instructions and output templates.
-            input_template: Template for input message.
-            instructions_template: Template for instructions message.
-            output_template: Template for output message.
-            extra_fields: Extra fields to be used in templates.
-            field_schema: Field jsonschema to be used for parsing templates.
-                          Field schema must contain "format": "uri" for image fields.
-                          For example:
-                            ```json
-                            {
-                                "image": {
-                                    "type": "string",
-                                    "format": "uri"
-                                }
-                            }
-                            ```
-            instructions_first: If True, instructions will be sent before input.
-        """
+        if not response_model:
+            raise ValueError(
+                "You must explicitly specify the `response_model` in runtime."
+            )
+
+        input_field_types = input_field_types or defaultdict(
+            lambda: MessageChunkType.TEXT
+        )
 
         extra_fields = extra_fields or {}
-        field_schema = field_schema or {}
+        user_prompts = batch.apply(
+            # TODO: remove "extra_fields" to avoid name collisions
+            lambda row: split_message_into_chunks(
+                input_template, input_field_types, **row, **extra_fields
+            ),
+            axis=1,
+        ).tolist()
 
-        output_fields = parse_template(
-            partial_str_format(output_template, **extra_fields),
-            include_texts=False,
-        )
+        # rest of this function is the same as AsyncLiteLLMChatRuntime.batch_to_batch
 
-        if len(output_fields) > 1:
-            raise NotImplementedError(
-                f"{self.__class__.__name__} does not support multiple output fields. "
-                f"Found: {output_fields}"
+        retries = AsyncRetrying(**RETRY_POLICY)
+
+        tasks = [
+            asyncio.ensure_future(
+                self.client.chat.completions.create_with_completion(
+                    messages=get_messages(
+                        user_prompt,
+                        instructions_template,
+                        instructions_first,
+                    ),
+                    response_model=response_model,
+                    model=self.model,
+                    max_tokens=self.max_tokens,
+                    temperature=self.temperature,
+                    seed=self.seed,
+                    max_retries=retries,
+                    # extra inference params passed to this runtime
+                    **self.model_extra,
+                )
             )
-        output_field = output_fields[0]
-        output_field_name = output_field["text"]
-
-        input_fields = parse_template(input_template)
-
-        # split input template into text and image parts
-        input_text = ""
-        content = [
-            {
-                "type": "text",
-                "text": instructions_template,
-            }
+            for user_prompt in user_prompts
         ]
-        for field in input_fields:
-            if field["type"] == "text":
-                input_text += field["text"]
-            elif field["type"] == "var":
-                if field["text"] not in field_schema:
-                    input_text += record[field["text"]]
-                elif field_schema[field["text"]]["type"] == "string":
-                    if field_schema[field["text"]].get("format") == "uri":
-                        if input_text:
-                            content.append({"type": "text", "text": input_text})
-                            input_text = ""
-                        content.append(
-                            {
-                                "type": "image_url",
-                                "image_url": record[field["text"]],
-                            }
-                        )
-                    else:
-                        input_text += record[field["text"]]
-                else:
-                    raise ValueError(
-                        f'Unsupported field type: {field_schema[field["text"]]["type"]}'
-                    )
-        if input_text:
-            content.append({"type": "text", "text": input_text})
+        responses = await asyncio.gather(*tasks, return_exceptions=True)
 
-        if self.verbose:
-            print(f"**Prompt content**:\n{content}")
+        # convert list of LLMResponse objects to the dataframe records
+        df_data = []
+        for response in responses:
+            if isinstance(response, Exception):
+                messages = []  # TODO how to get these?
+                dct, usage = handle_llm_exception(
+                    response, messages, self.model, retries
+                )
+            else:
+                resp, completion = response
+                usage = completion.usage
+                dct = to_jsonable_python(resp)
 
-        completion = litellm.completion(
-            messages=[{"role": "user", "content": content}],
-            max_tokens=self.max_tokens,
-            temperature=self.temperature,
-            seed=self.seed,
-            # extra inference params passed to this runtime
-            **self.model_extra,
-        )
+            # Add usage data to the response (e.g. token counts, cost)
+            dct.update(_get_usage_dict(usage, model=self.model))
 
-        completion_text = completion.choices[0].message.content
-        return {output_field_name: completion_text}
+            df_data.append(dct)
+
+        output_df = InternalDataFrame(df_data)
+        return output_df.set_index(batch.index)
+
+    # TODO: cost estimate
diff --git a/adala/runtimes/_openai.py b/adala/runtimes/_openai.py
index 009d1980..c3441699 100644
--- a/adala/runtimes/_openai.py
+++ b/adala/runtimes/_openai.py
@@ -1,7 +1,11 @@
-from ._litellm import AsyncLiteLLMChatRuntime, LiteLLMChatRuntime, LiteLLMVisionRuntime
+from ._litellm import (
+    AsyncLiteLLMChatRuntime,
+    LiteLLMChatRuntime,
+    AsyncLiteLLMVisionRuntime,
+)
 
 
 # litellm already reads the OPENAI_API_KEY env var, which was the reason for this class
 OpenAIChatRuntime = LiteLLMChatRuntime
 AsyncOpenAIChatRuntime = AsyncLiteLLMChatRuntime
-OpenAIVisionRuntime = LiteLLMVisionRuntime
+AsyncOpenAIVisionRuntime = AsyncLiteLLMVisionRuntime
diff --git a/adala/skills/collection/label_studio.py b/adala/skills/collection/label_studio.py
index edf3a0d0..8a1582b5 100644
--- a/adala/skills/collection/label_studio.py
+++ b/adala/skills/collection/label_studio.py
@@ -2,14 +2,17 @@
 import pandas as pd
 from typing import Type, Iterator, Optional
 from functools import cached_property
+from collections import defaultdict
 from adala.skills._base import TransformSkill
+from adala.runtimes import AsyncLiteLLMVisionRuntime
+from adala.runtimes._litellm import MessageChunkType
 from pydantic import BaseModel, Field, model_validator
 
 from adala.runtimes import Runtime, AsyncRuntime
 from adala.utils.internal_data import InternalDataFrame
 
 from label_studio_sdk.label_interface import LabelInterface
-from label_studio_sdk.label_interface.control_tags import ControlTag
+from label_studio_sdk.label_interface.control_tags import ControlTag, ObjectTag
 from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import (
     json_schema_to_pydantic,
 )
@@ -35,39 +38,60 @@ class LabelStudioSkill(TransformSkill):
 
     # TODO: implement postprocessing to verify Taxonomy
 
+    @cached_property
+    def label_interface(self) -> LabelInterface:
+        return LabelInterface(self.label_config)
+
+    @cached_property
     def ner_tags(self) -> Iterator[ControlTag]:
         # check if the input config has NER tag (<Labels> + <Text>), and return its `from_name` and `to_name`
-        interface = LabelInterface(self.label_config)
-        for tag in interface.controls:
-            # NOTE: don't need to check object tag because at this point, unusable control tags should have been stripped out of the label config
-            if tag.tag.lower() == "labels":
+        control_tag_names = self.allowed_control_tags or list(
+            self.label_interface._controls.keys()
+        )
+        for tag_name in control_tag_names:
+            tag = self.label_interface.get_control(tag_name)
+            if tag.tag.lower() in {"labels", "hypertextlabels"}:
+                yield tag
+
+    @cached_property
+    def image_tags(self) -> Iterator[ObjectTag]:
+        # check if any image tags are used as input variables
+        object_tag_names = self.allowed_object_tags or list(
+            self.label_interface._objects.keys()
+        )
+        for tag_name in object_tag_names:
+            tag = self.label_interface.get_object(tag_name)
+            if tag.tag.lower() == "image":
                 yield tag
 
     @model_validator(mode="after")
     def validate_response_model(self):
 
-        interface = LabelInterface(self.label_config)
         logger.debug(f"Read labeling config {self.label_config}")
 
         if self.allowed_control_tags or self.allowed_object_tags:
             if self.allowed_control_tags:
                 control_tags = {
-                    tag: interface._controls[tag] for tag in self.allowed_control_tags
+                    tag: self.label_interface._controls[tag]
+                    for tag in self.allowed_control_tags
                 }
             else:
-                control_tags = interface._controls
+                control_tags = self.label_interface._controls
             if self.allowed_object_tags:
                 object_tags = {
-                    tag: interface._objects[tag] for tag in self.allowed_object_tags
+                    tag: self.label_interface._objects[tag]
+                    for tag in self.allowed_object_tags
                 }
             else:
-                object_tags = interface._objects
+                object_tags = self.label_interface._objects
             interface = LabelInterface.create_instance(
                 tags={**control_tags, **object_tags}
             )
             logger.debug(
                 f"Filtered labeling config based on allowed tags {self.allowed_control_tags=} and {self.allowed_object_tags=} to {interface.config}"
             )
+        else:
+            interface = self.label_interface
 
         # NOTE: filtered label config is used for the response model, but full label config is used for the prompt, so that the model has as much context as possible.
         self.field_schema = interface.to_json_schema()
@@ -100,14 +124,28 @@ async def aapply(
     ) -> InternalDataFrame:
 
         with json_schema_to_pydantic(self.field_schema) as ResponseModel:
-            output = await runtime.batch_to_batch(
-                input,
-                input_template=self.input_template,
-                output_template="",
-                instructions_template=self.instructions,
-                response_model=ResponseModel,
-            )
-            for ner_tag in self.ner_tags():
+            # special handling to flag image inputs if they exist
+            if isinstance(runtime, AsyncLiteLLMVisionRuntime):
+                input_field_types = defaultdict(lambda: MessageChunkType.TEXT)
+                for tag in self.image_tags:
+                    input_field_types[tag.name] = MessageChunkType.IMAGE_URL
+                output = await runtime.batch_to_batch(
+                    input,
+                    input_template=self.input_template,
+                    output_template="",
+                    instructions_template=self.instructions,
+                    response_model=ResponseModel,
+                    input_field_types=input_field_types,
+                )
+            else:
+                output = await runtime.batch_to_batch(
+                    input,
+                    input_template=self.input_template,
+                    output_template="",
+                    instructions_template=self.instructions,
+                    response_model=ResponseModel,
+                )
+            for ner_tag in self.ner_tags:
                 input_field_name = ner_tag.objects[0].value.lstrip("$")
                 output_field_name = ner_tag.name
                 quote_string_field_name = "text"
diff --git a/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml b/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml
new file mode 100644
index 00000000..6e57a70d
--- /dev/null
+++ b/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml
@@ -0,0 +1,215 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model":
+      "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xSy27bMBC86yu2vPRiFbbs1o9LUPTS9NBDgz7QIBBociWxobgsuYLjBgb6G/29
+        fklB2bEUNAV6IcCZncHMkvcZgDBabECoRrJqvc1ff9FXTf3+zXfeVl/ryw/x46d3P1q7M/X+6k5M
+        koK231Dxg+qFotZbZEPuSKuAkjG5zpbz4uVyvVque6IljTbJas/5gvLWOJMX02KRT5f5bHVSN2QU
+        RrGB6wwA4L4/U06n8U5sYDp5QFqMUdYoNuchABHIJkTIGE1k6VhMBlKRY3R99MvnLWgyroYdWjsB
+        bqS7hT11z+At7UBuqeN0vYDPjeTfP39FIJeAAK1xGpi03F+MzQNWXZSpoOusPeGHc1pLtQ+0jSf+
+        jFfGmdiUAWUkl5JFJi969pAB3PRb6R4VFT5Q67lkukWXDGeLo50Y3mJErk4kE0s74PNi8oRbqZGl
+        sXG0VaGkalAPyuEJZKcNjYhs1PnvME95H3sbV/+P/UAohZ5Rlz6gNupx4WEsYPqp/xo777gPLOI+
+        MrZlZVyNwQdz/CeVL+caZ8VqNX21Ftkh+wMAAP//AwDs57wINQMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e85a915891d6208-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 26 Nov 2024 00:11:19 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=f2eAWUmcSjgkraa7rJvzhr53.Kz3y7EZniQwAmrWmHg-1732579879-1.0.1.1-FcTG.L1LC0IYeDrJNsA3S_9CqAeK8RVmE9li1oKj8OrrEOFELgjJ.wfKOQqQi8SWUsocl.oe2kGwriII9BVQ5Q;
+        path=/; expires=Tue, 26-Nov-24 00:41:19 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5M11WH7821NNRxCf3t86tF5_JSGA0RXiNMeAxl1Pa4A-1732579879834-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '488'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998994'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_c89ae189bd037c2fdf4605f19a3115f5
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": "\n                    Given
+      the title of a museum painting:\nIt''s definitely not the Mona Lisa\n and the
+      image of the painting:\nhttps://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg\n,\n                    classify
+      the painting as either \"Mona Lisa\" or \"Not Mona Lisa\".\n                    They
+      may or may not agree with each other. If the title and image disagree, believe
+      the image.\n                "}], "model": "gpt-4o-mini", "max_tokens": 1000,
+      "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function":
+      {"name": "MyModel"}}, "tools": [{"type": "function", "function": {"name": "MyModel",
+      "description": "Correctly extracted `MyModel` with all the required parameters
+      with correct types", "parameters": {"properties": {"classification": {"description":
+      "Choices for image", "enum": ["Mona Lisa", "Not Mona Lisa"], "title": "Classification",
+      "type": "string"}}, "required": ["classification"], "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1124'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xTUW/TMBB+z6+w7rlBSelIyNsmgRhqERpioFEUuc4l9ebYnu1IlKr/HdnpkrQU
+        iTxY1n33fXf3+bKPCAFeQUGAbaljrRbx9ffqC79/Xr3p3n+6edfJ5cOH34+39c0i+5YgzDxDbR6R
+        uRfWK6ZaLdBxJXuYGaQOvWqavZ5fZW/zPAlAqyoUntZoFy9U3HLJ43kyX8RJFqf5kb1VnKGFgvyI
+        CCFkH07fp6zwFxQkaIVIi9bSBqEYkggBo4SPALWWW0elg9kIMiUdSt+67ISYAE4pUTIqxFi4//aT
+        +2gWFaLcpJpf7+rlXf7ViXZ7e/f8RD9/5GZSr5fe6dBQ3Uk2mDTBh3hxVowQkLQN3NVuFbybnSdQ
+        03QtSufbhv0amPBz15xRL7mGYg0rJSlZckvXcIAT/iG6dP85scVg3Vkqjn4d44fhAYRqtFEbe+Yn
+        1Fxyuy0NUhvmAuuU7mv7OqECdCdvB9qoVrvSqSeUXnCepr0ejPs1otkRc8pRMSXlswtyZYWO8vC2
+        wzoxyrZYjdRxrWhXcTUBosnQfzdzSbsfnMvmf+RHgDHUDqtSG6w4Ox14TDPo/75/pQ0mh4bB7qzD
+        tqy5bNBow8PuQ63LJEuuNnWesQSiQ/QHAAD//wMAq681QQkEAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e85a91a5a7622f3-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 26 Nov 2024 00:11:20 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=mR.lQGByVqO3YXPOJhOAYfQSCaSh.GGUAiqvmTKYeF4-1732579880-1.0.1.1-kjoNgd4tNmz.8ile246dtkSjbL3C9pTtBxM35zH_sQENgFJuN91lWEVTAYebM_Au.qq8D_Sr1S1_DegpYxCo7A;
+        path=/; expires=Tue, 26-Nov-24 00:41:20 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=vJrrlSUKQKX62ERSv.300oGbNMFud1yC5ztTRDPBooA-1732579880316-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '189'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998865'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_5d437fcbab69225ff907cf1da14e1bb7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_llm/test_vision_runtime.yaml b/tests/cassettes/test_llm/test_vision_runtime.yaml
new file mode 100644
index 00000000..3c500876
--- /dev/null
+++ b/tests/cassettes/test_llm/test_vision_runtime.yaml
@@ -0,0 +1,515 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model":
+      "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=l7iL4O7hW2C3VXg3EHHmHGMfG6h9GaDpM3R43nhxJAw-1730733247411-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xSwYrbMBS8+ytedeklLk6cYDeXpaWU5tDelpYtxSjSs62NrKdaz03Dkn8vcrJx
+        lt2FXgSaeTPMPOkhARBGizUI1UpWnbfphx+6yDe36q5c7PKPd5803f5pPpd4//tr+U3MooK296j4
+        UfVOUectsiF3olWPkjG6zot8sSrKVV6MREcabZQ1ntMlpZ1xJl1ki2WaFem8PKtbMgqDWMPPBADg
+        YTxjTqfxr1hDNntEOgxBNijWlyEA0ZONiJAhmMDSsZhNpCLH6Mbom7cdaDKugT1aOwNupdvBgYY3
+        8IX2ILc0cLzewPdWMijpYAMtWh9B2BtugUnLw821f4/1EGTs6AZrz/jxEthS43vahjN/wWvjTGir
+        HmUgF8MFJi9G9pgA/BoXMzzpKnxPneeKaYcuGs6XJzsxPccV+f5MMrG0E57nsxfcKo0sjQ1XixVK
+        qhb1pJxeQQ7a0BWRXHV+HuYl71Nv45r/sZ8IpdAz6sr3qI16Wnga6zF+1tfGLjseA4twCIxdVRvX
+        YO97c/oqta+yIltt67JQmUiOyT8AAAD//wMA/n3XgzgDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e85885378e086f8-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 25 Nov 2024 23:48:57 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=eLsPnt6Pzes.k01.NQ1J2SgKRKHWAdtCiIuLlunhlDc-1732578537-1.0.1.1-4M9T24vvtlNrVZCB3bdC7q5cky0ytrfuAc6SSSWPX_bTfFo6Y2dRnkyN.fFwqAwbjETBDmiCE4pFQ_HP6XtwiA;
+        path=/; expires=Tue, 26-Nov-24 00:18:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=H3e75EZARspIKAJIbdmcPONXX2bd4dBJ_iVCJBZKCPk-1732578537967-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '367'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998994'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_494b776ae0ac7090d4d435fe4bb434ab
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "My
+      name is Carla and I am 25 years old."}]}], "model": "gpt-4o-mini", "max_tokens":
+      1000, "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function":
+      {"name": "Output"}}, "tools": [{"type": "function", "function": {"name": "Output",
+      "description": "Correctly extracted `Output` with all the required parameters
+      with correct types", "parameters": {"properties": {"name": {"description": "name:",
+      "title": "Name", "type": "string"}, "age": {"description": "age:", "title":
+      "Age", "type": "string"}}, "required": ["age", "name"], "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '637'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=sklnqaX5NEz5UfcuJZB8jNRawvW.wjbD8N5YSJ_rUh0-1723578083597-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xT32+bMBB+56+w7jlMJA0l5S2Nqkn70eZhU6uOCRlzEKfG9myztYvyv09AAiRt
+        pfFgWffd931352PnEQI8h5gA21DHKi385UMezT9+vnkKbx++F6s/65uvj9f6223269PvGiYNQ2Vb
+        ZO7I+sBUpQU6rmQHM4PUYaM6jS5mYbQILxYtUKkcRUMrtfPnyq+45P4smM39IPKniwN7ozhDCzH5
+        4RFCyK49mzpljs8Qk2ByjFRoLS0R4j6JEDBKNBGg1nLrqHQwGUCmpEPZlC5rIUaAU0qkjAoxGHff
+        bnQfhkWFSC/1/ePV3y1dZl+e18vtOsCVvqvur0d+nfSLbgsqasn6IY3wPh6fmRECklYt9652unZn
+        TEKAmrKuULqmatglbX4CcQIragRNYJIALbvILExgDycCe++t+8/RWAwWtaXiMK9DfN8/gFClNiqz
+        Z/OEgktuN6lBatu+wDqlO+/Gp3WA+uTtQBtVaZc69YSyEVxcdnIwrNcATg9LAE45Kob41ZF0opbm
+        6Chvn7bfJkbZBvOBOWwVrXOuRoA36vl1MW9pd31zWf6P/AAwhtphnmqDOWenDQ9pBpuf7720fsZt
+        wWBfrMMqLbgs0WjD29WHQqdBFIRZsYhYAN7e+wcAAP//AwAydy+KCAQAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e8588573ce4eb05-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 25 Nov 2024 23:48:58 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=nUr9DAa1xx46rsdQ8jLylug4EQEIjBqOh_DrnLQ4iCs-1732578538-1.0.1.1-K1.z3.kxwK.y2bN.csBbWfD2WXgmPSqNwl3533sF5oKfc02lw0BHcl2bPvy3DNpFzGmut5iaAIX7XUGTSGX0Mw;
+        path=/; expires=Tue, 26-Nov-24 00:18:58 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=mZO7PQIQWPp7HYEYjCqejvrcPsMCoqvpZ8K9wj9EcwQ-1732578538510-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '321'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998989'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_16f953166c00a551ee4fa5e931a38a40
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "My
+      name is Carla and I am 25 years old."}]}], "model": "gpt-4o-mini", "max_tokens":
+      1000, "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function":
+      {"name": "Output"}}, "tools": [{"type": "function", "function": {"name": "Output",
+      "description": "Correctly extracted `Output` with all the required parameters
+      with correct types", "parameters": {"properties": {"name": {"description": "name:",
+      "title": "Name", "type": "string"}, "age": {"description": "age:", "title":
+      "Age", "type": "string"}}, "required": ["age", "name"], "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '637'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=W8DNnoWXBapN39sZpvMK7iASgmsq_OnqmTKqTPq2CBI-1723578083808-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n    \"error\": {\n        \"message\": \"Incorrect API key provided:
+        fake_api_key. You can find your API key at https://platform.openai.com/account/api-keys.\",\n
+        \       \"type\": \"invalid_request_error\",\n        \"param\": null,\n        \"code\":
+        \"invalid_api_key\"\n    }\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e85885b2f0010cd-ORD
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '262'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 25 Nov 2024 23:48:58 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=gnCsyRGFtGleLdDkTxhpjTtYilNQ2QnK83QhNMjlUXc-1732578538-1.0.1.1-0nLNbKcFb6VJlx3KAZyc0Qmtnd8O3b4b3ZNZiYdnhRY2zbO93GO4Jc5QaTnmZybTfoQwXuC.lQeLZIChL536ew;
+        path=/; expires=Tue, 26-Nov-24 00:18:58 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5yrza0CipJMkqg1mo3_JtMjx7oALVzAVravbx3z3rVw-1732578538840-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Origin
+      x-request-id:
+      - req_ea1af632f81971bec43817a1fdccbf82
+    status:
+      code: 401
+      message: Unauthorized
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model":
+      "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=H3e75EZARspIKAJIbdmcPONXX2bd4dBJ_iVCJBZKCPk-1732578537967-0.0.1.1-604800000;
+        __cf_bm=eLsPnt6Pzes.k01.NQ1J2SgKRKHWAdtCiIuLlunhlDc-1732578537-1.0.1.1-4M9T24vvtlNrVZCB3bdC7q5cky0ytrfuAc6SSSWPX_bTfFo6Y2dRnkyN.fFwqAwbjETBDmiCE4pFQ_HP6XtwiA
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xSTY/TMBS851c8fOGSoPRLKb2sOMHCBQkhKhCKXPslMXX8LPuFpVpV4m/w9/gl
+        yGm36YpF4mLJM29GM8++zwCE0WIDQnWSVe9t8Wqrq9XWfGQOfbX/vLj93r3X796+/qDKbSPypKDd
+        N1T8oHqhqPcW2ZA70SqgZEyus2oxX1Xr1eLlSPSk0SZZ67lYUtEbZ4p5OV8WZVXM1md1R0ZhFBv4
+        kgEA3I9nyuk0/hAbKPMHpMcYZYticxkCEIFsQoSM0USWjkU+kYocoxuj3z7vQZNxLdyhtTlwJ90e
+        DjQ8gzd0B3JHA6frDXzqJP/++SsCuQQE6I3TwKTl4ebaPGAzRJkKusHaM368pLXU+kC7eOYveGOc
+        iV0dUEZyKVlk8mJkjxnA13Erw6OiwgfqPddMe3TJcLY82YnpLa7I9ZlkYmknfDHPn3CrNbI0Nl5t
+        VSipOtSTcnoCOWhDV0R21fnvME95n3ob1/6P/UQohZ5R1z6gNupx4WksYPqp/xq77HgMLOIhMvZ1
+        Y1yLwQdz+ieNr8uqXO2adaVKkR2zPwAAAP//AwApqZnyNQMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e85885c1af786f8-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 25 Nov 2024 23:48:59 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '472'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998994'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_dfa7c9dcf2072b06ff6bf2eace034012
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "Describe what you see in the
+      image."}, {"role": "user", "content": [{"type": "text", "text": "What''s in
+      this image? "}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg"}}]}],
+      "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0,
+      "tool_choice": {"type": "function", "function": {"name": "VisionOutput"}}, "tools":
+      [{"type": "function", "function": {"name": "VisionOutput", "description": "Correctly
+      extracted `VisionOutput` with all the required parameters with correct types",
+      "parameters": {"properties": {"description": {"description": "Description of
+      the image", "title": "Description", "type": "string"}}, "required": ["description"],
+      "type": "object"}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '911'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=mZO7PQIQWPp7HYEYjCqejvrcPsMCoqvpZ8K9wj9EcwQ-1732578538510-0.0.1.1-604800000;
+        __cf_bm=nUr9DAa1xx46rsdQ8jLylug4EQEIjBqOh_DrnLQ4iCs-1732578538-1.0.1.1-K1.z3.kxwK.y2bN.csBbWfD2WXgmPSqNwl3533sF5oKfc02lw0BHcl2bPvy3DNpFzGmut5iaAIX7XUGTSGX0Mw
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.47.1
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.47.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xUUW/bNhB+96848Fk2EjuxXb8V67AOyFYMS4NhTWGcyZN0NUUS5KmqG+S/F5Qc
+        y8kyoHoQiPt43/fdkbyHCYBiozagdI2im2Cnb/8xq9Xd7V/ffbq7qd52tzeH9T5E6X69Mr+pImf4
+        3RfS8pQ1074JloS9G2AdCYUy6+VqMb9era+vLnug8YZsTquCTK/8tGHH0/nF/Gp6sZpero/ZtWdN
+        SW3g0wQA4KH/Z5/O0De1gYviKdJQSliR2pw2AajobY4oTImToBNVjKD2Tshl66619gwQ7+1Wo7Wj
+        8PA9nK3HZqG12/Dv4pePH3Qpy4/0znXm9s+l48X37kxvoD6E3lDZOn1q0hl+im9eiAEoh02fe8eJ
+        vfvQSmjlRT6Awli1DTnJ3tXDvTKUdOSQSe/V5l7d1gTcYEVgKLCWBFITlNj4NkFAdsKugr3znQMc
+        wD+8Q7jhhAUcjxN2B7gh7zAaDwbhjp3mGfwuUBJKGykBQucbdNCx1ICQKJIjoG8hUsr+C0gsvRZW
+        yC4JIJjhkMCiM0ljoBn8XRPUmMB6VxVgMO6hRo6AzgAn6AhjzzFAJpM/SVquaplqb30kA60zFCuM
+        uTczeE8R6qwCGAl09CmRAXZQRu8EfAk1xaJXSblh2YDfZyXDkbTYA6D0zfnK1FE8ldnuxBKkhi3N
+        IPd6h3pfRd9mv07b1lCC6K3NXDVbm3oRhI6dybHIX7NypOx38JR8KQU0bW58X06a3atH9ezkHyev
+        rT+f3epIZZvQHq/7Mf54ej/WVyH6XXrxHFTJjlO9jYSpv5YqiQ+DdtbpFVT77OmpEH0TZCt+Ty4T
+        zq+v16uBUY0DYsTfLI+geEF7nrdcL4pXOLeGBLl/n6eRoFHXZMbkcTRga9ifAZOzyv/r5zXuoXp2
+        1c/Qj4DWFITMNkQyrJ/XPG6LlCfo/207dbo3rNIhCTXbkl1FMUTu55cqw3Zh6HK+Xl8s36jJ4+QH
+        AAAA//8DADoKaVzNBQAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e8588625b2ce814-ORD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 25 Nov 2024 23:49:03 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - heartex
+      openai-processing-ms:
+      - '3140'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-input-images:
+      - '50000'
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-input-images:
+      - '49999'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149998217'
+      x-ratelimit-reset-input-images:
+      - 1ms
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_1970fe17cb190fdb98d07b28d85fbe01
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_label_studio_skill.py b/tests/test_label_studio_skill.py
index ca3279a1..c04b1a9b 100644
--- a/tests/test_label_studio_skill.py
+++ b/tests/test_label_studio_skill.py
@@ -1,3 +1,4 @@
+import asyncio
 import pytest
 import os
 import pandas as pd
@@ -490,3 +491,51 @@ async def test_label_studio_skill_valid_predictions():
 
     assert len(failed_configs) == 0, f"Failed configs: {failed_configs}"
     assert len(errored_configs) == 0, f"Errored configs: {errored_configs}"
+
+
+@pytest.mark.vcr
+def test_label_studio_skill_image_input():
+    df = pd.DataFrame(
+        [
+            {
+                "title": "It's definitely not the Mona Lisa",
+                "image": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg",
+            }
+        ]
+    )
+
+    agent_payload = {
+        "runtimes": {
+            "default": {
+                "type": "AsyncLiteLLMChatRuntime",
+                "model": "gpt-4o-mini",
+            }
+        },
+        "skills": [
+            {
+                "type": "LabelStudioSkill",
+                "name": "SneakyMuseumLabel",
+                "input_template": """
+                    Given the title of a museum painting:\n{title}\n and the image of the painting:\n{image}\n,
+                    classify the painting as either "Mona Lisa" or "Not Mona Lisa".
+                    They may or may not agree with each other. If the title and image disagree, believe the image.
+                """,
+                "label_config": """
+                <View>
+                  <Header value="Painting Classification"/>
+                  <Text name="title" value="$title"/>
+                  <Image name="image" value="$image"/>
+                  <Choices name="classification" toName="image" required="true">
+                    <Choice value="Mona Lisa"/>
+                    <Choice value="Not Mona Lisa"/>
+                  </Choices>
+                </View>
+                """,
+            }
+        ],
+    }
+
+    agent = Agent(**agent_payload)
+    predictions = asyncio.run(agent.arun(df))
+
+    assert predictions.classification.tolist() == ["Mona Lisa"]
diff --git a/tests/test_llm.py b/tests/test_llm.py
index 10ad8c2b..ea3b9809 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -2,7 +2,12 @@
 import asyncio
 import pandas as pd
 from pydantic import BaseModel, Field
-from adala.runtimes import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime
+from adala.runtimes import (
+    LiteLLMChatRuntime,
+    AsyncLiteLLMChatRuntime,
+    AsyncLiteLLMVisionRuntime,
+)
+from adala.runtimes._litellm import split_message_into_chunks, MessageChunkType
 
 
 @pytest.mark.vcr
@@ -141,3 +146,173 @@ class Output(BaseModel):
     pd.testing.assert_frame_equal(result, expected_result)
 
     # TODO test batch with successes and failures, figure out how to inject a particular error into LiteLLM
+
+
+def test_split_message_into_chunks():
+    # Test basic text-only template
+    result = split_message_into_chunks(
+        "Hello {name}!", {"name": MessageChunkType.TEXT}, name="Alice"
+    )
+    assert result == [{"type": "text", "text": "Hello Alice!"}]
+
+    # Test template with image URL
+    result = split_message_into_chunks(
+        "Look at this {image}",
+        {"image": MessageChunkType.IMAGE_URL},
+        image="http://example.com/img.jpg",
+    )
+    assert result == [
+        {"type": "text", "text": "Look at this "},
+        {"type": "image_url", "image_url": {"url": "http://example.com/img.jpg"}},
+    ]
+
+    # Test mixed text and image template
+    result = split_message_into_chunks(
+        "User {name} shared {image} yesterday",
+        {"name": MessageChunkType.TEXT, "image": MessageChunkType.IMAGE_URL},
+        name="Bob",
+        image="http://example.com/photo.jpg",
+    )
+    assert result == [
+        {"type": "text", "text": "User Bob shared "},
+        {"type": "image_url", "image_url": {"url": "http://example.com/photo.jpg"}},
+        {"type": "text", "text": " yesterday"},
+    ]
+
+    # Test multiple occurrences of same field
+    result = split_message_into_chunks(
+        "{name} is here. Hi {name}!", {"name": MessageChunkType.TEXT}, name="Dave"
+    )
+    assert result == [{"type": "text", "text": "Dave is here. Hi Dave!"}]
+
+
+@pytest.mark.vcr
+def test_vision_runtime():
+
+    # test success
+
+    runtime = AsyncLiteLLMVisionRuntime()
+
+    batch = pd.DataFrame.from_records([{"input_name": "Carla", "input_age": 25}])
+
+    class Output(BaseModel):
+        name: str = Field(..., description="name:")
+        age: str = Field(..., description="age:")
+
+    result = asyncio.run(
+        runtime.batch_to_batch(
+            batch,
+            input_template="My name is {input_name} and I am {input_age} years old.",
+            instructions_template="",
+            response_model=Output,
+        )
+    )
+
+    # note age coerced to string
+    expected_result = pd.DataFrame.from_records(
+        [
+            {
+                "name": "Carla",
+                "age": "25",
+            }
+        ]
+    )
+    pd.testing.assert_frame_equal(result[["name", "age"]], expected_result)
+
+    # assert all other columns (costs) are nonzero
+    assert (
+        (
+            result[
+                [
+                    "_prompt_tokens",
+                    "_completion_tokens",
+                    "_prompt_cost_usd",
+                    "_completion_cost_usd",
+                    "_total_cost_usd",
+                ]
+            ]
+            > 0
+        )
+        .all()
+        .all()
+    )
+
+    # test failure
+
+    runtime.api_key = "fake_api_key"
+
+    result = asyncio.run(
+        runtime.batch_to_batch(
+            batch,
+            input_template="My name is {input_name} and I am {input_age} years old.",
+            instructions_template="",
+            response_model=Output,
+        )
+    )
+
+    expected_result = pd.DataFrame.from_records(
+        [
+            {
+                "_adala_error": True,
+                "_adala_message": "AuthenticationError",
+                "_adala_details": "litellm.AuthenticationError: AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: fake_api_key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}",
+            }
+        ]
+    )
+    pd.testing.assert_frame_equal(
+        result[["_adala_error", "_adala_message", "_adala_details"]], expected_result
+    )
+    # assert only prompt costs are nonzero
+    assert (
+        (result[["_prompt_tokens", "_prompt_cost_usd", "_total_cost_usd"]] > 0)
+        .all()
+        .all()
+    )
+    assert (result[["_completion_tokens", "_completion_cost_usd"]] == 0).all().all()
+
+    # test with image input
+
+    runtime = AsyncLiteLLMVisionRuntime(model="gpt-4o-mini")
+
+    batch = pd.DataFrame.from_records(
+        [
+            {
+                "text": "What's in this image?",
+                "image": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg",
+            }
+        ]
+    )
+
+    class VisionOutput(BaseModel):
+        description: str = Field(..., description="Description of the image")
+
+    result = asyncio.run(
+        runtime.batch_to_batch(
+            batch,
+            input_template="{text} {image}",
+            instructions_template="Describe what you see in the image.",
+            response_model=VisionOutput,
+            input_field_types={
+                "text": MessageChunkType.TEXT,
+                "image": MessageChunkType.IMAGE_URL,
+            },
+        )
+    )
+
+    assert "mona lisa" in result["description"].iloc[0].lower()
+    assert (
+        (
+            result[
+                [
+                    "_prompt_tokens",
+                    "_completion_tokens",
+                    "_prompt_cost_usd",
+                    "_completion_cost_usd",
+                    "_total_cost_usd",
+                ]
+            ]
+            > 0
+        )
+        .all()
+        .all()
+    )