Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@ async def get_provider_impl(
from .agents import MetaReferenceAgentsImpl

impl = MetaReferenceAgentsImpl(
config,
deps[Api.inference],
deps[Api.vector_io],
deps[Api.safety],
deps[Api.tool_runtime],
deps[Api.tool_groups],
deps[Api.conversations],
policy,
telemetry_enabled,
config=config,
inference_api=deps[Api.inference],
vector_io_api=deps[Api.vector_io],
safety_api=deps[Api.safety],
tool_runtime_api=deps[Api.tool_runtime],
tool_groups_api=deps[Api.tool_groups],
conversations_api=deps[Api.conversations],
prompts_api=deps[Api.prompts],
files_api=deps[Api.files],
telemetry_enabled=telemetry_enabled,
policy=policy,
)
await impl.initialize()
return impl
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.files import Files
from llama_stack.apis.inference import (
Inference,
ToolConfig,
ToolResponse,
ToolResponseMessage,
UserMessage,
)
from llama_stack.apis.prompts import Prompts
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
Expand Down Expand Up @@ -66,6 +68,8 @@ def __init__(
tool_runtime_api: ToolRuntime,
tool_groups_api: ToolGroups,
conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule],
telemetry_enabled: bool = False,
):
Expand All @@ -77,7 +81,8 @@ def __init__(
self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api
self.telemetry_enabled = telemetry_enabled

self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl: OpenAIResponsesImpl | None = None
self.policy = policy
Expand All @@ -94,6 +99,8 @@ async def initialize(self) -> None:
vector_io_api=self.vector_io_api,
safety_api=self.safety_api,
conversations_api=self.conversations_api,
prompts_api=self.prompts_api,
files_api=self.files_api,
)

async def create_agent(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import re
import time
import uuid
from collections.abc import AsyncIterator
Expand All @@ -17,6 +18,8 @@
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseMessage,
Expand All @@ -31,11 +34,15 @@
)
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.conversations.conversations import ConversationItem
from llama_stack.apis.files import Files
from llama_stack.apis.inference import (
Inference,
OpenAIChatCompletionContentPartParam,
OpenAIMessageParam,
OpenAISystemMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.prompts import Prompts
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
Expand All @@ -49,6 +56,7 @@
from .tool_executor import ToolExecutor
from .types import ChatCompletionContext, ToolContext
from .utils import (
convert_response_content_to_chat_content,
convert_response_input_to_chat_messages,
convert_response_text_to_chat_response_format,
extract_guardrail_ids,
Expand All @@ -72,6 +80,8 @@ def __init__(
vector_io_api: VectorIO, # VectorIO
safety_api: Safety,
conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
):
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
Expand All @@ -85,6 +95,8 @@ def __init__(
tool_runtime_api=tool_runtime_api,
vector_io_api=vector_io_api,
)
self.prompts_api = prompts_api
self.files_api = files_api

async def _prepend_previous_response(
self,
Expand Down Expand Up @@ -125,11 +137,13 @@ async def _process_input_with_previous_response(
# Use stored messages directly and convert only new input
message_adapter = TypeAdapter(list[OpenAIMessageParam])
messages = message_adapter.validate_python(previous_response.messages)
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
new_messages = await convert_response_input_to_chat_messages(
input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages)
else:
# Backward compatibility: reconstruct from inputs
messages = await convert_response_input_to_chat_messages(all_input)
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)

tool_context.recover_tools_from_previous_response(previous_response)
elif conversation is not None:
Expand All @@ -141,7 +155,7 @@ async def _process_input_with_previous_response(
all_input = input
if not conversation_items.data:
# First turn - just convert the new input
messages = await convert_response_input_to_chat_messages(input)
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
else:
if not stored_messages:
all_input = conversation_items.data
Expand All @@ -157,14 +171,82 @@ async def _process_input_with_previous_response(
all_input = input

messages = stored_messages or []
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
new_messages = await convert_response_input_to_chat_messages(
all_input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages)
else:
all_input = input
messages = await convert_response_input_to_chat_messages(all_input)
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)

return all_input, messages, tool_context

async def _prepend_prompt(
self,
messages: list[OpenAIMessageParam],
openai_response_prompt: OpenAIResponsePrompt | None,
) -> None:
"""Prepend prompt template to messages, resolving text/image/file variables.

:param messages: List of OpenAIMessageParam objects
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
:returns: string of utf-8 characters
"""
if not openai_response_prompt or not openai_response_prompt.id:
return

prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)

if not cur_prompt or not cur_prompt.prompt:
return

cur_prompt_text = cur_prompt.prompt
cur_prompt_variables = cur_prompt.variables

if not openai_response_prompt.variables:
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
return

# Validate that all provided variables exist in the prompt
for name in openai_response_prompt.variables.keys():
if name not in cur_prompt_variables:
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")

# Separate text and media variables
text_substitutions = {}
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []

for name, value in openai_response_prompt.variables.items():
# Text variable found
if isinstance(value, OpenAIResponseInputMessageContentText):
text_substitutions[name] = value.text

# Media variable found
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
if isinstance(converted_parts, list):
media_content_parts.extend(converted_parts)

# Eg: {{product_photo}} becomes "[Image: product_photo]"
# This gives the model textual context about what media exists in the prompt
var_type = value.type.replace("input_", "").replace("_", " ").title()
text_substitutions[name] = f"[{var_type}: {name}]"

def replace_variable(match: re.Match[str]) -> str:
var_name = match.group(1).strip()
return str(text_substitutions.get(var_name, match.group(0)))

pattern = r"\{\{\s*(\w+)\s*\}\}"
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)

# Insert system message with resolved text
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))

# If we have media, create a new user message because allows to ingest images and files
if media_content_parts:
messages.append(OpenAIUserMessageParam(content=media_content_parts))

async def get_openai_response(
self,
response_id: str,
Expand Down Expand Up @@ -274,6 +356,7 @@ async def create_openai_response(
input=input,
conversation=conversation,
model=model,
prompt=prompt,
instructions=instructions,
previous_response_id=previous_response_id,
store=store,
Expand Down Expand Up @@ -325,6 +408,7 @@ async def _create_streaming_response(
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
prompt: OpenAIResponsePrompt | None = None,
store: bool | None = True,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
Expand All @@ -345,6 +429,9 @@ async def _create_streaming_response(
if instructions:
messages.insert(0, OpenAISystemMessageParam(content=instructions))

# Prepend reusable prompt (if provided)
await self._prepend_prompt(messages, prompt)

# Structured outputs
response_format = await convert_response_text_to_chat_response_format(text)

Expand All @@ -367,6 +454,7 @@ async def _create_streaming_response(
ctx=ctx,
response_id=response_id,
created_at=created_at,
prompt=prompt,
text=text,
max_infer_iters=max_infer_iters,
tool_executor=self.tool_executor,
Expand Down
Loading
Loading