Skip to content

Commit 2f80636

Browse files
committed
feat(responses)!: implement support for OpenAI compatible prompts in Responses API
1 parent b90c6a2 commit 2f80636

File tree

10 files changed

+851
-26
lines changed

10 files changed

+851
-26
lines changed

src/llama_stack/providers/inline/agents/meta_reference/__init__.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,17 @@ async def get_provider_impl(
2020
from .agents import MetaReferenceAgentsImpl
2121

2222
impl = MetaReferenceAgentsImpl(
23-
config,
24-
deps[Api.inference],
25-
deps[Api.vector_io],
26-
deps[Api.safety],
27-
deps[Api.tool_runtime],
28-
deps[Api.tool_groups],
29-
deps[Api.conversations],
30-
policy,
31-
telemetry_enabled,
23+
config=config,
24+
inference_api=deps[Api.inference],
25+
vector_io_api=deps[Api.vector_io],
26+
safety_api=deps[Api.safety],
27+
tool_runtime_api=deps[Api.tool_runtime],
28+
tool_groups_api=deps[Api.tool_groups],
29+
conversations_api=deps[Api.conversations],
30+
prompts_api=deps[Api.prompts],
31+
files_api=deps[Api.files],
32+
telemetry_enabled=telemetry_enabled,
33+
policy=policy,
3234
)
3335
await impl.initialize()
3436
return impl

src/llama_stack/providers/inline/agents/meta_reference/agents.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,15 @@
3232
from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
3333
from llama_stack.apis.common.responses import PaginatedResponse
3434
from llama_stack.apis.conversations import Conversations
35+
from llama_stack.apis.files import Files
3536
from llama_stack.apis.inference import (
3637
Inference,
3738
ToolConfig,
3839
ToolResponse,
3940
ToolResponseMessage,
4041
UserMessage,
4142
)
43+
from llama_stack.apis.prompts import Prompts
4244
from llama_stack.apis.safety import Safety
4345
from llama_stack.apis.tools import ToolGroups, ToolRuntime
4446
from llama_stack.apis.vector_io import VectorIO
@@ -66,6 +68,8 @@ def __init__(
6668
tool_runtime_api: ToolRuntime,
6769
tool_groups_api: ToolGroups,
6870
conversations_api: Conversations,
71+
prompts_api: Prompts,
72+
files_api: Files,
6973
policy: list[AccessRule],
7074
telemetry_enabled: bool = False,
7175
):
@@ -77,7 +81,8 @@ def __init__(
7781
self.tool_groups_api = tool_groups_api
7882
self.conversations_api = conversations_api
7983
self.telemetry_enabled = telemetry_enabled
80-
84+
self.prompts_api = prompts_api
85+
self.files_api = files_api
8186
self.in_memory_store = InmemoryKVStoreImpl()
8287
self.openai_responses_impl: OpenAIResponsesImpl | None = None
8388
self.policy = policy
@@ -94,6 +99,8 @@ async def initialize(self) -> None:
9499
vector_io_api=self.vector_io_api,
95100
safety_api=self.safety_api,
96101
conversations_api=self.conversations_api,
102+
prompts_api=self.prompts_api,
103+
files_api=self.files_api,
97104
)
98105

99106
async def create_agent(

src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py

Lines changed: 129 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# This source code is licensed under the terms described in the LICENSE file in
55
# the root directory of this source tree.
66

7+
import re
78
import time
89
import uuid
910
from collections.abc import AsyncIterator
@@ -17,6 +18,8 @@
1718
ListOpenAIResponseObject,
1819
OpenAIDeleteResponseObject,
1920
OpenAIResponseInput,
21+
OpenAIResponseInputMessageContentFile,
22+
OpenAIResponseInputMessageContentImage,
2023
OpenAIResponseInputMessageContentText,
2124
OpenAIResponseInputTool,
2225
OpenAIResponseMessage,
@@ -31,11 +34,16 @@
3134
)
3235
from llama_stack.apis.conversations import Conversations
3336
from llama_stack.apis.conversations.conversations import ConversationItem
37+
from llama_stack.apis.files import Files
3438
from llama_stack.apis.inference import (
3539
Inference,
40+
OpenAIChatCompletionContentPartParam,
41+
OpenAIChatCompletionContentPartTextParam,
3642
OpenAIMessageParam,
3743
OpenAISystemMessageParam,
44+
OpenAIUserMessageParam,
3845
)
46+
from llama_stack.apis.prompts import Prompts
3947
from llama_stack.apis.safety import Safety
4048
from llama_stack.apis.tools import ToolGroups, ToolRuntime
4149
from llama_stack.apis.vector_io import VectorIO
@@ -72,6 +80,8 @@ def __init__(
7280
vector_io_api: VectorIO, # VectorIO
7381
safety_api: Safety,
7482
conversations_api: Conversations,
83+
prompts_api: Prompts,
84+
files_api: Files,
7585
):
7686
self.inference_api = inference_api
7787
self.tool_groups_api = tool_groups_api
@@ -85,6 +95,8 @@ def __init__(
8595
tool_runtime_api=tool_runtime_api,
8696
vector_io_api=vector_io_api,
8797
)
98+
self.prompts_api = prompts_api
99+
self.files_api = files_api
88100

89101
async def _prepend_previous_response(
90102
self,
@@ -125,11 +137,13 @@ async def _process_input_with_previous_response(
125137
# Use stored messages directly and convert only new input
126138
message_adapter = TypeAdapter(list[OpenAIMessageParam])
127139
messages = message_adapter.validate_python(previous_response.messages)
128-
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
140+
new_messages = await convert_response_input_to_chat_messages(
141+
input, previous_messages=messages, files_api=self.files_api
142+
)
129143
messages.extend(new_messages)
130144
else:
131145
# Backward compatibility: reconstruct from inputs
132-
messages = await convert_response_input_to_chat_messages(all_input)
146+
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
133147

134148
tool_context.recover_tools_from_previous_response(previous_response)
135149
elif conversation is not None:
@@ -141,7 +155,7 @@ async def _process_input_with_previous_response(
141155
all_input = input
142156
if not conversation_items.data:
143157
# First turn - just convert the new input
144-
messages = await convert_response_input_to_chat_messages(input)
158+
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
145159
else:
146160
if not stored_messages:
147161
all_input = conversation_items.data
@@ -157,14 +171,118 @@ async def _process_input_with_previous_response(
157171
all_input = input
158172

159173
messages = stored_messages or []
160-
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
174+
new_messages = await convert_response_input_to_chat_messages(
175+
all_input, previous_messages=messages, files_api=self.files_api
176+
)
161177
messages.extend(new_messages)
162178
else:
163179
all_input = input
164-
messages = await convert_response_input_to_chat_messages(all_input)
180+
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
165181

166182
return all_input, messages, tool_context
167183

184+
async def _prepend_prompt(
185+
self,
186+
messages: list[OpenAIMessageParam],
187+
openai_response_prompt: OpenAIResponsePrompt | None,
188+
) -> None:
189+
"""Prepend prompt template to messages, resolving text/image/file variables.
190+
191+
For text-only prompts: Inserts as system message
192+
For prompts with media: Inserts text as system message + media into first user message
193+
"""
194+
if not openai_response_prompt or not openai_response_prompt.id:
195+
return
196+
197+
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
198+
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
199+
200+
if not cur_prompt or not cur_prompt.prompt:
201+
return
202+
203+
cur_prompt_text = cur_prompt.prompt
204+
cur_prompt_variables = cur_prompt.variables
205+
206+
if not openai_response_prompt.variables:
207+
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
208+
return
209+
210+
# Validate that all provided variables exist in the prompt
211+
for name in openai_response_prompt.variables.keys():
212+
if name not in cur_prompt_variables:
213+
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
214+
215+
# Separate text and media variables
216+
text_substitutions = {}
217+
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
218+
219+
for name, value in openai_response_prompt.variables.items():
220+
# Text variable found
221+
if isinstance(value, OpenAIResponseInputMessageContentText):
222+
text_substitutions[name] = value.text
223+
224+
# Media variable found
225+
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
226+
# use existing converter to achieve OpenAI Chat Completion format
227+
from .utils import convert_response_content_to_chat_content
228+
229+
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
230+
if isinstance(converted_parts, list):
231+
media_content_parts.extend(converted_parts)
232+
233+
# Eg: {{product_photo}} becomes "[Image: product_photo]"
234+
# This gives the model textual context about what media exists in the prompt
235+
var_type = value.type.replace("input_", "").replace("_", " ").title()
236+
text_substitutions[name] = f"[{var_type}: {name}]"
237+
238+
def replace_variable(match: re.Match[str]) -> str:
239+
var_name = match.group(1).strip()
240+
return str(text_substitutions.get(var_name, match.group(0)))
241+
242+
pattern = r"\{\{\s*(\w+)\s*\}\}"
243+
resolved_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
244+
245+
# Insert system message with resolved text
246+
messages.insert(0, OpenAISystemMessageParam(content=resolved_prompt_text))
247+
248+
# If we have media, prepend to first user message
249+
if media_content_parts:
250+
self._prepend_media_into_first_user_message(messages, media_content_parts)
251+
252+
def _prepend_media_into_first_user_message(
253+
self, messages: list[OpenAIMessageParam], media_parts: list[OpenAIChatCompletionContentPartParam]
254+
) -> None:
255+
"""Prepend media content parts into the first user message."""
256+
257+
# Find first user message (skip the system message we just added)
258+
first_user_msg_index = None
259+
for i, message in enumerate(messages):
260+
if isinstance(message, OpenAIUserMessageParam):
261+
first_user_msg_index = i
262+
break
263+
264+
if first_user_msg_index is not None:
265+
user_msg = messages[first_user_msg_index]
266+
assert isinstance(user_msg, OpenAIUserMessageParam)
267+
268+
# Convert string content to parts if needed, otherwise use existing parts directly
269+
existing_parts: list[OpenAIChatCompletionContentPartParam]
270+
if isinstance(user_msg.content, str):
271+
existing_parts = [OpenAIChatCompletionContentPartTextParam(text=user_msg.content)]
272+
else:
273+
existing_parts = user_msg.content
274+
275+
# Prepend media before user's content
276+
combined_parts = media_parts + existing_parts
277+
278+
messages[first_user_msg_index] = OpenAIUserMessageParam(
279+
content=combined_parts, name=user_msg.name if hasattr(user_msg, "name") else None
280+
)
281+
282+
else:
283+
# No user message exists - append one with just media
284+
messages.append(OpenAIUserMessageParam(content=media_parts))
285+
168286
async def get_openai_response(
169287
self,
170288
response_id: str,
@@ -274,6 +392,7 @@ async def create_openai_response(
274392
input=input,
275393
conversation=conversation,
276394
model=model,
395+
prompt=prompt,
277396
instructions=instructions,
278397
previous_response_id=previous_response_id,
279398
store=store,
@@ -325,6 +444,7 @@ async def _create_streaming_response(
325444
instructions: str | None = None,
326445
previous_response_id: str | None = None,
327446
conversation: str | None = None,
447+
prompt: OpenAIResponsePrompt | None = None,
328448
store: bool | None = True,
329449
temperature: float | None = None,
330450
text: OpenAIResponseText | None = None,
@@ -345,6 +465,9 @@ async def _create_streaming_response(
345465
if instructions:
346466
messages.insert(0, OpenAISystemMessageParam(content=instructions))
347467

468+
# Prepend reusable prompt (if provided)
469+
await self._prepend_prompt(messages, prompt)
470+
348471
# Structured outputs
349472
response_format = await convert_response_text_to_chat_response_format(text)
350473

@@ -367,6 +490,7 @@ async def _create_streaming_response(
367490
ctx=ctx,
368491
response_id=response_id,
369492
created_at=created_at,
493+
prompt=prompt,
370494
text=text,
371495
max_infer_iters=max_infer_iters,
372496
tool_executor=self.tool_executor,

0 commit comments

Comments
 (0)