44# This source code is licensed under the terms described in the LICENSE file in
55# the root directory of this source tree.
66
7+ import re
78import time
89import uuid
910from collections .abc import AsyncIterator
1718 ListOpenAIResponseObject ,
1819 OpenAIDeleteResponseObject ,
1920 OpenAIResponseInput ,
21+ OpenAIResponseInputMessageContentFile ,
22+ OpenAIResponseInputMessageContentImage ,
2023 OpenAIResponseInputMessageContentText ,
2124 OpenAIResponseInputTool ,
2225 OpenAIResponseMessage ,
3134)
3235from llama_stack .apis .conversations import Conversations
3336from llama_stack .apis .conversations .conversations import ConversationItem
37+ from llama_stack .apis .files import Files
3438from llama_stack .apis .inference import (
3539 Inference ,
40+ OpenAIChatCompletionContentPartParam ,
41+ OpenAIChatCompletionContentPartTextParam ,
3642 OpenAIMessageParam ,
3743 OpenAISystemMessageParam ,
44+ OpenAIUserMessageParam ,
3845)
46+ from llama_stack .apis .prompts import Prompts
3947from llama_stack .apis .safety import Safety
4048from llama_stack .apis .tools import ToolGroups , ToolRuntime
4149from llama_stack .apis .vector_io import VectorIO
@@ -72,6 +80,8 @@ def __init__(
7280 vector_io_api : VectorIO , # VectorIO
7381 safety_api : Safety ,
7482 conversations_api : Conversations ,
83+ prompts_api : Prompts ,
84+ files_api : Files ,
7585 ):
7686 self .inference_api = inference_api
7787 self .tool_groups_api = tool_groups_api
@@ -85,6 +95,8 @@ def __init__(
8595 tool_runtime_api = tool_runtime_api ,
8696 vector_io_api = vector_io_api ,
8797 )
98+ self .prompts_api = prompts_api
99+ self .files_api = files_api
88100
89101 async def _prepend_previous_response (
90102 self ,
@@ -125,11 +137,13 @@ async def _process_input_with_previous_response(
125137 # Use stored messages directly and convert only new input
126138 message_adapter = TypeAdapter (list [OpenAIMessageParam ])
127139 messages = message_adapter .validate_python (previous_response .messages )
128- new_messages = await convert_response_input_to_chat_messages (input , previous_messages = messages )
140+ new_messages = await convert_response_input_to_chat_messages (
141+ input , previous_messages = messages , files_api = self .files_api
142+ )
129143 messages .extend (new_messages )
130144 else :
131145 # Backward compatibility: reconstruct from inputs
132- messages = await convert_response_input_to_chat_messages (all_input )
146+ messages = await convert_response_input_to_chat_messages (all_input , files_api = self . files_api )
133147
134148 tool_context .recover_tools_from_previous_response (previous_response )
135149 elif conversation is not None :
@@ -141,7 +155,7 @@ async def _process_input_with_previous_response(
141155 all_input = input
142156 if not conversation_items .data :
143157 # First turn - just convert the new input
144- messages = await convert_response_input_to_chat_messages (input )
158+ messages = await convert_response_input_to_chat_messages (input , files_api = self . files_api )
145159 else :
146160 if not stored_messages :
147161 all_input = conversation_items .data
@@ -157,14 +171,118 @@ async def _process_input_with_previous_response(
157171 all_input = input
158172
159173 messages = stored_messages or []
160- new_messages = await convert_response_input_to_chat_messages (all_input , previous_messages = messages )
174+ new_messages = await convert_response_input_to_chat_messages (
175+ all_input , previous_messages = messages , files_api = self .files_api
176+ )
161177 messages .extend (new_messages )
162178 else :
163179 all_input = input
164- messages = await convert_response_input_to_chat_messages (all_input )
180+ messages = await convert_response_input_to_chat_messages (all_input , files_api = self . files_api )
165181
166182 return all_input , messages , tool_context
167183
184+ async def _prepend_prompt (
185+ self ,
186+ messages : list [OpenAIMessageParam ],
187+ openai_response_prompt : OpenAIResponsePrompt | None ,
188+ ) -> None :
189+ """Prepend prompt template to messages, resolving text/image/file variables.
190+
191+ For text-only prompts: Inserts as system message
192+ For prompts with media: Inserts text as system message + media into first user message
193+ """
194+ if not openai_response_prompt or not openai_response_prompt .id :
195+ return
196+
197+ prompt_version = int (openai_response_prompt .version ) if openai_response_prompt .version else None
198+ cur_prompt = await self .prompts_api .get_prompt (openai_response_prompt .id , prompt_version )
199+
200+ if not cur_prompt or not cur_prompt .prompt :
201+ return
202+
203+ cur_prompt_text = cur_prompt .prompt
204+ cur_prompt_variables = cur_prompt .variables
205+
206+ if not openai_response_prompt .variables :
207+ messages .insert (0 , OpenAISystemMessageParam (content = cur_prompt_text ))
208+ return
209+
210+ # Validate that all provided variables exist in the prompt
211+ for name in openai_response_prompt .variables .keys ():
212+ if name not in cur_prompt_variables :
213+ raise ValueError (f"Variable { name } not found in prompt { openai_response_prompt .id } " )
214+
215+ # Separate text and media variables
216+ text_substitutions = {}
217+ media_content_parts : list [OpenAIChatCompletionContentPartParam ] = []
218+
219+ for name , value in openai_response_prompt .variables .items ():
220+ # Text variable found
221+ if isinstance (value , OpenAIResponseInputMessageContentText ):
222+ text_substitutions [name ] = value .text
223+
224+ # Media variable found
225+ elif isinstance (value , OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile ):
226+ # use existing converter to achieve OpenAI Chat Completion format
227+ from .utils import convert_response_content_to_chat_content
228+
229+ converted_parts = await convert_response_content_to_chat_content ([value ], files_api = self .files_api )
230+ if isinstance (converted_parts , list ):
231+ media_content_parts .extend (converted_parts )
232+
233+ # Eg: {{product_photo}} becomes "[Image: product_photo]"
234+ # This gives the model textual context about what media exists in the prompt
235+ var_type = value .type .replace ("input_" , "" ).replace ("_" , " " ).title ()
236+ text_substitutions [name ] = f"[{ var_type } : { name } ]"
237+
238+ def replace_variable (match : re .Match [str ]) -> str :
239+ var_name = match .group (1 ).strip ()
240+ return str (text_substitutions .get (var_name , match .group (0 )))
241+
242+ pattern = r"\{\{\s*(\w+)\s*\}\}"
243+ resolved_prompt_text = re .sub (pattern , replace_variable , cur_prompt_text )
244+
245+ # Insert system message with resolved text
246+ messages .insert (0 , OpenAISystemMessageParam (content = resolved_prompt_text ))
247+
248+ # If we have media, prepend to first user message
249+ if media_content_parts :
250+ self ._prepend_media_into_first_user_message (messages , media_content_parts )
251+
252+ def _prepend_media_into_first_user_message (
253+ self , messages : list [OpenAIMessageParam ], media_parts : list [OpenAIChatCompletionContentPartParam ]
254+ ) -> None :
255+ """Prepend media content parts into the first user message."""
256+
257+ # Find first user message (skip the system message we just added)
258+ first_user_msg_index = None
259+ for i , message in enumerate (messages ):
260+ if isinstance (message , OpenAIUserMessageParam ):
261+ first_user_msg_index = i
262+ break
263+
264+ if first_user_msg_index is not None :
265+ user_msg = messages [first_user_msg_index ]
266+ assert isinstance (user_msg , OpenAIUserMessageParam )
267+
268+ # Convert string content to parts if needed, otherwise use existing parts directly
269+ existing_parts : list [OpenAIChatCompletionContentPartParam ]
270+ if isinstance (user_msg .content , str ):
271+ existing_parts = [OpenAIChatCompletionContentPartTextParam (text = user_msg .content )]
272+ else :
273+ existing_parts = user_msg .content
274+
275+ # Prepend media before user's content
276+ combined_parts = media_parts + existing_parts
277+
278+ messages [first_user_msg_index ] = OpenAIUserMessageParam (
279+ content = combined_parts , name = user_msg .name if hasattr (user_msg , "name" ) else None
280+ )
281+
282+ else :
283+ # No user message exists - append one with just media
284+ messages .append (OpenAIUserMessageParam (content = media_parts ))
285+
168286 async def get_openai_response (
169287 self ,
170288 response_id : str ,
@@ -274,6 +392,7 @@ async def create_openai_response(
274392 input = input ,
275393 conversation = conversation ,
276394 model = model ,
395+ prompt = prompt ,
277396 instructions = instructions ,
278397 previous_response_id = previous_response_id ,
279398 store = store ,
@@ -325,6 +444,7 @@ async def _create_streaming_response(
325444 instructions : str | None = None ,
326445 previous_response_id : str | None = None ,
327446 conversation : str | None = None ,
447+ prompt : OpenAIResponsePrompt | None = None ,
328448 store : bool | None = True ,
329449 temperature : float | None = None ,
330450 text : OpenAIResponseText | None = None ,
@@ -345,6 +465,9 @@ async def _create_streaming_response(
345465 if instructions :
346466 messages .insert (0 , OpenAISystemMessageParam (content = instructions ))
347467
468+ # Prepend reusable prompt (if provided)
469+ await self ._prepend_prompt (messages , prompt )
470+
348471 # Structured outputs
349472 response_format = await convert_response_text_to_chat_response_format (text )
350473
@@ -367,6 +490,7 @@ async def _create_streaming_response(
367490 ctx = ctx ,
368491 response_id = response_id ,
369492 created_at = created_at ,
493+ prompt = prompt ,
370494 text = text ,
371495 max_infer_iters = max_infer_iters ,
372496 tool_executor = self .tool_executor ,
0 commit comments