diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py index e4d7fcdac5..50439cd6d6 100644 --- a/livekit-agents/livekit/agents/voice/agent_activity.py +++ b/livekit-agents/livekit/agents/voice/agent_activity.py @@ -777,17 +777,25 @@ def _generate_reply( tool_choice: NotGivenOr[llm.ToolChoice] = NOT_GIVEN, allow_interruptions: NotGivenOr[bool] = NOT_GIVEN, schedule_speech: bool = True, + ignore_agent_instructions: bool = False, ) -> SpeechHandle: if ( isinstance(self.llm, llm.RealtimeModel) - and self.llm.capabilities.turn_detection - and allow_interruptions is False ): - logger.warning( - "the RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.generate_reply(), " # noqa: E501 - "disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead" # noqa: E501 - ) - allow_interruptions = NOT_GIVEN + if ( + self.llm.capabilities.turn_detection + and allow_interruptions is False + ): + logger.warning( + "the RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.generate_reply(), " # noqa: E501 + "disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead" # noqa: E501 + ) + allow_interruptions = NOT_GIVEN + + if ignore_agent_instructions: + logger.warning( + "generate_reply cannot ignore agent instructions in RealtimeModel" # noqa: E501 + ) if self.llm is None: raise RuntimeError("trying to generate reply without an LLM model") @@ -823,10 +831,7 @@ def _generate_reply( ) elif isinstance(self.llm, llm.LLM): - # instructions used inside generate_reply are "extra" instructions. - # this matches the behavior of the Realtime API: - # https://platform.openai.com/docs/api-reference/realtime-client-events/response/create - if instructions: + if instructions and not ignore_agent_instructions: instructions = "\n".join([self._agent.instructions, instructions]) task = self._create_speech_task( diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py index 03ed2bbc1f..bcc35df3e2 100644 --- a/livekit-agents/livekit/agents/voice/agent_session.py +++ b/livekit-agents/livekit/agents/voice/agent_session.py @@ -807,6 +807,7 @@ def generate_reply( instructions: NotGivenOr[str] = NOT_GIVEN, tool_choice: NotGivenOr[llm.ToolChoice] = NOT_GIVEN, allow_interruptions: NotGivenOr[bool] = NOT_GIVEN, + ignore_agent_instructions: bool = False, ) -> SpeechHandle: """Generate a reply for the agent to speak to the user. @@ -840,6 +841,7 @@ def generate_reply( instructions=instructions, tool_choice=tool_choice, allow_interruptions=allow_interruptions, + ignore_agent_instructions=ignore_agent_instructions, ) if run_state: run_state._watch_handle(handle) @@ -851,6 +853,7 @@ def generate_reply( instructions=instructions, tool_choice=tool_choice, allow_interruptions=allow_interruptions, + ignore_agent_instructions=ignore_agent_instructions, ) if run_state: run_state._watch_handle(handle)