diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
index e4d7fcdac5..50439cd6d6 100644
--- a/livekit-agents/livekit/agents/voice/agent_activity.py
+++ b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -777,17 +777,25 @@ def _generate_reply(
         tool_choice: NotGivenOr[llm.ToolChoice] = NOT_GIVEN,
         allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
         schedule_speech: bool = True,
+        ignore_agent_instructions: bool = False,
     ) -> SpeechHandle:
         if (
             isinstance(self.llm, llm.RealtimeModel)
-            and self.llm.capabilities.turn_detection
-            and allow_interruptions is False
         ):
-            logger.warning(
-                "the RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.generate_reply(), "  # noqa: E501
-                "disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"  # noqa: E501
-            )
-            allow_interruptions = NOT_GIVEN
+            if (
+                self.llm.capabilities.turn_detection
+                and allow_interruptions is False
+            ):
+                logger.warning(
+                    "the RealtimeModel uses a server-side turn detection, allow_interruptions cannot be False when using VoiceAgent.generate_reply(), "  # noqa: E501
+                    "disable turn_detection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"  # noqa: E501
+                )
+                allow_interruptions = NOT_GIVEN
+
+            if ignore_agent_instructions:
+                logger.warning(
+                    "generate_reply cannot ignore agent instructions in RealtimeModel"  # noqa: E501
+                )
 
         if self.llm is None:
             raise RuntimeError("trying to generate reply without an LLM model")
@@ -823,10 +831,7 @@ def _generate_reply(
             )
 
         elif isinstance(self.llm, llm.LLM):
-            # instructions used inside generate_reply are "extra" instructions.
-            # this matches the behavior of the Realtime API:
-            # https://platform.openai.com/docs/api-reference/realtime-client-events/response/create
-            if instructions:
+            if instructions and not ignore_agent_instructions:
                 instructions = "\n".join([self._agent.instructions, instructions])
 
             task = self._create_speech_task(
diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py
index 03ed2bbc1f..bcc35df3e2 100644
--- a/livekit-agents/livekit/agents/voice/agent_session.py
+++ b/livekit-agents/livekit/agents/voice/agent_session.py
@@ -807,6 +807,7 @@ def generate_reply(
         instructions: NotGivenOr[str] = NOT_GIVEN,
         tool_choice: NotGivenOr[llm.ToolChoice] = NOT_GIVEN,
         allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
+        ignore_agent_instructions: bool = False,
     ) -> SpeechHandle:
         """Generate a reply for the agent to speak to the user.
 
@@ -840,6 +841,7 @@ def generate_reply(
                 instructions=instructions,
                 tool_choice=tool_choice,
                 allow_interruptions=allow_interruptions,
+                ignore_agent_instructions=ignore_agent_instructions,
             )
             if run_state:
                 run_state._watch_handle(handle)
@@ -851,6 +853,7 @@ def generate_reply(
             instructions=instructions,
             tool_choice=tool_choice,
             allow_interruptions=allow_interruptions,
+            ignore_agent_instructions=ignore_agent_instructions,
         )
         if run_state:
             run_state._watch_handle(handle)