google · qyuo · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/contributing/samples/live_bidi_streaming_multi_agent/agent.py b/contributing/samples/live_bidi_streaming_multi_agent/agent.py
@@ -42,6 +42,13 @@ def roll_die(sides: int) -> int:
             ),
         ]
     ),
+    speech_config=types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name="Kore",
+            )
+        )
+    ),
 )
 
 
@@ -85,6 +92,13 @@ def check_prime(nums: list[int]) -> str:
             ),
         ]
     ),
+    speech_config=types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name="Puck",
+            )
+        )
+    ),
 )
 
 
@@ -100,8 +114,8 @@ def get_current_weather(location: str):
 
 root_agent = Agent(
     # find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/
-    model="gemini-2.0-flash-live-preview-04-09",  # for Vertex project
-    # model="gemini-live-2.5-flash-preview",  # for AI studio key
+    # model="gemini-2.0-flash-live-preview-04-09",  # for Vertex project
+    model="gemini-live-2.5-flash-preview",  # for AI studio key
     name="root_agent",
     instruction="""
       You are a helpful assistant that can check time, roll dice and check if numbers are prime.
@@ -126,4 +140,11 @@ def get_current_weather(location: str):
             ),
         ]
     ),
+    speech_config=types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name="Zephyr",
+            )
+        )
+    ),
 )
diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py
@@ -263,6 +263,9 @@ class LlmAgent(BaseAgent):
   settings, etc.
   """
 
+  speech_config: Optional[types.SpeechConfig] = None
+  """The agent's speech configurations."""
+
   # LLM-based agent transfer configs - Start
   disallow_transfer_to_parent: bool = False
   """Disallows LLM-controlled transferring to the parent agent.
@@ -697,6 +700,7 @@ def __maybe_save_output_to_state(self, event: Event):
   @model_validator(mode='after')
   def __model_validator_after(self) -> LlmAgent:
     self.__check_output_schema()
+    self.__check_speech_config()
     return self
 
   def __check_output_schema(self):
@@ -722,6 +726,16 @@ def __check_output_schema(self):
           ' sub_agents must be empty to disable agent transfer.'
       )
 
+  def __check_speech_config(self):
+    if self.speech_config:
+      logger.warning(
+          'Agent %s has a speech_config set. This configuration is only'
+          ' effective when using the agent in a live/streaming mode'
+          ' (e.g., via run_live) and with a model that supports speech'
+          ' input/output.',
+          self.name,
+      )
+
   @field_validator('generate_content_config', mode='after')
   @classmethod
   def validate_generate_content_config(
@@ -851,6 +865,8 @@ def _parse_config(
       )
     if config.generate_content_config:
       kwargs['generate_content_config'] = config.generate_content_config
+    if config.speech_config:
+      kwargs['speech_config'] = config.speech_config
 
     return kwargs
 

diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py
@@ -188,3 +188,7 @@ class LlmAgentConfig(BaseAgentConfig):
   generate_content_config: Optional[types.GenerateContentConfig] = Field(
       default=None, description='Optional. LlmAgent.generate_content_config.'
   )
+
+  speech_config: Optional[types.SpeechConfig] = Field(
+      default=None, description='Optional. LlmAgent.speech_config'
+  )
diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py
@@ -35,7 +35,10 @@ class StreamingMode(Enum):
 
 
 class RunConfig(BaseModel):
-  """Configs for runtime behavior of agents."""
+  """Configs for runtime behavior of agents.
+
+  The configs here will be overriden by agent-specific configurations.
+  """
 
   model_config = ConfigDict(
       extra='forbid',

diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
@@ -58,9 +58,12 @@ async def run_async(
     llm_request.live_connect_config.response_modalities = (
         invocation_context.run_config.response_modalities
     )
-    llm_request.live_connect_config.speech_config = (
-        invocation_context.run_config.speech_config
+
+    speech_config_to_use = (
+        agent.speech_config or invocation_context.run_config.speech_config
     )
+    llm_request.live_connect_config.speech_config = speech_config_to_use
+
     llm_request.live_connect_config.output_audio_transcription = (
         invocation_context.run_config.output_audio_transcription
     )

diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py
@@ -167,6 +167,32 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str:
   assert bypass_state_injection
 
 
+def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture):
+  with caplog.at_level('WARNING'):
+
+    agent = LlmAgent(
+        name='test_agent',
+        speech_config=types.SpeechConfig(
+            voice_config=types.VoiceConfig(
+                prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                    voice_name='Kore',
+                )
+            )
+        ),
+    )
+
+    assert agent.speech_config is not None
+    assert (
+        agent.speech_config.voice_config.prebuilt_voice_config.voice_name
+        == 'Kore'
+    )
+    assert (
+        'Agent test_agent has a speech_config set.' in caplog.text
+        and 'only effective when using the agent in a live/streaming mode'
+        in caplog.text
+    )
+
+
 def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture):
   with caplog.at_level('WARNING'):
 

diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py
@@ -21,6 +21,7 @@
 from google.adk.models.llm_request import LlmRequest
 from google.adk.sessions.in_memory_session_service import InMemorySessionService
 from google.adk.tools.function_tool import FunctionTool
+from google.genai import types
 from pydantic import BaseModel
 from pydantic import Field
 import pytest
@@ -38,7 +39,9 @@ def dummy_tool(query: str) -> str:
   return f'Result: {query}'
 
 
-async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
+async def _create_invocation_context(
+    agent: LlmAgent, run_config: RunConfig = RunConfig()
+) -> InvocationContext:
   """Helper to create InvocationContext for testing."""
   session_service = InMemorySessionService()
   session = await session_service.create_session(
@@ -49,7 +52,7 @@ async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
       agent=agent,
       session=session,
       session_service=session_service,
-      run_config=RunConfig(),
+      run_config=run_config,
   )
 
 
@@ -143,3 +146,122 @@ async def test_sets_model_name(self):
 
     # Should have set the model name
     assert llm_request.model == 'gemini-1.5-flash'
+
+  @pytest.mark.asyncio
+  async def test_speech_config_agent_overrides_run_config(self):
+    """Tests that agent's speech_config is prioritized over the RunConfig's."""
+    agent_speech_config = types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name='Kore',
+            )
+        )
+    )
+    run_speech_config = types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name='Puck',
+            )
+        )
+    )
+
+    agent = LlmAgent(
+        name='test_agent',
+        model='gemini-1.5-flash',
+        speech_config=agent_speech_config,
+    )
+    run_config = RunConfig(speech_config=run_speech_config)
+    invocation_context = await _create_invocation_context(agent, run_config)
+    llm_request = LlmRequest()
+    processor = _BasicLlmRequestProcessor()
+
+    # Process the request
+    async for _ in processor.run_async(invocation_context, llm_request):
+      pass
+
+    # Assert that the agent's override was used
+    assert llm_request.live_connect_config.speech_config == agent_speech_config
+    assert (
+        llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+        == 'Kore'
+    )
+
+  @pytest.mark.asyncio
+  async def test_speech_config_uses_agent_as_fallback(self):
+    """Tests that the agent's speech_config is used when RunConfig's is None."""
+    agent_speech_config = types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name='Kore',
+            )
+        )
+    )
+
+    agent = LlmAgent(
+        name='test_agent',
+        model='gemini-1.5-flash',
+        speech_config=agent_speech_config,
+    )
+    run_config = RunConfig(speech_config=None)  # No runtime config
+    invocation_context = await _create_invocation_context(agent, run_config)
+    llm_request = LlmRequest()
+    processor = _BasicLlmRequestProcessor()
+
+    # Process the request
+    async for _ in processor.run_async(invocation_context, llm_request):
+      pass
+
+    # Assert that the agent's config was used as a fallback
+    assert llm_request.live_connect_config.speech_config == agent_speech_config
+    assert (
+        llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+        == 'Kore'
+    )
+
+  @pytest.mark.asyncio
+  async def test_speech_config_uses_run_config_when_agent_is_none(self):
+    """Tests that RunConfig's speech_config is used when the agent's is None."""
+    run_speech_config = types.SpeechConfig(
+        voice_config=types.VoiceConfig(
+            prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                voice_name='Puck',
+            )
+        )
+    )
+
+    agent = LlmAgent(
+        name='test_agent', model='gemini-1.5-flash', speech_config=None
+    )  # No agent config
+    run_config = RunConfig(speech_config=run_speech_config)
+    invocation_context = await _create_invocation_context(agent, run_config)
+    llm_request = LlmRequest()
+    processor = _BasicLlmRequestProcessor()
+
+    # Process the request
+    async for _ in processor.run_async(invocation_context, llm_request):
+      pass
+
+    # Assert that the runtime config was used
+    assert llm_request.live_connect_config.speech_config == run_speech_config
+    assert (
+        llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+        == 'Puck'
+    )
+
+  @pytest.mark.asyncio
+  async def test_speech_config_is_none_when_both_are_none(self):
+    """Tests that speech_config is None when neither agent nor RunConfig has it."""
+    agent = LlmAgent(
+        name='test_agent', model='gemini-1.5-flash', speech_config=None
+    )
+    run_config = RunConfig(speech_config=None)  # No runtime config
+    invocation_context = await _create_invocation_context(agent, run_config)
+    llm_request = LlmRequest()
+    processor = _BasicLlmRequestProcessor()
+
+    # Process the request
+    async for _ in processor.run_async(invocation_context, llm_request):
+      pass
+
+    # Assert that the final config is None
+    assert llm_request.live_connect_config.speech_config is None