Skip to content
25 changes: 23 additions & 2 deletions contributing/samples/live_bidi_streaming_multi_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ def roll_die(sides: int) -> int:
),
]
),
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name="Kore",
)
)
),
)


Expand Down Expand Up @@ -85,6 +92,13 @@ def check_prime(nums: list[int]) -> str:
),
]
),
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name="Puck",
)
)
),
)


Expand All @@ -100,8 +114,8 @@ def get_current_weather(location: str):

root_agent = Agent(
# find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/
model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
# model="gemini-live-2.5-flash-preview", # for AI studio key
# model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
model="gemini-live-2.5-flash-preview", # for AI studio key
name="root_agent",
instruction="""
You are a helpful assistant that can check time, roll dice and check if numbers are prime.
Expand All @@ -126,4 +140,11 @@ def get_current_weather(location: str):
),
]
),
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name="Zephyr",
)
)
),
)
16 changes: 16 additions & 0 deletions src/google/adk/agents/llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ class LlmAgent(BaseAgent):
settings, etc.
"""

speech_config: Optional[types.SpeechConfig] = None
"""The agent's speech configurations."""

# LLM-based agent transfer configs - Start
disallow_transfer_to_parent: bool = False
"""Disallows LLM-controlled transferring to the parent agent.
Expand Down Expand Up @@ -697,6 +700,7 @@ def __maybe_save_output_to_state(self, event: Event):
@model_validator(mode='after')
def __model_validator_after(self) -> LlmAgent:
self.__check_output_schema()
self.__check_speech_config()
return self

def __check_output_schema(self):
Expand All @@ -722,6 +726,16 @@ def __check_output_schema(self):
' sub_agents must be empty to disable agent transfer.'
)

def __check_speech_config(self):
if self.speech_config:
logger.warning(
'Agent %s has a speech_config set. This configuration is only'
' effective when using the agent in a live/streaming mode'
' (e.g., via run_live) and with a model that supports speech'
' input/output.',
self.name,
)

@field_validator('generate_content_config', mode='after')
@classmethod
def validate_generate_content_config(
Expand Down Expand Up @@ -851,6 +865,8 @@ def _parse_config(
)
if config.generate_content_config:
kwargs['generate_content_config'] = config.generate_content_config
if config.speech_config:
kwargs['speech_config'] = config.speech_config

return kwargs

Expand Down
4 changes: 4 additions & 0 deletions src/google/adk/agents/llm_agent_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,7 @@ class LlmAgentConfig(BaseAgentConfig):
generate_content_config: Optional[types.GenerateContentConfig] = Field(
default=None, description='Optional. LlmAgent.generate_content_config.'
)

speech_config: Optional[types.SpeechConfig] = Field(
default=None, description='Optional. LlmAgent.speech_config'
)
5 changes: 4 additions & 1 deletion src/google/adk/agents/run_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ class StreamingMode(Enum):


class RunConfig(BaseModel):
"""Configs for runtime behavior of agents."""
"""Configs for runtime behavior of agents.

The configs here will be overriden by agent-specific configurations.
"""

model_config = ConfigDict(
extra='forbid',
Expand Down
7 changes: 5 additions & 2 deletions src/google/adk/flows/llm_flows/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@ async def run_async(
llm_request.live_connect_config.response_modalities = (
invocation_context.run_config.response_modalities
)
llm_request.live_connect_config.speech_config = (
invocation_context.run_config.speech_config

speech_config_to_use = (
agent.speech_config or invocation_context.run_config.speech_config
)
llm_request.live_connect_config.speech_config = speech_config_to_use

llm_request.live_connect_config.output_audio_transcription = (
invocation_context.run_config.output_audio_transcription
)
Expand Down
26 changes: 26 additions & 0 deletions tests/unittests/agents/test_llm_agent_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,32 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str:
assert bypass_state_injection


def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture):
with caplog.at_level('WARNING'):

agent = LlmAgent(
name='test_agent',
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Kore',
)
)
),
)

assert agent.speech_config is not None
assert (
agent.speech_config.voice_config.prebuilt_voice_config.voice_name
== 'Kore'
)
assert (
'Agent test_agent has a speech_config set.' in caplog.text
and 'only effective when using the agent in a live/streaming mode'
in caplog.text
)


def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture):
with caplog.at_level('WARNING'):

Expand Down
126 changes: 124 additions & 2 deletions tests/unittests/flows/llm_flows/test_basic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from google.adk.models.llm_request import LlmRequest
from google.adk.sessions.in_memory_session_service import InMemorySessionService
from google.adk.tools.function_tool import FunctionTool
from google.genai import types
from pydantic import BaseModel
from pydantic import Field
import pytest
Expand All @@ -38,7 +39,9 @@ def dummy_tool(query: str) -> str:
return f'Result: {query}'


async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
async def _create_invocation_context(
agent: LlmAgent, run_config: RunConfig = RunConfig()
) -> InvocationContext:
"""Helper to create InvocationContext for testing."""
session_service = InMemorySessionService()
session = await session_service.create_session(
Expand All @@ -49,7 +52,7 @@ async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
agent=agent,
session=session,
session_service=session_service,
run_config=RunConfig(),
run_config=run_config,
)


Expand Down Expand Up @@ -143,3 +146,122 @@ async def test_sets_model_name(self):

# Should have set the model name
assert llm_request.model == 'gemini-1.5-flash'

@pytest.mark.asyncio
async def test_speech_config_agent_overrides_run_config(self):
"""Tests that agent's speech_config is prioritized over the RunConfig's."""
agent_speech_config = types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Kore',
)
)
)
run_speech_config = types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Puck',
)
)
)

agent = LlmAgent(
name='test_agent',
model='gemini-1.5-flash',
speech_config=agent_speech_config,
)
run_config = RunConfig(speech_config=run_speech_config)
invocation_context = await _create_invocation_context(agent, run_config)
llm_request = LlmRequest()
processor = _BasicLlmRequestProcessor()

# Process the request
async for _ in processor.run_async(invocation_context, llm_request):
pass

# Assert that the agent's override was used
assert llm_request.live_connect_config.speech_config == agent_speech_config
assert (
llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
== 'Kore'
)

@pytest.mark.asyncio
async def test_speech_config_uses_agent_as_fallback(self):
"""Tests that the agent's speech_config is used when RunConfig's is None."""
agent_speech_config = types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Kore',
)
)
)

agent = LlmAgent(
name='test_agent',
model='gemini-1.5-flash',
speech_config=agent_speech_config,
)
run_config = RunConfig(speech_config=None) # No runtime config
invocation_context = await _create_invocation_context(agent, run_config)
llm_request = LlmRequest()
processor = _BasicLlmRequestProcessor()

# Process the request
async for _ in processor.run_async(invocation_context, llm_request):
pass

# Assert that the agent's config was used as a fallback
assert llm_request.live_connect_config.speech_config == agent_speech_config
assert (
llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
== 'Kore'
)

@pytest.mark.asyncio
async def test_speech_config_uses_run_config_when_agent_is_none(self):
"""Tests that RunConfig's speech_config is used when the agent's is None."""
run_speech_config = types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Puck',
)
)
)

agent = LlmAgent(
name='test_agent', model='gemini-1.5-flash', speech_config=None
) # No agent config
run_config = RunConfig(speech_config=run_speech_config)
invocation_context = await _create_invocation_context(agent, run_config)
llm_request = LlmRequest()
processor = _BasicLlmRequestProcessor()

# Process the request
async for _ in processor.run_async(invocation_context, llm_request):
pass

# Assert that the runtime config was used
assert llm_request.live_connect_config.speech_config == run_speech_config
assert (
llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
== 'Puck'
)

@pytest.mark.asyncio
async def test_speech_config_is_none_when_both_are_none(self):
"""Tests that speech_config is None when neither agent nor RunConfig has it."""
agent = LlmAgent(
name='test_agent', model='gemini-1.5-flash', speech_config=None
)
run_config = RunConfig(speech_config=None) # No runtime config
invocation_context = await _create_invocation_context(agent, run_config)
llm_request = LlmRequest()
processor = _BasicLlmRequestProcessor()

# Process the request
async for _ in processor.run_async(invocation_context, llm_request):
pass

# Assert that the final config is None
assert llm_request.live_connect_config.speech_config is None