diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py index c0cb800e90..a296a4c077 100644 --- a/examples/voice_agents/basic_agent.py +++ b/examples/voice_agents/basic_agent.py @@ -11,12 +11,11 @@ JobProcess, MetricsCollectedEvent, ModelSettings, - RoomInputOptions, - RoomOutputOptions, RunContext, WorkerOptions, cli, metrics, + room_io, ) from livekit.agents.llm import function_tool from livekit.plugins import deepgram, openai, silero @@ -123,11 +122,12 @@ async def log_usage(): await session.start( agent=MyAgent(), room=ctx.room, - room_input_options=RoomInputOptions( - # uncomment to enable Krisp BVC noise cancellation - # noise_cancellation=noise_cancellation.BVC(), + room_options=room_io.RoomOptions( + audio_input=room_io.AudioInputOptions( + # uncomment to enable the Krisp BVC noise cancellation + # noise_cancellation=noise_cancellation.BVC(), + ), ), - room_output_options=RoomOutputOptions(transcription_enabled=True), ) diff --git a/livekit-agents/livekit/agents/__init__.py b/livekit-agents/livekit/agents/__init__.py index 98cd192354..ed6cd23341 100644 --- a/livekit-agents/livekit/agents/__init__.py +++ b/livekit-agents/livekit/agents/__init__.py @@ -75,6 +75,7 @@ UserStateChangedEvent, avatar, io, + room_io, ) from .voice.background_audio import AudioConfig, BackgroundAudioPlayer, BuiltinAudioClip, PlayHandle from .voice.room_io import RoomInputOptions, RoomIO, RoomOutputOptions @@ -127,6 +128,7 @@ def __getattr__(name: str) -> typing.Any: "function_tool", "ChatContext", "ChatItem", + "room_io", "RoomIO", "RoomInputOptions", "RoomOutputOptions", diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py index 5bf73d096a..3c7b380f49 100644 --- a/livekit-agents/livekit/agents/voice/agent_session.py +++ b/livekit-agents/livekit/agents/voice/agent_session.py @@ -414,6 +414,8 @@ async def start( *, capture_run: Literal[True], room: NotGivenOr[rtc.Room] = NOT_GIVEN, + room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN, + # deprecated room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN, room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN, ) -> RunResult: ... @@ -425,6 +427,8 @@ async def start( *, capture_run: Literal[False] = False, room: NotGivenOr[rtc.Room] = NOT_GIVEN, + room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN, + # deprecated room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN, room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN, ) -> None: ... @@ -436,6 +440,8 @@ async def start( *, capture_run: bool = False, room: NotGivenOr[rtc.Room] = NOT_GIVEN, + room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN, + # deprecated room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN, room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN, ) -> RunResult | None: @@ -485,40 +491,35 @@ async def start( tasks.append(asyncio.create_task(chat_cli.start(), name="_chat_cli_start")) elif is_given(room) and not self._room_io: - room_input_options = copy.copy( - room_input_options or room_io.DEFAULT_ROOM_INPUT_OPTIONS - ) - room_output_options = copy.copy( - room_output_options or room_io.DEFAULT_ROOM_OUTPUT_OPTIONS + room_options = room_io.RoomOptions._ensure_options( + room_options, + room_input_options=room_input_options, + room_output_options=room_output_options, ) + room_options = copy.copy(room_options) # shadow copy is enough if self.input.audio is not None: - if room_input_options.audio_enabled: + if room_options.audio_input: logger.warning( "RoomIO audio input is enabled but input.audio is already set, ignoring.." # noqa: E501 ) - room_input_options.audio_enabled = False + room_options.audio_input = False if self.output.audio is not None: - if room_output_options.audio_enabled: + if room_options.audio_output: logger.warning( "RoomIO audio output is enabled but output.audio is already set, ignoring.." # noqa: E501 ) - room_output_options.audio_enabled = False + room_options.audio_output = False if self.output.transcription is not None: - if room_output_options.transcription_enabled: + if room_options.text_output: logger.warning( "RoomIO transcription output is enabled but output.transcription is already set, ignoring.." # noqa: E501 ) - room_output_options.transcription_enabled = False + room_options.text_output = False - self._room_io = room_io.RoomIO( - room=room, - agent_session=self, - input_options=room_input_options, - output_options=room_output_options, - ) + self._room_io = room_io.RoomIO(room=room, agent_session=self, options=room_options) tasks.append(asyncio.create_task(self._room_io.start(), name="_room_io_start")) # session can be restarted, register the callbacks only once diff --git a/livekit-agents/livekit/agents/voice/room_io/__init__.py b/livekit-agents/livekit/agents/voice/room_io/__init__.py index 036fac742a..8e380aec44 100644 --- a/livekit-agents/livekit/agents/voice/room_io/__init__.py +++ b/livekit-agents/livekit/agents/voice/room_io/__init__.py @@ -4,23 +4,31 @@ _ParticipantStreamTranscriptionOutput, _ParticipantTranscriptionOutput, ) -from .room_io import ( - DEFAULT_ROOM_INPUT_OPTIONS, - DEFAULT_ROOM_OUTPUT_OPTIONS, +from .room_io import RoomIO +from .types import ( + AudioInputOptions, + AudioOutputOptions, RoomInputOptions, - RoomIO, + RoomOptions, RoomOutputOptions, TextInputEvent, + TextInputOptions, + TextOutputOptions, + VideoInputOptions, ) __all__ = [ "RoomIO", - "DEFAULT_ROOM_INPUT_OPTIONS", - "DEFAULT_ROOM_OUTPUT_OPTIONS", + "RoomOptions", "RoomInputOptions", "RoomOutputOptions", "ATTRIBUTE_PUBLISH_ON_BEHALF", "TextInputEvent", + "TextInputOptions", + "AudioInputOptions", + "AudioOutputOptions", + "TextOutputOptions", + "VideoInputOptions", "_ParticipantTranscriptionOutput", "_ParticipantAudioOutput", "_ParticipantStreamTranscriptionOutput", diff --git a/livekit-agents/livekit/agents/voice/room_io/room_io.py b/livekit-agents/livekit/agents/voice/room_io/room_io.py index c60c356948..5a9980dccb 100644 --- a/livekit-agents/livekit/agents/voice/room_io/room_io.py +++ b/livekit-agents/livekit/agents/voice/room_io/room_io.py @@ -1,9 +1,7 @@ from __future__ import annotations import asyncio -from collections.abc import Coroutine -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, Callable, Optional +from typing import TYPE_CHECKING, Any from livekit import rtc @@ -27,86 +25,15 @@ from ._input import _ParticipantAudioInputStream, _ParticipantVideoInputStream from ._output import _ParticipantAudioOutput, _ParticipantTranscriptionOutput - -DEFAULT_PARTICIPANT_KINDS: list[rtc.ParticipantKind.ValueType] = [ - rtc.ParticipantKind.PARTICIPANT_KIND_SIP, - rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD, -] - -DEFAULT_CLOSE_ON_DISCONNECT_REASONS: list[rtc.DisconnectReason.ValueType] = [ - rtc.DisconnectReason.CLIENT_INITIATED, - rtc.DisconnectReason.ROOM_DELETED, - rtc.DisconnectReason.USER_REJECTED, -] - - -@dataclass -class TextInputEvent: - text: str - info: rtc.TextStreamInfo - participant: rtc.RemoteParticipant - - -TextInputCallback = Callable[ - ["AgentSession", TextInputEvent], Optional[Coroutine[None, None, None]] -] - - -def _default_text_input_cb(sess: AgentSession, ev: TextInputEvent) -> None: - sess.interrupt() - sess.generate_reply(user_input=ev.text) - - -@dataclass -class RoomInputOptions: - text_enabled: NotGivenOr[bool] = NOT_GIVEN - """If not given, default to True.""" - audio_enabled: NotGivenOr[bool] = NOT_GIVEN - """If not given, default to True.""" - video_enabled: NotGivenOr[bool] = NOT_GIVEN - """If not given, default to False.""" - audio_sample_rate: int = 24000 - audio_num_channels: int = 1 - noise_cancellation: rtc.NoiseCancellationOptions | None = None - text_input_cb: TextInputCallback = _default_text_input_cb - participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN - """Participant kinds accepted for auto subscription. If not provided, - accept `DEFAULT_PARTICIPANT_KINDS`.""" - participant_identity: NotGivenOr[str] = NOT_GIVEN - """The participant to link to. If not provided, link to the first participant. - Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`.""" - pre_connect_audio: bool = True - """Pre-connect audio enabled or not.""" - pre_connect_audio_timeout: float = 3.0 - """The pre-connect audio will be ignored if it doesn't arrive within this time.""" - close_on_disconnect: bool = True - """Close the AgentSession if the linked participant disconnects with reasons in - CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED.""" - - -@dataclass -class RoomOutputOptions: - transcription_enabled: NotGivenOr[bool] = NOT_GIVEN - """If not given, default to True.""" - audio_enabled: NotGivenOr[bool] = NOT_GIVEN - """If not given, default to True.""" - audio_sample_rate: int = 24000 - audio_num_channels: int = 1 - audio_publish_options: rtc.TrackPublishOptions = field( - default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE) - ) - audio_track_name: NotGivenOr[str] = NOT_GIVEN - """The name of the audio track to publish. If not provided, default to "roomio_audio".""" - sync_transcription: NotGivenOr[bool] = NOT_GIVEN - """False to disable transcription synchronization with audio output. - Otherwise, transcription is emitted as quickly as available.""" - transcription_speed_factor: float = 1.0 - """Speed factor of transcription synchronization with audio output. - Only effective if `sync_transcription` is True.""" - - -DEFAULT_ROOM_INPUT_OPTIONS = RoomInputOptions() -DEFAULT_ROOM_OUTPUT_OPTIONS = RoomOutputOptions() +from .types import ( + DEFAULT_CLOSE_ON_DISCONNECT_REASONS, + DEFAULT_PARTICIPANT_KINDS, + RoomInputOptions, + RoomOptions, + RoomOutputOptions, + TextInputCallback, + TextInputEvent, +) class RoomIO: @@ -116,19 +43,26 @@ def __init__( room: rtc.Room, *, participant: rtc.RemoteParticipant | str | None = None, - input_options: RoomInputOptions = DEFAULT_ROOM_INPUT_OPTIONS, - output_options: RoomOutputOptions = DEFAULT_ROOM_OUTPUT_OPTIONS, + options: NotGivenOr[RoomOptions] = NOT_GIVEN, + # deprecated + input_options: NotGivenOr[RoomInputOptions] = NOT_GIVEN, + output_options: NotGivenOr[RoomOutputOptions] = NOT_GIVEN, ) -> None: + self._options = RoomOptions._ensure_options( + options, room_input_options=input_options, room_output_options=output_options + ) + self._text_input_cb: TextInputCallback | None = None + self._agent_session, self._room = agent_session, room - self._input_options = input_options - self._output_options = output_options + # self._input_options = input_options + # self._output_options = output_options self._participant_identity = ( participant.identity if isinstance(participant, rtc.RemoteParticipant) else participant ) if self._participant_identity is None and utils.is_given( - input_options.participant_identity + self._options.participant_identity ): - self._participant_identity = input_options.participant_identity + self._participant_identity = self._options.participant_identity self._audio_input: _ParticipantAudioInputStream | None = None self._video_input: _ParticipantVideoInputStream | None = None @@ -152,54 +86,56 @@ def __init__( async def start(self) -> None: # -- create inputs -- - if self._input_options.pre_connect_audio: + input_audio_options = self._options.get_audio_input_options() + if input_audio_options and input_audio_options.pre_connect_audio: self._pre_connect_audio_handler = PreConnectAudioHandler( room=self._room, - timeout=self._input_options.pre_connect_audio_timeout, + timeout=input_audio_options.pre_connect_audio_timeout, ) self._pre_connect_audio_handler.register() - if self._input_options.text_enabled or not utils.is_given(self._input_options.text_enabled): + input_text_options = self._options.get_text_input_options() + if input_text_options: + self._text_input_cb = input_text_options.text_input_cb try: self._room.register_text_stream_handler(TOPIC_CHAT, self._on_user_text_input) self._text_stream_handler_registered = True except ValueError: - if self._input_options.text_enabled: + if utils.is_given(self._options.text_input): logger.warning( f"text stream handler for topic '{TOPIC_CHAT}' already set, ignoring" ) + else: + self._text_input_cb = None - if self._input_options.video_enabled: + input_video_options = self._options.get_video_input_options() + if input_video_options: self._video_input = _ParticipantVideoInputStream(self._room) - if self._input_options.audio_enabled or not utils.is_given( - self._input_options.audio_enabled - ): + if input_audio_options: self._audio_input = _ParticipantAudioInputStream( self._room, - sample_rate=self._input_options.audio_sample_rate, - num_channels=self._input_options.audio_num_channels, - noise_cancellation=self._input_options.noise_cancellation, + sample_rate=input_audio_options.sample_rate, + num_channels=input_audio_options.num_channels, + noise_cancellation=input_audio_options.noise_cancellation, pre_connect_audio_handler=self._pre_connect_audio_handler, ) # -- create outputs -- - if self._output_options.audio_enabled or not utils.is_given( - self._output_options.audio_enabled - ): + output_audio_options = self._options.get_audio_output_options() + if output_audio_options: self._audio_output = _ParticipantAudioOutput( self._room, - sample_rate=self._output_options.audio_sample_rate, - num_channels=self._output_options.audio_num_channels, - track_publish_options=self._output_options.audio_publish_options, - track_name=self._output_options.audio_track_name - if utils.is_given(self._output_options.audio_track_name) + sample_rate=output_audio_options.sample_rate, + num_channels=output_audio_options.num_channels, + track_publish_options=output_audio_options.track_publish_options, + track_name=output_audio_options.track_name + if utils.is_given(output_audio_options.track_name) else "roomio_audio", ) - if self._output_options.transcription_enabled or not utils.is_given( - self._output_options.transcription_enabled - ): + output_text_options = self._options.get_text_output_options() + if output_text_options: self._user_tr_output = _ParticipantTranscriptionOutput( room=self._room, is_delta_stream=False, participant=self._participant_identity ) @@ -212,17 +148,13 @@ async def start(self) -> None: # use the RoomIO's audio output if available, otherwise use the agent's audio output # (e.g the audio output isn't using RoomIO with our avatar datastream impl) - sync_transcription = True - if utils.is_given(self._output_options.sync_transcription): - sync_transcription = self._output_options.sync_transcription - - if sync_transcription and ( + if output_text_options.sync_transcription is not False and ( audio_output := self._audio_output or self._agent_session.output.audio ): self._tr_synchronizer = TranscriptSynchronizer( next_in_chain_audio=audio_output, next_in_chain_text=self._agent_tr_output, - speed=self._output_options.transcription_speed_factor, + speed=output_text_options.transcription_speed_factor, ) # -- set the room event handlers -- @@ -409,7 +341,7 @@ def _on_participant_connected(self, participant: rtc.RemoteParticipant) -> None: ): return - accepted_kinds = self._input_options.participant_kinds or DEFAULT_PARTICIPANT_KINDS + accepted_kinds = self._options.participant_kinds or DEFAULT_PARTICIPANT_KINDS if participant.kind not in accepted_kinds: # not an accepted participant kind, skip return @@ -423,7 +355,7 @@ def _on_participant_disconnected(self, participant: rtc.RemoteParticipant) -> No self._participant_available_fut = asyncio.Future[rtc.RemoteParticipant]() if ( - self._input_options.close_on_disconnect + self._options.close_on_disconnect and participant.disconnect_reason in DEFAULT_CLOSE_ON_DISCONNECT_REASONS and not self._close_session_atask ): @@ -452,17 +384,21 @@ def _on_user_text_input(self, reader: rtc.TextStreamReader, participant_identity logger.warning("participant not found, ignoring text input") return - async def _read_text() -> None: + async def _read_text(text_input_cb: TextInputCallback) -> None: text = await reader.read_all() - text_input_result = self._input_options.text_input_cb( + text_input_result = text_input_cb( self._agent_session, TextInputEvent(text=text, info=reader.info, participant=participant), ) if asyncio.iscoroutine(text_input_result): await text_input_result - task = asyncio.create_task(_read_text()) + if self._text_input_cb is None: + logger.error("text input callback is not set, ignoring text input") + return + + task = asyncio.create_task(_read_text(self._text_input_cb)) self._tasks.add(task) task.add_done_callback(self._tasks.discard) diff --git a/livekit-agents/livekit/agents/voice/room_io/types.py b/livekit-agents/livekit/agents/voice/room_io/types.py new file mode 100644 index 0000000000..1ca68a6a5b --- /dev/null +++ b/livekit-agents/livekit/agents/voice/room_io/types.py @@ -0,0 +1,258 @@ +from __future__ import annotations + +from collections.abc import Coroutine +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Callable, Optional + +from livekit import rtc + +from ...log import logger +from ...types import NOT_GIVEN, NotGivenOr +from ...utils import is_given + +if TYPE_CHECKING: + from ..agent_session import AgentSession + + +DEFAULT_PARTICIPANT_KINDS: list[rtc.ParticipantKind.ValueType] = [ + rtc.ParticipantKind.PARTICIPANT_KIND_SIP, + rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD, +] + +DEFAULT_CLOSE_ON_DISCONNECT_REASONS: list[rtc.DisconnectReason.ValueType] = [ + rtc.DisconnectReason.CLIENT_INITIATED, + rtc.DisconnectReason.ROOM_DELETED, + rtc.DisconnectReason.USER_REJECTED, +] + + +@dataclass +class TextInputEvent: + text: str + info: rtc.TextStreamInfo + participant: rtc.RemoteParticipant + + +TextInputCallback = Callable[ + ["AgentSession", TextInputEvent], Optional[Coroutine[None, None, None]] +] + + +def _default_text_input_cb(sess: AgentSession, ev: TextInputEvent) -> None: + sess.interrupt() + sess.generate_reply(user_input=ev.text) + + +@dataclass +class TextInputOptions: + text_input_cb: TextInputCallback = _default_text_input_cb + + +@dataclass +class AudioInputOptions: + sample_rate: int = 24000 + num_channels: int = 1 + noise_cancellation: rtc.NoiseCancellationOptions | None = None + pre_connect_audio: bool = True + """Pre-connect audio enabled or not.""" + pre_connect_audio_timeout: float = 3.0 + """The pre-connect audio will be ignored if it doesn't arrive within this time.""" + + +@dataclass +class VideoInputOptions: + pass + + +@dataclass +class AudioOutputOptions: + sample_rate: int = 24000 + num_channels: int = 1 + track_publish_options: rtc.TrackPublishOptions = field( + default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE) + ) + track_name: NotGivenOr[str] = NOT_GIVEN + """The name of the audio track to publish. If not provided, default to "roomio_audio".""" + + +@dataclass +class TextOutputOptions: + sync_transcription: NotGivenOr[bool] = NOT_GIVEN + """False to disable transcription synchronization with audio output. + Otherwise, transcription is emitted as quickly as available.""" + transcription_speed_factor: float = 1.0 + """Speed factor of transcription synchronization with audio output. + Only effective if `sync_transcription` is True.""" + + +@dataclass +class RoomOptions: + text_input: NotGivenOr[TextInputOptions | bool] = NOT_GIVEN + """The text input options. If not provided, default to True.""" + audio_input: NotGivenOr[AudioInputOptions | bool] = NOT_GIVEN + """The audio input options. If not provided, default to True.""" + video_input: NotGivenOr[VideoInputOptions | bool] = NOT_GIVEN + """The video input options. If not provided, default to False.""" + audio_output: NotGivenOr[AudioOutputOptions | bool] = NOT_GIVEN + """The audio output options. If not provided, default to True.""" + text_output: NotGivenOr[TextOutputOptions | bool] = NOT_GIVEN + """The transcription output options. If not provided, default to True.""" + + participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN + """Participant kinds accepted for auto subscription. If not provided, + accept `DEFAULT_PARTICIPANT_KINDS`.""" + participant_identity: NotGivenOr[str] = NOT_GIVEN + """The participant to link to. If not provided, link to the first participant. + Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`.""" + close_on_disconnect: bool = True + """Close the AgentSession if the linked participant disconnects with reasons in + CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED.""" + + def get_text_input_options(self) -> TextInputOptions | None: + if isinstance(self.text_input, TextInputOptions): + return self.text_input + return TextInputOptions() if self.text_input is not False else None + + def get_audio_input_options(self) -> AudioInputOptions | None: + if isinstance(self.audio_input, AudioInputOptions): + return self.audio_input + return AudioInputOptions() if self.audio_input is not False else None + + def get_video_input_options(self) -> VideoInputOptions | None: + if isinstance(self.video_input, VideoInputOptions): + return self.video_input + return VideoInputOptions() if self.video_input is True else None + + def get_audio_output_options(self) -> AudioOutputOptions | None: + if isinstance(self.audio_output, AudioOutputOptions): + return self.audio_output + return AudioOutputOptions() if self.audio_output is not False else None + + def get_text_output_options(self) -> TextOutputOptions | None: + if isinstance(self.text_output, TextOutputOptions): + return self.text_output + return TextOutputOptions() if self.text_output is not False else None + + @classmethod + def _ensure_options( + cls, + options: NotGivenOr[RoomOptions], + *, + room_input_options: NotGivenOr[RoomInputOptions] = NOT_GIVEN, + room_output_options: NotGivenOr[RoomOutputOptions] = NOT_GIVEN, + ) -> RoomOptions: + if is_given(room_input_options) or is_given(room_output_options): + logger.warning( + "RoomInputOptions and RoomOutputOptions are deprecated, use RoomOptions instead" + ) + if not is_given(options): + return cls._create_from_legacy(room_input_options, room_output_options) + + if isinstance(options, RoomOptions): + return options + else: + return cls() + + @classmethod + def _create_from_legacy( + cls, + input_options: NotGivenOr[RoomInputOptions], + output_options: NotGivenOr[RoomOutputOptions], + ) -> RoomOptions: + opts = cls() + if input_options: + opts.text_input = ( + TextInputOptions(text_input_cb=input_options.text_input_cb) + if input_options.text_enabled is not False + else False + ) + opts.audio_input = ( + AudioInputOptions( + sample_rate=input_options.audio_sample_rate, + num_channels=input_options.audio_num_channels, + noise_cancellation=input_options.noise_cancellation, + pre_connect_audio=input_options.pre_connect_audio, + pre_connect_audio_timeout=input_options.pre_connect_audio_timeout, + ) + if input_options.audio_enabled is not False + else False + ) + + opts.video_input = input_options.video_enabled + opts.close_on_disconnect = input_options.close_on_disconnect + + if output_options: + opts.audio_output = ( + AudioOutputOptions( + sample_rate=output_options.audio_sample_rate, + num_channels=output_options.audio_num_channels, + track_publish_options=output_options.audio_publish_options, + track_name=output_options.audio_track_name, + ) + if output_options.audio_enabled is not False + else False + ) + opts.text_output = ( + TextOutputOptions( + sync_transcription=output_options.sync_transcription, + transcription_speed_factor=output_options.transcription_speed_factor, + ) + if output_options.transcription_enabled is not False + else False + ) + return opts + + +# RoomInputOptions and RoomOutputOptions are deprecated + + +@dataclass +class RoomInputOptions: + text_enabled: NotGivenOr[bool] = NOT_GIVEN + """If not given, default to True.""" + audio_enabled: NotGivenOr[bool] = NOT_GIVEN + """If not given, default to True.""" + video_enabled: NotGivenOr[bool] = NOT_GIVEN + """If not given, default to False.""" + audio_sample_rate: int = 24000 + audio_num_channels: int = 1 + noise_cancellation: rtc.NoiseCancellationOptions | None = None + text_input_cb: TextInputCallback = _default_text_input_cb + participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN + """Participant kinds accepted for auto subscription. If not provided, + accept `DEFAULT_PARTICIPANT_KINDS`.""" + participant_identity: NotGivenOr[str] = NOT_GIVEN + """The participant to link to. If not provided, link to the first participant. + Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`.""" + pre_connect_audio: bool = True + """Pre-connect audio enabled or not.""" + pre_connect_audio_timeout: float = 3.0 + """The pre-connect audio will be ignored if it doesn't arrive within this time.""" + close_on_disconnect: bool = True + """Close the AgentSession if the linked participant disconnects with reasons in + CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED.""" + + +@dataclass +class RoomOutputOptions: + transcription_enabled: NotGivenOr[bool] = NOT_GIVEN + """If not given, default to True.""" + audio_enabled: NotGivenOr[bool] = NOT_GIVEN + """If not given, default to True.""" + audio_sample_rate: int = 24000 + audio_num_channels: int = 1 + audio_publish_options: rtc.TrackPublishOptions = field( + default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE) + ) + audio_track_name: NotGivenOr[str] = NOT_GIVEN + """The name of the audio track to publish. If not provided, default to "roomio_audio".""" + sync_transcription: NotGivenOr[bool] = NOT_GIVEN + """False to disable transcription synchronization with audio output. + Otherwise, transcription is emitted as quickly as available.""" + transcription_speed_factor: float = 1.0 + """Speed factor of transcription synchronization with audio output. + Only effective if `sync_transcription` is True.""" + + +# DEFAULT_ROOM_INPUT_OPTIONS = RoomInputOptions() +# DEFAULT_ROOM_OUTPUT_OPTIONS = RoomOutputOptions()