diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index c0cb800e90..a296a4c077 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -11,12 +11,11 @@
     JobProcess,
     MetricsCollectedEvent,
     ModelSettings,
-    RoomInputOptions,
-    RoomOutputOptions,
     RunContext,
     WorkerOptions,
     cli,
     metrics,
+    room_io,
 )
 from livekit.agents.llm import function_tool
 from livekit.plugins import deepgram, openai, silero
@@ -123,11 +122,12 @@ async def log_usage():
     await session.start(
         agent=MyAgent(),
         room=ctx.room,
-        room_input_options=RoomInputOptions(
-            # uncomment to enable Krisp BVC noise cancellation
-            # noise_cancellation=noise_cancellation.BVC(),
+        room_options=room_io.RoomOptions(
+            audio_input=room_io.AudioInputOptions(
+                # uncomment to enable the Krisp BVC noise cancellation
+                # noise_cancellation=noise_cancellation.BVC(),
+            ),
         ),
-        room_output_options=RoomOutputOptions(transcription_enabled=True),
     )
 
 
diff --git a/livekit-agents/livekit/agents/__init__.py b/livekit-agents/livekit/agents/__init__.py
index 98cd192354..ed6cd23341 100644
--- a/livekit-agents/livekit/agents/__init__.py
+++ b/livekit-agents/livekit/agents/__init__.py
@@ -75,6 +75,7 @@
     UserStateChangedEvent,
     avatar,
     io,
+    room_io,
 )
 from .voice.background_audio import AudioConfig, BackgroundAudioPlayer, BuiltinAudioClip, PlayHandle
 from .voice.room_io import RoomInputOptions, RoomIO, RoomOutputOptions
@@ -127,6 +128,7 @@ def __getattr__(name: str) -> typing.Any:
     "function_tool",
     "ChatContext",
     "ChatItem",
+    "room_io",
     "RoomIO",
     "RoomInputOptions",
     "RoomOutputOptions",
diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py
index 5bf73d096a..3c7b380f49 100644
--- a/livekit-agents/livekit/agents/voice/agent_session.py
+++ b/livekit-agents/livekit/agents/voice/agent_session.py
@@ -414,6 +414,8 @@ async def start(
         *,
         capture_run: Literal[True],
         room: NotGivenOr[rtc.Room] = NOT_GIVEN,
+        room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN,
+        # deprecated
         room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN,
         room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN,
     ) -> RunResult: ...
@@ -425,6 +427,8 @@ async def start(
         *,
         capture_run: Literal[False] = False,
         room: NotGivenOr[rtc.Room] = NOT_GIVEN,
+        room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN,
+        # deprecated
         room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN,
         room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN,
     ) -> None: ...
@@ -436,6 +440,8 @@ async def start(
         *,
         capture_run: bool = False,
         room: NotGivenOr[rtc.Room] = NOT_GIVEN,
+        room_options: NotGivenOr[room_io.RoomOptions] = NOT_GIVEN,
+        # deprecated
         room_input_options: NotGivenOr[room_io.RoomInputOptions] = NOT_GIVEN,
         room_output_options: NotGivenOr[room_io.RoomOutputOptions] = NOT_GIVEN,
     ) -> RunResult | None:
@@ -485,40 +491,35 @@ async def start(
                 tasks.append(asyncio.create_task(chat_cli.start(), name="_chat_cli_start"))
 
             elif is_given(room) and not self._room_io:
-                room_input_options = copy.copy(
-                    room_input_options or room_io.DEFAULT_ROOM_INPUT_OPTIONS
-                )
-                room_output_options = copy.copy(
-                    room_output_options or room_io.DEFAULT_ROOM_OUTPUT_OPTIONS
+                room_options = room_io.RoomOptions._ensure_options(
+                    room_options,
+                    room_input_options=room_input_options,
+                    room_output_options=room_output_options,
                 )
+                room_options = copy.copy(room_options)  # shadow copy is enough
 
                 if self.input.audio is not None:
-                    if room_input_options.audio_enabled:
+                    if room_options.audio_input:
                         logger.warning(
                             "RoomIO audio input is enabled but input.audio is already set, ignoring.."  # noqa: E501
                         )
-                    room_input_options.audio_enabled = False
+                    room_options.audio_input = False
 
                 if self.output.audio is not None:
-                    if room_output_options.audio_enabled:
+                    if room_options.audio_output:
                         logger.warning(
                             "RoomIO audio output is enabled but output.audio is already set, ignoring.."  # noqa: E501
                         )
-                    room_output_options.audio_enabled = False
+                    room_options.audio_output = False
 
                 if self.output.transcription is not None:
-                    if room_output_options.transcription_enabled:
+                    if room_options.text_output:
                         logger.warning(
                             "RoomIO transcription output is enabled but output.transcription is already set, ignoring.."  # noqa: E501
                         )
-                    room_output_options.transcription_enabled = False
+                    room_options.text_output = False
 
-                self._room_io = room_io.RoomIO(
-                    room=room,
-                    agent_session=self,
-                    input_options=room_input_options,
-                    output_options=room_output_options,
-                )
+                self._room_io = room_io.RoomIO(room=room, agent_session=self, options=room_options)
                 tasks.append(asyncio.create_task(self._room_io.start(), name="_room_io_start"))
 
             # session can be restarted, register the callbacks only once
diff --git a/livekit-agents/livekit/agents/voice/room_io/__init__.py b/livekit-agents/livekit/agents/voice/room_io/__init__.py
index 036fac742a..8e380aec44 100644
--- a/livekit-agents/livekit/agents/voice/room_io/__init__.py
+++ b/livekit-agents/livekit/agents/voice/room_io/__init__.py
@@ -4,23 +4,31 @@
     _ParticipantStreamTranscriptionOutput,
     _ParticipantTranscriptionOutput,
 )
-from .room_io import (
-    DEFAULT_ROOM_INPUT_OPTIONS,
-    DEFAULT_ROOM_OUTPUT_OPTIONS,
+from .room_io import RoomIO
+from .types import (
+    AudioInputOptions,
+    AudioOutputOptions,
     RoomInputOptions,
-    RoomIO,
+    RoomOptions,
     RoomOutputOptions,
     TextInputEvent,
+    TextInputOptions,
+    TextOutputOptions,
+    VideoInputOptions,
 )
 
 __all__ = [
     "RoomIO",
-    "DEFAULT_ROOM_INPUT_OPTIONS",
-    "DEFAULT_ROOM_OUTPUT_OPTIONS",
+    "RoomOptions",
     "RoomInputOptions",
     "RoomOutputOptions",
     "ATTRIBUTE_PUBLISH_ON_BEHALF",
     "TextInputEvent",
+    "TextInputOptions",
+    "AudioInputOptions",
+    "AudioOutputOptions",
+    "TextOutputOptions",
+    "VideoInputOptions",
     "_ParticipantTranscriptionOutput",
     "_ParticipantAudioOutput",
     "_ParticipantStreamTranscriptionOutput",
diff --git a/livekit-agents/livekit/agents/voice/room_io/room_io.py b/livekit-agents/livekit/agents/voice/room_io/room_io.py
index c60c356948..5a9980dccb 100644
--- a/livekit-agents/livekit/agents/voice/room_io/room_io.py
+++ b/livekit-agents/livekit/agents/voice/room_io/room_io.py
@@ -1,9 +1,7 @@
 from __future__ import annotations
 
 import asyncio
-from collections.abc import Coroutine
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any
 
 from livekit import rtc
 
@@ -27,86 +25,15 @@
 
 from ._input import _ParticipantAudioInputStream, _ParticipantVideoInputStream
 from ._output import _ParticipantAudioOutput, _ParticipantTranscriptionOutput
-
-DEFAULT_PARTICIPANT_KINDS: list[rtc.ParticipantKind.ValueType] = [
-    rtc.ParticipantKind.PARTICIPANT_KIND_SIP,
-    rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD,
-]
-
-DEFAULT_CLOSE_ON_DISCONNECT_REASONS: list[rtc.DisconnectReason.ValueType] = [
-    rtc.DisconnectReason.CLIENT_INITIATED,
-    rtc.DisconnectReason.ROOM_DELETED,
-    rtc.DisconnectReason.USER_REJECTED,
-]
-
-
-@dataclass
-class TextInputEvent:
-    text: str
-    info: rtc.TextStreamInfo
-    participant: rtc.RemoteParticipant
-
-
-TextInputCallback = Callable[
-    ["AgentSession", TextInputEvent], Optional[Coroutine[None, None, None]]
-]
-
-
-def _default_text_input_cb(sess: AgentSession, ev: TextInputEvent) -> None:
-    sess.interrupt()
-    sess.generate_reply(user_input=ev.text)
-
-
-@dataclass
-class RoomInputOptions:
-    text_enabled: NotGivenOr[bool] = NOT_GIVEN
-    """If not given, default to True."""
-    audio_enabled: NotGivenOr[bool] = NOT_GIVEN
-    """If not given, default to True."""
-    video_enabled: NotGivenOr[bool] = NOT_GIVEN
-    """If not given, default to False."""
-    audio_sample_rate: int = 24000
-    audio_num_channels: int = 1
-    noise_cancellation: rtc.NoiseCancellationOptions | None = None
-    text_input_cb: TextInputCallback = _default_text_input_cb
-    participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN
-    """Participant kinds accepted for auto subscription. If not provided,
-    accept `DEFAULT_PARTICIPANT_KINDS`."""
-    participant_identity: NotGivenOr[str] = NOT_GIVEN
-    """The participant to link to. If not provided, link to the first participant.
-    Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`."""
-    pre_connect_audio: bool = True
-    """Pre-connect audio enabled or not."""
-    pre_connect_audio_timeout: float = 3.0
-    """The pre-connect audio will be ignored if it doesn't arrive within this time."""
-    close_on_disconnect: bool = True
-    """Close the AgentSession if the linked participant disconnects with reasons in
-    CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED."""
-
-
-@dataclass
-class RoomOutputOptions:
-    transcription_enabled: NotGivenOr[bool] = NOT_GIVEN
-    """If not given, default to True."""
-    audio_enabled: NotGivenOr[bool] = NOT_GIVEN
-    """If not given, default to True."""
-    audio_sample_rate: int = 24000
-    audio_num_channels: int = 1
-    audio_publish_options: rtc.TrackPublishOptions = field(
-        default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
-    )
-    audio_track_name: NotGivenOr[str] = NOT_GIVEN
-    """The name of the audio track to publish. If not provided, default to "roomio_audio"."""
-    sync_transcription: NotGivenOr[bool] = NOT_GIVEN
-    """False to disable transcription synchronization with audio output.
-    Otherwise, transcription is emitted as quickly as available."""
-    transcription_speed_factor: float = 1.0
-    """Speed factor of transcription synchronization with audio output.
-    Only effective if `sync_transcription` is True."""
-
-
-DEFAULT_ROOM_INPUT_OPTIONS = RoomInputOptions()
-DEFAULT_ROOM_OUTPUT_OPTIONS = RoomOutputOptions()
+from .types import (
+    DEFAULT_CLOSE_ON_DISCONNECT_REASONS,
+    DEFAULT_PARTICIPANT_KINDS,
+    RoomInputOptions,
+    RoomOptions,
+    RoomOutputOptions,
+    TextInputCallback,
+    TextInputEvent,
+)
 
 
 class RoomIO:
@@ -116,19 +43,26 @@ def __init__(
         room: rtc.Room,
         *,
         participant: rtc.RemoteParticipant | str | None = None,
-        input_options: RoomInputOptions = DEFAULT_ROOM_INPUT_OPTIONS,
-        output_options: RoomOutputOptions = DEFAULT_ROOM_OUTPUT_OPTIONS,
+        options: NotGivenOr[RoomOptions] = NOT_GIVEN,
+        # deprecated
+        input_options: NotGivenOr[RoomInputOptions] = NOT_GIVEN,
+        output_options: NotGivenOr[RoomOutputOptions] = NOT_GIVEN,
     ) -> None:
+        self._options = RoomOptions._ensure_options(
+            options, room_input_options=input_options, room_output_options=output_options
+        )
+        self._text_input_cb: TextInputCallback | None = None
+
         self._agent_session, self._room = agent_session, room
-        self._input_options = input_options
-        self._output_options = output_options
+        # self._input_options = input_options
+        # self._output_options = output_options
         self._participant_identity = (
             participant.identity if isinstance(participant, rtc.RemoteParticipant) else participant
         )
         if self._participant_identity is None and utils.is_given(
-            input_options.participant_identity
+            self._options.participant_identity
         ):
-            self._participant_identity = input_options.participant_identity
+            self._participant_identity = self._options.participant_identity
 
         self._audio_input: _ParticipantAudioInputStream | None = None
         self._video_input: _ParticipantVideoInputStream | None = None
@@ -152,54 +86,56 @@ def __init__(
 
     async def start(self) -> None:
         # -- create inputs --
-        if self._input_options.pre_connect_audio:
+        input_audio_options = self._options.get_audio_input_options()
+        if input_audio_options and input_audio_options.pre_connect_audio:
             self._pre_connect_audio_handler = PreConnectAudioHandler(
                 room=self._room,
-                timeout=self._input_options.pre_connect_audio_timeout,
+                timeout=input_audio_options.pre_connect_audio_timeout,
             )
             self._pre_connect_audio_handler.register()
 
-        if self._input_options.text_enabled or not utils.is_given(self._input_options.text_enabled):
+        input_text_options = self._options.get_text_input_options()
+        if input_text_options:
+            self._text_input_cb = input_text_options.text_input_cb
             try:
                 self._room.register_text_stream_handler(TOPIC_CHAT, self._on_user_text_input)
                 self._text_stream_handler_registered = True
             except ValueError:
-                if self._input_options.text_enabled:
+                if utils.is_given(self._options.text_input):
                     logger.warning(
                         f"text stream handler for topic '{TOPIC_CHAT}' already set, ignoring"
                     )
+        else:
+            self._text_input_cb = None
 
-        if self._input_options.video_enabled:
+        input_video_options = self._options.get_video_input_options()
+        if input_video_options:
             self._video_input = _ParticipantVideoInputStream(self._room)
 
-        if self._input_options.audio_enabled or not utils.is_given(
-            self._input_options.audio_enabled
-        ):
+        if input_audio_options:
             self._audio_input = _ParticipantAudioInputStream(
                 self._room,
-                sample_rate=self._input_options.audio_sample_rate,
-                num_channels=self._input_options.audio_num_channels,
-                noise_cancellation=self._input_options.noise_cancellation,
+                sample_rate=input_audio_options.sample_rate,
+                num_channels=input_audio_options.num_channels,
+                noise_cancellation=input_audio_options.noise_cancellation,
                 pre_connect_audio_handler=self._pre_connect_audio_handler,
             )
 
         # -- create outputs --
-        if self._output_options.audio_enabled or not utils.is_given(
-            self._output_options.audio_enabled
-        ):
+        output_audio_options = self._options.get_audio_output_options()
+        if output_audio_options:
             self._audio_output = _ParticipantAudioOutput(
                 self._room,
-                sample_rate=self._output_options.audio_sample_rate,
-                num_channels=self._output_options.audio_num_channels,
-                track_publish_options=self._output_options.audio_publish_options,
-                track_name=self._output_options.audio_track_name
-                if utils.is_given(self._output_options.audio_track_name)
+                sample_rate=output_audio_options.sample_rate,
+                num_channels=output_audio_options.num_channels,
+                track_publish_options=output_audio_options.track_publish_options,
+                track_name=output_audio_options.track_name
+                if utils.is_given(output_audio_options.track_name)
                 else "roomio_audio",
             )
 
-        if self._output_options.transcription_enabled or not utils.is_given(
-            self._output_options.transcription_enabled
-        ):
+        output_text_options = self._options.get_text_output_options()
+        if output_text_options:
             self._user_tr_output = _ParticipantTranscriptionOutput(
                 room=self._room, is_delta_stream=False, participant=self._participant_identity
             )
@@ -212,17 +148,13 @@ async def start(self) -> None:
 
             # use the RoomIO's audio output if available, otherwise use the agent's audio output
             # (e.g the audio output isn't using RoomIO with our avatar datastream impl)
-            sync_transcription = True
-            if utils.is_given(self._output_options.sync_transcription):
-                sync_transcription = self._output_options.sync_transcription
-
-            if sync_transcription and (
+            if output_text_options.sync_transcription is not False and (
                 audio_output := self._audio_output or self._agent_session.output.audio
             ):
                 self._tr_synchronizer = TranscriptSynchronizer(
                     next_in_chain_audio=audio_output,
                     next_in_chain_text=self._agent_tr_output,
-                    speed=self._output_options.transcription_speed_factor,
+                    speed=output_text_options.transcription_speed_factor,
                 )
 
         # -- set the room event handlers --
@@ -409,7 +341,7 @@ def _on_participant_connected(self, participant: rtc.RemoteParticipant) -> None:
         ):
             return
 
-        accepted_kinds = self._input_options.participant_kinds or DEFAULT_PARTICIPANT_KINDS
+        accepted_kinds = self._options.participant_kinds or DEFAULT_PARTICIPANT_KINDS
         if participant.kind not in accepted_kinds:
             # not an accepted participant kind, skip
             return
@@ -423,7 +355,7 @@ def _on_participant_disconnected(self, participant: rtc.RemoteParticipant) -> No
         self._participant_available_fut = asyncio.Future[rtc.RemoteParticipant]()
 
         if (
-            self._input_options.close_on_disconnect
+            self._options.close_on_disconnect
             and participant.disconnect_reason in DEFAULT_CLOSE_ON_DISCONNECT_REASONS
             and not self._close_session_atask
         ):
@@ -452,17 +384,21 @@ def _on_user_text_input(self, reader: rtc.TextStreamReader, participant_identity
             logger.warning("participant not found, ignoring text input")
             return
 
-        async def _read_text() -> None:
+        async def _read_text(text_input_cb: TextInputCallback) -> None:
             text = await reader.read_all()
 
-            text_input_result = self._input_options.text_input_cb(
+            text_input_result = text_input_cb(
                 self._agent_session,
                 TextInputEvent(text=text, info=reader.info, participant=participant),
             )
             if asyncio.iscoroutine(text_input_result):
                 await text_input_result
 
-        task = asyncio.create_task(_read_text())
+        if self._text_input_cb is None:
+            logger.error("text input callback is not set, ignoring text input")
+            return
+
+        task = asyncio.create_task(_read_text(self._text_input_cb))
         self._tasks.add(task)
         task.add_done_callback(self._tasks.discard)
 
diff --git a/livekit-agents/livekit/agents/voice/room_io/types.py b/livekit-agents/livekit/agents/voice/room_io/types.py
new file mode 100644
index 0000000000..1ca68a6a5b
--- /dev/null
+++ b/livekit-agents/livekit/agents/voice/room_io/types.py
@@ -0,0 +1,258 @@
+from __future__ import annotations
+
+from collections.abc import Coroutine
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Callable, Optional
+
+from livekit import rtc
+
+from ...log import logger
+from ...types import NOT_GIVEN, NotGivenOr
+from ...utils import is_given
+
+if TYPE_CHECKING:
+    from ..agent_session import AgentSession
+
+
+DEFAULT_PARTICIPANT_KINDS: list[rtc.ParticipantKind.ValueType] = [
+    rtc.ParticipantKind.PARTICIPANT_KIND_SIP,
+    rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD,
+]
+
+DEFAULT_CLOSE_ON_DISCONNECT_REASONS: list[rtc.DisconnectReason.ValueType] = [
+    rtc.DisconnectReason.CLIENT_INITIATED,
+    rtc.DisconnectReason.ROOM_DELETED,
+    rtc.DisconnectReason.USER_REJECTED,
+]
+
+
+@dataclass
+class TextInputEvent:
+    text: str
+    info: rtc.TextStreamInfo
+    participant: rtc.RemoteParticipant
+
+
+TextInputCallback = Callable[
+    ["AgentSession", TextInputEvent], Optional[Coroutine[None, None, None]]
+]
+
+
+def _default_text_input_cb(sess: AgentSession, ev: TextInputEvent) -> None:
+    sess.interrupt()
+    sess.generate_reply(user_input=ev.text)
+
+
+@dataclass
+class TextInputOptions:
+    text_input_cb: TextInputCallback = _default_text_input_cb
+
+
+@dataclass
+class AudioInputOptions:
+    sample_rate: int = 24000
+    num_channels: int = 1
+    noise_cancellation: rtc.NoiseCancellationOptions | None = None
+    pre_connect_audio: bool = True
+    """Pre-connect audio enabled or not."""
+    pre_connect_audio_timeout: float = 3.0
+    """The pre-connect audio will be ignored if it doesn't arrive within this time."""
+
+
+@dataclass
+class VideoInputOptions:
+    pass
+
+
+@dataclass
+class AudioOutputOptions:
+    sample_rate: int = 24000
+    num_channels: int = 1
+    track_publish_options: rtc.TrackPublishOptions = field(
+        default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
+    )
+    track_name: NotGivenOr[str] = NOT_GIVEN
+    """The name of the audio track to publish. If not provided, default to "roomio_audio"."""
+
+
+@dataclass
+class TextOutputOptions:
+    sync_transcription: NotGivenOr[bool] = NOT_GIVEN
+    """False to disable transcription synchronization with audio output.
+    Otherwise, transcription is emitted as quickly as available."""
+    transcription_speed_factor: float = 1.0
+    """Speed factor of transcription synchronization with audio output.
+    Only effective if `sync_transcription` is True."""
+
+
+@dataclass
+class RoomOptions:
+    text_input: NotGivenOr[TextInputOptions | bool] = NOT_GIVEN
+    """The text input options. If not provided, default to True."""
+    audio_input: NotGivenOr[AudioInputOptions | bool] = NOT_GIVEN
+    """The audio input options. If not provided, default to True."""
+    video_input: NotGivenOr[VideoInputOptions | bool] = NOT_GIVEN
+    """The video input options. If not provided, default to False."""
+    audio_output: NotGivenOr[AudioOutputOptions | bool] = NOT_GIVEN
+    """The audio output options. If not provided, default to True."""
+    text_output: NotGivenOr[TextOutputOptions | bool] = NOT_GIVEN
+    """The transcription output options. If not provided, default to True."""
+
+    participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN
+    """Participant kinds accepted for auto subscription. If not provided,
+    accept `DEFAULT_PARTICIPANT_KINDS`."""
+    participant_identity: NotGivenOr[str] = NOT_GIVEN
+    """The participant to link to. If not provided, link to the first participant.
+    Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`."""
+    close_on_disconnect: bool = True
+    """Close the AgentSession if the linked participant disconnects with reasons in
+    CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED."""
+
+    def get_text_input_options(self) -> TextInputOptions | None:
+        if isinstance(self.text_input, TextInputOptions):
+            return self.text_input
+        return TextInputOptions() if self.text_input is not False else None
+
+    def get_audio_input_options(self) -> AudioInputOptions | None:
+        if isinstance(self.audio_input, AudioInputOptions):
+            return self.audio_input
+        return AudioInputOptions() if self.audio_input is not False else None
+
+    def get_video_input_options(self) -> VideoInputOptions | None:
+        if isinstance(self.video_input, VideoInputOptions):
+            return self.video_input
+        return VideoInputOptions() if self.video_input is True else None
+
+    def get_audio_output_options(self) -> AudioOutputOptions | None:
+        if isinstance(self.audio_output, AudioOutputOptions):
+            return self.audio_output
+        return AudioOutputOptions() if self.audio_output is not False else None
+
+    def get_text_output_options(self) -> TextOutputOptions | None:
+        if isinstance(self.text_output, TextOutputOptions):
+            return self.text_output
+        return TextOutputOptions() if self.text_output is not False else None
+
+    @classmethod
+    def _ensure_options(
+        cls,
+        options: NotGivenOr[RoomOptions],
+        *,
+        room_input_options: NotGivenOr[RoomInputOptions] = NOT_GIVEN,
+        room_output_options: NotGivenOr[RoomOutputOptions] = NOT_GIVEN,
+    ) -> RoomOptions:
+        if is_given(room_input_options) or is_given(room_output_options):
+            logger.warning(
+                "RoomInputOptions and RoomOutputOptions are deprecated, use RoomOptions instead"
+            )
+            if not is_given(options):
+                return cls._create_from_legacy(room_input_options, room_output_options)
+
+        if isinstance(options, RoomOptions):
+            return options
+        else:
+            return cls()
+
+    @classmethod
+    def _create_from_legacy(
+        cls,
+        input_options: NotGivenOr[RoomInputOptions],
+        output_options: NotGivenOr[RoomOutputOptions],
+    ) -> RoomOptions:
+        opts = cls()
+        if input_options:
+            opts.text_input = (
+                TextInputOptions(text_input_cb=input_options.text_input_cb)
+                if input_options.text_enabled is not False
+                else False
+            )
+            opts.audio_input = (
+                AudioInputOptions(
+                    sample_rate=input_options.audio_sample_rate,
+                    num_channels=input_options.audio_num_channels,
+                    noise_cancellation=input_options.noise_cancellation,
+                    pre_connect_audio=input_options.pre_connect_audio,
+                    pre_connect_audio_timeout=input_options.pre_connect_audio_timeout,
+                )
+                if input_options.audio_enabled is not False
+                else False
+            )
+
+            opts.video_input = input_options.video_enabled
+            opts.close_on_disconnect = input_options.close_on_disconnect
+
+        if output_options:
+            opts.audio_output = (
+                AudioOutputOptions(
+                    sample_rate=output_options.audio_sample_rate,
+                    num_channels=output_options.audio_num_channels,
+                    track_publish_options=output_options.audio_publish_options,
+                    track_name=output_options.audio_track_name,
+                )
+                if output_options.audio_enabled is not False
+                else False
+            )
+            opts.text_output = (
+                TextOutputOptions(
+                    sync_transcription=output_options.sync_transcription,
+                    transcription_speed_factor=output_options.transcription_speed_factor,
+                )
+                if output_options.transcription_enabled is not False
+                else False
+            )
+        return opts
+
+
+# RoomInputOptions and RoomOutputOptions are deprecated
+
+
+@dataclass
+class RoomInputOptions:
+    text_enabled: NotGivenOr[bool] = NOT_GIVEN
+    """If not given, default to True."""
+    audio_enabled: NotGivenOr[bool] = NOT_GIVEN
+    """If not given, default to True."""
+    video_enabled: NotGivenOr[bool] = NOT_GIVEN
+    """If not given, default to False."""
+    audio_sample_rate: int = 24000
+    audio_num_channels: int = 1
+    noise_cancellation: rtc.NoiseCancellationOptions | None = None
+    text_input_cb: TextInputCallback = _default_text_input_cb
+    participant_kinds: NotGivenOr[list[rtc.ParticipantKind.ValueType]] = NOT_GIVEN
+    """Participant kinds accepted for auto subscription. If not provided,
+    accept `DEFAULT_PARTICIPANT_KINDS`."""
+    participant_identity: NotGivenOr[str] = NOT_GIVEN
+    """The participant to link to. If not provided, link to the first participant.
+    Can be overridden by the `participant` argument of RoomIO constructor or `set_participant`."""
+    pre_connect_audio: bool = True
+    """Pre-connect audio enabled or not."""
+    pre_connect_audio_timeout: float = 3.0
+    """The pre-connect audio will be ignored if it doesn't arrive within this time."""
+    close_on_disconnect: bool = True
+    """Close the AgentSession if the linked participant disconnects with reasons in
+    CLIENT_INITIATED, ROOM_DELETED, or USER_REJECTED."""
+
+
+@dataclass
+class RoomOutputOptions:
+    transcription_enabled: NotGivenOr[bool] = NOT_GIVEN
+    """If not given, default to True."""
+    audio_enabled: NotGivenOr[bool] = NOT_GIVEN
+    """If not given, default to True."""
+    audio_sample_rate: int = 24000
+    audio_num_channels: int = 1
+    audio_publish_options: rtc.TrackPublishOptions = field(
+        default_factory=lambda: rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)
+    )
+    audio_track_name: NotGivenOr[str] = NOT_GIVEN
+    """The name of the audio track to publish. If not provided, default to "roomio_audio"."""
+    sync_transcription: NotGivenOr[bool] = NOT_GIVEN
+    """False to disable transcription synchronization with audio output.
+    Otherwise, transcription is emitted as quickly as available."""
+    transcription_speed_factor: float = 1.0
+    """Speed factor of transcription synchronization with audio output.
+    Only effective if `sync_transcription` is True."""
+
+
+# DEFAULT_ROOM_INPUT_OPTIONS = RoomInputOptions()
+# DEFAULT_ROOM_OUTPUT_OPTIONS = RoomOutputOptions()