livekit · Shubhrakanti · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/examples/voice_agents/nvidia_test.py b/examples/voice_agents/nvidia_test.py
@@ -0,0 +1,65 @@
+import logging
+
+from dotenv import load_dotenv
+
+from livekit.agents import (
+    Agent,
+    AgentSession,
+    JobContext,
+    JobProcess,
+    RoomInputOptions,
+    RoomOutputOptions,
+    WorkerOptions,
+    cli,
+)
+from livekit.plugins import nvidia, openai, silero
+from livekit.plugins.turn_detector.multilingual import MultilingualModel
+
+# uncomment to enable Krisp background voice/noise cancellation
+# from livekit.plugins import noise_cancellation
+
+logger = logging.getLogger("basic-agent")
+
+load_dotenv()
+
+
+def prewarm(proc: JobProcess):
+    proc.userdata["vad"] = silero.VAD.load()
+
+
+async def entrypoint(ctx: JobContext):
+    # each log entry will include these fields
+    ctx.log_context_fields = {
+        "room": ctx.room.name,
+    }
+
+    session = AgentSession(
+        vad=ctx.proc.userdata["vad"],
+        # any combination of STT, LLM, TTS, or realtime API can be used
+        llm=openai.LLM(model="gpt-4o-mini"),
+        stt=nvidia.STT(),
+        tts=nvidia.TTS(),
+        # allow the LLM to generate a response while waiting for the end of turn
+        preemptive_generation=True,
+        # sometimes background noise could interrupt the agent session, these are considered false positive interruptions
+        # when it's detected, you may resume the agent's speech
+        resume_false_interruption=True,
+        false_interruption_timeout=1.0,
+        min_interruption_duration=0.2,  # with false interruption resume, interruption can be more sensitive
+        # use LiveKit's turn detection model
+        turn_detection=MultilingualModel(),
+    )
+
+    await session.start(
+        agent=Agent(instructions="You are a helpful voice AI assistant."),
+        room=ctx.room,
+        room_input_options=RoomInputOptions(
+            # uncomment to enable Krisp BVC noise cancellation
+            # noise_cancellation=noise_cancellation.BVC(),
+        ),
+        room_output_options=RoomOutputOptions(transcription_enabled=True),
+    )
+
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))
diff --git a/livekit-plugins/livekit-plugins-nvidia/README.md b/livekit-plugins/livekit-plugins-nvidia/README.md
@@ -0,0 +1,127 @@
+# LiveKit NVIDIA STT Plugin
+
+This plugin provides Speech-to-Text (STT) capabilities using NVIDIA's Riva ASR models through the NVIDIA Cloud Functions API.
+
+## Features
+
+- **Streaming Recognition**: Real-time speech-to-text conversion
+- **Interim Results**: Get partial transcripts as users speak
+- **Multiple Models**: Support for NVIDIA's Parakeet models
+- **Automatic Punctuation**: Built-in punctuation support
+- **Language Support**: Configurable language codes
+
+## Installation
+
+1. Install the nvidia-riva-client dependency:
+
+```bash
+pip install nvidia-riva-client
+```
+
+2. Set up your NVIDIA API key:
+
+```bash
+export NVIDIA_API_KEY="your_api_key_here"
+```
+
+## Usage
+
+### Basic Usage
+
+```python
+import os
+from livekit.plugins.nvidia import STT
+
+# Initialize the STT
+stt = STT(
+    model="parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer",
+    function_id="1598d209-5e27-4d3c-8079-4751568b1081",
+    api_key=os.getenv("NVIDIA_API_KEY")  # or pass directly
+)
+
+# Create a recognition stream
+stream = stt.stream(language="en-US")
+```
+
+### Configuration Options
+
+- `model`: NVIDIA ASR model to use (default: parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer)
+- `function_id`: NVIDIA function ID for the API (default: 1598d209-5e27-4d3c-8079-4751568b1081)
+- `punctuate`: Enable automatic punctuation (default: True)
+- `language_code`: Language code for recognition (default: en-US)
+- `sample_rate`: Audio sample rate in Hz (default: 16000)
+- `server`: NVIDIA server endpoint (default: grpc.nvcf.nvidia.com:443)
+- `api_key`: NVIDIA API key (can also use NVIDIA_API_KEY env var)
+
+### Available Models
+
+Based on testing, the following models are confirmed to work:
+
+**Streaming Models:**
+
+- `parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer` (Function ID: 1598d209-5e27-4d3c-8079-4751568b1081)
+
+**Offline Models:**
+
+- `parakeet-1.1b-en-US-asr-offline-silero-vad-sortformer` (Function ID: 1598d209-5e27-4d3c-8079-4751568b1081)
+
+### Example Agent
+
+```python
+import asyncio
+from livekit import rtc
+from livekit.agents import JobContext, WorkerOptions, cli
+from livekit.plugins.nvidia import STT
+
+async def entrypoint(ctx: JobContext):
+    await ctx.connect()
+
+    # Create STT instance
+    stt = STT()
+
+    # Process audio from participants
+    @ctx.room.on("participant_connected")
+    def on_participant_connected(participant: rtc.RemoteParticipant):
+        print(f"Participant connected: {participant.identity}")
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
+```
+
+## Testing
+
+Run the test script to verify your setup:
+
+```bash
+export NVIDIA_API_KEY="your_api_key_here"
+python test_nvidia_stt.py
+```
+
+## Requirements
+
+- Python 3.8+
+- nvidia-riva-client
+- livekit-agents
+- Valid NVIDIA API key
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Import Error for riva.client**: Make sure nvidia-riva-client is installed
+2. **API Key Error**: Ensure NVIDIA_API_KEY is set in your environment
+3. **Model Not Available**: Check that you're using a supported model name and function ID
+4. **Connection Issues**: Verify your internet connection and NVIDIA API access
+
+### Debugging
+
+Enable debug logging to see detailed information:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## License
+
+This plugin is licensed under the Apache License 2.0.
diff --git a/livekit-plugins/livekit-plugins-nvidia/livekit/plugins/nvidia/__init__.py b/livekit-plugins/livekit-plugins-nvidia/livekit/plugins/nvidia/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2025 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .stt import STT, SpeechStream
+from .tts import TTS, SynthesizeStream
+from .version import __version__
+
+__all__ = ["STT", "SpeechStream", "TTS", "SynthesizeStream", "__version__"]
+
+
+from livekit.agents import Plugin
+
+from .log import logger
+
+
+class NVIDIAPlugin(Plugin):
+    def __init__(self) -> None:
+        super().__init__(__name__, __version__, __package__, logger)
+
+
+Plugin.register_plugin(NVIDIAPlugin())
+
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+
+__pdoc__ = {}
+
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False
diff --git a/livekit-plugins/livekit-plugins-nvidia/livekit/plugins/nvidia/log.py b/livekit-plugins/livekit-plugins-nvidia/livekit/plugins/nvidia/log.py
@@ -0,0 +1,3 @@
+import logging
+
+logger = logging.getLogger("livekit.plugins.nvidia")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import logging

		logger = logging.getLogger("livekit.plugins.nvidia")