KoljaB · hungdoba · Aug 28, 2025
diff --git a/README.md b/README.md
@@ -507,6 +507,11 @@ When you initialize the `AudioToTextRecorder` class, you have various options to
 - **beam_size_realtime** (int, default=3): The beam size to use for real-time transcription beam search decoding.
 
 #### Voice Activation Parameters
+- **silero_repo_or_dir** (str, default="snakers4/silero-vad"): Specifies the repository or directory from which to load the Silero VAD model. You can provide a GitHub repository name for remote loading or a local directory path for loading a custom model.
+
+- **silero_source** (str, default="github"): Specifies the source to use for loading the Silero VAD model. Set to "github" to load from a remote repository, or "local" to load from local files.
+
+- **silero_model** (str, default="silero_vad"): Specifies the model name to use for Silero VAD. Use "silero_vad" for the default model, or specify a custom model name if needed.
 
 - **silero_sensitivity** (float, default=0.6): Sensitivity for Silero's voice activity detection ranging from 0 (least sensitive) to 1 (most sensitive). Default is 0.6.
 

diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
@@ -69,6 +69,9 @@
 INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
 INIT_REALTIME_PROCESSING_PAUSE = 0.2
 INIT_REALTIME_INITIAL_PAUSE = 0.2
+INIT_SILERO_REPO_OR_DIR = "snakers4/silero-vad"
+INIT_SILERO_SOURCE = "github"
+INIT_SILERO_MODEL = "silero_vad"
 INIT_SILERO_SENSITIVITY = 0.4
 INIT_WEBRTC_SENSITIVITY = 3
 INIT_POST_SPEECH_SILENCE_DURATION = 0.6
@@ -279,6 +282,9 @@ def __init__(self,
                  realtime_batch_size: int = 16,
 
                  # Voice activation parameters
+                 silero_repo_or_dir: str = INIT_SILERO_REPO_OR_DIR,
+                 silero_source: str = INIT_SILERO_SOURCE,
+                 silero_model: str = INIT_SILERO_MODEL,
                  silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
                  silero_use_onnx: bool = False,
                  silero_deactivity_detection: bool = False,
@@ -418,6 +424,12 @@ def __init__(self,
             slight delay compared to the regular real-time updates.
         - realtime_batch_size (int, default=16): Batch size for the real-time
             transcription model.
+        - silero_repo_or_dir (str, default="snakers4/silero-vad"): Specifies the repository or directory
+            from which to load the Silero VAD model. It can be github repo name or a local directory.
+        - silero_source (str, default="github"): Specifies the source to use for loading the Silero VAD model.
+            Typically "github" for remote loading, or "local" for local files.
+        - silero_model (str, default="silero_vad"): Specifies the model name to use for Silero VAD.
+            Usually "silero_vad" unless using a custom model.
         - silero_sensitivity (float, default=SILERO_SENSITIVITY): Sensitivity
             for the Silero Voice Activity Detection model ranging from 0
             (least sensitive) to 1 (most sensitive). Default is 0.5.
@@ -920,8 +932,9 @@ def __init__(self,
         # Setup voice activity detection model Silero VAD
         try:
             self.silero_vad_model, _ = torch.hub.load(
-                repo_or_dir="snakers4/silero-vad",
-                model="silero_vad",
+                repo_or_dir=silero_repo_or_dir,
+                source=silero_source,
+                model=silero_model,
                 verbose=False,
                 onnx=silero_use_onnx
             )

diff --git a/tests/offiline_vad.py b/tests/offiline_vad.py
@@ -0,0 +1,15 @@
+from RealtimeSTT import AudioToTextRecorder
+import os
+
+
+def test_offline_vad():
+    local_dir = os.path.abspath("./silero-vad")
+    source = "local"
+    recorder = AudioToTextRecorder(
+        silero_repo_or_dir=local_dir, silero_source=source, silero_deactivity_detection=True)
+    assert recorder.silero_vad_model is not None, "Failed to load Silero VAD model offline"
+    print("Offline VAD test passed!")
+
+
+if __name__ == '__main__':
+    test_offline_vad()