Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,11 @@ When you initialize the `AudioToTextRecorder` class, you have various options to
- **beam_size_realtime** (int, default=3): The beam size to use for real-time transcription beam search decoding.

#### Voice Activation Parameters
- **silero_repo_or_dir** (str, default="snakers4/silero-vad"): Specifies the repository or directory from which to load the Silero VAD model. You can provide a GitHub repository name for remote loading or a local directory path for loading a custom model.

- **silero_source** (str, default="github"): Specifies the source to use for loading the Silero VAD model. Set to "github" to load from a remote repository, or "local" to load from local files.

- **silero_model** (str, default="silero_vad"): Specifies the model name to use for Silero VAD. Use "silero_vad" for the default model, or specify a custom model name if needed.

- **silero_sensitivity** (float, default=0.6): Sensitivity for Silero's voice activity detection ranging from 0 (least sensitive) to 1 (most sensitive). Default is 0.6.

Expand Down
17 changes: 15 additions & 2 deletions RealtimeSTT/audio_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@
INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
INIT_REALTIME_PROCESSING_PAUSE = 0.2
INIT_REALTIME_INITIAL_PAUSE = 0.2
INIT_SILERO_REPO_OR_DIR = "snakers4/silero-vad"
INIT_SILERO_SOURCE = "github"
INIT_SILERO_MODEL = "silero_vad"
INIT_SILERO_SENSITIVITY = 0.4
INIT_WEBRTC_SENSITIVITY = 3
INIT_POST_SPEECH_SILENCE_DURATION = 0.6
Expand Down Expand Up @@ -279,6 +282,9 @@ def __init__(self,
realtime_batch_size: int = 16,

# Voice activation parameters
silero_repo_or_dir: str = INIT_SILERO_REPO_OR_DIR,
silero_source: str = INIT_SILERO_SOURCE,
silero_model: str = INIT_SILERO_MODEL,
silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
silero_use_onnx: bool = False,
silero_deactivity_detection: bool = False,
Expand Down Expand Up @@ -418,6 +424,12 @@ def __init__(self,
slight delay compared to the regular real-time updates.
- realtime_batch_size (int, default=16): Batch size for the real-time
transcription model.
- silero_repo_or_dir (str, default="snakers4/silero-vad"): Specifies the repository or directory
from which to load the Silero VAD model. It can be github repo name or a local directory.
- silero_source (str, default="github"): Specifies the source to use for loading the Silero VAD model.
Typically "github" for remote loading, or "local" for local files.
- silero_model (str, default="silero_vad"): Specifies the model name to use for Silero VAD.
Usually "silero_vad" unless using a custom model.
- silero_sensitivity (float, default=SILERO_SENSITIVITY): Sensitivity
for the Silero Voice Activity Detection model ranging from 0
(least sensitive) to 1 (most sensitive). Default is 0.5.
Expand Down Expand Up @@ -920,8 +932,9 @@ def __init__(self,
# Setup voice activity detection model Silero VAD
try:
self.silero_vad_model, _ = torch.hub.load(
repo_or_dir="snakers4/silero-vad",
model="silero_vad",
repo_or_dir=silero_repo_or_dir,
source=silero_source,
model=silero_model,
verbose=False,
onnx=silero_use_onnx
)
Expand Down
15 changes: 15 additions & 0 deletions tests/offiline_vad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from RealtimeSTT import AudioToTextRecorder
import os


def test_offline_vad():
local_dir = os.path.abspath("./silero-vad")
source = "local"
recorder = AudioToTextRecorder(
silero_repo_or_dir=local_dir, silero_source=source, silero_deactivity_detection=True)
assert recorder.silero_vad_model is not None, "Failed to load Silero VAD model offline"
print("Offline VAD test passed!")


if __name__ == '__main__':
test_offline_vad()