Skip to content

Commit a3a7974

Browse files
committed
feat(speechmatics): add max_speakers parameter for speaker diarization
- Added max_speakers parameter to STT __init__ method - Updated STTOptions dataclass to include max_speakers field - Modified _process_config to include max_speakers in speaker_diarization_config - Added handling for extracting max_speakers from deprecated transcription_config - Updated documentation to explain the new parameter This parameter allows limiting the number of unique speakers detected during diarization, which is useful for scenarios with a known number of participants (e.g., 2-person interviews, small group meetings with fixed participants).
1 parent 1bc1bf8 commit a3a7974

File tree

1 file changed

+14
-0
lines changed
  • livekit-plugins/livekit-plugins-speechmatics/livekit/plugins/speechmatics

1 file changed

+14
-0
lines changed

livekit-plugins/livekit-plugins-speechmatics/livekit/plugins/speechmatics/stt.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class STTOptions:
7171
additional_vocab: list[AdditionalVocabEntry] = dataclasses.field(default_factory=list)
7272
punctuation_overrides: dict = dataclasses.field(default_factory=dict)
7373
diarization_sensitivity: float = 0.5
74+
max_speakers: int | None = None
7475
speaker_active_format: str = "{text}"
7576
speaker_passive_format: str = "{text}"
7677
prefer_current_speaker: bool = False
@@ -98,6 +99,7 @@ def __init__(
9899
additional_vocab: NotGivenOr[list[AdditionalVocabEntry]] = NOT_GIVEN,
99100
punctuation_overrides: NotGivenOr[dict] = NOT_GIVEN,
100101
diarization_sensitivity: float = 0.5,
102+
max_speakers: NotGivenOr[int] = NOT_GIVEN,
101103
speaker_active_format: str = "{text}",
102104
speaker_passive_format: str = "{text}",
103105
prefer_current_speaker: bool = False,
@@ -172,6 +174,11 @@ def __init__(
172174
the sensitivity of diarization and helps when two or more speakers have similar voices.
173175
Defaults to 0.5.
174176
177+
max_speakers (int): Maximum number of speakers to detect during diarization. When set,
178+
the STT engine will limit the number of unique speakers identified in the transcription.
179+
This is useful for scenarios where you know the maximum number of participants (e.g.,
180+
2-person interviews, small group meetings). Optional.
181+
175182
speaker_active_format (str): Formatter for active speaker ID. This formatter is used
176183
to format the text output for individual speakers and ensures that the context is
177184
clear for language models further down the pipeline. The attributes `text` and
@@ -259,6 +266,10 @@ def __init__(
259266
if is_given(punctuation_overrides)
260267
else config.punctuation_overrides
261268
)
269+
# Extract max_speakers from speaker_diarization_config if present
270+
if not is_given(max_speakers) and hasattr(config, 'speaker_diarization_config'):
271+
if config.speaker_diarization_config and 'max_speakers' in config.speaker_diarization_config:
272+
max_speakers = config.speaker_diarization_config['max_speakers']
262273

263274
if is_given(audio_settings):
264275
logger.warning(
@@ -282,6 +293,7 @@ def __init__(
282293
additional_vocab=additional_vocab if is_given(additional_vocab) else [],
283294
punctuation_overrides=punctuation_overrides if is_given(punctuation_overrides) else {},
284295
diarization_sensitivity=diarization_sensitivity,
296+
max_speakers=max_speakers if is_given(max_speakers) else None,
285297
speaker_active_format=speaker_active_format,
286298
speaker_passive_format=speaker_passive_format,
287299
prefer_current_speaker=prefer_current_speaker,
@@ -382,6 +394,8 @@ def _process_config(self) -> None:
382394
dz_cfg: dict[str, Any] = {}
383395
if self._stt_options.diarization_sensitivity is not None:
384396
dz_cfg["speaker_sensitivity"] = self._stt_options.diarization_sensitivity
397+
if self._stt_options.max_speakers is not None:
398+
dz_cfg["max_speakers"] = self._stt_options.max_speakers
385399
if self._stt_options.prefer_current_speaker is not None:
386400
dz_cfg["prefer_current_speaker"] = self._stt_options.prefer_current_speaker
387401
if self._stt_options.known_speakers:

0 commit comments

Comments
 (0)