diff --git a/crates/cactus/src/stt/mod.rs b/crates/cactus/src/stt/mod.rs index 12756fe2d4..133fe88bad 100644 --- a/crates/cactus/src/stt/mod.rs +++ b/crates/cactus/src/stt/mod.rs @@ -36,4 +36,8 @@ pub struct TranscribeOptions { pub min_chunk_size: Option, #[serde(skip_serializing_if = "Option::is_none")] pub confirmation_threshold: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub custom_vocabulary: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vocabulary_boost: Option, } diff --git a/crates/transcribe-cactus/src/service/batch/transcribe.rs b/crates/transcribe-cactus/src/service/batch/transcribe.rs index 087a355ac4..97e609e872 100644 --- a/crates/transcribe-cactus/src/service/batch/transcribe.rs +++ b/crates/transcribe-cactus/src/service/batch/transcribe.rs @@ -25,8 +25,15 @@ pub(super) fn transcribe_batch( let model = hypr_cactus::Model::new(model_path)?; + let custom_vocabulary = if params.keywords.is_empty() { + None + } else { + Some(params.keywords.clone()) + }; + let options = hypr_cactus::TranscribeOptions { language: hypr_cactus::constrain_to(¶ms.languages), + custom_vocabulary, ..Default::default() }; diff --git a/crates/transcribe-cactus/src/service/streaming/session.rs b/crates/transcribe-cactus/src/service/streaming/session.rs index 1171b378dd..7d512f2500 100644 --- a/crates/transcribe-cactus/src/service/streaming/session.rs +++ b/crates/transcribe-cactus/src/service/streaming/session.rs @@ -53,9 +53,16 @@ pub(super) async fn handle_websocket( let total_channels = (params.channels as i32).max(1) as usize; let chunk_size_ms = 300; + let custom_vocabulary = if params.keywords.is_empty() { + None + } else { + Some(params.keywords.clone()) + }; + let options = hypr_cactus::TranscribeOptions { language: hypr_cactus::constrain_to(¶ms.languages), min_chunk_size: Some((cactus_config.min_chunk_sec * SAMPLE_RATE as f32) as u32), + custom_vocabulary, ..Default::default() };