Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/config/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@ pub struct VoiceOpenAiConfig {
deserialize_with = "crate::schema::deserialize_option_secret"
)]
pub api_key: Option<Secret<String>>,
/// API base URL (default: https://api.openai.com/v1).
/// Override for OpenAI-compatible TTS servers (e.g. Chatterbox).
pub base_url: Option<String>,
/// Voice to use for TTS (alloy, echo, fable, onyx, nova, shimmer).
pub voice: Option<String>,
/// Model to use for TTS (tts-1, tts-1-hd).
Expand Down Expand Up @@ -630,6 +633,9 @@ pub struct VoiceWhisperConfig {
deserialize_with = "crate::schema::deserialize_option_secret"
)]
pub api_key: Option<Secret<String>>,
/// API base URL (default: https://api.openai.com/v1).
/// Override for OpenAI-compatible STT servers (e.g. faster-whisper-server).
pub base_url: Option<String>,
/// Model to use (whisper-1).
pub model: Option<String>,
/// Language hint (ISO 639-1 code).
Expand Down
1 change: 1 addition & 0 deletions crates/config/src/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ providers = ["whisper", "mistral", "elevenlabs"] # UI allowlist (empty = show al

# No api_key needed for OpenAI TTS/Whisper when OpenAI is configured as an LLM provider.
# [voice.tts.openai]
# base_url = "https://api.openai.com/v1" # API endpoint (change for Chatterbox, etc.)
# voice = "alloy" # alloy, echo, fable, onyx, nova, shimmer
# model = "tts-1" # tts-1 or tts-1-hd
Comment on lines 554 to 558
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Whisper STT base_url missing from config template

The template was updated to document base_url under [voice.tts.openai], but there is no corresponding template entry for the Whisper STT section. Users generating a fresh config from the template won't see the new base_url field for Whisper, making it harder to discover this feature for STT.

Consider adding a commented-out entry near the Whisper STT configuration block:

# [voice.stt.whisper]
# base_url = "https://api.openai.com/v1"  # API endpoint (change for faster-whisper-server, etc.)
# model = "whisper-1"


Expand Down
2 changes: 2 additions & 0 deletions crates/config/src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ fn build_schema_map() -> KnownKeys {
"openai",
Struct(HashMap::from([
("api_key", Leaf),
("base_url", Leaf),
("voice", Leaf),
("model", Leaf),
])),
Expand Down Expand Up @@ -594,6 +595,7 @@ fn build_schema_map() -> KnownKeys {
"whisper",
Struct(HashMap::from([
("api_key", Leaf),
("base_url", Leaf),
("model", Leaf),
("language", Leaf),
])),
Expand Down
2 changes: 1 addition & 1 deletion crates/gateway/src/methods/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3700,7 +3700,7 @@ pub(super) fn register(reg: &mut MethodRegistry) {
"enabled": config.voice.tts.enabled,
"provider": config.voice.tts.provider,
"elevenlabs_configured": config.voice.tts.elevenlabs.api_key.is_some(),
"openai_configured": config.voice.tts.openai.api_key.is_some(),
"openai_configured": config.voice.tts.openai.api_key.is_some() || config.voice.tts.openai.base_url.is_some(),
},
"stt": {
"enabled": config.voice.stt.enabled,
Expand Down
1 change: 1 addition & 0 deletions crates/gateway/src/methods/voice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ pub(super) async fn detect_voice_providers(
"tts",
"cloud",
config.voice.tts.openai.api_key.is_some()
|| config.voice.tts.openai.base_url.is_some()
|| env_openai_key.is_some()
|| llm_openai_key.is_some(),
config.voice.tts.provider == "openai" && config.voice.tts.enabled,
Expand Down
35 changes: 26 additions & 9 deletions crates/gateway/src/voice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ impl LiveTtsService {
},
openai: moltis_voice::OpenAiTtsConfig {
api_key: resolve_openai_key(cfg.voice.tts.openai.api_key.as_ref(), &cfg),
base_url: cfg.voice.tts.openai.base_url.clone(),
voice: cfg.voice.tts.openai.voice.clone(),
model: cfg.voice.tts.openai.model.clone(),
speed: None,
Expand Down Expand Up @@ -150,13 +151,19 @@ impl LiveTtsService {
config.elevenlabs.model.clone(),
)) as Box<dyn TtsProvider + Send + Sync>
}),
TtsProviderId::OpenAi => config.openai.api_key.as_ref().map(|key| {
Box::new(OpenAiTts::with_defaults(
Some(key.clone()),
TtsProviderId::OpenAi => {
let provider = OpenAiTts::with_defaults(
config.openai.api_key.clone(),
config.openai.base_url.clone(),
config.openai.voice.clone(),
config.openai.model.clone(),
)) as Box<dyn TtsProvider + Send + Sync>
}),
);
if provider.is_configured() {
Some(Box::new(provider) as Box<dyn TtsProvider + Send + Sync>)
} else {
None
}
},
TtsProviderId::Google => config.google.api_key.as_ref().map(|_| {
Box::new(GoogleTts::new(&config.google)) as Box<dyn TtsProvider + Send + Sync>
}),
Expand Down Expand Up @@ -187,7 +194,10 @@ impl LiveTtsService {
TtsProviderId::ElevenLabs,
config.elevenlabs.api_key.is_some(),
),
(TtsProviderId::OpenAi, config.openai.api_key.is_some()),
(
TtsProviderId::OpenAi,
config.openai.api_key.is_some() || config.openai.base_url.is_some(),
),
(TtsProviderId::Google, config.google.api_key.is_some()),
(TtsProviderId::Piper, config.piper.model_path.is_some()),
(TtsProviderId::Coqui, true), // Always available if server running
Expand Down Expand Up @@ -518,9 +528,16 @@ impl LiveSttService {
match provider_id {
SttProviderId::Whisper => {
let key = resolve_openai_key(cfg.voice.stt.whisper.api_key.as_ref(), &cfg);
key.map(|k| {
Box::new(WhisperStt::new(Some(k))) as Box<dyn SttProvider + Send + Sync>
})
let provider = WhisperStt::with_options(
key,
cfg.voice.stt.whisper.base_url.clone(),
cfg.voice.stt.whisper.model.clone(),
);
if provider.is_configured() {
Some(Box::new(provider) as Box<dyn SttProvider + Send + Sync>)
} else {
None
}
},
SttProviderId::Groq => cfg.voice.stt.groq.api_key.as_ref().map(|key| {
Box::new(GroqStt::with_options(
Expand Down
8 changes: 8 additions & 0 deletions crates/voice/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ pub struct OpenAiTtsConfig {
)]
pub api_key: Option<Secret<String>>,

/// API base URL (default: https://api.openai.com/v1).
/// Override for OpenAI-compatible TTS servers (e.g. Chatterbox, local TTS).
pub base_url: Option<String>,

/// Voice to use (alloy, echo, fable, onyx, nova, shimmer).
pub voice: Option<String>,

Expand Down Expand Up @@ -445,6 +449,10 @@ pub struct WhisperConfig {
)]
pub api_key: Option<Secret<String>>,

/// API base URL (default: https://api.openai.com/v1).
/// Override for OpenAI-compatible STT servers (e.g. faster-whisper-server).
pub base_url: Option<String>,

/// Model to use (whisper-1).
pub model: Option<String>,

Expand Down
57 changes: 37 additions & 20 deletions crates/voice/src/stt/whisper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ const DEFAULT_MODEL: &str = "whisper-1";
pub struct WhisperStt {
client: Client,
api_key: Option<Secret<String>>,
base_url: String,
model: String,
}

impl std::fmt::Debug for WhisperStt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("WhisperStt")
.field("api_key", &"[REDACTED]")
.field("base_url", &self.base_url)
.field("model", &self.model)
.finish()
}
Expand All @@ -55,27 +57,26 @@ impl WhisperStt {
Self {
client: Client::new(),
api_key,
base_url: API_BASE.into(),
model: DEFAULT_MODEL.into(),
}
}

/// Create with custom model.
/// Create with custom model and optional base URL.
#[must_use]
pub fn with_model(api_key: Option<Secret<String>>, model: Option<String>) -> Self {
pub fn with_options(
api_key: Option<Secret<String>>,
base_url: Option<String>,
model: Option<String>,
) -> Self {
Self {
client: Client::new(),
api_key,
base_url: base_url.unwrap_or_else(|| API_BASE.into()),
model: model.unwrap_or_else(|| DEFAULT_MODEL.into()),
}
}

/// Get the API key, returning an error if not configured.
fn get_api_key(&self) -> Result<&Secret<String>> {
self.api_key
.as_ref()
.ok_or_else(|| anyhow!("OpenAI API key not configured for Whisper"))
}

/// Get file extension for audio format.
fn file_extension(format: AudioFormat) -> &'static str {
format.extension()
Expand All @@ -98,12 +99,12 @@ impl SttProvider for WhisperStt {
}

fn is_configured(&self) -> bool {
self.api_key.is_some()
// Configured if API key is set, or if using a custom base URL (local servers
// like faster-whisper-server don't require auth).
self.api_key.is_some() || self.base_url != API_BASE
}

async fn transcribe(&self, request: TranscribeRequest) -> Result<Transcript> {
let api_key = self.get_api_key()?;

let filename = format!("audio.{}", Self::file_extension(request.format));
let mime_type = Self::mime_type(request.format);

Expand All @@ -126,14 +127,20 @@ impl SttProvider for WhisperStt {
form = form.text("prompt", prompt);
}

let response = self
let mut req = self
.client
.post(format!("{API_BASE}/audio/transcriptions"))
.header(
.post(format!("{}/audio/transcriptions", self.base_url))
.multipart(form);

// Only add auth header if an API key is configured (local servers skip auth).
if let Some(api_key) = &self.api_key {
req = req.header(
"Authorization",
format!("Bearer {}", api_key.expose_secret()),
)
.multipart(form)
);
}

let response = req
.send()
.await
.context("failed to send Whisper transcription request")?;
Expand Down Expand Up @@ -232,18 +239,28 @@ mod tests {
prompt: None,
};

// Without API key and default base URL, the request will fail
// (either connection refused to api.openai.com or auth error).
let result = provider.transcribe(request).await;
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not configured"));
}

#[test]
fn test_with_model() {
let provider = WhisperStt::with_model(
fn test_with_options() {
let provider = WhisperStt::with_options(
Some(Secret::new("key".into())),
None,
Some("whisper-large-v3".into()),
);
assert_eq!(provider.model, "whisper-large-v3");
assert_eq!(provider.base_url, API_BASE);
}

#[test]
fn test_with_custom_base_url() {
let provider = WhisperStt::with_options(None, Some("http://10.1.2.30:8001".into()), None);
assert!(provider.is_configured());
assert_eq!(provider.base_url, "http://10.1.2.30:8001");
}

#[test]
Expand Down
Loading
Loading