|
diff --git a/plugins/listener2/permissions/default.toml b/plugins/listener2/permissions/default.toml
index 13233e78d2..86a487f3e5 100644
--- a/plugins/listener2/permissions/default.toml
+++ b/plugins/listener2/permissions/default.toml
@@ -4,6 +4,7 @@ permissions = [
"allow-run-batch",
"allow-parse-subtitle",
"allow-export-to-vtt",
+ "allow-diarize-session",
"allow-is-supported-languages-batch",
"allow-suggest-providers-for-languages-batch",
"allow-list-documented-language-codes-batch",
diff --git a/plugins/listener2/permissions/schemas/schema.json b/plugins/listener2/permissions/schemas/schema.json
index 391f1e79a1..6c5c94dd29 100644
--- a/plugins/listener2/permissions/schemas/schema.json
+++ b/plugins/listener2/permissions/schemas/schema.json
@@ -294,6 +294,18 @@
"PermissionKind": {
"type": "string",
"oneOf": [
+ {
+ "description": "Enables the diarize_session command without any pre-configured scope.",
+ "type": "string",
+ "const": "allow-diarize-session",
+ "markdownDescription": "Enables the diarize_session command without any pre-configured scope."
+ },
+ {
+ "description": "Denies the diarize_session command without any pre-configured scope.",
+ "type": "string",
+ "const": "deny-diarize-session",
+ "markdownDescription": "Denies the diarize_session command without any pre-configured scope."
+ },
{
"description": "Enables the export_to_vtt command without any pre-configured scope.",
"type": "string",
@@ -367,10 +379,10 @@
"markdownDescription": "Denies the suggest_providers_for_languages_batch command without any pre-configured scope."
},
{
- "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-run-batch`\n- `allow-parse-subtitle`\n- `allow-export-to-vtt`\n- `allow-is-supported-languages-batch`\n- `allow-suggest-providers-for-languages-batch`\n- `allow-list-documented-language-codes-batch`",
+ "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-run-batch`\n- `allow-parse-subtitle`\n- `allow-export-to-vtt`\n- `allow-diarize-session`\n- `allow-is-supported-languages-batch`\n- `allow-suggest-providers-for-languages-batch`\n- `allow-list-documented-language-codes-batch`",
"type": "string",
"const": "default",
- "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-run-batch`\n- `allow-parse-subtitle`\n- `allow-export-to-vtt`\n- `allow-is-supported-languages-batch`\n- `allow-suggest-providers-for-languages-batch`\n- `allow-list-documented-language-codes-batch`"
+ "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-run-batch`\n- `allow-parse-subtitle`\n- `allow-export-to-vtt`\n- `allow-diarize-session`\n- `allow-is-supported-languages-batch`\n- `allow-suggest-providers-for-languages-batch`\n- `allow-list-documented-language-codes-batch`"
}
]
}
diff --git a/plugins/listener2/src/commands.rs b/plugins/listener2/src/commands.rs
index 3100a8c0db..98a969170b 100644
--- a/plugins/listener2/src/commands.rs
+++ b/plugins/listener2/src/commands.rs
@@ -2,6 +2,7 @@ use owhisper_client::AdapterKind;
use std::str::FromStr;
use crate::{BatchParams, Listener2PluginExt, Subtitle, VttWord};
+use hypr_pyannote_local::diarize::DiarizationSegment;
#[tauri::command]
#[specta::specta]
@@ -99,6 +100,19 @@ pub async fn suggest_providers_for_languages_batch(
Ok(supported)
}
+#[tauri::command]
+#[specta::specta]
+pub async fn diarize_session(
+ app: tauri::AppHandle,
+ session_id: String,
+ max_speakers: usize,
+) -> Result, String> {
+ app.listener2()
+ .diarize_session(session_id, max_speakers)
+ .await
+ .map_err(|e| e.to_string())
+}
+
#[tauri::command]
#[specta::specta]
pub async fn list_documented_language_codes_batch(
diff --git a/plugins/listener2/src/error.rs b/plugins/listener2/src/error.rs
index 0ee00641a7..a45d85bf0f 100644
--- a/plugins/listener2/src/error.rs
+++ b/plugins/listener2/src/error.rs
@@ -12,6 +12,8 @@ pub enum Error {
SpawnError(#[from] ractor::SpawnErr),
#[error("batch start failed: {0}")]
BatchStartFailed(String),
+ #[error("diarization failed: {0}")]
+ DiarizeFailed(String),
}
impl Serialize for Error {
diff --git a/plugins/listener2/src/ext.rs b/plugins/listener2/src/ext.rs
index 7d5cd05bc0..1e7c30546a 100644
--- a/plugins/listener2/src/ext.rs
+++ b/plugins/listener2/src/ext.rs
@@ -95,6 +95,74 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Listener2<'a, R, M> {
}
}
+ pub async fn diarize_session(
+ &self,
+ session_id: String,
+ max_speakers: usize,
+ ) -> Result, crate::Error> {
+ use dasp::sample::Sample;
+ use rodio::Source;
+ use tauri_plugin_settings::SettingsPluginExt;
+
+ let base = self
+ .manager
+ .settings()
+ .cached_vault_base()
+ .map_err(|e| crate::Error::DiarizeFailed(e.to_string()))?;
+
+ let session_dir = base.join("sessions").join(&session_id);
+
+ let audio_path = if session_dir.join("audio.wav").exists() {
+ session_dir.join("audio.wav")
+ } else if session_dir.join("audio.ogg").exists() {
+ session_dir.join("audio.ogg")
+ } else {
+ return Err(crate::Error::DiarizeFailed(
+ "no audio file found".to_string(),
+ ));
+ };
+
+ let segments = tokio::task::spawn_blocking(move || {
+ let decoder = hypr_audio_utils::source_from_path(&audio_path)
+ .map_err(|e| crate::Error::DiarizeFailed(e.to_string()))?;
+
+ let channels = decoder.channels() as usize;
+ let sample_rate = decoder.sample_rate();
+ let f32_samples: Vec = decoder.collect();
+
+ let mono: Vec = if channels > 1 {
+ f32_samples
+ .chunks_exact(channels)
+ .map(|frame| frame.iter().sum::() / channels as f32)
+ .collect()
+ } else {
+ f32_samples
+ };
+
+ let resampled = if sample_rate != 16000 {
+ let source = rodio::buffer::SamplesBuffer::new(1, sample_rate, mono);
+ hypr_audio_utils::resample_audio(source, 16000)
+ .map_err(|e| crate::Error::DiarizeFailed(e.to_string()))?
+ } else {
+ mono
+ };
+
+ let i16_samples: Vec = resampled.iter().map(|s| s.to_sample()).collect();
+
+ let opts = hypr_pyannote_local::diarize::DiarizeOptions {
+ max_speakers,
+ ..Default::default()
+ };
+
+ hypr_pyannote_local::diarize::diarize(&i16_samples, 16000, Some(opts))
+ .map_err(|e| crate::Error::DiarizeFailed(e.to_string()))
+ })
+ .await
+ .map_err(|e| crate::Error::DiarizeFailed(format!("join error: {e}")))?;
+
+ segments
+ }
+
pub fn parse_subtitle(&self, path: String) -> Result {
use aspasia::TimedSubtitleFile;
let sub = TimedSubtitleFile::new(&path).unwrap();
diff --git a/plugins/listener2/src/lib.rs b/plugins/listener2/src/lib.rs
index 6a21df2a0b..676f8763f0 100644
--- a/plugins/listener2/src/lib.rs
+++ b/plugins/listener2/src/lib.rs
@@ -29,6 +29,7 @@ fn make_specta_builder() -> tauri_specta::Builder {
commands::run_batch::,
commands::parse_subtitle::,
commands::export_to_vtt::,
+ commands::diarize_session::,
commands::is_supported_languages_batch::,
commands::suggest_providers_for_languages_batch::,
commands::list_documented_language_codes_batch::,
|