From bade239d958c9d8c5400d2297a6d37c2702d0b9b Mon Sep 17 00:00:00 2001 From: plyght Date: Tue, 22 Apr 2025 15:07:38 -0400 Subject: [PATCH 01/10] first AI improvement commit --- Hex.xcodeproj/project.pbxproj | 4 +- Hex/Clients/AIEnhancementClient.swift | 287 ++++++++++++ Hex/Clients/RecordingClient.swift | 2 +- Hex/Features/App/AppFeature.swift | 11 + .../Settings/AIEnhancementFeature.swift | 107 +++++ Hex/Features/Settings/AIEnhancementView.swift | 413 ++++++++++++++++++ Hex/Features/Settings/SettingsFeature.swift | 19 +- .../Transcription/TranscriptionFeature.swift | 80 +++- .../TranscriptionIndicatorView.swift | 12 +- Hex/Models/HexSettings.swift | 26 +- Localizable.xcstrings | 96 ++++ 11 files changed, 1047 insertions(+), 10 deletions(-) create mode 100644 Hex/Clients/AIEnhancementClient.swift create mode 100644 Hex/Features/Settings/AIEnhancementFeature.swift create mode 100644 Hex/Features/Settings/AIEnhancementView.swift diff --git a/Hex.xcodeproj/project.pbxproj b/Hex.xcodeproj/project.pbxproj index 9b2f1e1..3113846 100644 --- a/Hex.xcodeproj/project.pbxproj +++ b/Hex.xcodeproj/project.pbxproj @@ -423,7 +423,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_ENTITLEMENTS = Hex/Hex.entitlements; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; @@ -456,7 +456,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_ENTITLEMENTS = Hex/Hex.entitlements; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; diff --git a/Hex/Clients/AIEnhancementClient.swift b/Hex/Clients/AIEnhancementClient.swift new file mode 100644 index 0000000..36d5197 --- /dev/null +++ b/Hex/Clients/AIEnhancementClient.swift @@ -0,0 +1,287 @@ +// +// AIEnhancementClient.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import Dependencies +import DependenciesMacros +import Foundation + +// Note: Future enhancement could use OllamaKit directly: +// import OllamaKit + +/// A client that enhances transcribed text using local LLMs. +/// Supports both Ollama and other local options (future expansion). +@DependencyClient +struct AIEnhancementClient { + /// Enhances the given text using the specified model. + var enhance: @Sendable (String, String, EnhancementOptions, @escaping (Progress) -> Void) async throws -> String = { text, _, _, _ in text } + + /// Checks if Ollama is installed and running on the system + var isOllamaAvailable: @Sendable () async -> Bool = { false } + + /// Gets a list of available models from Ollama + var getAvailableModels: @Sendable () async throws -> [String] = { [] } +} + +/// Enhancement options for AI processing +struct EnhancementOptions { + /// The prompt to send to the AI model for text enhancement + var prompt: String + + /// Temperature controls randomness: lower values (0.1-0.3) are more precise, + /// higher values (0.7-1.0) give more creative/varied results + var temperature: Double + + /// Maximum number of tokens to generate in the response + var maxTokens: Int + + /// Default prompt for enhancing transcribed text with clear instructions + static let defaultPrompt = """ + You are a professional editor improving transcribed text from speech-to-text. + + Your task is to: + 1. Fix grammar, punctuation, and capitalization + 2. Correct obvious transcription errors and typos + 3. Format the text to be more readable + 4. Preserve all meaning and information from the original + 5. Make the text flow naturally as written text + 6. DO NOT add any new information that wasn't in the original + 7. DO NOT remove any information from the original text + + Focus only on improving readability while preserving the exact meaning. + """ + + /// Default enhancement options for transcribed text + static let `default` = EnhancementOptions( + prompt: defaultPrompt, + temperature: 0.3, + maxTokens: 1000 + ) + + /// Custom initialization with sensible defaults + init(prompt: String = defaultPrompt, temperature: Double = 0.3, maxTokens: Int = 1000) { + self.prompt = prompt + self.temperature = temperature + self.maxTokens = maxTokens + } +} + +/// Dependency Key for AIEnhancementClient +extension AIEnhancementClient: DependencyKey { + static var liveValue: Self { + let live = AIEnhancementClientLive() + return Self( + enhance: { try await live.enhance(text: $0, model: $1, options: $2, progressCallback: $3) }, + isOllamaAvailable: { await live.isOllamaAvailable() }, + getAvailableModels: { try await live.getAvailableModels() } + ) + } +} + +extension DependencyValues { + var aiEnhancement: AIEnhancementClient { + get { self[AIEnhancementClient.self] } + set { self[AIEnhancementClient.self] = newValue } + } +} + +/// Live implementation of AIEnhancementClient +class AIEnhancementClientLive { + // MARK: - Public Methods + + /// Enhances text using a local AI model + func enhance(text: String, model: String, options: EnhancementOptions, progressCallback: @escaping (Progress) -> Void) async throws -> String { + // Skip if the text is empty or too short + guard !text.isEmpty, text.count > 5 else { + return text + } + + let progress = Progress(totalUnitCount: 100) + progressCallback(progress) + + // For now, we support Ollama only + do { + let enhancedText = try await enhanceWithOllama(text: text, model: model, options: options) { fraction in + progress.completedUnitCount = Int64(fraction * 100) + progressCallback(progress) + } + + progress.completedUnitCount = 100 + progressCallback(progress) + + return enhancedText + } catch { + print("[AIEnhancementClientLive] Error enhancing text: \(error.localizedDescription)") + throw error + } + } + + /// Checks if Ollama is available on the system + func isOllamaAvailable() async -> Bool { + // Simple check - try to connect to Ollama's API endpoint + do { + var request = URLRequest(url: URL(string: "http://localhost:11434/api/version")!) + request.timeoutInterval = 3.0 // Short timeout for quick feedback + + let (_, response) = try await URLSession.shared.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + return httpResponse.statusCode == 200 + } + return false + } catch { + print("[AIEnhancementClientLive] Ollama not available: \(error.localizedDescription)") + return false + } + } + + /// Gets a list of available models from Ollama + func getAvailableModels() async throws -> [String] { + // Our direct API implementation: + struct ModelResponse: Decodable { + struct Model: Decodable { + let name: String + let modifiedAt: String? + let size: Int64? + + enum CodingKeys: String, CodingKey { + case name + case modifiedAt = "modified_at" + case size + } + } + let models: [Model] + } + + var request = URLRequest(url: URL(string: "http://localhost:11434/api/tags")!) + request.timeoutInterval = 5.0 + + do { + let (data, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse else { + throw NSError(domain: "AIEnhancementClient", code: -1, + userInfo: [NSLocalizedDescriptionKey: "Invalid response from Ollama"]) + } + + if httpResponse.statusCode != 200 { + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama returned status code \(httpResponse.statusCode)"]) + } + + do { + let modelResponse = try JSONDecoder().decode(ModelResponse.self, from: data) + // Sort models alphabetically for better display + return modelResponse.models.map { $0.name }.sorted() + } catch let decodingError { + print("[AIEnhancementClientLive] Failed to decode model list: \(decodingError)") + throw NSError(domain: "AIEnhancementClient", code: -2, + userInfo: [NSLocalizedDescriptionKey: "Failed to parse model list from Ollama. \(decodingError.localizedDescription)"]) + } + } catch { + print("[AIEnhancementClientLive] Error getting models: \(error.localizedDescription)") + throw NSError(domain: "AIEnhancementClient", code: -3, + userInfo: [NSLocalizedDescriptionKey: "Failed to connect to Ollama. Ensure it's running."]) + } + } + + // MARK: - Private Helpers + + /// Enhances text using Ollama's API + private func enhanceWithOllama(text: String, model: String, options: EnhancementOptions, progressCallback: @escaping (Double) -> Void) async throws -> String { + // Initial progress update + progressCallback(0.1) + + // Validate inputs + guard !model.isEmpty else { + throw NSError(domain: "AIEnhancementClient", code: -4, + userInfo: [NSLocalizedDescriptionKey: "No model selected for enhancement"]) + } + + let url = URL(string: "http://localhost:11434/api/generate")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.timeoutInterval = 30.0 // Allow longer timeout for generation + + // Create a well-formatted prompt with clear instructions + let fullPrompt = """ + \(options.prompt) + + TEXT TO IMPROVE: + \(text) + + IMPROVED TEXT: + """ + + // Build request parameters with appropriate defaults + let requestDict: [String: Any] = [ + "model": model, + "prompt": fullPrompt, + "temperature": max(0.1, min(1.0, options.temperature)), // Ensure valid range + "max_tokens": max(100, min(2000, options.maxTokens)), // Reasonable limits + "stream": false, + "system": "You are an AI that improves transcribed text while preserving meaning." + ] + + do { + // Progress update - request prepared + progressCallback(0.2) + + // Convert to JSON and send + let requestData = try JSONSerialization.data(withJSONObject: requestDict) + request.httpBody = requestData + + // Make the request + let (responseData, urlResponse) = try await URLSession.shared.data(for: request) + + // Progress update - response received + progressCallback(0.8) + + // Validate response + guard let httpResponse = urlResponse as? HTTPURLResponse else { + throw NSError(domain: "AIEnhancementClient", code: -1, + userInfo: [NSLocalizedDescriptionKey: "Invalid response from Ollama"]) + } + + if httpResponse.statusCode != 200 { + // Try to extract error message if available + if let errorDict = try? JSONSerialization.jsonObject(with: responseData) as? [String: Any], + let errorMessage = errorDict["error"] as? String { + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama error: \(errorMessage)"]) + } else { + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama returned status code \(httpResponse.statusCode)"]) + } + } + + // Parse response + if let responseDict = try JSONSerialization.jsonObject(with: responseData) as? [String: Any], + let enhancedText = responseDict["response"] as? String { + + // Progress update - processing complete + progressCallback(1.0) + + // Clean up the response - trim whitespace and ensure it's not empty + let cleanedText = enhancedText.trimmingCharacters(in: .whitespacesAndNewlines) + return cleanedText.isEmpty ? text : cleanedText + } else { + throw NSError(domain: "AIEnhancementClient", code: -2, + userInfo: [NSLocalizedDescriptionKey: "Failed to parse Ollama response"]) + } + } catch let error as NSError { + // Log the error and rethrow + print("[AIEnhancementClientLive] Error enhancing text: \(error.localizedDescription)") + throw error + } catch { + // Handle unexpected errors + print("[AIEnhancementClientLive] Unexpected error: \(error)") + throw NSError(domain: "AIEnhancementClient", code: -3, + userInfo: [NSLocalizedDescriptionKey: "Error communicating with Ollama: \(error.localizedDescription)"]) + } + } +} \ No newline at end of file diff --git a/Hex/Clients/RecordingClient.swift b/Hex/Clients/RecordingClient.swift index 1db8fbb..e4303e5 100644 --- a/Hex/Clients/RecordingClient.swift +++ b/Hex/Clients/RecordingClient.swift @@ -381,7 +381,7 @@ actor RecordingClientLive { var deviceName: CFString? = nil var size = UInt32(MemoryLayout.size) - var deviceNamePtr: UnsafeMutableRawPointer = .allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) + let deviceNamePtr: UnsafeMutableRawPointer = .allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) defer { deviceNamePtr.deallocate() } let status = AudioObjectGetPropertyData( diff --git a/Hex/Features/App/AppFeature.swift b/Hex/Features/App/AppFeature.swift index 9bf15ee..98b01ed 100644 --- a/Hex/Features/App/AppFeature.swift +++ b/Hex/Features/App/AppFeature.swift @@ -15,6 +15,7 @@ struct AppFeature { case settings case history case about + case aiEnhancement } @ObservableState @@ -80,6 +81,13 @@ struct AppView: View { }.buttonStyle(.plain) .tag(AppFeature.ActiveTab.settings) + Button { + store.send(.setActiveTab(.aiEnhancement)) + } label: { + Label("AI Enhancement", systemImage: "brain") + }.buttonStyle(.plain) + .tag(AppFeature.ActiveTab.aiEnhancement) + Button { store.send(.setActiveTab(.history)) } label: { @@ -99,6 +107,9 @@ struct AppView: View { case .settings: SettingsView(store: store.scope(state: \.settings, action: \.settings)) .navigationTitle("Settings") + case .aiEnhancement: + AIEnhancementView(store: store.scope(state: \.settings.aiEnhancement, action: \.settings.aiEnhancement)) + .navigationTitle("AI Enhancement") case .history: HistoryView(store: store.scope(state: \.history, action: \.history)) .navigationTitle("History") diff --git a/Hex/Features/Settings/AIEnhancementFeature.swift b/Hex/Features/Settings/AIEnhancementFeature.swift new file mode 100644 index 0000000..5582efd --- /dev/null +++ b/Hex/Features/Settings/AIEnhancementFeature.swift @@ -0,0 +1,107 @@ +// +// AIEnhancementFeature.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import ComposableArchitecture +import Foundation +import SwiftUI + +@Reducer +struct AIEnhancementFeature { + @ObservableState + struct State: Equatable { + @Shared(.hexSettings) var hexSettings: HexSettings + + var isOllamaAvailable: Bool = false + var availableModels: [String] = [] + var isLoadingModels: Bool = false + var errorMessage: String? = nil + + // Computed property for convenient access to the default model + var defaultAIModel: String { + "llama3:8b" + } + } + + enum Action { + case task + case checkOllamaAvailability + case ollamaAvailabilityResult(Bool) + case loadAvailableModels + case modelsLoaded([String]) + case modelLoadingError(String) + case setSelectedModel(String) + case resetToDefaultPrompt + } + + @Dependency(\.aiEnhancement) var aiEnhancement + + var body: some ReducerOf { + Reduce { state, action in + switch action { + case .task: + return .send(.checkOllamaAvailability) + + case .checkOllamaAvailability: + return .run { send in + let isAvailable = await aiEnhancement.isOllamaAvailable() + await send(.ollamaAvailabilityResult(isAvailable)) + } + + case let .ollamaAvailabilityResult(isAvailable): + state.isOllamaAvailable = isAvailable + + // If Ollama is available, load models + if isAvailable { + return .send(.loadAvailableModels) + } + return .none + + case .loadAvailableModels: + state.isLoadingModels = true + state.errorMessage = nil + + return .run { send in + do { + let models = try await aiEnhancement.getAvailableModels() + await send(.modelsLoaded(models)) + } catch { + await send(.modelLoadingError(error.localizedDescription)) + } + } + + case let .modelsLoaded(models): + state.isLoadingModels = false + state.availableModels = models + + // If the selected model is not in the list and we have models, select the first one + if !models.isEmpty && !models.contains(state.hexSettings.selectedAIModel) { + // Check if the default model is available + if models.contains(state.defaultAIModel) { + state.$hexSettings.withLock { $0.selectedAIModel = state.defaultAIModel } + } else { + state.$hexSettings.withLock { $0.selectedAIModel = models[0] } + } + } + + return .none + + case let .modelLoadingError(message): + state.isLoadingModels = false + state.errorMessage = message + return .none + + case let .setSelectedModel(model): + state.$hexSettings.withLock { $0.selectedAIModel = model } + return .none + + case .resetToDefaultPrompt: + state.$hexSettings.withLock { $0.aiEnhancementPrompt = EnhancementOptions.defaultPrompt } + return .none + } + } + } +} \ No newline at end of file diff --git a/Hex/Features/Settings/AIEnhancementView.swift b/Hex/Features/Settings/AIEnhancementView.swift new file mode 100644 index 0000000..df68dcb --- /dev/null +++ b/Hex/Features/Settings/AIEnhancementView.swift @@ -0,0 +1,413 @@ +// +// AIEnhancementView.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import ComposableArchitecture +import SwiftUI + +struct AIEnhancementView: View { + @Bindable var store: StoreOf + @State private var showExpandedPrompt = false + @State private var isHoveringModelSelect = false + + var body: some View { + Form { + // Activation Section (Always show this first) + Section { + activationToggle + } header: { + Text("AI Enhancement") + } footer: { + Text("Enable AI to improve transcription quality by fixing grammar, formatting, and errors.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + + // Only show other settings if AI enhancement is enabled + if store.hexSettings.useAIEnhancement { + // Connection Status Section (only if AI enhancement is enabled) + if !store.isOllamaAvailable { + Section { + connectionStatusView + } header: { + Text("Ollama Status") + } + } + + // Model Selection Section + modelSelectionSection + + // Temperature Control Section + temperatureSection + + // Prompt Configuration Section + promptSection + } + } + .formStyle(.grouped) + .task { + await store.send(.task).finish() + } + } + + // MARK: - Component Views + + // Connection Status View + private var connectionStatusView: some View { + VStack(alignment: .leading, spacing: 12) { + HStack(alignment: .top) { + Image(systemName: "exclamationmark.triangle.fill") + .font(.title2) + .foregroundColor(.orange) + + VStack(alignment: .leading, spacing: 8) { + Text("Ollama Not Connected") + .font(.headline) + .foregroundColor(.primary) + + Text("AI enhancement requires Ollama to be installed and running locally.") + .font(.subheadline) + .foregroundColor(.secondary) + } + } + + Divider() + + VStack(alignment: .leading, spacing: 12) { + Text("To set up Ollama:") + .font(.subheadline) + .foregroundColor(.secondary) + + VStack(alignment: .leading, spacing: 8) { + bulletPoint(text: "Download and install Ollama from [ollama.com](https://ollama.com)") + bulletPoint(text: "Launch the Ollama application") + bulletPoint(text: "Pull a language model (llama3 recommended)") + } + .padding(.leading, 8) + } + + HStack { + Spacer() + + Button { + NSWorkspace.shared.open(URL(string: "https://ollama.com")!) + } label: { + Label("Download Ollama", systemImage: "arrow.down.circle") + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(Color.blue) + + Button { + store.send(.checkOllamaAvailability) + } label: { + Label("Check Connection", systemImage: "arrow.clockwise") + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(Color.blue) + } + .padding(.top, 4) + } + .padding() + .background(RoundedRectangle(cornerRadius: 8) + .fill(Color.orange.opacity(0.1)) + .overlay(RoundedRectangle(cornerRadius: 8) + .stroke(Color.orange.opacity(0.3), lineWidth: 1)) + ) + } + + // Activation Toggle + private var activationToggle: some View { + VStack(spacing: 8) { + // Main toggle row + Toggle(isOn: Binding( + get: { store.hexSettings.useAIEnhancement }, + set: { newValue in + store.$hexSettings.withLock { $0.useAIEnhancement = newValue } + + // When enabling, check Ollama status + if newValue { + Task { + await store.send(.checkOllamaAvailability).finish() + } + } + } + )) { + Text("Use AI Enhancement") + .font(.body) + } + + // Connection status indicator (only show if AI enhancement is enabled and Ollama is available) + if store.hexSettings.useAIEnhancement && store.isOllamaAvailable { + HStack(spacing: 4) { + Circle() + .fill(Color.green) + .frame(width: 6, height: 6) + Text("Ollama Connected") + .font(.caption) + .foregroundColor(.secondary) + Spacer() + } + .padding(.leading, 2) + } + } + } + + // Model Selection Section + private var modelSelectionSection: some View { + Section { + VStack(alignment: .leading, spacing: 12) { + // Model selection header + HStack { + Label { + Text("Language Model") + .font(.body) + } icon: { + Image(systemName: "brain") + } + + Spacer() + + // Refresh button for models + Button { + store.send(.loadAvailableModels) + } label: { + Image(systemName: "arrow.clockwise") + .font(.body) + } + .buttonStyle(DefaultButtonStyle()) + .disabled(store.isLoadingModels) + .opacity(store.isLoadingModels ? 0.5 : 0.7) + } + + if store.isLoadingModels { + // Loading indicator + HStack { + ProgressView() + .scaleEffect(0.7) + Text("Loading available models...") + .font(.subheadline) + .foregroundColor(.secondary) + Spacer() + } + .padding(.vertical, 4) + } else if !store.isOllamaAvailable { + // Ollama not available message + Text("Ollama connection required to view models") + .font(.subheadline) + .foregroundColor(.secondary) + .padding(.vertical, 4) + } else if let error = store.errorMessage { + // Error message + HStack { + Image(systemName: "exclamationmark.triangle") + .foregroundColor(.red) + Text("Error: \(error)") + .font(.caption) + .foregroundColor(.red) + .lineLimit(2) + } + .padding(.vertical, 4) + } else if store.availableModels.isEmpty { + // No models available + HStack(alignment: .center) { + Text("No models found in Ollama") + .font(.subheadline) + .foregroundColor(.secondary) + + Spacer() + + Link("Browse Models", destination: URL(string: "https://ollama.com/library")!) + .font(.subheadline) + .foregroundColor(.blue) + } + .padding(.vertical, 8) + } else { + // Model picker + VStack(alignment: .leading, spacing: 4) { + Text("Select AI model:") + .font(.subheadline) + .foregroundColor(.secondary) + + Picker("", selection: Binding( + get: { store.hexSettings.selectedAIModel }, + set: { store.send(.setSelectedModel($0)) } + )) { + ForEach(store.availableModels, id: \.self) { model in + Text(model).tag(model) + } + } + .pickerStyle(.menu) + .labelsHidden() + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.vertical, 2) + } + } + } + } header: { + Text("Model Selection") + } footer: { + if !store.availableModels.isEmpty { + Text("Smaller models are faster but less capable. Llama3 offers a good balance of speed and quality.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + } + + // Temperature Section + private var temperatureSection: some View { + Section { + // Label with temperature value + HStack { + Text("Response Style") + .font(.subheadline) + + Spacer() + + Text("\(store.hexSettings.aiEnhancementTemperature, specifier: "%.2f")") + .monospacedDigit() + .font(.caption) + .foregroundColor(.secondary) + .frame(width: 40) + } + .padding(.bottom, 4) + + // Slider for temperature control - extend even further + ZStack { + Slider( + value: Binding( + get: { store.hexSettings.aiEnhancementTemperature }, + set: { newValue in + store.$hexSettings.withLock { $0.aiEnhancementTemperature = newValue } + } + ), + in: 0...1, + step: 0.05 + ) + .padding(.horizontal, -40) + .frame(maxWidth: .infinity) + } + .padding(.horizontal, 40) + + // Temperature descriptions + HStack { + Text("Precision") + .font(.caption) + .foregroundColor(.secondary) + + Spacer() + + Text("Creativity") + .font(.caption) + .foregroundColor(.secondary) + } + .padding(.horizontal, 0) + .padding(.top, 4) + } header: { + Text("Response Style") + } footer: { + Text("Lower values produce more consistent, conservative improvements. Higher values allow more creative rewording.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + + // Prompt Configuration Section + private var promptSection: some View { + Section { + VStack(spacing: 0) { + // Header with edit button + HStack { + Label { + Text("Instructions") + .font(.subheadline) + } icon: { + Image(systemName: "doc.text") + } + + Spacer() + + Button(showExpandedPrompt ? "Done" : "Edit") { + withAnimation(.spring(duration: 0.3)) { + showExpandedPrompt.toggle() + } + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(showExpandedPrompt ? Color.primary : Color.accentColor) + .font(.caption) + } + .padding(.bottom, 8) + + if showExpandedPrompt { + // Expanded editor view + VStack(spacing: 8) { + // Editor + TextEditor(text: Binding( + get: { store.hexSettings.aiEnhancementPrompt }, + set: { newValue in + store.$hexSettings.withLock { $0.aiEnhancementPrompt = newValue } + } + )) + .font(.system(.body, design: .monospaced)) + .frame(minHeight: 150) + .padding(4) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.secondary.opacity(0.2), lineWidth: 1) + ) + + // Reset button + Button("Reset to Default") { + store.send(.resetToDefaultPrompt) + } + .buttonStyle(DefaultButtonStyle()) + .font(.caption) + .foregroundColor(.secondary) + .padding(4) + .frame(maxWidth: .infinity, alignment: .trailing) + } + } else { + // Collapsed preview + Text(store.hexSettings.aiEnhancementPrompt) + .font(.caption) + .foregroundColor(.secondary) + .lineLimit(4) + .truncationMode(.tail) + .frame(maxWidth: .infinity, alignment: .leading) + .padding(10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.secondary.opacity(0.05)) + ) + } + } + } header: { + Text("Enhancement Prompt") + } footer: { + if !showExpandedPrompt { + Text("These instructions tell the AI how to improve your transcribed text.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } else { + Text("Make changes to customize how the AI enhances your transcriptions. Be specific about what should be preserved or changed.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + } + + // Helper for bullet points + private func bulletPoint(text: String) -> some View { + HStack(alignment: .firstTextBaseline, spacing: 8) { + Text("•") + .font(.subheadline) + .foregroundColor(.secondary) + Text(LocalizedStringKey(text)) + .font(.subheadline) + .foregroundColor(.secondary) + } + } +} diff --git a/Hex/Features/Settings/SettingsFeature.swift b/Hex/Features/Settings/SettingsFeature.swift index 6d68620..c73c451 100644 --- a/Hex/Features/Settings/SettingsFeature.swift +++ b/Hex/Features/Settings/SettingsFeature.swift @@ -35,6 +35,9 @@ struct SettingsFeature { // Model Management var modelDownload = ModelDownloadFeature.State() + + // AI Enhancement + var aiEnhancement = AIEnhancementFeature.State() } enum Action: BindableAction { @@ -60,6 +63,9 @@ struct SettingsFeature { // Model Management case modelDownload(ModelDownloadFeature.Action) + + // AI Enhancement + case aiEnhancement(AIEnhancementFeature.Action) } @Dependency(\.keyEventMonitor) var keyEventMonitor @@ -73,6 +79,10 @@ struct SettingsFeature { Scope(state: \.modelDownload, action: \.modelDownload) { ModelDownloadFeature() } + + Scope(state: \.aiEnhancement, action: \.aiEnhancement) { + AIEnhancementFeature() + } Reduce { state, action in switch action { @@ -104,7 +114,10 @@ struct SettingsFeature { let deviceRefreshTask = Task { @MainActor in for await _ in clock.timer(interval: .seconds(120)) { // Only refresh when the app is active to save resources - if await NSApplication.shared.isActive { + let isActive = NSApplication.shared.isActive + + if isActive { + try? await Task.sleep(for: .nanoseconds(1)) await send(.loadAvailableInputDevices) } } @@ -279,6 +292,10 @@ struct SettingsFeature { case .modelDownload: return .none + + // AI Enhancement + case .aiEnhancement: + return .none // Microphone device selection case .loadAvailableInputDevices: diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index 7c0b605..8e2a699 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -45,12 +45,17 @@ struct TranscriptionFeature { // Transcription result flow case transcriptionResult(String) case transcriptionError(Error) + + // AI Enhancement flow + case aiEnhancementResult(String) + case aiEnhancementError(Error) } enum CancelID { case delayedRecord case metering case transcription + case aiEnhancement } @Dependency(\.transcription) var transcription @@ -58,6 +63,7 @@ struct TranscriptionFeature { @Dependency(\.pasteboard) var pasteboard @Dependency(\.keyEventMonitor) var keyEventMonitor @Dependency(\.soundEffects) var soundEffect + @Dependency(\.aiEnhancement) var aiEnhancement var body: some ReducerOf { Reduce { state, action in @@ -106,6 +112,17 @@ struct TranscriptionFeature { case let .transcriptionError(error): return handleTranscriptionError(&state, error: error) + + // MARK: - AI Enhancement Results + + case let .aiEnhancementResult(result): + return handleAIEnhancement(&state, result: result) + + case let .aiEnhancementError(error): + // On AI enhancement error, we'll just use the original transcription + // so we don't need specific error handling here + print("AI Enhancement error: \(error)") + return .none // MARK: - Cancel Entire Flow @@ -303,6 +320,64 @@ private extension TranscriptionFeature { func handleTranscriptionResult( _ state: inout State, result: String + ) -> Effect { + // First check if we should use AI enhancement + if state.hexSettings.useAIEnhancement { + // Keep state.isTranscribing = true since we're still processing + return enhanceWithAI(result: result, state: state) + } else { + state.isTranscribing = false + state.isPrewarming = false + + // If empty text, nothing else to do + guard !result.isEmpty else { + return .none + } + + // Compute how long we recorded + let duration = state.recordingStartTime.map { Date().timeIntervalSince($0) } ?? 0 + + // Continue with storing the final result in the background + return finalizeRecordingAndStoreTranscript( + result: result, + duration: duration, + transcriptionHistory: state.$transcriptionHistory + ) + } + } + + // MARK: - AI Enhancement Handlers + + // Use AI to enhance the transcription result + private func enhanceWithAI(result: String, state: State) -> Effect { + // If empty text, nothing else to do + guard !result.isEmpty else { + return .send(.aiEnhancementResult(result)) // Just pass through empty text + } + + let model = state.hexSettings.selectedAIModel + let options = EnhancementOptions( + prompt: state.hexSettings.aiEnhancementPrompt, + temperature: state.hexSettings.aiEnhancementTemperature + ) + + return .run { send in + do { + let enhancedText = try await aiEnhancement.enhance(result, model, options) { _ in } + await send(.aiEnhancementResult(enhancedText)) + } catch { + print("Error enhancing text with AI: \(error)") + // On error, fall back to the original transcription + await send(.aiEnhancementResult(result)) + } + } + .cancellable(id: CancelID.aiEnhancement) + } + + // Handle the AI enhancement result + private func handleAIEnhancement( + _ state: inout State, + result: String ) -> Effect { state.isTranscribing = false state.isPrewarming = false @@ -399,6 +474,7 @@ private extension TranscriptionFeature { return .merge( .cancel(id: CancelID.transcription), .cancel(id: CancelID.delayedRecord), + .cancel(id: CancelID.aiEnhancement), .run { _ in await soundEffect.play(.cancel) } @@ -440,7 +516,9 @@ struct TranscriptionView: View { @Bindable var store: StoreOf var status: TranscriptionIndicatorView.Status { - if store.isTranscribing { + if store.isTranscribing && store.hexSettings.useAIEnhancement { + return .enhancing + } else if store.isTranscribing { return .transcribing } else if store.isRecording { return .recording diff --git a/Hex/Features/Transcription/TranscriptionIndicatorView.swift b/Hex/Features/Transcription/TranscriptionIndicatorView.swift index 0f4dd25..db17968 100644 --- a/Hex/Features/Transcription/TranscriptionIndicatorView.swift +++ b/Hex/Features/Transcription/TranscriptionIndicatorView.swift @@ -14,12 +14,14 @@ struct TranscriptionIndicatorView: View { case recording case transcribing case prewarming + case enhancing } var status: Status var meter: Meter let transcribeBaseColor: Color = .blue + let enhanceBaseColor: Color = .green private var backgroundColor: Color { switch status { @@ -28,6 +30,7 @@ struct TranscriptionIndicatorView: View { case .recording: return .red.mix(with: .black, by: 0.5).mix(with: .red, by: meter.averagePower * 3) case .transcribing: return transcribeBaseColor.mix(with: .black, by: 0.5) case .prewarming: return transcribeBaseColor.mix(with: .black, by: 0.5) + case .enhancing: return enhanceBaseColor.mix(with: .black, by: 0.5) } } @@ -38,6 +41,7 @@ struct TranscriptionIndicatorView: View { case .recording: return Color.red.mix(with: .white, by: 0.1).opacity(0.6) case .transcribing: return transcribeBaseColor.mix(with: .white, by: 0.1).opacity(0.6) case .prewarming: return transcribeBaseColor.mix(with: .white, by: 0.1).opacity(0.6) + case .enhancing: return enhanceBaseColor.mix(with: .white, by: 0.1).opacity(0.6) } } @@ -48,6 +52,7 @@ struct TranscriptionIndicatorView: View { case .recording: return Color.red case .transcribing: return transcribeBaseColor case .prewarming: return transcribeBaseColor + case .enhancing: return enhanceBaseColor } } @@ -125,10 +130,10 @@ struct TranscriptionIndicatorView: View { } } - // Show tooltip when prewarming - if status == .prewarming { + // Show tooltip for prewarming/enhancing + if status == .prewarming || status == .enhancing { VStack(spacing: 4) { - Text("Model prewarming...") + Text(status == .prewarming ? "Model prewarming..." : "AI enhancing text...") .font(.system(size: 12, weight: .medium)) .foregroundColor(.white) .padding(.horizontal, 8) @@ -153,6 +158,7 @@ struct TranscriptionIndicatorView: View { TranscriptionIndicatorView(status: .recording, meter: .init(averagePower: 0.5, peakPower: 0.5)) TranscriptionIndicatorView(status: .transcribing, meter: .init(averagePower: 0, peakPower: 0)) TranscriptionIndicatorView(status: .prewarming, meter: .init(averagePower: 0, peakPower: 0)) + TranscriptionIndicatorView(status: .enhancing, meter: .init(averagePower: 0, peakPower: 0)) } .padding(40) } diff --git a/Hex/Models/HexSettings.swift b/Hex/Models/HexSettings.swift index ff3804e..a941889 100644 --- a/Hex/Models/HexSettings.swift +++ b/Hex/Models/HexSettings.swift @@ -17,6 +17,11 @@ struct HexSettings: Codable, Equatable { var useDoubleTapOnly: Bool = false var outputLanguage: String? = nil var selectedMicrophoneID: String? = nil + // AI Enhancement options + var useAIEnhancement: Bool = false + var selectedAIModel: String = "llama3:8b" + var aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt + var aiEnhancementTemperature: Double = 0.3 // Define coding keys to match struct properties enum CodingKeys: String, CodingKey { @@ -33,6 +38,10 @@ struct HexSettings: Codable, Equatable { case useDoubleTapOnly case outputLanguage case selectedMicrophoneID + case useAIEnhancement + case selectedAIModel + case aiEnhancementPrompt + case aiEnhancementTemperature } init( @@ -48,7 +57,11 @@ struct HexSettings: Codable, Equatable { copyToClipboard: Bool = true, useDoubleTapOnly: Bool = false, on outputLanguage: String? = nil, - selectedMicrophoneID: String? = nil + selectedMicrophoneID: String? = nil, + useAIEnhancement: Bool = false, + selectedAIModel: String = "llama3:8b", + aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt, + aiEnhancementTemperature: Double = 0.3 ) { self.soundEffectsEnabled = soundEffectsEnabled self.hotkey = hotkey @@ -63,6 +76,10 @@ struct HexSettings: Codable, Equatable { self.useDoubleTapOnly = useDoubleTapOnly self.outputLanguage = outputLanguage self.selectedMicrophoneID = selectedMicrophoneID + self.useAIEnhancement = useAIEnhancement + self.selectedAIModel = selectedAIModel + self.aiEnhancementPrompt = aiEnhancementPrompt + self.aiEnhancementTemperature = aiEnhancementTemperature } // Custom decoder that handles missing fields @@ -92,7 +109,12 @@ struct HexSettings: Codable, Equatable { useDoubleTapOnly = try container.decodeIfPresent(Bool.self, forKey: .useDoubleTapOnly) ?? false outputLanguage = try container.decodeIfPresent(String.self, forKey: .outputLanguage) - selectedMicrophoneID = try container.decodeIfPresent(String.self, forKey: .selectedMicrophoneID) + selectedMicrophoneID = try container.decodeIfPresent(String.self, forKey: .selectedMicrophoneID) + // AI Enhancement settings + useAIEnhancement = try container.decodeIfPresent(Bool.self, forKey: .useAIEnhancement) ?? false + selectedAIModel = try container.decodeIfPresent(String.self, forKey: .selectedAIModel) ?? "llama3:8b" + aiEnhancementPrompt = try container.decodeIfPresent(String.self, forKey: .aiEnhancementPrompt) ?? EnhancementOptions.defaultPrompt + aiEnhancementTemperature = try container.decodeIfPresent(Double.self, forKey: .aiEnhancementTemperature) ?? 0.3 } } diff --git a/Localizable.xcstrings b/Localizable.xcstrings index a78507e..395d509 100644 --- a/Localizable.xcstrings +++ b/Localizable.xcstrings @@ -3,6 +3,9 @@ "strings" : { "" : { "shouldTranslate" : false + }, + "%.2f" : { + }, "•" : { "shouldTranslate" : false @@ -31,6 +34,15 @@ }, "Accuracy" : { + }, + "AI Enhancement" : { + + }, + "AI enhancement requires Ollama to be installed and running locally." : { + + }, + "AI enhancing text..." : { + }, "Are you sure you want to delete all transcripts? This action cannot be undone." : { "comment" : "Delete transcript history confirm", @@ -45,6 +57,9 @@ }, "Become a Sponsor" : { + }, + "Browse Models" : { + }, "Cancel" : { "comment" : "Cancel deleting All Transcripts", @@ -76,6 +91,9 @@ } } } + }, + "Check Connection" : { + }, "Check for Updates" : { "comment" : "Check for updates button in About section of Settings", @@ -133,6 +151,9 @@ }, "Copy transcription text to clipboard in addition to pasting it" : { + }, + "Creativity" : { + }, "Delete" : { @@ -181,6 +202,9 @@ } } } + }, + "Done" : { + }, "Download" : { @@ -195,6 +219,9 @@ } } } + }, + "Download Ollama" : { + }, "Download Selected Model" : { "comment" : "In Transcription Model section in settings.", @@ -222,6 +249,15 @@ }, "Downloading model..." : { + }, + "Edit" : { + + }, + "Enable AI to improve transcription quality by fixing grammar, formatting, and errors." : { + + }, + "Enhancement Prompt" : { + }, "Ensure Hex can access your microphone and system accessibility features." : { "comment" : "Footer for permissions section in settings", @@ -244,6 +280,9 @@ } } } + }, + "Error: %@" : { + }, "General" : { "comment" : "General section in Settings Header.", @@ -312,6 +351,21 @@ }, "Input Device" : { + }, + "Instructions" : { + + }, + "Language Model" : { + + }, + "Loading available models..." : { + + }, + "Lower values produce more consistent, conservative improvements. Higher values allow more creative rewording." : { + + }, + "Make changes to customize how the AI enhances your transcriptions. Be specific about what should be preserved or changed." : { + }, "Microphone" : { "comment" : "Microphone permission.", @@ -332,6 +386,12 @@ }, "Model prewarming..." : { + }, + "Model Selection" : { + + }, + "No models found in Ollama" : { + }, "No models found." : { "comment" : "Replacement text in transcription model section when no available models are found", @@ -355,6 +415,18 @@ } } } + }, + "Ollama Connected" : { + + }, + "Ollama connection required to view models" : { + + }, + "Ollama Not Connected" : { + + }, + "Ollama Status" : { + }, "Open on Login" : { "comment" : "Label for general setting to open app on login.", @@ -413,6 +485,9 @@ } } } + }, + "Precision" : { + }, "Prevent System Sleep while Recording" : { "comment" : "Label for general setting whether to prevent system sleep while recording.", @@ -441,6 +516,15 @@ } } } + }, + "Reset to Default" : { + + }, + "Response Style" : { + + }, + "Select AI model:" : { + }, "Selected device not connected. System default will be used." : { @@ -519,6 +603,9 @@ }, "Size" : { + }, + "Smaller models are faster but less capable. Llama3 offers a good balance of speed and quality." : { + }, "Sound" : { "comment" : "sound section in general settings.", @@ -561,6 +648,12 @@ }, "System Default" : { + }, + "These instructions tell the AI how to improve your transcribed text." : { + + }, + "To set up Ollama:" : { + }, "Transcription Model" : { "comment" : "Label for Transcription Model Section", @@ -572,6 +665,9 @@ } } } + }, + "Use AI Enhancement" : { + }, "Use clipboard to insert" : { "comment" : "Label for toggle in general section.", From 07475cf7365fd353990ed9bea3f61268266c436b Mon Sep 17 00:00:00 2001 From: plyght Date: Thu, 24 Apr 2025 14:19:10 -0400 Subject: [PATCH 02/10] second commit with major fixes and improvements --- Hex/Clients/AIEnhancementClient.swift | 55 ++++++++++++++-- Hex/Clients/PasteboardClient.swift | 64 +++++++++++++------ Hex/Clients/TranscriptionClient.swift | 20 +++++- .../Settings/AIEnhancementFeature.swift | 2 +- Hex/Features/Settings/SettingsView.swift | 7 ++ .../Transcription/TranscriptionFeature.swift | 45 +++++++++---- .../TranscriptionIndicatorView.swift | 26 ++++++-- Hex/Models/HexSettings.swift | 11 +++- Localizable.xcstrings | 9 ++- 9 files changed, 189 insertions(+), 50 deletions(-) diff --git a/Hex/Clients/AIEnhancementClient.swift b/Hex/Clients/AIEnhancementClient.swift index 36d5197..daa563e 100644 --- a/Hex/Clients/AIEnhancementClient.swift +++ b/Hex/Clients/AIEnhancementClient.swift @@ -96,14 +96,26 @@ class AIEnhancementClientLive { func enhance(text: String, model: String, options: EnhancementOptions, progressCallback: @escaping (Progress) -> Void) async throws -> String { // Skip if the text is empty or too short guard !text.isEmpty, text.count > 5 else { + print("[AIEnhancementClientLive] Text too short for enhancement, returning original") return text } let progress = Progress(totalUnitCount: 100) progressCallback(progress) + print("[AIEnhancementClientLive] Starting text enhancement with model: \(model)") + print("[AIEnhancementClientLive] Text to enhance (\(text.count) chars): \"\(text.prefix(50))...\"") + // For now, we support Ollama only do { + // First verify Ollama is available + let isAvailable = await isOllamaAvailable() + if !isAvailable { + print("[AIEnhancementClientLive] Ollama not available, cannot enhance text") + throw NSError(domain: "AIEnhancementClient", code: -5, + userInfo: [NSLocalizedDescriptionKey: "Ollama is not available. Please ensure it's running."]) + } + let enhancedText = try await enhanceWithOllama(text: text, model: model, options: options) { fraction in progress.completedUnitCount = Int64(fraction * 100) progressCallback(progress) @@ -112,6 +124,7 @@ class AIEnhancementClientLive { progress.completedUnitCount = 100 progressCallback(progress) + print("[AIEnhancementClientLive] Successfully enhanced text: \"\(enhancedText.prefix(50))...\"") return enhancedText } catch { print("[AIEnhancementClientLive] Error enhancing text: \(error.localizedDescription)") @@ -124,13 +137,20 @@ class AIEnhancementClientLive { // Simple check - try to connect to Ollama's API endpoint do { var request = URLRequest(url: URL(string: "http://localhost:11434/api/version")!) - request.timeoutInterval = 3.0 // Short timeout for quick feedback + request.timeoutInterval = 5.0 // Longer timeout for more reliability - let (_, response) = try await URLSession.shared.data(for: request) + print("[AIEnhancementClientLive] Checking Ollama availability...") + let (data, response) = try await URLSession.shared.data(for: request) if let httpResponse = response as? HTTPURLResponse { - return httpResponse.statusCode == 200 + let isAvailable = httpResponse.statusCode == 200 + print("[AIEnhancementClientLive] Ollama availability check: \(isAvailable ? "Available" : "Unavailable") (status: \(httpResponse.statusCode))") + if isAvailable, let dataString = String(data: data, encoding: .utf8) { + print("[AIEnhancementClientLive] Ollama version: \(dataString)") + } + return isAvailable } + print("[AIEnhancementClientLive] Ollama unavailable: Invalid response type") return false } catch { print("[AIEnhancementClientLive] Ollama not available: \(error.localizedDescription)") @@ -197,6 +217,7 @@ class AIEnhancementClientLive { // Validate inputs guard !model.isEmpty else { + print("[AIEnhancementClientLive] Error: No model selected for enhancement") throw NSError(domain: "AIEnhancementClient", code: -4, userInfo: [NSLocalizedDescriptionKey: "No model selected for enhancement"]) } @@ -205,7 +226,7 @@ class AIEnhancementClientLive { var request = URLRequest(url: url) request.httpMethod = "POST" request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.timeoutInterval = 30.0 // Allow longer timeout for generation + request.timeoutInterval = 60.0 // Allow longer timeout for generation // Create a well-formatted prompt with clear instructions let fullPrompt = """ @@ -218,15 +239,20 @@ class AIEnhancementClientLive { """ // Build request parameters with appropriate defaults + let temperature = max(0.1, min(1.0, options.temperature)) // Ensure valid range + let maxTokens = max(100, min(2000, options.maxTokens)) // Reasonable limits + let requestDict: [String: Any] = [ "model": model, "prompt": fullPrompt, - "temperature": max(0.1, min(1.0, options.temperature)), // Ensure valid range - "max_tokens": max(100, min(2000, options.maxTokens)), // Reasonable limits + "temperature": temperature, + "max_tokens": maxTokens, "stream": false, "system": "You are an AI that improves transcribed text while preserving meaning." ] + print("[AIEnhancementClientLive] Preparing request to Ollama with model: \(model), temp: \(temperature), max_tokens: \(maxTokens)") + do { // Progress update - request prepared progressCallback(0.2) @@ -235,30 +261,44 @@ class AIEnhancementClientLive { let requestData = try JSONSerialization.data(withJSONObject: requestDict) request.httpBody = requestData + print("[AIEnhancementClientLive] Sending request to Ollama API...") + // Make the request let (responseData, urlResponse) = try await URLSession.shared.data(for: request) // Progress update - response received progressCallback(0.8) + print("[AIEnhancementClientLive] Received response from Ollama API") + // Validate response guard let httpResponse = urlResponse as? HTTPURLResponse else { + print("[AIEnhancementClientLive] Error: Invalid response type from Ollama") throw NSError(domain: "AIEnhancementClient", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid response from Ollama"]) } + print("[AIEnhancementClientLive] Ollama response status: \(httpResponse.statusCode)") + if httpResponse.statusCode != 200 { // Try to extract error message if available if let errorDict = try? JSONSerialization.jsonObject(with: responseData) as? [String: Any], let errorMessage = errorDict["error"] as? String { + print("[AIEnhancementClientLive] Ollama API error: \(errorMessage)") throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, userInfo: [NSLocalizedDescriptionKey: "Ollama error: \(errorMessage)"]) } else { + print("[AIEnhancementClientLive] Ollama error with status code: \(httpResponse.statusCode)") throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, userInfo: [NSLocalizedDescriptionKey: "Ollama returned status code \(httpResponse.statusCode)"]) } } + // Try to log raw response for debugging + if let responseString = String(data: responseData, encoding: .utf8) { + print("[AIEnhancementClientLive] Raw response: \(responseString.prefix(100))...") + } + // Parse response if let responseDict = try JSONSerialization.jsonObject(with: responseData) as? [String: Any], let enhancedText = responseDict["response"] as? String { @@ -266,10 +306,13 @@ class AIEnhancementClientLive { // Progress update - processing complete progressCallback(1.0) + print("[AIEnhancementClientLive] Successfully parsed Ollama response") + // Clean up the response - trim whitespace and ensure it's not empty let cleanedText = enhancedText.trimmingCharacters(in: .whitespacesAndNewlines) return cleanedText.isEmpty ? text : cleanedText } else { + print("[AIEnhancementClientLive] Error: Failed to parse Ollama response") throw NSError(domain: "AIEnhancementClient", code: -2, userInfo: [NSLocalizedDescriptionKey: "Failed to parse Ollama response"]) } diff --git a/Hex/Clients/PasteboardClient.swift b/Hex/Clients/PasteboardClient.swift index 14f1f5d..4512b7f 100644 --- a/Hex/Clients/PasteboardClient.swift +++ b/Hex/Clients/PasteboardClient.swift @@ -61,14 +61,31 @@ struct PasteboardClientLive { func savePasteboardState(pasteboard: NSPasteboard) -> [[String: Any]] { var savedItems: [[String: Any]] = [] - for item in pasteboard.pasteboardItems ?? [] { + // Limit how many pasteboard items we save to avoid excessive memory use + let itemsToSave = pasteboard.pasteboardItems?.prefix(5) ?? [] + + for item in itemsToSave { var itemDict: [String: Any] = [:] - for type in item.types { + // Prioritize string content which is typically smaller + if item.types.contains(.string), let string = item.string(forType: .string) { + itemDict[NSPasteboard.PasteboardType.string.rawValue] = string.data(using: .utf8) + savedItems.append(itemDict) + continue + } + + // For non-string content, limit the types we save + let typesToSave = item.types.prefix(2) // Only save up to 2 types per item + for type in typesToSave { if let data = item.data(forType: type) { - itemDict[type.rawValue] = data + // Only save data up to 1MB to prevent large memory usage + if data.count <= 1024 * 1024 { + itemDict[type.rawValue] = data + } } } - savedItems.append(itemDict) + if !itemDict.isEmpty { + savedItems.append(itemDict) + } } return savedItems @@ -135,18 +152,25 @@ struct PasteboardClientLive { func pasteWithClipboard(_ text: String) async { let pasteboard = NSPasteboard.general - let originalItems = savePasteboardState(pasteboard: pasteboard) + + // Only save pasteboard state if we need to restore it later + let originalItems = hexSettings.copyToClipboard ? [] : savePasteboardState(pasteboard: pasteboard) + pasteboard.clearContents() pasteboard.setString(text, forType: .string) let source = CGEventSource(stateID: .combinedSessionState) - // Track if paste operation successful + // First try the AppleScript approach - it's more reliable in most apps var pasteSucceeded = PasteboardClientLive.pasteToFrontmostApp() // If menu-based paste failed, try simulated keypresses if !pasteSucceeded { print("Failed to paste to frontmost app, falling back to simulated keypresses") + + // Add a small delay to allow system to process + try? await Task.sleep(for: .milliseconds(100)) + let vKeyCode = Sauce.shared.keyCode(for: .v) let cmdKeyCode: CGKeyCode = 55 // Command key @@ -164,33 +188,37 @@ struct PasteboardClientLive { // Create cmd up event let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: false) - // Post the events + // Post the events with small delays between them cmdDown?.post(tap: .cghidEventTap) + try? await Task.sleep(for: .milliseconds(10)) vDown?.post(tap: .cghidEventTap) + try? await Task.sleep(for: .milliseconds(10)) vUp?.post(tap: .cghidEventTap) + try? await Task.sleep(for: .milliseconds(10)) cmdUp?.post(tap: .cghidEventTap) - // Assume keypress-based paste succeeded - but text will remain in clipboard as fallback + // Assume keypress-based paste succeeded - text will remain in clipboard as fallback pasteSucceeded = true } // Only restore original pasteboard contents if: // 1. Copying to clipboard is disabled AND - // 2. The paste operation succeeded - if !hexSettings.copyToClipboard && pasteSucceeded { - try? await Task.sleep(for: .seconds(0.1)) - pasteboard.clearContents() - restorePasteboardState(pasteboard: pasteboard, savedItems: originalItems) + // 2. The paste operation succeeded AND + // 3. We have original items to restore + if !hexSettings.copyToClipboard && pasteSucceeded && !originalItems.isEmpty { + try? await Task.sleep(for: .milliseconds(200)) // Give paste operation time to complete + + // Use autoreleasepool to help manage memory during pasteboard operations + autoreleasepool { + pasteboard.clearContents() + restorePasteboardState(pasteboard: pasteboard, savedItems: originalItems) + } } // If we failed to paste AND user doesn't want clipboard retention, - // show a notification that text is available in clipboard + // log the issue but leave text in clipboard as fallback if !pasteSucceeded && !hexSettings.copyToClipboard { - // Keep the transcribed text in clipboard regardless of setting print("Paste operation failed. Text remains in clipboard as fallback.") - - // TODO: Could add a notification here to inform user - // that text is available in clipboard } } diff --git a/Hex/Clients/TranscriptionClient.swift b/Hex/Clients/TranscriptionClient.swift index 2f22d15..626a4d5 100644 --- a/Hex/Clients/TranscriptionClient.swift +++ b/Hex/Clients/TranscriptionClient.swift @@ -231,7 +231,25 @@ actor TranscriptionClientLive { let results = try await whisperKit.transcribe(audioPath: url.path, decodeOptions: options) // Concatenate results from all segments. - let text = results.map(\.text).joined(separator: " ") + var text = results.map(\.text).joined(separator: " ") + + // Get the hex settings to check if auto-capitalization should be disabled + let useAutoCapitalization: Bool + do { + let fileURL = URL.documentsDirectory.appending(component: "hex_settings.json") + let data = try Data(contentsOf: fileURL) + let settings = try JSONDecoder().decode(HexSettings.self, from: data) + useAutoCapitalization = !settings.disableAutoCapitalization + } catch { + // If settings can't be read, default to using auto-capitalization + useAutoCapitalization = true + } + + // Convert to lowercase if auto-capitalization is disabled + if !useAutoCapitalization { + text = text.lowercased() + } + return text } diff --git a/Hex/Features/Settings/AIEnhancementFeature.swift b/Hex/Features/Settings/AIEnhancementFeature.swift index 5582efd..507b7aa 100644 --- a/Hex/Features/Settings/AIEnhancementFeature.swift +++ b/Hex/Features/Settings/AIEnhancementFeature.swift @@ -22,7 +22,7 @@ struct AIEnhancementFeature { // Computed property for convenient access to the default model var defaultAIModel: String { - "llama3:8b" + "gemma3" } } diff --git a/Hex/Features/Settings/SettingsView.swift b/Hex/Features/Settings/SettingsView.swift index bd38e4f..30946f3 100644 --- a/Hex/Features/Settings/SettingsView.swift +++ b/Hex/Features/Settings/SettingsView.swift @@ -217,6 +217,13 @@ struct SettingsView: View { } icon: { Image(systemName: "doc.on.clipboard") } + + Label { + Toggle("Disable auto-capitalization", isOn: $store.hexSettings.disableAutoCapitalization) + Text("Disable automatic capitalization in transcriptions") + } icon: { + Image(systemName: "textformat.abc") + } Label { Toggle( diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index 8e2a699..7166407 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -19,6 +19,7 @@ struct TranscriptionFeature { var isRecording: Bool = false var isTranscribing: Bool = false var isPrewarming: Bool = false + var isEnhancing: Bool = false // Add this to track when AI enhancement is active var error: String? var recordingStartTime: Date? var meter: Meter = .init(averagePower: 0, peakPower: 0) @@ -47,6 +48,7 @@ struct TranscriptionFeature { case transcriptionError(Error) // AI Enhancement flow + case setEnhancingState(Bool) case aiEnhancementResult(String) case aiEnhancementError(Error) } @@ -115,6 +117,10 @@ struct TranscriptionFeature { // MARK: - AI Enhancement Results + case let .setEnhancingState(isEnhancing): + state.isEnhancing = isEnhancing + return .none + case let .aiEnhancementResult(result): return handleAIEnhancement(&state, result: result) @@ -361,17 +367,29 @@ private extension TranscriptionFeature { temperature: state.hexSettings.aiEnhancementTemperature ) - return .run { send in - do { - let enhancedText = try await aiEnhancement.enhance(result, model, options) { _ in } - await send(.aiEnhancementResult(enhancedText)) - } catch { - print("Error enhancing text with AI: \(error)") - // On error, fall back to the original transcription - await send(.aiEnhancementResult(result)) + print("[TranscriptionFeature] Starting AI enhancement with model: \(model)") + + // We need to use .send to set the enhancing state through the proper action + return .merge( + // First update the state to indicate enhancement is starting + .send(.setEnhancingState(true)), + + // Then run the enhancement + .run { send in + do { + print("[TranscriptionFeature] Calling aiEnhancement.enhance()") + let enhancedText = try await aiEnhancement.enhance(result, model, options) { _ in } + print("[TranscriptionFeature] AI enhancement succeeded") + await send(.aiEnhancementResult(enhancedText)) + } catch { + print("[TranscriptionFeature] Error enhancing text with AI: \(error)") + // On error, fall back to the original transcription + await send(.aiEnhancementResult(result)) + } } - } - .cancellable(id: CancelID.aiEnhancement) + ) + // Don't make this cancellable to avoid premature cancellation + // This may have been causing the issue with the enhancement being cancelled } // Handle the AI enhancement result @@ -381,6 +399,7 @@ private extension TranscriptionFeature { ) -> Effect { state.isTranscribing = false state.isPrewarming = false + state.isEnhancing = false // Reset the enhancing state // If empty text, nothing else to do guard !result.isEmpty else { @@ -470,11 +489,13 @@ private extension TranscriptionFeature { state.isTranscribing = false state.isRecording = false state.isPrewarming = false + state.isEnhancing = false return .merge( .cancel(id: CancelID.transcription), .cancel(id: CancelID.delayedRecord), - .cancel(id: CancelID.aiEnhancement), + // Don't cancel AI enhancement as it might cause issues + // .cancel(id: CancelID.aiEnhancement), .run { _ in await soundEffect.play(.cancel) } @@ -516,7 +537,7 @@ struct TranscriptionView: View { @Bindable var store: StoreOf var status: TranscriptionIndicatorView.Status { - if store.isTranscribing && store.hexSettings.useAIEnhancement { + if store.isEnhancing { return .enhancing } else if store.isTranscribing { return .transcribing diff --git a/Hex/Features/Transcription/TranscriptionIndicatorView.swift b/Hex/Features/Transcription/TranscriptionIndicatorView.swift index db17968..5e69d80 100644 --- a/Hex/Features/Transcription/TranscriptionIndicatorView.swift +++ b/Hex/Features/Transcription/TranscriptionIndicatorView.swift @@ -65,6 +65,7 @@ struct TranscriptionIndicatorView: View { } @State var transcribeEffect = 0 + @State var enhanceEffect = 0 var body: some View { let averagePower = min(1, meter.averagePower * 3) @@ -104,11 +105,15 @@ struct TranscriptionIndicatorView: View { } .cornerRadius(cornerRadius) .shadow( - color: status == .recording ? .red.opacity(averagePower) : .red.opacity(0), + color: status == .recording ? .red.opacity(averagePower) : + status == .enhancing ? enhanceBaseColor.opacity(0.7) : + status == .transcribing ? transcribeBaseColor.opacity(0.7) : .red.opacity(0), radius: 4 ) .shadow( - color: status == .recording ? .red.opacity(averagePower * 0.5) : .red.opacity(0), + color: status == .recording ? .red.opacity(averagePower * 0.5) : + status == .enhancing ? enhanceBaseColor.opacity(0.4) : + status == .transcribing ? transcribeBaseColor.opacity(0.4) : .red.opacity(0), radius: 8 ) .animation(.interactiveSpring(), value: meter) @@ -120,20 +125,29 @@ struct TranscriptionIndicatorView: View { .scaleEffect(status == .hidden ? 0.0 : 1) .blur(radius: status == .hidden ? 4 : 0) .animation(.bouncy(duration: 0.3), value: status) - .changeEffect(.glow(color: .red.opacity(0.5), radius: 8), value: status) + .changeEffect(.glow(color: status == .enhancing ? enhanceBaseColor.opacity(0.5) : .red.opacity(0.5), radius: 8), value: status) .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: transcribeEffect) + .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: enhanceEffect) .compositingGroup() + // Task for transcribing animation effect .task(id: status == .transcribing) { while status == .transcribing, !Task.isCancelled { transcribeEffect += 1 try? await Task.sleep(for: .seconds(0.25)) } } + // Task for enhancement animation effect + .task(id: status == .enhancing) { + while status == .enhancing, !Task.isCancelled { + enhanceEffect += 1 + try? await Task.sleep(for: .seconds(0.25)) + } + } - // Show tooltip for prewarming/enhancing - if status == .prewarming || status == .enhancing { + // Show tooltip only for prewarming, not for enhancing + if status == .prewarming { VStack(spacing: 4) { - Text(status == .prewarming ? "Model prewarming..." : "AI enhancing text...") + Text("Model prewarming...") .font(.system(size: 12, weight: .medium)) .foregroundColor(.white) .padding(.horizontal, 8) diff --git a/Hex/Models/HexSettings.swift b/Hex/Models/HexSettings.swift index a941889..df8b494 100644 --- a/Hex/Models/HexSettings.swift +++ b/Hex/Models/HexSettings.swift @@ -17,9 +17,10 @@ struct HexSettings: Codable, Equatable { var useDoubleTapOnly: Bool = false var outputLanguage: String? = nil var selectedMicrophoneID: String? = nil + var disableAutoCapitalization: Bool = false // New setting for disabling auto-capitalization // AI Enhancement options var useAIEnhancement: Bool = false - var selectedAIModel: String = "llama3:8b" + var selectedAIModel: String = "gemma3" var aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt var aiEnhancementTemperature: Double = 0.3 @@ -38,6 +39,7 @@ struct HexSettings: Codable, Equatable { case useDoubleTapOnly case outputLanguage case selectedMicrophoneID + case disableAutoCapitalization case useAIEnhancement case selectedAIModel case aiEnhancementPrompt @@ -58,8 +60,9 @@ struct HexSettings: Codable, Equatable { useDoubleTapOnly: Bool = false, on outputLanguage: String? = nil, selectedMicrophoneID: String? = nil, + disableAutoCapitalization: Bool = false, useAIEnhancement: Bool = false, - selectedAIModel: String = "llama3:8b", + selectedAIModel: String = "gemma3", aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt, aiEnhancementTemperature: Double = 0.3 ) { @@ -76,6 +79,7 @@ struct HexSettings: Codable, Equatable { self.useDoubleTapOnly = useDoubleTapOnly self.outputLanguage = outputLanguage self.selectedMicrophoneID = selectedMicrophoneID + self.disableAutoCapitalization = disableAutoCapitalization self.useAIEnhancement = useAIEnhancement self.selectedAIModel = selectedAIModel self.aiEnhancementPrompt = aiEnhancementPrompt @@ -110,9 +114,10 @@ struct HexSettings: Codable, Equatable { try container.decodeIfPresent(Bool.self, forKey: .useDoubleTapOnly) ?? false outputLanguage = try container.decodeIfPresent(String.self, forKey: .outputLanguage) selectedMicrophoneID = try container.decodeIfPresent(String.self, forKey: .selectedMicrophoneID) + disableAutoCapitalization = try container.decodeIfPresent(Bool.self, forKey: .disableAutoCapitalization) ?? false // AI Enhancement settings useAIEnhancement = try container.decodeIfPresent(Bool.self, forKey: .useAIEnhancement) ?? false - selectedAIModel = try container.decodeIfPresent(String.self, forKey: .selectedAIModel) ?? "llama3:8b" + selectedAIModel = try container.decodeIfPresent(String.self, forKey: .selectedAIModel) ?? "gemma3" aiEnhancementPrompt = try container.decodeIfPresent(String.self, forKey: .aiEnhancementPrompt) ?? EnhancementOptions.defaultPrompt aiEnhancementTemperature = try container.decodeIfPresent(Double.self, forKey: .aiEnhancementTemperature) ?? 0.3 } diff --git a/Localizable.xcstrings b/Localizable.xcstrings index 395d509..e4e9c10 100644 --- a/Localizable.xcstrings +++ b/Localizable.xcstrings @@ -40,9 +40,6 @@ }, "AI enhancement requires Ollama to be installed and running locally." : { - }, - "AI enhancing text..." : { - }, "Are you sure you want to delete all transcripts? This action cannot be undone." : { "comment" : "Delete transcript history confirm", @@ -202,6 +199,12 @@ } } } + }, + "Disable auto-capitalization" : { + + }, + "Disable automatic capitalization in transcriptions" : { + }, "Done" : { From ed13c8b8f166ce02c3a68681201e5e0113729ac1 Mon Sep 17 00:00:00 2001 From: plyght Date: Thu, 24 Apr 2025 14:33:48 -0400 Subject: [PATCH 03/10] improve performance --- Hex/Clients/PasteboardClient.swift | 147 +++++++++--------- Hex/Clients/RecordingClient.swift | 18 ++- Hex/Features/Settings/SettingsFeature.swift | 14 +- .../TranscriptionIndicatorView.swift | 27 ++-- Hex/Models/HexSettings.swift | 33 +++- 5 files changed, 149 insertions(+), 90 deletions(-) diff --git a/Hex/Clients/PasteboardClient.swift b/Hex/Clients/PasteboardClient.swift index 4512b7f..3d3903e 100644 --- a/Hex/Clients/PasteboardClient.swift +++ b/Hex/Clients/PasteboardClient.swift @@ -57,53 +57,61 @@ struct PasteboardClientLive { pasteboard.setString(text, forType: .string) } - // Function to save the current state of the NSPasteboard - func savePasteboardState(pasteboard: NSPasteboard) -> [[String: Any]] { - var savedItems: [[String: Any]] = [] + // Stores the previous pasteboard owner change count + private static var savedChangeCount: Int = 0 + // Stores the previous pasteboard contents name for tracking + private static var savedPasteboardName: String? + + // More efficient approach that uses NSPasteboard's built-in functionality + // Instead of copying all the data, we'll track the pasteboard state and create + // a temporary pasteboard to hold the original data + func savePasteboardState(pasteboard: NSPasteboard) -> NSPasteboard? { + // If pasteboard is empty, nothing to save + if pasteboard.pasteboardItems?.isEmpty ?? true { + return nil + } - // Limit how many pasteboard items we save to avoid excessive memory use - let itemsToSave = pasteboard.pasteboardItems?.prefix(5) ?? [] + // Generate a unique name for the backup pasteboard + let tempName = "com.kitlangton.Hex.backup.\(UUID().uuidString)" + let backupPasteboard = NSPasteboard(name: .init(tempName)) - for item in itemsToSave { - var itemDict: [String: Any] = [:] - // Prioritize string content which is typically smaller - if item.types.contains(.string), let string = item.string(forType: .string) { - itemDict[NSPasteboard.PasteboardType.string.rawValue] = string.data(using: .utf8) - savedItems.append(itemDict) - continue - } - - // For non-string content, limit the types we save - let typesToSave = item.types.prefix(2) // Only save up to 2 types per item - for type in typesToSave { - if let data = item.data(forType: type) { - // Only save data up to 1MB to prevent large memory usage - if data.count <= 1024 * 1024 { - itemDict[type.rawValue] = data - } - } - } - if !itemDict.isEmpty { - savedItems.append(itemDict) - } + // Clear the backup pasteboard and write all contents from original + backupPasteboard.clearContents() + + // Copy all items to the backup pasteboard + // This is more efficient than manually copying each data item + if let items = pasteboard.pasteboardItems { + backupPasteboard.writeObjects(items) } - return savedItems + // Save the current change count and name for later reference + PasteboardClientLive.savedChangeCount = pasteboard.changeCount + PasteboardClientLive.savedPasteboardName = tempName + + return backupPasteboard } - // Function to restore the saved state of the NSPasteboard - func restorePasteboardState(pasteboard: NSPasteboard, savedItems: [[String: Any]]) { - pasteboard.clearContents() + // Restore the pasteboard state from a backup pasteboard + func restorePasteboardFromBackup(mainPasteboard: NSPasteboard, backupPasteboard: NSPasteboard?) { + // If no backup pasteboard, nothing to restore + guard let backupPasteboard = backupPasteboard else { return } - for itemDict in savedItems { - let item = NSPasteboardItem() - for (type, data) in itemDict { - if let data = data as? Data { - item.setData(data, forType: NSPasteboard.PasteboardType(rawValue: type)) - } - } - pasteboard.writeObjects([item]) + // Clear the main pasteboard + mainPasteboard.clearContents() + + // Copy all items from backup to main pasteboard + if let items = backupPasteboard.pasteboardItems { + mainPasteboard.writeObjects(items) } + + // Release the temporary pasteboard by clearing it + backupPasteboard.clearContents() + } + + // Legacy method to maintain compatibility - will be removed in future + func restorePasteboardState(pasteboard: NSPasteboard, savedItems: [[String: Any]]) { + // This is kept for compatibility but shouldn't be used anymore + print("Warning: Using deprecated pasteboard restoration method") } /// Pastes current clipboard content to the frontmost application @@ -153,9 +161,10 @@ struct PasteboardClientLive { func pasteWithClipboard(_ text: String) async { let pasteboard = NSPasteboard.general - // Only save pasteboard state if we need to restore it later - let originalItems = hexSettings.copyToClipboard ? [] : savePasteboardState(pasteboard: pasteboard) + // Save the original pasteboard only if we need to restore it + let backupPasteboard = hexSettings.copyToClipboard ? nil : savePasteboardState(pasteboard: pasteboard) + // Set our text in the clipboard pasteboard.clearContents() pasteboard.setString(text, forType: .string) @@ -174,44 +183,42 @@ struct PasteboardClientLive { let vKeyCode = Sauce.shared.keyCode(for: .v) let cmdKeyCode: CGKeyCode = 55 // Command key - // Create cmd down event - let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: true) - - // Create v down event - let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true) - vDown?.flags = .maskCommand - - // Create v up event - let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false) - vUp?.flags = .maskCommand - - // Create cmd up event - let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: false) - - // Post the events with small delays between them - cmdDown?.post(tap: .cghidEventTap) - try? await Task.sleep(for: .milliseconds(10)) - vDown?.post(tap: .cghidEventTap) - try? await Task.sleep(for: .milliseconds(10)) - vUp?.post(tap: .cghidEventTap) - try? await Task.sleep(for: .milliseconds(10)) - cmdUp?.post(tap: .cghidEventTap) + // Create and post key events with small delays between + autoreleasepool { + // Command down + let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: true) + cmdDown?.post(tap: .cghidEventTap) + + // V down with command flag + let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true) + vDown?.flags = .maskCommand + vDown?.post(tap: .cghidEventTap) + + // V up with command flag + let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false) + vUp?.flags = .maskCommand + vUp?.post(tap: .cghidEventTap) + + // Command up + let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: false) + cmdUp?.post(tap: .cghidEventTap) + } // Assume keypress-based paste succeeded - text will remain in clipboard as fallback pasteSucceeded = true } // Only restore original pasteboard contents if: - // 1. Copying to clipboard is disabled AND + // 1. User doesn't want to keep text in clipboard AND // 2. The paste operation succeeded AND - // 3. We have original items to restore - if !hexSettings.copyToClipboard && pasteSucceeded && !originalItems.isEmpty { - try? await Task.sleep(for: .milliseconds(200)) // Give paste operation time to complete + // 3. We have a backup pasteboard + if !hexSettings.copyToClipboard && pasteSucceeded && backupPasteboard != nil { + // Give paste operation time to complete + try? await Task.sleep(for: .milliseconds(200)) - // Use autoreleasepool to help manage memory during pasteboard operations + // Restore the original pasteboard state autoreleasepool { - pasteboard.clearContents() - restorePasteboardState(pasteboard: pasteboard, savedItems: originalItems) + restorePasteboardFromBackup(mainPasteboard: pasteboard, backupPasteboard: backupPasteboard) } } diff --git a/Hex/Clients/RecordingClient.swift b/Hex/Clients/RecordingClient.swift index e4303e5..f0f0982 100644 --- a/Hex/Clients/RecordingClient.swift +++ b/Hex/Clients/RecordingClient.swift @@ -559,13 +559,29 @@ actor RecordingClientLive { func startMeterTask() { meterTask = Task { + var lastMeter = Meter(averagePower: 0, peakPower: 0) + var updateCount = 0 + while !Task.isCancelled, let r = self.recorder, r.isRecording { r.updateMeters() let averagePower = r.averagePower(forChannel: 0) let averageNormalized = pow(10, averagePower / 20.0) let peakPower = r.peakPower(forChannel: 0) let peakNormalized = pow(10, peakPower / 20.0) - meterContinuation.yield(Meter(averagePower: Double(averageNormalized), peakPower: Double(peakNormalized))) + let currentMeter = Meter(averagePower: Double(averageNormalized), peakPower: Double(peakNormalized)) + + // Only emit if there's a significant change, or every ~5 updates (500ms) + let significantChange = abs(currentMeter.averagePower - lastMeter.averagePower) > 0.05 || + abs(currentMeter.peakPower - lastMeter.peakPower) > 0.1 + + if significantChange || updateCount >= 5 { + meterContinuation.yield(currentMeter) + lastMeter = currentMeter + updateCount = 0 + } else { + updateCount += 1 + } + try? await Task.sleep(for: .milliseconds(100)) } } diff --git a/Hex/Features/Settings/SettingsFeature.swift b/Hex/Features/Settings/SettingsFeature.swift index c73c451..3e31394 100644 --- a/Hex/Features/Settings/SettingsFeature.swift +++ b/Hex/Features/Settings/SettingsFeature.swift @@ -109,15 +109,17 @@ struct SettingsFeature { await send(.modelDownload(.fetchModels)) await send(.loadAvailableInputDevices) - // Set up periodic refresh of available devices (every 120 seconds) - // Using a longer interval to reduce resource usage + // Set up periodic refresh of available devices (every 180 seconds = 3 minutes) + // Using an even longer interval to further reduce resource usage let deviceRefreshTask = Task { @MainActor in - for await _ in clock.timer(interval: .seconds(120)) { - // Only refresh when the app is active to save resources + for await _ in clock.timer(interval: .seconds(180)) { + // Only refresh when the app is active AND the settings panel is visible let isActive = NSApplication.shared.isActive + let areSettingsVisible = NSApp.windows.contains { + $0.isVisible && ($0.title.contains("Settings") || $0.title.contains("Preferences")) + } - if isActive { - try? await Task.sleep(for: .nanoseconds(1)) + if isActive && areSettingsVisible { await send(.loadAvailableInputDevices) } } diff --git a/Hex/Features/Transcription/TranscriptionIndicatorView.swift b/Hex/Features/Transcription/TranscriptionIndicatorView.swift index 5e69d80..67c1931 100644 --- a/Hex/Features/Transcription/TranscriptionIndicatorView.swift +++ b/Hex/Features/Transcription/TranscriptionIndicatorView.swift @@ -129,18 +129,21 @@ struct TranscriptionIndicatorView: View { .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: transcribeEffect) .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: enhanceEffect) .compositingGroup() - // Task for transcribing animation effect - .task(id: status == .transcribing) { - while status == .transcribing, !Task.isCancelled { - transcribeEffect += 1 - try? await Task.sleep(for: .seconds(0.25)) - } - } - // Task for enhancement animation effect - .task(id: status == .enhancing) { - while status == .enhancing, !Task.isCancelled { - enhanceEffect += 1 - try? await Task.sleep(for: .seconds(0.25)) + // Shared animation task to reduce the number of active tasks + .task(id: status) { + // Only animate if we're in a state that needs animation + guard status == .transcribing || status == .enhancing else { return } + + // Use a single timer loop for both types of animations + let animationDelay: Duration = .seconds(0.3) + while (status == .transcribing || status == .enhancing), !Task.isCancelled { + // Update the appropriate counter based on current status + if status == .transcribing { + transcribeEffect += 1 + } else if status == .enhancing { + enhanceEffect += 1 + } + try? await Task.sleep(for: animationDelay) } } diff --git a/Hex/Models/HexSettings.swift b/Hex/Models/HexSettings.swift index df8b494..9dae772 100644 --- a/Hex/Models/HexSettings.swift +++ b/Hex/Models/HexSettings.swift @@ -123,12 +123,43 @@ struct HexSettings: Codable, Equatable { } } +// Cache for HexSettings to reduce disk I/O +private var cachedSettings: HexSettings? = nil +private var lastSettingsLoadTime: Date = .distantPast + extension SharedReaderKey where Self == FileStorageKey.Default { static var hexSettings: Self { Self[ - .fileStorage(URL.documentsDirectory.appending(component: "hex_settings.json")), + .fileStorage( + URL.documentsDirectory.appending(component: "hex_settings.json"), + read: { url in + // Use cached settings if they exist and are recent (within last 5 seconds) + if let cached = cachedSettings, + Date().timeIntervalSince(lastSettingsLoadTime) < 5.0 { + return cached + } + + // Otherwise read from disk + do { + let data = try Data(contentsOf: url) + let settings = try JSONDecoder().decode(HexSettings.self, from: data) + + // Update cache + cachedSettings = settings + lastSettingsLoadTime = Date() + + return settings + } catch { + // On error, return default settings + let defaultSettings = HexSettings() + cachedSettings = defaultSettings + lastSettingsLoadTime = Date() + return defaultSettings + } + } + ), default: .init() ] } From e24a759480263bb4a16aaea490b41b57f3331325 Mon Sep 17 00:00:00 2001 From: plyght Date: Thu, 24 Apr 2025 14:35:27 -0400 Subject: [PATCH 04/10] fix build issue --- Hex/Models/HexSettings.swift | 63 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/Hex/Models/HexSettings.swift b/Hex/Models/HexSettings.swift index 9dae772..2636b77 100644 --- a/Hex/Models/HexSettings.swift +++ b/Hex/Models/HexSettings.swift @@ -127,40 +127,45 @@ struct HexSettings: Codable, Equatable { private var cachedSettings: HexSettings? = nil private var lastSettingsLoadTime: Date = .distantPast +// Helper function to get cached settings or load from disk +func getCachedSettings() -> HexSettings { + // Use cached settings if they exist and are recent (within last 5 seconds) + if let cached = cachedSettings, + Date().timeIntervalSince(lastSettingsLoadTime) < 5.0 { + return cached + } + + // Otherwise read from disk + do { + let url = URL.documentsDirectory.appending(component: "hex_settings.json") + if FileManager.default.fileExists(atPath: url.path) { + let data = try Data(contentsOf: url) + let settings = try JSONDecoder().decode(HexSettings.self, from: data) + + // Update cache + cachedSettings = settings + lastSettingsLoadTime = Date() + + return settings + } + } catch { + print("Error loading settings: \(error)") + } + + // On error or if file doesn't exist, return default settings + let defaultSettings = HexSettings() + cachedSettings = defaultSettings + lastSettingsLoadTime = Date() + return defaultSettings +} + extension SharedReaderKey where Self == FileStorageKey.Default { static var hexSettings: Self { Self[ - .fileStorage( - URL.documentsDirectory.appending(component: "hex_settings.json"), - read: { url in - // Use cached settings if they exist and are recent (within last 5 seconds) - if let cached = cachedSettings, - Date().timeIntervalSince(lastSettingsLoadTime) < 5.0 { - return cached - } - - // Otherwise read from disk - do { - let data = try Data(contentsOf: url) - let settings = try JSONDecoder().decode(HexSettings.self, from: data) - - // Update cache - cachedSettings = settings - lastSettingsLoadTime = Date() - - return settings - } catch { - // On error, return default settings - let defaultSettings = HexSettings() - cachedSettings = defaultSettings - lastSettingsLoadTime = Date() - return defaultSettings - } - } - ), - default: .init() + .fileStorage(URL.documentsDirectory.appending(component: "hex_settings.json")), + default: getCachedSettings() ] } } From 9e51c96f160b573cadfa0019542bf0f59d47f226 Mon Sep 17 00:00:00 2001 From: plyght Date: Thu, 24 Apr 2025 14:40:20 -0400 Subject: [PATCH 05/10] revert signing --- Hex.xcodeproj/project.pbxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Hex.xcodeproj/project.pbxproj b/Hex.xcodeproj/project.pbxproj index 3113846..9b2f1e1 100644 --- a/Hex.xcodeproj/project.pbxproj +++ b/Hex.xcodeproj/project.pbxproj @@ -423,7 +423,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_ENTITLEMENTS = Hex/Hex.entitlements; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; @@ -456,7 +456,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_ENTITLEMENTS = Hex/Hex.entitlements; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; From 7bcad42611ba7acb2824ce1b855a80bbf5d78a97 Mon Sep 17 00:00:00 2001 From: plyght Date: Mon, 28 Apr 2025 09:47:20 -0400 Subject: [PATCH 06/10] fix coderabbit stuff --- Hex/Clients/AIEnhancementClient.swift | 11 ++- Hex/Clients/RecordingClient.swift | 2 +- Hex/Clients/TranscriptionClient.swift | 19 ++---- Hex/Features/Settings/SettingsFeature.swift | 2 +- .../Transcription/TranscriptionFeature.swift | 68 +++++++++++++++---- 5 files changed, 73 insertions(+), 29 deletions(-) diff --git a/Hex/Clients/AIEnhancementClient.swift b/Hex/Clients/AIEnhancementClient.swift index daa563e..e8e5842 100644 --- a/Hex/Clients/AIEnhancementClient.swift +++ b/Hex/Clients/AIEnhancementClient.swift @@ -264,7 +264,16 @@ class AIEnhancementClientLive { print("[AIEnhancementClientLive] Sending request to Ollama API...") // Make the request - let (responseData, urlResponse) = try await URLSession.shared.data(for: request) + let (responseData, urlResponse): (Data, URLResponse) + do { + (responseData, urlResponse) = try await URLSession.shared.data(for: request) + } catch { + // Treat timeouts and connectivity issues as "Ollama unavailable" + print("[AIEnhancementClientLive] Generation failed: \(error.localizedDescription)") + throw NSError(domain: "AIEnhancementClient", + code: -1001, // NSURLErrorTimedOut or similar + userInfo: [NSLocalizedDescriptionKey: "Ollama is unresponsive. Please check if it's running."]) + } // Progress update - response received progressCallback(0.8) diff --git a/Hex/Clients/RecordingClient.swift b/Hex/Clients/RecordingClient.swift index f0f0982..0746184 100644 --- a/Hex/Clients/RecordingClient.swift +++ b/Hex/Clients/RecordingClient.swift @@ -381,7 +381,7 @@ actor RecordingClientLive { var deviceName: CFString? = nil var size = UInt32(MemoryLayout.size) - let deviceNamePtr: UnsafeMutableRawPointer = .allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) + let deviceNamePtr = UnsafeMutableRawPointer.allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) defer { deviceNamePtr.deallocate() } let status = AudioObjectGetPropertyData( diff --git a/Hex/Clients/TranscriptionClient.swift b/Hex/Clients/TranscriptionClient.swift index 626a4d5..71b7aa3 100644 --- a/Hex/Clients/TranscriptionClient.swift +++ b/Hex/Clients/TranscriptionClient.swift @@ -17,7 +17,8 @@ import WhisperKit struct TranscriptionClient { /// Transcribes an audio file at the specified `URL` using the named `model`. /// Reports transcription progress via `progressCallback`. - var transcribe: @Sendable (URL, String, DecodingOptions, @escaping (Progress) -> Void) async throws -> String + /// Optionally accepts HexSettings for features like auto-capitalization. + var transcribe: @Sendable (URL, String, DecodingOptions, HexSettings?, @escaping (Progress) -> Void) async throws -> String /// Ensures a model is downloaded (if missing) and loaded into memory, reporting progress via `progressCallback`. var downloadModel: @Sendable (String, @escaping (Progress) -> Void) async throws -> Void @@ -39,7 +40,7 @@ extension TranscriptionClient: DependencyKey { static var liveValue: Self { let live = TranscriptionClientLive() return Self( - transcribe: { try await live.transcribe(url: $0, model: $1, options: $2, progressCallback: $3) }, + transcribe: { try await live.transcribe(url: $0, model: $1, options: $2, settings: $3, progressCallback: $4) }, downloadModel: { try await live.downloadAndLoadModel(variant: $0, progressCallback: $1) }, deleteModel: { try await live.deleteModel(variant: $0) }, isModelDownloaded: { await live.isModelDownloaded($0) }, @@ -206,6 +207,7 @@ actor TranscriptionClientLive { url: URL, model: String, options: DecodingOptions, + settings: HexSettings? = nil, progressCallback: @escaping (Progress) -> Void ) async throws -> String { // Load or switch to the required model if needed. @@ -233,17 +235,8 @@ actor TranscriptionClientLive { // Concatenate results from all segments. var text = results.map(\.text).joined(separator: " ") - // Get the hex settings to check if auto-capitalization should be disabled - let useAutoCapitalization: Bool - do { - let fileURL = URL.documentsDirectory.appending(component: "hex_settings.json") - let data = try Data(contentsOf: fileURL) - let settings = try JSONDecoder().decode(HexSettings.self, from: data) - useAutoCapitalization = !settings.disableAutoCapitalization - } catch { - // If settings can't be read, default to using auto-capitalization - useAutoCapitalization = true - } + // Use provided settings or default to auto-capitalization + let useAutoCapitalization = settings == nil ? true : !settings!.disableAutoCapitalization // Convert to lowercase if auto-capitalization is disabled if !useAutoCapitalization { diff --git a/Hex/Features/Settings/SettingsFeature.swift b/Hex/Features/Settings/SettingsFeature.swift index 3e31394..bb8416c 100644 --- a/Hex/Features/Settings/SettingsFeature.swift +++ b/Hex/Features/Settings/SettingsFeature.swift @@ -120,7 +120,7 @@ struct SettingsFeature { } if isActive && areSettingsVisible { - await send(.loadAvailableInputDevices) + send(.loadAvailableInputDevices) } } } diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index 7166407..567a254 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -51,6 +51,8 @@ struct TranscriptionFeature { case setEnhancingState(Bool) case aiEnhancementResult(String) case aiEnhancementError(Error) + case ollamaBecameUnavailable + case recheckOllamaAvailability } enum CancelID { @@ -125,10 +127,31 @@ struct TranscriptionFeature { return handleAIEnhancement(&state, result: result) case let .aiEnhancementError(error): - // On AI enhancement error, we'll just use the original transcription - // so we don't need specific error handling here - print("AI Enhancement error: \(error)") - return .none + // Check if this is an Ollama connectivity error + let nsError = error as NSError + if nsError.domain == "AIEnhancementClient" && (nsError.code == -1001 || nsError.localizedDescription.contains("Ollama")) { + print("AI Enhancement error due to Ollama connectivity: \(error)") + return .send(.ollamaBecameUnavailable) + } else { + // For other errors, just use the original transcription + print("AI Enhancement error: \(error)") + return .none + } + + case .ollamaBecameUnavailable: + // When Ollama becomes unavailable, recheck availability and handle UI updates + return .send(.recheckOllamaAvailability) + + case .recheckOllamaAvailability: + // Recheck if Ollama is available and update UI accordingly + return .run { send in + let isAvailable = await aiEnhancement.isOllamaAvailable() + if !isAvailable { + // Could dispatch to a UI state to show an alert or notification + print("[TranscriptionFeature] Ollama is not available. AI enhancement is disabled.") + // Here you would typically update UI state to show an alert + } + } // MARK: - Cancel Entire Flow @@ -290,9 +313,13 @@ private extension TranscriptionFeature { // Otherwise, proceed to transcription state.isTranscribing = true state.error = nil + + // Extract all required state values to local variables to avoid capturing inout parameter let model = state.hexSettings.selectedModel let language = state.hexSettings.outputLanguage - + let settings = state.hexSettings + let recordingStartTime = state.recordingStartTime + state.isPrewarming = true return .run { send in @@ -307,7 +334,7 @@ private extension TranscriptionFeature { chunkingStrategy: .vad ) - let result = try await transcription.transcribe(audioURL, model, decodeOptions) { _ in } + let result = try await transcription.transcribe(audioURL, model, decodeOptions, settings) { _ in } print("Transcribed audio from URL: \(audioURL) to text: \(result)") await send(.transcriptionResult(result)) @@ -330,7 +357,18 @@ private extension TranscriptionFeature { // First check if we should use AI enhancement if state.hexSettings.useAIEnhancement { // Keep state.isTranscribing = true since we're still processing - return enhanceWithAI(result: result, state: state) + + // Extract values to avoid capturing inout parameter + let selectedAIModel = state.hexSettings.selectedAIModel + let promptText = state.hexSettings.aiEnhancementPrompt + let temperature = state.hexSettings.aiEnhancementTemperature + + return enhanceWithAI( + result: result, + model: selectedAIModel, + promptText: promptText, + temperature: temperature + ) } else { state.isTranscribing = false state.isPrewarming = false @@ -355,16 +393,20 @@ private extension TranscriptionFeature { // MARK: - AI Enhancement Handlers // Use AI to enhance the transcription result - private func enhanceWithAI(result: String, state: State) -> Effect { + private func enhanceWithAI( + result: String, + model: String, + promptText: String, + temperature: Double + ) -> Effect { // If empty text, nothing else to do guard !result.isEmpty else { return .send(.aiEnhancementResult(result)) // Just pass through empty text } - let model = state.hexSettings.selectedAIModel let options = EnhancementOptions( - prompt: state.hexSettings.aiEnhancementPrompt, - temperature: state.hexSettings.aiEnhancementTemperature + prompt: promptText, + temperature: temperature ) print("[TranscriptionFeature] Starting AI enhancement with model: \(model)") @@ -383,8 +425,8 @@ private extension TranscriptionFeature { await send(.aiEnhancementResult(enhancedText)) } catch { print("[TranscriptionFeature] Error enhancing text with AI: \(error)") - // On error, fall back to the original transcription - await send(.aiEnhancementResult(result)) + // Properly handle the error through the action system + await send(.aiEnhancementError(error)) } } ) From 27fef1e50975651c034bf01d06239cfd49ac6619 Mon Sep 17 00:00:00 2001 From: plyght Date: Mon, 28 Apr 2025 09:48:04 -0400 Subject: [PATCH 07/10] update reccomended --- Hex.xcodeproj/project.pbxproj | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Hex.xcodeproj/project.pbxproj b/Hex.xcodeproj/project.pbxproj index 9b2f1e1..62f99bb 100644 --- a/Hex.xcodeproj/project.pbxproj +++ b/Hex.xcodeproj/project.pbxproj @@ -271,6 +271,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_TEAM = QC99C9JE59; GENERATE_INFOPLIST_FILE = YES; MARKETING_VERSION = 0.2.1; @@ -288,6 +289,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_TEAM = QC99C9JE59; GENERATE_INFOPLIST_FILE = YES; MARKETING_VERSION = 0.2.1; @@ -333,6 +335,7 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; @@ -396,6 +399,7 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; @@ -427,6 +431,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"Hex/Preview Content\""; DEVELOPMENT_TEAM = QC99C9JE59; ENABLE_HARDENED_RUNTIME = YES; @@ -460,6 +465,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"Hex/Preview Content\""; DEVELOPMENT_TEAM = QC99C9JE59; ENABLE_HARDENED_RUNTIME = YES; From 25679ae1e09c947b41a7e874d72a2a2c4308ad44 Mon Sep 17 00:00:00 2001 From: plyght Date: Mon, 28 Apr 2025 09:56:09 -0400 Subject: [PATCH 08/10] fix --- Hex/Features/Transcription/TranscriptionFeature.swift | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index 567a254..a214d4a 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -318,7 +318,7 @@ private extension TranscriptionFeature { let model = state.hexSettings.selectedModel let language = state.hexSettings.outputLanguage let settings = state.hexSettings - let recordingStartTime = state.recordingStartTime + // recordingStartTime captured in handleTranscriptionResult state.isPrewarming = true @@ -420,7 +420,9 @@ private extension TranscriptionFeature { .run { send in do { print("[TranscriptionFeature] Calling aiEnhancement.enhance()") - let enhancedText = try await aiEnhancement.enhance(result, model, options) { _ in } + let enhancedText = try await aiEnhancement.enhance(result, model, options) { progress in + // Optional: Could update UI with progress information here if needed + } print("[TranscriptionFeature] AI enhancement succeeded") await send(.aiEnhancementResult(enhancedText)) } catch { From e2ce7ca4d48874a91297cb32b9bab77a1d7d6069 Mon Sep 17 00:00:00 2001 From: plyght Date: Mon, 12 May 2025 23:19:06 -0400 Subject: [PATCH 09/10] fix thread --- Hex/Clients/KeyEventMonitorClient.swift | 32 +- Hex/Clients/PasteboardClient.swift | 10 +- Hex/Clients/RecordingClient.swift | 44 ++- .../Transcription/TranscriptionFeature.swift | 42 ++- .../TranscriptionIndicatorView.swift | 285 +++++++++++++----- 5 files changed, 324 insertions(+), 89 deletions(-) diff --git a/Hex/Clients/KeyEventMonitorClient.swift b/Hex/Clients/KeyEventMonitorClient.swift index ebc8dea..7645460 100644 --- a/Hex/Clients/KeyEventMonitorClient.swift +++ b/Hex/Clients/KeyEventMonitorClient.swift @@ -8,6 +8,33 @@ import Sauce private let logger = Logger(subsystem: "com.kitlangton.Hex", category: "KeyEventMonitor") +/// Thread-safe wrapper for interacting with the Sauce library +/// This ensures all Sauce operations happen on the main thread +/// to prevent "_dispatch_assert_queue_fail" errors +enum SafeSauce { + /// Thread-safe way to call Sauce methods from any thread + static func performOnMainThread(_ operation: @escaping () -> T) -> T { + // If we're already on the main thread, just perform the operation + if Thread.isMainThread { + return operation() + } + + // Otherwise dispatch to main thread and wait for result + return DispatchQueue.main.sync { + operation() + } + } + + // Convenience methods that handle thread switching automatically + static func safeKey(for keyCode: Int) -> Key? { + performOnMainThread { Sauce.shared.key(for: keyCode) } + } + + static func safeKeyCode(for key: Key) -> CGKeyCode { + performOnMainThread { Sauce.shared.keyCode(for: key) } + } +} + public struct KeyEvent { let key: Key? let modifiers: Modifiers @@ -16,7 +43,8 @@ public struct KeyEvent { public extension KeyEvent { init(cgEvent: CGEvent, type _: CGEventType) { let keyCode = Int(cgEvent.getIntegerValueField(.keyboardEventKeycode)) - let key = cgEvent.type == .keyDown ? Sauce.shared.key(for: keyCode) : nil + // Use our thread-safe wrapper to prevent _dispatch_assert_queue_fail + let key: Key? = cgEvent.type == .keyDown ? SafeSauce.safeKey(for: keyCode) : nil let modifiers = Modifiers.from(carbonFlags: cgEvent.flags) self.init(key: key, modifiers: modifiers) @@ -188,4 +216,4 @@ class KeyEventMonitorClientLive { return handled } -} +} \ No newline at end of file diff --git a/Hex/Clients/PasteboardClient.swift b/Hex/Clients/PasteboardClient.swift index 3d3903e..eae8532 100644 --- a/Hex/Clients/PasteboardClient.swift +++ b/Hex/Clients/PasteboardClient.swift @@ -179,8 +179,14 @@ struct PasteboardClientLive { // Add a small delay to allow system to process try? await Task.sleep(for: .milliseconds(100)) - - let vKeyCode = Sauce.shared.keyCode(for: .v) + + // Use a thread-safe approach to prevent _dispatch_assert_queue_fail + let vKeyCode: CGKeyCode + if Thread.isMainThread { + vKeyCode = Sauce.shared.keyCode(for: .v) + } else { + vKeyCode = DispatchQueue.main.sync { Sauce.shared.keyCode(for: .v) } + } let cmdKeyCode: CGKeyCode = 55 // Command key // Create and post key events with small delays between diff --git a/Hex/Clients/RecordingClient.swift b/Hex/Clients/RecordingClient.swift index 0746184..025815d 100644 --- a/Hex/Clients/RecordingClient.swift +++ b/Hex/Clients/RecordingClient.swift @@ -561,7 +561,12 @@ actor RecordingClientLive { meterTask = Task { var lastMeter = Meter(averagePower: 0, peakPower: 0) var updateCount = 0 - + var lastUpdateTime = Date() + + // Use lower sampling rates when there's less activity + var inactiveCount = 0 + var samplingInterval: Duration = .milliseconds(100) // Start with default + while !Task.isCancelled, let r = self.recorder, r.isRecording { r.updateMeters() let averagePower = r.averagePower(forChannel: 0) @@ -569,20 +574,41 @@ actor RecordingClientLive { let peakPower = r.peakPower(forChannel: 0) let peakNormalized = pow(10, peakPower / 20.0) let currentMeter = Meter(averagePower: Double(averageNormalized), peakPower: Double(peakNormalized)) - - // Only emit if there's a significant change, or every ~5 updates (500ms) - let significantChange = abs(currentMeter.averagePower - lastMeter.averagePower) > 0.05 || - abs(currentMeter.peakPower - lastMeter.peakPower) > 0.1 - - if significantChange || updateCount >= 5 { + + // Determine threshold for significant change (adaptive based on current levels) + let averageThreshold = max(0.05, lastMeter.averagePower * 0.15) // More sensitive at low levels + let peakThreshold = max(0.1, lastMeter.peakPower * 0.15) + + // Check if there's a significant change + let significantChange = abs(currentMeter.averagePower - lastMeter.averagePower) > averageThreshold || + abs(currentMeter.peakPower - lastMeter.peakPower) > peakThreshold + + // Force update if too much time has passed (prevents UI from appearing frozen) + let timeSinceLastUpdate = Date().timeIntervalSince(lastUpdateTime) + let forceUpdate = timeSinceLastUpdate > 0.3 // Max 300ms between updates for smooth UI + + // Adaptive sampling rate based on activity level + if significantChange { + inactiveCount = 0 + samplingInterval = .milliseconds(80) // Faster sampling during active periods + } else { + inactiveCount += 1 + if inactiveCount > 10 { + // Gradually increase sampling interval during periods of low activity + samplingInterval = .milliseconds(min(150, 80 + inactiveCount * 5)) + } + } + + if significantChange || forceUpdate || updateCount >= 3 { meterContinuation.yield(currentMeter) lastMeter = currentMeter + lastUpdateTime = Date() updateCount = 0 } else { updateCount += 1 } - - try? await Task.sleep(for: .milliseconds(100)) + + try? await Task.sleep(for: samplingInterval) } } } diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index a214d4a..f025037 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -86,7 +86,15 @@ struct TranscriptionFeature { // MARK: - Metering case let .audioLevelUpdated(meter): - state.meter = meter + // Only update state.meter if it's significantly different from the previous value + // or if we're currently recording (when we need more responsive updates) + let averageDiff = abs(meter.averagePower - state.meter.averagePower) + let peakDiff = abs(meter.peakPower - state.meter.peakPower) + let significantChange = averageDiff > 0.03 || peakDiff > 0.05 + + if state.isRecording || significantChange { + state.meter = meter + } return .none // MARK: - HotKey Flow @@ -172,8 +180,38 @@ private extension TranscriptionFeature { /// Effect to begin observing the audio meter. func startMeteringEffect() -> Effect { .run { send in + // Use a rate limiter to prevent too many updates + var lastUpdateTime = Date() + var lastMeter: Meter? = nil + for await meter in await recording.observeAudioLevel() { - await send(.audioLevelUpdated(meter)) + // Apply main-thread protection + await MainActor.run { + // Rate limit updates based on time and significant changes + let now = Date() + let timeSinceLastUpdate = now.timeIntervalSince(lastUpdateTime) + + // Determine if we should process this update + var shouldUpdate = false + + // Always update if enough time has passed (ensures UI responsiveness) + if timeSinceLastUpdate >= 0.05 { // Max 20 updates per second + shouldUpdate = true + } + // Or if there's a significant change from the last meter we actually sent + else if let last = lastMeter { + let averageDiff = abs(meter.averagePower - last.averagePower) + let peakDiff = abs(meter.peakPower - last.peakPower) + // More responsive threshold for significant changes + shouldUpdate = averageDiff > 0.02 || peakDiff > 0.04 + } + + if shouldUpdate { + send(.audioLevelUpdated(meter)) + lastUpdateTime = now + lastMeter = meter + } + } } } .cancellable(id: CancelID.metering, cancelInFlight: true) diff --git a/Hex/Features/Transcription/TranscriptionIndicatorView.swift b/Hex/Features/Transcription/TranscriptionIndicatorView.swift index 67c1931..17e03ae 100644 --- a/Hex/Features/Transcription/TranscriptionIndicatorView.swift +++ b/Hex/Features/Transcription/TranscriptionIndicatorView.swift @@ -3,6 +3,7 @@ // Hex // // Created by Kit Langton on 1/25/25. +// import Pow import SwiftUI @@ -67,104 +68,240 @@ struct TranscriptionIndicatorView: View { @State var transcribeEffect = 0 @State var enhanceEffect = 0 + // Memoize these calculations to prevent recalculating on every render + private func recordingOpacity(for power: Double, threshold: Double = 0.1) -> Double { + guard status == .recording else { return 0 } + return power < threshold ? power / threshold : 1 + } + + // Cache shadow colors based on status and power + @ViewBuilder + private func shadowEffect(averagePower: Double) -> some View { + switch status { + case .recording: + EmptyView() + .shadow(color: .red.opacity(averagePower), radius: 4) + .shadow(color: .red.opacity(averagePower * 0.5), radius: 8) + case .enhancing: + EmptyView() + .shadow(color: enhanceBaseColor.opacity(0.7), radius: 4) + .shadow(color: enhanceBaseColor.opacity(0.4), radius: 8) + case .transcribing, .prewarming: + EmptyView() + .shadow(color: transcribeBaseColor.opacity(0.7), radius: 4) + .shadow(color: transcribeBaseColor.opacity(0.4), radius: 8) + default: + EmptyView() + .shadow(color: .red.opacity(0), radius: 4) + .shadow(color: .red.opacity(0), radius: 8) + } + } + var body: some View { - let averagePower = min(1, meter.averagePower * 3) - let peakPower = min(1, meter.peakPower * 3) - ZStack { - Capsule() - .fill(backgroundColor.shadow(.inner(color: innerShadowColor, radius: 4))) - .overlay { - Capsule() - .stroke(strokeColor, lineWidth: 1) - .blendMode(.screen) - } - .overlay(alignment: .center) { - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.red.opacity(status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 1) : 0)) - .blur(radius: 2) - .blendMode(.screen) - .padding(6) - } - .overlay(alignment: .center) { - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.white.opacity(status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 0.5) : 0)) - .blur(radius: 1) - .blendMode(.screen) - .frame(maxWidth: .infinity, alignment: .center) - .padding(7) - } - .overlay(alignment: .center) { - GeometryReader { proxy in - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.red.opacity(status == .recording ? (peakPower < 0.1 ? (peakPower / 0.1) * 0.5 : 0.5) : 0)) - .frame(width: max(proxy.size.width * (peakPower + 0.6), 0), height: proxy.size.height, alignment: .center) - .frame(maxWidth: .infinity, alignment: .center) - .blur(radius: 4) - .blendMode(.screen) - }.padding(6) - } - .cornerRadius(cornerRadius) - .shadow( - color: status == .recording ? .red.opacity(averagePower) : - status == .enhancing ? enhanceBaseColor.opacity(0.7) : - status == .transcribing ? transcribeBaseColor.opacity(0.7) : .red.opacity(0), - radius: 4 - ) - .shadow( - color: status == .recording ? .red.opacity(averagePower * 0.5) : - status == .enhancing ? enhanceBaseColor.opacity(0.4) : - status == .transcribing ? transcribeBaseColor.opacity(0.4) : .red.opacity(0), - radius: 8 + // Fast track for hidden state to avoid expensive calculations + if status == .hidden { + EmptyView() + } else { + // Only do these calculations when actually visible + let averagePower = min(1, meter.averagePower * 3) + let peakPower = min(1, meter.peakPower * 3) + + ZStack { + // Base capsule with all effects - avoid recreating for hidden state + CapsuleWithEffects( + status: status, + cornerRadius: cornerRadius, + averagePower: averagePower, + peakPower: peakPower, + innerShadowColor: innerShadowColor, + backgroundColor: backgroundColor, + strokeColor: strokeColor ) - .animation(.interactiveSpring(), value: meter) .frame( width: status == .recording ? expandedWidth : baseWidth, height: baseWidth ) + // Combine these into a single animation for better performance + .scaleEffect(status == .optionKeyPressed ? 0.95 : 1) .opacity(status == .hidden ? 0 : 1) - .scaleEffect(status == .hidden ? 0.0 : 1) - .blur(radius: status == .hidden ? 4 : 0) - .animation(.bouncy(duration: 0.3), value: status) - .changeEffect(.glow(color: status == .enhancing ? enhanceBaseColor.opacity(0.5) : .red.opacity(0.5), radius: 8), value: status) - .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: transcribeEffect) - .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: enhanceEffect) + // Apply expensive effects conditionally + .modifier(LightweightEffects(status: status, enhanceBaseColor: enhanceBaseColor)) + // Only apply these effects during active animation states + .apply(needsShine: status == .transcribing || status == .enhancing, + transcribeEffect: transcribeEffect, + enhanceEffect: enhanceEffect) .compositingGroup() - // Shared animation task to reduce the number of active tasks + // Efficient animation task .task(id: status) { // Only animate if we're in a state that needs animation guard status == .transcribing || status == .enhancing else { return } - // Use a single timer loop for both types of animations - let animationDelay: Duration = .seconds(0.3) + // Use longer delay to reduce CPU usage with split sleep pattern for better cancellation while (status == .transcribing || status == .enhancing), !Task.isCancelled { + try? await Task.sleep(for: .milliseconds(250)) + if Task.isCancelled { break } + // Update the appropriate counter based on current status if status == .transcribing { transcribeEffect += 1 } else if status == .enhancing { enhanceEffect += 1 } - try? await Task.sleep(for: animationDelay) + + try? await Task.sleep(for: .milliseconds(250)) } } - // Show tooltip only for prewarming, not for enhancing - if status == .prewarming { - VStack(spacing: 4) { - Text("Model prewarming...") - .font(.system(size: 12, weight: .medium)) - .foregroundColor(.white) - .padding(.horizontal, 8) - .padding(.vertical, 4) - .background( - RoundedRectangle(cornerRadius: 4) - .fill(Color.black.opacity(0.8)) - ) + // Show tooltip only for prewarming, not for enhancing + if status == .prewarming { + VStack(spacing: 4) { + Text("Model prewarming...") + .font(.system(size: 12, weight: .medium)) + .foregroundColor(.white) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(Color.black.opacity(0.8)) + ) + } + .offset(y: -24) + .transition(.opacity) + .zIndex(2) + } + } + .animation(.interactiveSpring(response: 0.3, dampingFraction: 0.7), value: status) + } + } +} + +// Optimized view hierarchy to improve performance +struct CapsuleWithEffects: View { + var status: TranscriptionIndicatorView.Status + var cornerRadius: CGFloat + var averagePower: Double + var peakPower: Double + var innerShadowColor: Color + var backgroundColor: Color + var strokeColor: Color + + // Cache calculated values to avoid recalculation + private let recordingOpacity: Double + private let whiteOverlayOpacity: Double + private let peakOverlayOpacity: Double + private let primaryShadowColor: Color + private let secondaryShadowColor: Color + + // This will help us avoid constant rebuilding of the view + @ViewBuilder private var innerOverlays: some View { + if status == .recording { + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.red.opacity(recordingOpacity)) + .blur(radius: 2) + .blendMode(.screen) + .padding(6) + + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.white.opacity(whiteOverlayOpacity)) + .blur(radius: 1) + .blendMode(.screen) + .frame(maxWidth: .infinity, alignment: .center) + .padding(7) + } + } + + // Constructor to pre-calculate all values + init(status: TranscriptionIndicatorView.Status, cornerRadius: CGFloat, averagePower: Double, peakPower: Double, + innerShadowColor: Color, backgroundColor: Color, strokeColor: Color) { + self.status = status + self.cornerRadius = cornerRadius + self.averagePower = averagePower + self.peakPower = peakPower + self.innerShadowColor = innerShadowColor + self.backgroundColor = backgroundColor + self.strokeColor = strokeColor + + // Precalculate all values once during initialization + self.recordingOpacity = status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 1) : 0 + self.whiteOverlayOpacity = status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 0.5) : 0 + self.peakOverlayOpacity = status == .recording ? (peakPower < 0.1 ? (peakPower / 0.1) * 0.5 : 0.5) : 0 + + // Precalculate shadow colors + switch status { + case .recording: + self.primaryShadowColor = .red.opacity(averagePower) + self.secondaryShadowColor = .red.opacity(averagePower * 0.5) + case .enhancing: + self.primaryShadowColor = Color.green.opacity(0.7) + self.secondaryShadowColor = Color.green.opacity(0.4) + case .transcribing, .prewarming: + self.primaryShadowColor = Color.blue.opacity(0.7) + self.secondaryShadowColor = Color.blue.opacity(0.4) + default: + self.primaryShadowColor = .red.opacity(0) + self.secondaryShadowColor = .red.opacity(0) + } + } + + var body: some View { + ZStack { + // Base capsule + Capsule() + .fill(backgroundColor.shadow(.inner(color: innerShadowColor, radius: 4))) + + // Border capsule + Capsule() + .stroke(strokeColor, lineWidth: 1) + .blendMode(.screen) + + // Conditionally add overlays for performance + innerOverlays + + // Only use GeometryReader when in recording mode + if status == .recording { + GeometryReader { proxy in + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.red.opacity(peakOverlayOpacity)) + .frame(width: max(proxy.size.width * (peakPower + 0.6), 0), height: proxy.size.height, alignment: .center) + .frame(maxWidth: .infinity, alignment: .center) + .blur(radius: 4) + .blendMode(.screen) + .padding(6) } - .offset(y: -24) - .transition(.opacity) - .zIndex(2) } } + // Apply common modifiers + .cornerRadius(cornerRadius) + .shadow(color: primaryShadowColor, radius: 4) + .shadow(color: secondaryShadowColor, radius: 8) + .animation(status == .recording ? .interactiveSpring(response: 0.35) : nil, value: averagePower) + } +} + +// Lightweight modifier for effects +struct LightweightEffects: ViewModifier { + var status: TranscriptionIndicatorView.Status + var enhanceBaseColor: Color + + func body(content: Content) -> some View { + content.changeEffect( + .glow(color: status == .enhancing ? enhanceBaseColor.opacity(0.4) : .red.opacity(0.4), radius: 6), + value: status + ) + } +} + +// Extension to conditionally apply shine effects +extension View { + @ViewBuilder + func apply(needsShine: Bool, transcribeEffect: Int, enhanceEffect: Int) -> some View { + if needsShine { + self + .changeEffect(.shine(angle: .degrees(0), duration: 0.8), value: transcribeEffect) + .changeEffect(.shine(angle: .degrees(0), duration: 0.8), value: enhanceEffect) + } else { + self + } } } @@ -178,4 +315,4 @@ struct TranscriptionIndicatorView: View { TranscriptionIndicatorView(status: .enhancing, meter: .init(averagePower: 0, peakPower: 0)) } .padding(40) -} +} \ No newline at end of file From dd4356fa9b14fb5a1fbacd6cbcd6a593dee8b92f Mon Sep 17 00:00:00 2001 From: plyght Date: Tue, 13 May 2025 00:44:33 -0400 Subject: [PATCH 10/10] fix --- .../xcshareddata/swiftpm/Package.resolved | 6 +- .../Transcription/TranscriptionFeature.swift | 112 ++++++++++++------ 2 files changed, 79 insertions(+), 39 deletions(-) diff --git a/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 3dc6625..338b381 100644 --- a/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "59bf00219a966d82c75a54eb4322a9b59a51c689972d1f70957352575925a174", + "originHash" : "ccdfd2e59edbb435d25bf392fd2edaecafacefd6a9f90a7b71d4efe657fa874f", "pins" : [ { "identity" : "combine-schedulers", @@ -195,8 +195,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/argmaxinc/WhisperKit", "state" : { - "branch" : "main", - "revision" : "b84fcd30134c800024b05050e895232c7b1fbba8" + "revision" : "6a509d6181da1f71517159425d6ec9d94d424f55", + "version" : "0.12.0" } }, { diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index f025037..e693ba0 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -24,6 +24,7 @@ struct TranscriptionFeature { var recordingStartTime: Date? var meter: Meter = .init(averagePower: 0, peakPower: 0) var assertionID: IOPMAssertionID? + var pendingTranscription: String? // Store original transcription for fallback @Shared(.hexSettings) var hexSettings: HexSettings @Shared(.transcriptionHistory) var transcriptionHistory: TranscriptionHistory } @@ -141,9 +142,20 @@ struct TranscriptionFeature { print("AI Enhancement error due to Ollama connectivity: \(error)") return .send(.ollamaBecameUnavailable) } else { - // For other errors, just use the original transcription + // For other errors, we need to: + // 1. Log the error + // 2. Disable AI enhancement status + // 3. Fall back to the original transcription that produced this action print("AI Enhancement error: \(error)") - return .none + + // In the enhance method, there's a parameter to capture the original transcription + // We'll modify enhanceWithAI() to store the original transcription for error case + + // For now, use the bare minimum error message to inform the user + state.error = "AI enhancement failed: \(error.localizedDescription). Using original transcription instead." + + // Continue with original transcription processing + return .send(.transcriptionResult(state.pendingTranscription ?? "")) } case .ollamaBecameUnavailable: @@ -155,9 +167,13 @@ struct TranscriptionFeature { return .run { send in let isAvailable = await aiEnhancement.isOllamaAvailable() if !isAvailable { - // Could dispatch to a UI state to show an alert or notification print("[TranscriptionFeature] Ollama is not available. AI enhancement is disabled.") - // Here you would typically update UI state to show an alert + // Update state to show error to the user + await send(.transcriptionError(NSError( + domain: "TranscriptionFeature", + code: -1002, + userInfo: [NSLocalizedDescriptionKey: "Ollama is not available. AI enhancement is disabled."] + ))) } } @@ -179,38 +195,51 @@ struct TranscriptionFeature { private extension TranscriptionFeature { /// Effect to begin observing the audio meter. func startMeteringEffect() -> Effect { - .run { send in - // Use a rate limiter to prevent too many updates - var lastUpdateTime = Date() - var lastMeter: Meter? = nil - - for await meter in await recording.observeAudioLevel() { - // Apply main-thread protection - await MainActor.run { - // Rate limit updates based on time and significant changes - let now = Date() - let timeSinceLastUpdate = now.timeIntervalSince(lastUpdateTime) - - // Determine if we should process this update - var shouldUpdate = false - - // Always update if enough time has passed (ensures UI responsiveness) - if timeSinceLastUpdate >= 0.05 { // Max 20 updates per second - shouldUpdate = true - } - // Or if there's a significant change from the last meter we actually sent - else if let last = lastMeter { - let averageDiff = abs(meter.averagePower - last.averagePower) - let peakDiff = abs(meter.peakPower - last.peakPower) - // More responsive threshold for significant changes - shouldUpdate = averageDiff > 0.02 || peakDiff > 0.04 - } - + // Create a separate actor to handle rate limiting safely in Swift 6 + actor MeterRateLimiter { + private var lastUpdateTime = Date() + private var lastMeter: Meter? = nil + + func shouldUpdate(meter: Meter) -> Bool { + let now = Date() + let timeSinceLastUpdate = now.timeIntervalSince(lastUpdateTime) + + // Always update if enough time has passed (ensures UI responsiveness) + if timeSinceLastUpdate >= 0.05 { // Max 20 updates per second + self.lastUpdateTime = now + self.lastMeter = meter + return true + } + // Or if there's a significant change from the last meter we actually sent + else if let last = lastMeter { + let averageDiff = abs(meter.averagePower - last.averagePower) + let peakDiff = abs(meter.peakPower - last.peakPower) + // More responsive threshold for significant changes + let shouldUpdate = averageDiff > 0.02 || peakDiff > 0.04 + if shouldUpdate { - send(.audioLevelUpdated(meter)) - lastUpdateTime = now - lastMeter = meter + self.lastUpdateTime = now + self.lastMeter = meter } + + return shouldUpdate + } + + self.lastUpdateTime = now + self.lastMeter = meter + return true // First update always passes through + } + } + + return .run { send in + let rateLimiter = MeterRateLimiter() + + for await meter in await recording.observeAudioLevel() { + // Check if we should send this update + if await rateLimiter.shouldUpdate(meter: meter) { + // The Effect.run captures its function as @Sendable, so we're already on an appropriate context + // for sending actions. ComposableArchitecture handles dispatching to the main thread as needed. + await send(.audioLevelUpdated(meter)) } } } @@ -396,6 +425,9 @@ private extension TranscriptionFeature { if state.hexSettings.useAIEnhancement { // Keep state.isTranscribing = true since we're still processing + // Store the original transcription for error handling/fallback + state.pendingTranscription = result + // Extract values to avoid capturing inout parameter let selectedAIModel = state.hexSettings.selectedAIModel let promptText = state.hexSettings.aiEnhancementPrompt @@ -458,7 +490,9 @@ private extension TranscriptionFeature { .run { send in do { print("[TranscriptionFeature] Calling aiEnhancement.enhance()") - let enhancedText = try await aiEnhancement.enhance(result, model, options) { progress in + // Access the raw value directly to avoid argument label issues + let enhanceMethod = aiEnhancement.enhance + let enhancedText = try await enhanceMethod(result, model, options) { progress in // Optional: Could update UI with progress information here if needed } print("[TranscriptionFeature] AI enhancement succeeded") @@ -482,6 +516,7 @@ private extension TranscriptionFeature { state.isTranscribing = false state.isPrewarming = false state.isEnhancing = false // Reset the enhancing state + state.pendingTranscription = nil // Clear the pending transcription since enhancement succeeded // If empty text, nothing else to do guard !result.isEmpty else { @@ -576,7 +611,12 @@ private extension TranscriptionFeature { return .merge( .cancel(id: CancelID.transcription), .cancel(id: CancelID.delayedRecord), - // Don't cancel AI enhancement as it might cause issues + // Don't cancel AI enhancement as it might cause issues with Ollama + // This creates a UI inconsistency where the UI shows cancellation + // but enhancement continues in background. We intentionally allow this + // to prevent issues with Ollama's streaming API and ensure stability. + // TODO: Consider implementing a safer cancellation approach or state tracking + // to properly ignore late results after cancellation. // .cancel(id: CancelID.aiEnhancement), .run { _ in await soundEffect.play(.cancel)