diff --git a/Hex.xcodeproj/project.pbxproj b/Hex.xcodeproj/project.pbxproj index 9b2f1e1..62f99bb 100644 --- a/Hex.xcodeproj/project.pbxproj +++ b/Hex.xcodeproj/project.pbxproj @@ -271,6 +271,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_TEAM = QC99C9JE59; GENERATE_INFOPLIST_FILE = YES; MARKETING_VERSION = 0.2.1; @@ -288,6 +289,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_TEAM = QC99C9JE59; GENERATE_INFOPLIST_FILE = YES; MARKETING_VERSION = 0.2.1; @@ -333,6 +335,7 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; @@ -396,6 +399,7 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; @@ -427,6 +431,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"Hex/Preview Content\""; DEVELOPMENT_TEAM = QC99C9JE59; ENABLE_HARDENED_RUNTIME = YES; @@ -460,6 +465,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 34; + DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = "\"Hex/Preview Content\""; DEVELOPMENT_TEAM = QC99C9JE59; ENABLE_HARDENED_RUNTIME = YES; diff --git a/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 3dc6625..338b381 100644 --- a/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Hex.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "59bf00219a966d82c75a54eb4322a9b59a51c689972d1f70957352575925a174", + "originHash" : "ccdfd2e59edbb435d25bf392fd2edaecafacefd6a9f90a7b71d4efe657fa874f", "pins" : [ { "identity" : "combine-schedulers", @@ -195,8 +195,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/argmaxinc/WhisperKit", "state" : { - "branch" : "main", - "revision" : "b84fcd30134c800024b05050e895232c7b1fbba8" + "revision" : "6a509d6181da1f71517159425d6ec9d94d424f55", + "version" : "0.12.0" } }, { diff --git a/Hex/Clients/AIEnhancementClient.swift b/Hex/Clients/AIEnhancementClient.swift new file mode 100644 index 0000000..e8e5842 --- /dev/null +++ b/Hex/Clients/AIEnhancementClient.swift @@ -0,0 +1,339 @@ +// +// AIEnhancementClient.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import Dependencies +import DependenciesMacros +import Foundation + +// Note: Future enhancement could use OllamaKit directly: +// import OllamaKit + +/// A client that enhances transcribed text using local LLMs. +/// Supports both Ollama and other local options (future expansion). +@DependencyClient +struct AIEnhancementClient { + /// Enhances the given text using the specified model. + var enhance: @Sendable (String, String, EnhancementOptions, @escaping (Progress) -> Void) async throws -> String = { text, _, _, _ in text } + + /// Checks if Ollama is installed and running on the system + var isOllamaAvailable: @Sendable () async -> Bool = { false } + + /// Gets a list of available models from Ollama + var getAvailableModels: @Sendable () async throws -> [String] = { [] } +} + +/// Enhancement options for AI processing +struct EnhancementOptions { + /// The prompt to send to the AI model for text enhancement + var prompt: String + + /// Temperature controls randomness: lower values (0.1-0.3) are more precise, + /// higher values (0.7-1.0) give more creative/varied results + var temperature: Double + + /// Maximum number of tokens to generate in the response + var maxTokens: Int + + /// Default prompt for enhancing transcribed text with clear instructions + static let defaultPrompt = """ + You are a professional editor improving transcribed text from speech-to-text. + + Your task is to: + 1. Fix grammar, punctuation, and capitalization + 2. Correct obvious transcription errors and typos + 3. Format the text to be more readable + 4. Preserve all meaning and information from the original + 5. Make the text flow naturally as written text + 6. DO NOT add any new information that wasn't in the original + 7. DO NOT remove any information from the original text + + Focus only on improving readability while preserving the exact meaning. + """ + + /// Default enhancement options for transcribed text + static let `default` = EnhancementOptions( + prompt: defaultPrompt, + temperature: 0.3, + maxTokens: 1000 + ) + + /// Custom initialization with sensible defaults + init(prompt: String = defaultPrompt, temperature: Double = 0.3, maxTokens: Int = 1000) { + self.prompt = prompt + self.temperature = temperature + self.maxTokens = maxTokens + } +} + +/// Dependency Key for AIEnhancementClient +extension AIEnhancementClient: DependencyKey { + static var liveValue: Self { + let live = AIEnhancementClientLive() + return Self( + enhance: { try await live.enhance(text: $0, model: $1, options: $2, progressCallback: $3) }, + isOllamaAvailable: { await live.isOllamaAvailable() }, + getAvailableModels: { try await live.getAvailableModels() } + ) + } +} + +extension DependencyValues { + var aiEnhancement: AIEnhancementClient { + get { self[AIEnhancementClient.self] } + set { self[AIEnhancementClient.self] = newValue } + } +} + +/// Live implementation of AIEnhancementClient +class AIEnhancementClientLive { + // MARK: - Public Methods + + /// Enhances text using a local AI model + func enhance(text: String, model: String, options: EnhancementOptions, progressCallback: @escaping (Progress) -> Void) async throws -> String { + // Skip if the text is empty or too short + guard !text.isEmpty, text.count > 5 else { + print("[AIEnhancementClientLive] Text too short for enhancement, returning original") + return text + } + + let progress = Progress(totalUnitCount: 100) + progressCallback(progress) + + print("[AIEnhancementClientLive] Starting text enhancement with model: \(model)") + print("[AIEnhancementClientLive] Text to enhance (\(text.count) chars): \"\(text.prefix(50))...\"") + + // For now, we support Ollama only + do { + // First verify Ollama is available + let isAvailable = await isOllamaAvailable() + if !isAvailable { + print("[AIEnhancementClientLive] Ollama not available, cannot enhance text") + throw NSError(domain: "AIEnhancementClient", code: -5, + userInfo: [NSLocalizedDescriptionKey: "Ollama is not available. Please ensure it's running."]) + } + + let enhancedText = try await enhanceWithOllama(text: text, model: model, options: options) { fraction in + progress.completedUnitCount = Int64(fraction * 100) + progressCallback(progress) + } + + progress.completedUnitCount = 100 + progressCallback(progress) + + print("[AIEnhancementClientLive] Successfully enhanced text: \"\(enhancedText.prefix(50))...\"") + return enhancedText + } catch { + print("[AIEnhancementClientLive] Error enhancing text: \(error.localizedDescription)") + throw error + } + } + + /// Checks if Ollama is available on the system + func isOllamaAvailable() async -> Bool { + // Simple check - try to connect to Ollama's API endpoint + do { + var request = URLRequest(url: URL(string: "http://localhost:11434/api/version")!) + request.timeoutInterval = 5.0 // Longer timeout for more reliability + + print("[AIEnhancementClientLive] Checking Ollama availability...") + let (data, response) = try await URLSession.shared.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + let isAvailable = httpResponse.statusCode == 200 + print("[AIEnhancementClientLive] Ollama availability check: \(isAvailable ? "Available" : "Unavailable") (status: \(httpResponse.statusCode))") + if isAvailable, let dataString = String(data: data, encoding: .utf8) { + print("[AIEnhancementClientLive] Ollama version: \(dataString)") + } + return isAvailable + } + print("[AIEnhancementClientLive] Ollama unavailable: Invalid response type") + return false + } catch { + print("[AIEnhancementClientLive] Ollama not available: \(error.localizedDescription)") + return false + } + } + + /// Gets a list of available models from Ollama + func getAvailableModels() async throws -> [String] { + // Our direct API implementation: + struct ModelResponse: Decodable { + struct Model: Decodable { + let name: String + let modifiedAt: String? + let size: Int64? + + enum CodingKeys: String, CodingKey { + case name + case modifiedAt = "modified_at" + case size + } + } + let models: [Model] + } + + var request = URLRequest(url: URL(string: "http://localhost:11434/api/tags")!) + request.timeoutInterval = 5.0 + + do { + let (data, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse else { + throw NSError(domain: "AIEnhancementClient", code: -1, + userInfo: [NSLocalizedDescriptionKey: "Invalid response from Ollama"]) + } + + if httpResponse.statusCode != 200 { + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama returned status code \(httpResponse.statusCode)"]) + } + + do { + let modelResponse = try JSONDecoder().decode(ModelResponse.self, from: data) + // Sort models alphabetically for better display + return modelResponse.models.map { $0.name }.sorted() + } catch let decodingError { + print("[AIEnhancementClientLive] Failed to decode model list: \(decodingError)") + throw NSError(domain: "AIEnhancementClient", code: -2, + userInfo: [NSLocalizedDescriptionKey: "Failed to parse model list from Ollama. \(decodingError.localizedDescription)"]) + } + } catch { + print("[AIEnhancementClientLive] Error getting models: \(error.localizedDescription)") + throw NSError(domain: "AIEnhancementClient", code: -3, + userInfo: [NSLocalizedDescriptionKey: "Failed to connect to Ollama. Ensure it's running."]) + } + } + + // MARK: - Private Helpers + + /// Enhances text using Ollama's API + private func enhanceWithOllama(text: String, model: String, options: EnhancementOptions, progressCallback: @escaping (Double) -> Void) async throws -> String { + // Initial progress update + progressCallback(0.1) + + // Validate inputs + guard !model.isEmpty else { + print("[AIEnhancementClientLive] Error: No model selected for enhancement") + throw NSError(domain: "AIEnhancementClient", code: -4, + userInfo: [NSLocalizedDescriptionKey: "No model selected for enhancement"]) + } + + let url = URL(string: "http://localhost:11434/api/generate")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.timeoutInterval = 60.0 // Allow longer timeout for generation + + // Create a well-formatted prompt with clear instructions + let fullPrompt = """ + \(options.prompt) + + TEXT TO IMPROVE: + \(text) + + IMPROVED TEXT: + """ + + // Build request parameters with appropriate defaults + let temperature = max(0.1, min(1.0, options.temperature)) // Ensure valid range + let maxTokens = max(100, min(2000, options.maxTokens)) // Reasonable limits + + let requestDict: [String: Any] = [ + "model": model, + "prompt": fullPrompt, + "temperature": temperature, + "max_tokens": maxTokens, + "stream": false, + "system": "You are an AI that improves transcribed text while preserving meaning." + ] + + print("[AIEnhancementClientLive] Preparing request to Ollama with model: \(model), temp: \(temperature), max_tokens: \(maxTokens)") + + do { + // Progress update - request prepared + progressCallback(0.2) + + // Convert to JSON and send + let requestData = try JSONSerialization.data(withJSONObject: requestDict) + request.httpBody = requestData + + print("[AIEnhancementClientLive] Sending request to Ollama API...") + + // Make the request + let (responseData, urlResponse): (Data, URLResponse) + do { + (responseData, urlResponse) = try await URLSession.shared.data(for: request) + } catch { + // Treat timeouts and connectivity issues as "Ollama unavailable" + print("[AIEnhancementClientLive] Generation failed: \(error.localizedDescription)") + throw NSError(domain: "AIEnhancementClient", + code: -1001, // NSURLErrorTimedOut or similar + userInfo: [NSLocalizedDescriptionKey: "Ollama is unresponsive. Please check if it's running."]) + } + + // Progress update - response received + progressCallback(0.8) + + print("[AIEnhancementClientLive] Received response from Ollama API") + + // Validate response + guard let httpResponse = urlResponse as? HTTPURLResponse else { + print("[AIEnhancementClientLive] Error: Invalid response type from Ollama") + throw NSError(domain: "AIEnhancementClient", code: -1, + userInfo: [NSLocalizedDescriptionKey: "Invalid response from Ollama"]) + } + + print("[AIEnhancementClientLive] Ollama response status: \(httpResponse.statusCode)") + + if httpResponse.statusCode != 200 { + // Try to extract error message if available + if let errorDict = try? JSONSerialization.jsonObject(with: responseData) as? [String: Any], + let errorMessage = errorDict["error"] as? String { + print("[AIEnhancementClientLive] Ollama API error: \(errorMessage)") + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama error: \(errorMessage)"]) + } else { + print("[AIEnhancementClientLive] Ollama error with status code: \(httpResponse.statusCode)") + throw NSError(domain: "AIEnhancementClient", code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "Ollama returned status code \(httpResponse.statusCode)"]) + } + } + + // Try to log raw response for debugging + if let responseString = String(data: responseData, encoding: .utf8) { + print("[AIEnhancementClientLive] Raw response: \(responseString.prefix(100))...") + } + + // Parse response + if let responseDict = try JSONSerialization.jsonObject(with: responseData) as? [String: Any], + let enhancedText = responseDict["response"] as? String { + + // Progress update - processing complete + progressCallback(1.0) + + print("[AIEnhancementClientLive] Successfully parsed Ollama response") + + // Clean up the response - trim whitespace and ensure it's not empty + let cleanedText = enhancedText.trimmingCharacters(in: .whitespacesAndNewlines) + return cleanedText.isEmpty ? text : cleanedText + } else { + print("[AIEnhancementClientLive] Error: Failed to parse Ollama response") + throw NSError(domain: "AIEnhancementClient", code: -2, + userInfo: [NSLocalizedDescriptionKey: "Failed to parse Ollama response"]) + } + } catch let error as NSError { + // Log the error and rethrow + print("[AIEnhancementClientLive] Error enhancing text: \(error.localizedDescription)") + throw error + } catch { + // Handle unexpected errors + print("[AIEnhancementClientLive] Unexpected error: \(error)") + throw NSError(domain: "AIEnhancementClient", code: -3, + userInfo: [NSLocalizedDescriptionKey: "Error communicating with Ollama: \(error.localizedDescription)"]) + } + } +} \ No newline at end of file diff --git a/Hex/Clients/KeyEventMonitorClient.swift b/Hex/Clients/KeyEventMonitorClient.swift index ebc8dea..7645460 100644 --- a/Hex/Clients/KeyEventMonitorClient.swift +++ b/Hex/Clients/KeyEventMonitorClient.swift @@ -8,6 +8,33 @@ import Sauce private let logger = Logger(subsystem: "com.kitlangton.Hex", category: "KeyEventMonitor") +/// Thread-safe wrapper for interacting with the Sauce library +/// This ensures all Sauce operations happen on the main thread +/// to prevent "_dispatch_assert_queue_fail" errors +enum SafeSauce { + /// Thread-safe way to call Sauce methods from any thread + static func performOnMainThread(_ operation: @escaping () -> T) -> T { + // If we're already on the main thread, just perform the operation + if Thread.isMainThread { + return operation() + } + + // Otherwise dispatch to main thread and wait for result + return DispatchQueue.main.sync { + operation() + } + } + + // Convenience methods that handle thread switching automatically + static func safeKey(for keyCode: Int) -> Key? { + performOnMainThread { Sauce.shared.key(for: keyCode) } + } + + static func safeKeyCode(for key: Key) -> CGKeyCode { + performOnMainThread { Sauce.shared.keyCode(for: key) } + } +} + public struct KeyEvent { let key: Key? let modifiers: Modifiers @@ -16,7 +43,8 @@ public struct KeyEvent { public extension KeyEvent { init(cgEvent: CGEvent, type _: CGEventType) { let keyCode = Int(cgEvent.getIntegerValueField(.keyboardEventKeycode)) - let key = cgEvent.type == .keyDown ? Sauce.shared.key(for: keyCode) : nil + // Use our thread-safe wrapper to prevent _dispatch_assert_queue_fail + let key: Key? = cgEvent.type == .keyDown ? SafeSauce.safeKey(for: keyCode) : nil let modifiers = Modifiers.from(carbonFlags: cgEvent.flags) self.init(key: key, modifiers: modifiers) @@ -188,4 +216,4 @@ class KeyEventMonitorClientLive { return handled } -} +} \ No newline at end of file diff --git a/Hex/Clients/PasteboardClient.swift b/Hex/Clients/PasteboardClient.swift index 14f1f5d..eae8532 100644 --- a/Hex/Clients/PasteboardClient.swift +++ b/Hex/Clients/PasteboardClient.swift @@ -57,36 +57,61 @@ struct PasteboardClientLive { pasteboard.setString(text, forType: .string) } - // Function to save the current state of the NSPasteboard - func savePasteboardState(pasteboard: NSPasteboard) -> [[String: Any]] { - var savedItems: [[String: Any]] = [] - - for item in pasteboard.pasteboardItems ?? [] { - var itemDict: [String: Any] = [:] - for type in item.types { - if let data = item.data(forType: type) { - itemDict[type.rawValue] = data - } - } - savedItems.append(itemDict) + // Stores the previous pasteboard owner change count + private static var savedChangeCount: Int = 0 + // Stores the previous pasteboard contents name for tracking + private static var savedPasteboardName: String? + + // More efficient approach that uses NSPasteboard's built-in functionality + // Instead of copying all the data, we'll track the pasteboard state and create + // a temporary pasteboard to hold the original data + func savePasteboardState(pasteboard: NSPasteboard) -> NSPasteboard? { + // If pasteboard is empty, nothing to save + if pasteboard.pasteboardItems?.isEmpty ?? true { + return nil } - return savedItems + // Generate a unique name for the backup pasteboard + let tempName = "com.kitlangton.Hex.backup.\(UUID().uuidString)" + let backupPasteboard = NSPasteboard(name: .init(tempName)) + + // Clear the backup pasteboard and write all contents from original + backupPasteboard.clearContents() + + // Copy all items to the backup pasteboard + // This is more efficient than manually copying each data item + if let items = pasteboard.pasteboardItems { + backupPasteboard.writeObjects(items) + } + + // Save the current change count and name for later reference + PasteboardClientLive.savedChangeCount = pasteboard.changeCount + PasteboardClientLive.savedPasteboardName = tempName + + return backupPasteboard } - // Function to restore the saved state of the NSPasteboard - func restorePasteboardState(pasteboard: NSPasteboard, savedItems: [[String: Any]]) { - pasteboard.clearContents() + // Restore the pasteboard state from a backup pasteboard + func restorePasteboardFromBackup(mainPasteboard: NSPasteboard, backupPasteboard: NSPasteboard?) { + // If no backup pasteboard, nothing to restore + guard let backupPasteboard = backupPasteboard else { return } - for itemDict in savedItems { - let item = NSPasteboardItem() - for (type, data) in itemDict { - if let data = data as? Data { - item.setData(data, forType: NSPasteboard.PasteboardType(rawValue: type)) - } - } - pasteboard.writeObjects([item]) + // Clear the main pasteboard + mainPasteboard.clearContents() + + // Copy all items from backup to main pasteboard + if let items = backupPasteboard.pasteboardItems { + mainPasteboard.writeObjects(items) } + + // Release the temporary pasteboard by clearing it + backupPasteboard.clearContents() + } + + // Legacy method to maintain compatibility - will be removed in future + func restorePasteboardState(pasteboard: NSPasteboard, savedItems: [[String: Any]]) { + // This is kept for compatibility but shouldn't be used anymore + print("Warning: Using deprecated pasteboard restoration method") } /// Pastes current clipboard content to the frontmost application @@ -135,62 +160,78 @@ struct PasteboardClientLive { func pasteWithClipboard(_ text: String) async { let pasteboard = NSPasteboard.general - let originalItems = savePasteboardState(pasteboard: pasteboard) + + // Save the original pasteboard only if we need to restore it + let backupPasteboard = hexSettings.copyToClipboard ? nil : savePasteboardState(pasteboard: pasteboard) + + // Set our text in the clipboard pasteboard.clearContents() pasteboard.setString(text, forType: .string) let source = CGEventSource(stateID: .combinedSessionState) - // Track if paste operation successful + // First try the AppleScript approach - it's more reliable in most apps var pasteSucceeded = PasteboardClientLive.pasteToFrontmostApp() // If menu-based paste failed, try simulated keypresses if !pasteSucceeded { print("Failed to paste to frontmost app, falling back to simulated keypresses") - let vKeyCode = Sauce.shared.keyCode(for: .v) - let cmdKeyCode: CGKeyCode = 55 // Command key - - // Create cmd down event - let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: true) - - // Create v down event - let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true) - vDown?.flags = .maskCommand - - // Create v up event - let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false) - vUp?.flags = .maskCommand + + // Add a small delay to allow system to process + try? await Task.sleep(for: .milliseconds(100)) - // Create cmd up event - let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: false) + // Use a thread-safe approach to prevent _dispatch_assert_queue_fail + let vKeyCode: CGKeyCode + if Thread.isMainThread { + vKeyCode = Sauce.shared.keyCode(for: .v) + } else { + vKeyCode = DispatchQueue.main.sync { Sauce.shared.keyCode(for: .v) } + } + let cmdKeyCode: CGKeyCode = 55 // Command key - // Post the events - cmdDown?.post(tap: .cghidEventTap) - vDown?.post(tap: .cghidEventTap) - vUp?.post(tap: .cghidEventTap) - cmdUp?.post(tap: .cghidEventTap) + // Create and post key events with small delays between + autoreleasepool { + // Command down + let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: true) + cmdDown?.post(tap: .cghidEventTap) + + // V down with command flag + let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true) + vDown?.flags = .maskCommand + vDown?.post(tap: .cghidEventTap) + + // V up with command flag + let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false) + vUp?.flags = .maskCommand + vUp?.post(tap: .cghidEventTap) + + // Command up + let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: cmdKeyCode, keyDown: false) + cmdUp?.post(tap: .cghidEventTap) + } - // Assume keypress-based paste succeeded - but text will remain in clipboard as fallback + // Assume keypress-based paste succeeded - text will remain in clipboard as fallback pasteSucceeded = true } // Only restore original pasteboard contents if: - // 1. Copying to clipboard is disabled AND - // 2. The paste operation succeeded - if !hexSettings.copyToClipboard && pasteSucceeded { - try? await Task.sleep(for: .seconds(0.1)) - pasteboard.clearContents() - restorePasteboardState(pasteboard: pasteboard, savedItems: originalItems) + // 1. User doesn't want to keep text in clipboard AND + // 2. The paste operation succeeded AND + // 3. We have a backup pasteboard + if !hexSettings.copyToClipboard && pasteSucceeded && backupPasteboard != nil { + // Give paste operation time to complete + try? await Task.sleep(for: .milliseconds(200)) + + // Restore the original pasteboard state + autoreleasepool { + restorePasteboardFromBackup(mainPasteboard: pasteboard, backupPasteboard: backupPasteboard) + } } // If we failed to paste AND user doesn't want clipboard retention, - // show a notification that text is available in clipboard + // log the issue but leave text in clipboard as fallback if !pasteSucceeded && !hexSettings.copyToClipboard { - // Keep the transcribed text in clipboard regardless of setting print("Paste operation failed. Text remains in clipboard as fallback.") - - // TODO: Could add a notification here to inform user - // that text is available in clipboard } } diff --git a/Hex/Clients/RecordingClient.swift b/Hex/Clients/RecordingClient.swift index 1db8fbb..025815d 100644 --- a/Hex/Clients/RecordingClient.swift +++ b/Hex/Clients/RecordingClient.swift @@ -381,7 +381,7 @@ actor RecordingClientLive { var deviceName: CFString? = nil var size = UInt32(MemoryLayout.size) - var deviceNamePtr: UnsafeMutableRawPointer = .allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) + let deviceNamePtr = UnsafeMutableRawPointer.allocate(byteCount: Int(size), alignment: MemoryLayout.alignment) defer { deviceNamePtr.deallocate() } let status = AudioObjectGetPropertyData( @@ -559,14 +559,56 @@ actor RecordingClientLive { func startMeterTask() { meterTask = Task { + var lastMeter = Meter(averagePower: 0, peakPower: 0) + var updateCount = 0 + var lastUpdateTime = Date() + + // Use lower sampling rates when there's less activity + var inactiveCount = 0 + var samplingInterval: Duration = .milliseconds(100) // Start with default + while !Task.isCancelled, let r = self.recorder, r.isRecording { r.updateMeters() let averagePower = r.averagePower(forChannel: 0) let averageNormalized = pow(10, averagePower / 20.0) let peakPower = r.peakPower(forChannel: 0) let peakNormalized = pow(10, peakPower / 20.0) - meterContinuation.yield(Meter(averagePower: Double(averageNormalized), peakPower: Double(peakNormalized))) - try? await Task.sleep(for: .milliseconds(100)) + let currentMeter = Meter(averagePower: Double(averageNormalized), peakPower: Double(peakNormalized)) + + // Determine threshold for significant change (adaptive based on current levels) + let averageThreshold = max(0.05, lastMeter.averagePower * 0.15) // More sensitive at low levels + let peakThreshold = max(0.1, lastMeter.peakPower * 0.15) + + // Check if there's a significant change + let significantChange = abs(currentMeter.averagePower - lastMeter.averagePower) > averageThreshold || + abs(currentMeter.peakPower - lastMeter.peakPower) > peakThreshold + + // Force update if too much time has passed (prevents UI from appearing frozen) + let timeSinceLastUpdate = Date().timeIntervalSince(lastUpdateTime) + let forceUpdate = timeSinceLastUpdate > 0.3 // Max 300ms between updates for smooth UI + + // Adaptive sampling rate based on activity level + if significantChange { + inactiveCount = 0 + samplingInterval = .milliseconds(80) // Faster sampling during active periods + } else { + inactiveCount += 1 + if inactiveCount > 10 { + // Gradually increase sampling interval during periods of low activity + samplingInterval = .milliseconds(min(150, 80 + inactiveCount * 5)) + } + } + + if significantChange || forceUpdate || updateCount >= 3 { + meterContinuation.yield(currentMeter) + lastMeter = currentMeter + lastUpdateTime = Date() + updateCount = 0 + } else { + updateCount += 1 + } + + try? await Task.sleep(for: samplingInterval) } } } diff --git a/Hex/Clients/TranscriptionClient.swift b/Hex/Clients/TranscriptionClient.swift index 2f22d15..71b7aa3 100644 --- a/Hex/Clients/TranscriptionClient.swift +++ b/Hex/Clients/TranscriptionClient.swift @@ -17,7 +17,8 @@ import WhisperKit struct TranscriptionClient { /// Transcribes an audio file at the specified `URL` using the named `model`. /// Reports transcription progress via `progressCallback`. - var transcribe: @Sendable (URL, String, DecodingOptions, @escaping (Progress) -> Void) async throws -> String + /// Optionally accepts HexSettings for features like auto-capitalization. + var transcribe: @Sendable (URL, String, DecodingOptions, HexSettings?, @escaping (Progress) -> Void) async throws -> String /// Ensures a model is downloaded (if missing) and loaded into memory, reporting progress via `progressCallback`. var downloadModel: @Sendable (String, @escaping (Progress) -> Void) async throws -> Void @@ -39,7 +40,7 @@ extension TranscriptionClient: DependencyKey { static var liveValue: Self { let live = TranscriptionClientLive() return Self( - transcribe: { try await live.transcribe(url: $0, model: $1, options: $2, progressCallback: $3) }, + transcribe: { try await live.transcribe(url: $0, model: $1, options: $2, settings: $3, progressCallback: $4) }, downloadModel: { try await live.downloadAndLoadModel(variant: $0, progressCallback: $1) }, deleteModel: { try await live.deleteModel(variant: $0) }, isModelDownloaded: { await live.isModelDownloaded($0) }, @@ -206,6 +207,7 @@ actor TranscriptionClientLive { url: URL, model: String, options: DecodingOptions, + settings: HexSettings? = nil, progressCallback: @escaping (Progress) -> Void ) async throws -> String { // Load or switch to the required model if needed. @@ -231,7 +233,16 @@ actor TranscriptionClientLive { let results = try await whisperKit.transcribe(audioPath: url.path, decodeOptions: options) // Concatenate results from all segments. - let text = results.map(\.text).joined(separator: " ") + var text = results.map(\.text).joined(separator: " ") + + // Use provided settings or default to auto-capitalization + let useAutoCapitalization = settings == nil ? true : !settings!.disableAutoCapitalization + + // Convert to lowercase if auto-capitalization is disabled + if !useAutoCapitalization { + text = text.lowercased() + } + return text } diff --git a/Hex/Features/App/AppFeature.swift b/Hex/Features/App/AppFeature.swift index 9bf15ee..98b01ed 100644 --- a/Hex/Features/App/AppFeature.swift +++ b/Hex/Features/App/AppFeature.swift @@ -15,6 +15,7 @@ struct AppFeature { case settings case history case about + case aiEnhancement } @ObservableState @@ -80,6 +81,13 @@ struct AppView: View { }.buttonStyle(.plain) .tag(AppFeature.ActiveTab.settings) + Button { + store.send(.setActiveTab(.aiEnhancement)) + } label: { + Label("AI Enhancement", systemImage: "brain") + }.buttonStyle(.plain) + .tag(AppFeature.ActiveTab.aiEnhancement) + Button { store.send(.setActiveTab(.history)) } label: { @@ -99,6 +107,9 @@ struct AppView: View { case .settings: SettingsView(store: store.scope(state: \.settings, action: \.settings)) .navigationTitle("Settings") + case .aiEnhancement: + AIEnhancementView(store: store.scope(state: \.settings.aiEnhancement, action: \.settings.aiEnhancement)) + .navigationTitle("AI Enhancement") case .history: HistoryView(store: store.scope(state: \.history, action: \.history)) .navigationTitle("History") diff --git a/Hex/Features/Settings/AIEnhancementFeature.swift b/Hex/Features/Settings/AIEnhancementFeature.swift new file mode 100644 index 0000000..507b7aa --- /dev/null +++ b/Hex/Features/Settings/AIEnhancementFeature.swift @@ -0,0 +1,107 @@ +// +// AIEnhancementFeature.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import ComposableArchitecture +import Foundation +import SwiftUI + +@Reducer +struct AIEnhancementFeature { + @ObservableState + struct State: Equatable { + @Shared(.hexSettings) var hexSettings: HexSettings + + var isOllamaAvailable: Bool = false + var availableModels: [String] = [] + var isLoadingModels: Bool = false + var errorMessage: String? = nil + + // Computed property for convenient access to the default model + var defaultAIModel: String { + "gemma3" + } + } + + enum Action { + case task + case checkOllamaAvailability + case ollamaAvailabilityResult(Bool) + case loadAvailableModels + case modelsLoaded([String]) + case modelLoadingError(String) + case setSelectedModel(String) + case resetToDefaultPrompt + } + + @Dependency(\.aiEnhancement) var aiEnhancement + + var body: some ReducerOf { + Reduce { state, action in + switch action { + case .task: + return .send(.checkOllamaAvailability) + + case .checkOllamaAvailability: + return .run { send in + let isAvailable = await aiEnhancement.isOllamaAvailable() + await send(.ollamaAvailabilityResult(isAvailable)) + } + + case let .ollamaAvailabilityResult(isAvailable): + state.isOllamaAvailable = isAvailable + + // If Ollama is available, load models + if isAvailable { + return .send(.loadAvailableModels) + } + return .none + + case .loadAvailableModels: + state.isLoadingModels = true + state.errorMessage = nil + + return .run { send in + do { + let models = try await aiEnhancement.getAvailableModels() + await send(.modelsLoaded(models)) + } catch { + await send(.modelLoadingError(error.localizedDescription)) + } + } + + case let .modelsLoaded(models): + state.isLoadingModels = false + state.availableModels = models + + // If the selected model is not in the list and we have models, select the first one + if !models.isEmpty && !models.contains(state.hexSettings.selectedAIModel) { + // Check if the default model is available + if models.contains(state.defaultAIModel) { + state.$hexSettings.withLock { $0.selectedAIModel = state.defaultAIModel } + } else { + state.$hexSettings.withLock { $0.selectedAIModel = models[0] } + } + } + + return .none + + case let .modelLoadingError(message): + state.isLoadingModels = false + state.errorMessage = message + return .none + + case let .setSelectedModel(model): + state.$hexSettings.withLock { $0.selectedAIModel = model } + return .none + + case .resetToDefaultPrompt: + state.$hexSettings.withLock { $0.aiEnhancementPrompt = EnhancementOptions.defaultPrompt } + return .none + } + } + } +} \ No newline at end of file diff --git a/Hex/Features/Settings/AIEnhancementView.swift b/Hex/Features/Settings/AIEnhancementView.swift new file mode 100644 index 0000000..df68dcb --- /dev/null +++ b/Hex/Features/Settings/AIEnhancementView.swift @@ -0,0 +1,413 @@ +// +// AIEnhancementView.swift +// Hex +// +// Created by Claude AI on 4/22/25. +// + +import ComposableArchitecture +import SwiftUI + +struct AIEnhancementView: View { + @Bindable var store: StoreOf + @State private var showExpandedPrompt = false + @State private var isHoveringModelSelect = false + + var body: some View { + Form { + // Activation Section (Always show this first) + Section { + activationToggle + } header: { + Text("AI Enhancement") + } footer: { + Text("Enable AI to improve transcription quality by fixing grammar, formatting, and errors.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + + // Only show other settings if AI enhancement is enabled + if store.hexSettings.useAIEnhancement { + // Connection Status Section (only if AI enhancement is enabled) + if !store.isOllamaAvailable { + Section { + connectionStatusView + } header: { + Text("Ollama Status") + } + } + + // Model Selection Section + modelSelectionSection + + // Temperature Control Section + temperatureSection + + // Prompt Configuration Section + promptSection + } + } + .formStyle(.grouped) + .task { + await store.send(.task).finish() + } + } + + // MARK: - Component Views + + // Connection Status View + private var connectionStatusView: some View { + VStack(alignment: .leading, spacing: 12) { + HStack(alignment: .top) { + Image(systemName: "exclamationmark.triangle.fill") + .font(.title2) + .foregroundColor(.orange) + + VStack(alignment: .leading, spacing: 8) { + Text("Ollama Not Connected") + .font(.headline) + .foregroundColor(.primary) + + Text("AI enhancement requires Ollama to be installed and running locally.") + .font(.subheadline) + .foregroundColor(.secondary) + } + } + + Divider() + + VStack(alignment: .leading, spacing: 12) { + Text("To set up Ollama:") + .font(.subheadline) + .foregroundColor(.secondary) + + VStack(alignment: .leading, spacing: 8) { + bulletPoint(text: "Download and install Ollama from [ollama.com](https://ollama.com)") + bulletPoint(text: "Launch the Ollama application") + bulletPoint(text: "Pull a language model (llama3 recommended)") + } + .padding(.leading, 8) + } + + HStack { + Spacer() + + Button { + NSWorkspace.shared.open(URL(string: "https://ollama.com")!) + } label: { + Label("Download Ollama", systemImage: "arrow.down.circle") + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(Color.blue) + + Button { + store.send(.checkOllamaAvailability) + } label: { + Label("Check Connection", systemImage: "arrow.clockwise") + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(Color.blue) + } + .padding(.top, 4) + } + .padding() + .background(RoundedRectangle(cornerRadius: 8) + .fill(Color.orange.opacity(0.1)) + .overlay(RoundedRectangle(cornerRadius: 8) + .stroke(Color.orange.opacity(0.3), lineWidth: 1)) + ) + } + + // Activation Toggle + private var activationToggle: some View { + VStack(spacing: 8) { + // Main toggle row + Toggle(isOn: Binding( + get: { store.hexSettings.useAIEnhancement }, + set: { newValue in + store.$hexSettings.withLock { $0.useAIEnhancement = newValue } + + // When enabling, check Ollama status + if newValue { + Task { + await store.send(.checkOllamaAvailability).finish() + } + } + } + )) { + Text("Use AI Enhancement") + .font(.body) + } + + // Connection status indicator (only show if AI enhancement is enabled and Ollama is available) + if store.hexSettings.useAIEnhancement && store.isOllamaAvailable { + HStack(spacing: 4) { + Circle() + .fill(Color.green) + .frame(width: 6, height: 6) + Text("Ollama Connected") + .font(.caption) + .foregroundColor(.secondary) + Spacer() + } + .padding(.leading, 2) + } + } + } + + // Model Selection Section + private var modelSelectionSection: some View { + Section { + VStack(alignment: .leading, spacing: 12) { + // Model selection header + HStack { + Label { + Text("Language Model") + .font(.body) + } icon: { + Image(systemName: "brain") + } + + Spacer() + + // Refresh button for models + Button { + store.send(.loadAvailableModels) + } label: { + Image(systemName: "arrow.clockwise") + .font(.body) + } + .buttonStyle(DefaultButtonStyle()) + .disabled(store.isLoadingModels) + .opacity(store.isLoadingModels ? 0.5 : 0.7) + } + + if store.isLoadingModels { + // Loading indicator + HStack { + ProgressView() + .scaleEffect(0.7) + Text("Loading available models...") + .font(.subheadline) + .foregroundColor(.secondary) + Spacer() + } + .padding(.vertical, 4) + } else if !store.isOllamaAvailable { + // Ollama not available message + Text("Ollama connection required to view models") + .font(.subheadline) + .foregroundColor(.secondary) + .padding(.vertical, 4) + } else if let error = store.errorMessage { + // Error message + HStack { + Image(systemName: "exclamationmark.triangle") + .foregroundColor(.red) + Text("Error: \(error)") + .font(.caption) + .foregroundColor(.red) + .lineLimit(2) + } + .padding(.vertical, 4) + } else if store.availableModels.isEmpty { + // No models available + HStack(alignment: .center) { + Text("No models found in Ollama") + .font(.subheadline) + .foregroundColor(.secondary) + + Spacer() + + Link("Browse Models", destination: URL(string: "https://ollama.com/library")!) + .font(.subheadline) + .foregroundColor(.blue) + } + .padding(.vertical, 8) + } else { + // Model picker + VStack(alignment: .leading, spacing: 4) { + Text("Select AI model:") + .font(.subheadline) + .foregroundColor(.secondary) + + Picker("", selection: Binding( + get: { store.hexSettings.selectedAIModel }, + set: { store.send(.setSelectedModel($0)) } + )) { + ForEach(store.availableModels, id: \.self) { model in + Text(model).tag(model) + } + } + .pickerStyle(.menu) + .labelsHidden() + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.vertical, 2) + } + } + } + } header: { + Text("Model Selection") + } footer: { + if !store.availableModels.isEmpty { + Text("Smaller models are faster but less capable. Llama3 offers a good balance of speed and quality.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + } + + // Temperature Section + private var temperatureSection: some View { + Section { + // Label with temperature value + HStack { + Text("Response Style") + .font(.subheadline) + + Spacer() + + Text("\(store.hexSettings.aiEnhancementTemperature, specifier: "%.2f")") + .monospacedDigit() + .font(.caption) + .foregroundColor(.secondary) + .frame(width: 40) + } + .padding(.bottom, 4) + + // Slider for temperature control - extend even further + ZStack { + Slider( + value: Binding( + get: { store.hexSettings.aiEnhancementTemperature }, + set: { newValue in + store.$hexSettings.withLock { $0.aiEnhancementTemperature = newValue } + } + ), + in: 0...1, + step: 0.05 + ) + .padding(.horizontal, -40) + .frame(maxWidth: .infinity) + } + .padding(.horizontal, 40) + + // Temperature descriptions + HStack { + Text("Precision") + .font(.caption) + .foregroundColor(.secondary) + + Spacer() + + Text("Creativity") + .font(.caption) + .foregroundColor(.secondary) + } + .padding(.horizontal, 0) + .padding(.top, 4) + } header: { + Text("Response Style") + } footer: { + Text("Lower values produce more consistent, conservative improvements. Higher values allow more creative rewording.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + + // Prompt Configuration Section + private var promptSection: some View { + Section { + VStack(spacing: 0) { + // Header with edit button + HStack { + Label { + Text("Instructions") + .font(.subheadline) + } icon: { + Image(systemName: "doc.text") + } + + Spacer() + + Button(showExpandedPrompt ? "Done" : "Edit") { + withAnimation(.spring(duration: 0.3)) { + showExpandedPrompt.toggle() + } + } + .buttonStyle(DefaultButtonStyle()) + .foregroundColor(showExpandedPrompt ? Color.primary : Color.accentColor) + .font(.caption) + } + .padding(.bottom, 8) + + if showExpandedPrompt { + // Expanded editor view + VStack(spacing: 8) { + // Editor + TextEditor(text: Binding( + get: { store.hexSettings.aiEnhancementPrompt }, + set: { newValue in + store.$hexSettings.withLock { $0.aiEnhancementPrompt = newValue } + } + )) + .font(.system(.body, design: .monospaced)) + .frame(minHeight: 150) + .padding(4) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.secondary.opacity(0.2), lineWidth: 1) + ) + + // Reset button + Button("Reset to Default") { + store.send(.resetToDefaultPrompt) + } + .buttonStyle(DefaultButtonStyle()) + .font(.caption) + .foregroundColor(.secondary) + .padding(4) + .frame(maxWidth: .infinity, alignment: .trailing) + } + } else { + // Collapsed preview + Text(store.hexSettings.aiEnhancementPrompt) + .font(.caption) + .foregroundColor(.secondary) + .lineLimit(4) + .truncationMode(.tail) + .frame(maxWidth: .infinity, alignment: .leading) + .padding(10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.secondary.opacity(0.05)) + ) + } + } + } header: { + Text("Enhancement Prompt") + } footer: { + if !showExpandedPrompt { + Text("These instructions tell the AI how to improve your transcribed text.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } else { + Text("Make changes to customize how the AI enhances your transcriptions. Be specific about what should be preserved or changed.") + .foregroundColor(.secondary.opacity(0.7)) + .font(.caption) + } + } + } + + // Helper for bullet points + private func bulletPoint(text: String) -> some View { + HStack(alignment: .firstTextBaseline, spacing: 8) { + Text("•") + .font(.subheadline) + .foregroundColor(.secondary) + Text(LocalizedStringKey(text)) + .font(.subheadline) + .foregroundColor(.secondary) + } + } +} diff --git a/Hex/Features/Settings/SettingsFeature.swift b/Hex/Features/Settings/SettingsFeature.swift index 6d68620..bb8416c 100644 --- a/Hex/Features/Settings/SettingsFeature.swift +++ b/Hex/Features/Settings/SettingsFeature.swift @@ -35,6 +35,9 @@ struct SettingsFeature { // Model Management var modelDownload = ModelDownloadFeature.State() + + // AI Enhancement + var aiEnhancement = AIEnhancementFeature.State() } enum Action: BindableAction { @@ -60,6 +63,9 @@ struct SettingsFeature { // Model Management case modelDownload(ModelDownloadFeature.Action) + + // AI Enhancement + case aiEnhancement(AIEnhancementFeature.Action) } @Dependency(\.keyEventMonitor) var keyEventMonitor @@ -73,6 +79,10 @@ struct SettingsFeature { Scope(state: \.modelDownload, action: \.modelDownload) { ModelDownloadFeature() } + + Scope(state: \.aiEnhancement, action: \.aiEnhancement) { + AIEnhancementFeature() + } Reduce { state, action in switch action { @@ -99,13 +109,18 @@ struct SettingsFeature { await send(.modelDownload(.fetchModels)) await send(.loadAvailableInputDevices) - // Set up periodic refresh of available devices (every 120 seconds) - // Using a longer interval to reduce resource usage + // Set up periodic refresh of available devices (every 180 seconds = 3 minutes) + // Using an even longer interval to further reduce resource usage let deviceRefreshTask = Task { @MainActor in - for await _ in clock.timer(interval: .seconds(120)) { - // Only refresh when the app is active to save resources - if await NSApplication.shared.isActive { - await send(.loadAvailableInputDevices) + for await _ in clock.timer(interval: .seconds(180)) { + // Only refresh when the app is active AND the settings panel is visible + let isActive = NSApplication.shared.isActive + let areSettingsVisible = NSApp.windows.contains { + $0.isVisible && ($0.title.contains("Settings") || $0.title.contains("Preferences")) + } + + if isActive && areSettingsVisible { + send(.loadAvailableInputDevices) } } } @@ -279,6 +294,10 @@ struct SettingsFeature { case .modelDownload: return .none + + // AI Enhancement + case .aiEnhancement: + return .none // Microphone device selection case .loadAvailableInputDevices: diff --git a/Hex/Features/Settings/SettingsView.swift b/Hex/Features/Settings/SettingsView.swift index bd38e4f..30946f3 100644 --- a/Hex/Features/Settings/SettingsView.swift +++ b/Hex/Features/Settings/SettingsView.swift @@ -217,6 +217,13 @@ struct SettingsView: View { } icon: { Image(systemName: "doc.on.clipboard") } + + Label { + Toggle("Disable auto-capitalization", isOn: $store.hexSettings.disableAutoCapitalization) + Text("Disable automatic capitalization in transcriptions") + } icon: { + Image(systemName: "textformat.abc") + } Label { Toggle( diff --git a/Hex/Features/Transcription/TranscriptionFeature.swift b/Hex/Features/Transcription/TranscriptionFeature.swift index 7c0b605..e693ba0 100644 --- a/Hex/Features/Transcription/TranscriptionFeature.swift +++ b/Hex/Features/Transcription/TranscriptionFeature.swift @@ -19,10 +19,12 @@ struct TranscriptionFeature { var isRecording: Bool = false var isTranscribing: Bool = false var isPrewarming: Bool = false + var isEnhancing: Bool = false // Add this to track when AI enhancement is active var error: String? var recordingStartTime: Date? var meter: Meter = .init(averagePower: 0, peakPower: 0) var assertionID: IOPMAssertionID? + var pendingTranscription: String? // Store original transcription for fallback @Shared(.hexSettings) var hexSettings: HexSettings @Shared(.transcriptionHistory) var transcriptionHistory: TranscriptionHistory } @@ -45,12 +47,20 @@ struct TranscriptionFeature { // Transcription result flow case transcriptionResult(String) case transcriptionError(Error) + + // AI Enhancement flow + case setEnhancingState(Bool) + case aiEnhancementResult(String) + case aiEnhancementError(Error) + case ollamaBecameUnavailable + case recheckOllamaAvailability } enum CancelID { case delayedRecord case metering case transcription + case aiEnhancement } @Dependency(\.transcription) var transcription @@ -58,6 +68,7 @@ struct TranscriptionFeature { @Dependency(\.pasteboard) var pasteboard @Dependency(\.keyEventMonitor) var keyEventMonitor @Dependency(\.soundEffects) var soundEffect + @Dependency(\.aiEnhancement) var aiEnhancement var body: some ReducerOf { Reduce { state, action in @@ -76,7 +87,15 @@ struct TranscriptionFeature { // MARK: - Metering case let .audioLevelUpdated(meter): - state.meter = meter + // Only update state.meter if it's significantly different from the previous value + // or if we're currently recording (when we need more responsive updates) + let averageDiff = abs(meter.averagePower - state.meter.averagePower) + let peakDiff = abs(meter.peakPower - state.meter.peakPower) + let significantChange = averageDiff > 0.03 || peakDiff > 0.05 + + if state.isRecording || significantChange { + state.meter = meter + } return .none // MARK: - HotKey Flow @@ -106,6 +125,57 @@ struct TranscriptionFeature { case let .transcriptionError(error): return handleTranscriptionError(&state, error: error) + + // MARK: - AI Enhancement Results + + case let .setEnhancingState(isEnhancing): + state.isEnhancing = isEnhancing + return .none + + case let .aiEnhancementResult(result): + return handleAIEnhancement(&state, result: result) + + case let .aiEnhancementError(error): + // Check if this is an Ollama connectivity error + let nsError = error as NSError + if nsError.domain == "AIEnhancementClient" && (nsError.code == -1001 || nsError.localizedDescription.contains("Ollama")) { + print("AI Enhancement error due to Ollama connectivity: \(error)") + return .send(.ollamaBecameUnavailable) + } else { + // For other errors, we need to: + // 1. Log the error + // 2. Disable AI enhancement status + // 3. Fall back to the original transcription that produced this action + print("AI Enhancement error: \(error)") + + // In the enhance method, there's a parameter to capture the original transcription + // We'll modify enhanceWithAI() to store the original transcription for error case + + // For now, use the bare minimum error message to inform the user + state.error = "AI enhancement failed: \(error.localizedDescription). Using original transcription instead." + + // Continue with original transcription processing + return .send(.transcriptionResult(state.pendingTranscription ?? "")) + } + + case .ollamaBecameUnavailable: + // When Ollama becomes unavailable, recheck availability and handle UI updates + return .send(.recheckOllamaAvailability) + + case .recheckOllamaAvailability: + // Recheck if Ollama is available and update UI accordingly + return .run { send in + let isAvailable = await aiEnhancement.isOllamaAvailable() + if !isAvailable { + print("[TranscriptionFeature] Ollama is not available. AI enhancement is disabled.") + // Update state to show error to the user + await send(.transcriptionError(NSError( + domain: "TranscriptionFeature", + code: -1002, + userInfo: [NSLocalizedDescriptionKey: "Ollama is not available. AI enhancement is disabled."] + ))) + } + } // MARK: - Cancel Entire Flow @@ -125,9 +195,52 @@ struct TranscriptionFeature { private extension TranscriptionFeature { /// Effect to begin observing the audio meter. func startMeteringEffect() -> Effect { - .run { send in + // Create a separate actor to handle rate limiting safely in Swift 6 + actor MeterRateLimiter { + private var lastUpdateTime = Date() + private var lastMeter: Meter? = nil + + func shouldUpdate(meter: Meter) -> Bool { + let now = Date() + let timeSinceLastUpdate = now.timeIntervalSince(lastUpdateTime) + + // Always update if enough time has passed (ensures UI responsiveness) + if timeSinceLastUpdate >= 0.05 { // Max 20 updates per second + self.lastUpdateTime = now + self.lastMeter = meter + return true + } + // Or if there's a significant change from the last meter we actually sent + else if let last = lastMeter { + let averageDiff = abs(meter.averagePower - last.averagePower) + let peakDiff = abs(meter.peakPower - last.peakPower) + // More responsive threshold for significant changes + let shouldUpdate = averageDiff > 0.02 || peakDiff > 0.04 + + if shouldUpdate { + self.lastUpdateTime = now + self.lastMeter = meter + } + + return shouldUpdate + } + + self.lastUpdateTime = now + self.lastMeter = meter + return true // First update always passes through + } + } + + return .run { send in + let rateLimiter = MeterRateLimiter() + for await meter in await recording.observeAudioLevel() { - await send(.audioLevelUpdated(meter)) + // Check if we should send this update + if await rateLimiter.shouldUpdate(meter: meter) { + // The Effect.run captures its function as @Sendable, so we're already on an appropriate context + // for sending actions. ComposableArchitecture handles dispatching to the main thread as needed. + await send(.audioLevelUpdated(meter)) + } } } .cancellable(id: CancelID.metering, cancelInFlight: true) @@ -267,9 +380,13 @@ private extension TranscriptionFeature { // Otherwise, proceed to transcription state.isTranscribing = true state.error = nil + + // Extract all required state values to local variables to avoid capturing inout parameter let model = state.hexSettings.selectedModel let language = state.hexSettings.outputLanguage - + let settings = state.hexSettings + // recordingStartTime captured in handleTranscriptionResult + state.isPrewarming = true return .run { send in @@ -284,7 +401,7 @@ private extension TranscriptionFeature { chunkingStrategy: .vad ) - let result = try await transcription.transcribe(audioURL, model, decodeOptions) { _ in } + let result = try await transcription.transcribe(audioURL, model, decodeOptions, settings) { _ in } print("Transcribed audio from URL: \(audioURL) to text: \(result)") await send(.transcriptionResult(result)) @@ -303,9 +420,103 @@ private extension TranscriptionFeature { func handleTranscriptionResult( _ state: inout State, result: String + ) -> Effect { + // First check if we should use AI enhancement + if state.hexSettings.useAIEnhancement { + // Keep state.isTranscribing = true since we're still processing + + // Store the original transcription for error handling/fallback + state.pendingTranscription = result + + // Extract values to avoid capturing inout parameter + let selectedAIModel = state.hexSettings.selectedAIModel + let promptText = state.hexSettings.aiEnhancementPrompt + let temperature = state.hexSettings.aiEnhancementTemperature + + return enhanceWithAI( + result: result, + model: selectedAIModel, + promptText: promptText, + temperature: temperature + ) + } else { + state.isTranscribing = false + state.isPrewarming = false + + // If empty text, nothing else to do + guard !result.isEmpty else { + return .none + } + + // Compute how long we recorded + let duration = state.recordingStartTime.map { Date().timeIntervalSince($0) } ?? 0 + + // Continue with storing the final result in the background + return finalizeRecordingAndStoreTranscript( + result: result, + duration: duration, + transcriptionHistory: state.$transcriptionHistory + ) + } + } + + // MARK: - AI Enhancement Handlers + + // Use AI to enhance the transcription result + private func enhanceWithAI( + result: String, + model: String, + promptText: String, + temperature: Double + ) -> Effect { + // If empty text, nothing else to do + guard !result.isEmpty else { + return .send(.aiEnhancementResult(result)) // Just pass through empty text + } + + let options = EnhancementOptions( + prompt: promptText, + temperature: temperature + ) + + print("[TranscriptionFeature] Starting AI enhancement with model: \(model)") + + // We need to use .send to set the enhancing state through the proper action + return .merge( + // First update the state to indicate enhancement is starting + .send(.setEnhancingState(true)), + + // Then run the enhancement + .run { send in + do { + print("[TranscriptionFeature] Calling aiEnhancement.enhance()") + // Access the raw value directly to avoid argument label issues + let enhanceMethod = aiEnhancement.enhance + let enhancedText = try await enhanceMethod(result, model, options) { progress in + // Optional: Could update UI with progress information here if needed + } + print("[TranscriptionFeature] AI enhancement succeeded") + await send(.aiEnhancementResult(enhancedText)) + } catch { + print("[TranscriptionFeature] Error enhancing text with AI: \(error)") + // Properly handle the error through the action system + await send(.aiEnhancementError(error)) + } + } + ) + // Don't make this cancellable to avoid premature cancellation + // This may have been causing the issue with the enhancement being cancelled + } + + // Handle the AI enhancement result + private func handleAIEnhancement( + _ state: inout State, + result: String ) -> Effect { state.isTranscribing = false state.isPrewarming = false + state.isEnhancing = false // Reset the enhancing state + state.pendingTranscription = nil // Clear the pending transcription since enhancement succeeded // If empty text, nothing else to do guard !result.isEmpty else { @@ -395,10 +606,18 @@ private extension TranscriptionFeature { state.isTranscribing = false state.isRecording = false state.isPrewarming = false + state.isEnhancing = false return .merge( .cancel(id: CancelID.transcription), .cancel(id: CancelID.delayedRecord), + // Don't cancel AI enhancement as it might cause issues with Ollama + // This creates a UI inconsistency where the UI shows cancellation + // but enhancement continues in background. We intentionally allow this + // to prevent issues with Ollama's streaming API and ensure stability. + // TODO: Consider implementing a safer cancellation approach or state tracking + // to properly ignore late results after cancellation. + // .cancel(id: CancelID.aiEnhancement), .run { _ in await soundEffect.play(.cancel) } @@ -440,7 +659,9 @@ struct TranscriptionView: View { @Bindable var store: StoreOf var status: TranscriptionIndicatorView.Status { - if store.isTranscribing { + if store.isEnhancing { + return .enhancing + } else if store.isTranscribing { return .transcribing } else if store.isRecording { return .recording diff --git a/Hex/Features/Transcription/TranscriptionIndicatorView.swift b/Hex/Features/Transcription/TranscriptionIndicatorView.swift index 0f4dd25..17e03ae 100644 --- a/Hex/Features/Transcription/TranscriptionIndicatorView.swift +++ b/Hex/Features/Transcription/TranscriptionIndicatorView.swift @@ -3,6 +3,7 @@ // Hex // // Created by Kit Langton on 1/25/25. +// import Pow import SwiftUI @@ -14,12 +15,14 @@ struct TranscriptionIndicatorView: View { case recording case transcribing case prewarming + case enhancing } var status: Status var meter: Meter let transcribeBaseColor: Color = .blue + let enhanceBaseColor: Color = .green private var backgroundColor: Color { switch status { @@ -28,6 +31,7 @@ struct TranscriptionIndicatorView: View { case .recording: return .red.mix(with: .black, by: 0.5).mix(with: .red, by: meter.averagePower * 3) case .transcribing: return transcribeBaseColor.mix(with: .black, by: 0.5) case .prewarming: return transcribeBaseColor.mix(with: .black, by: 0.5) + case .enhancing: return enhanceBaseColor.mix(with: .black, by: 0.5) } } @@ -38,6 +42,7 @@ struct TranscriptionIndicatorView: View { case .recording: return Color.red.mix(with: .white, by: 0.1).opacity(0.6) case .transcribing: return transcribeBaseColor.mix(with: .white, by: 0.1).opacity(0.6) case .prewarming: return transcribeBaseColor.mix(with: .white, by: 0.1).opacity(0.6) + case .enhancing: return enhanceBaseColor.mix(with: .white, by: 0.1).opacity(0.6) } } @@ -48,6 +53,7 @@ struct TranscriptionIndicatorView: View { case .recording: return Color.red case .transcribing: return transcribeBaseColor case .prewarming: return transcribeBaseColor + case .enhancing: return enhanceBaseColor } } @@ -60,89 +66,242 @@ struct TranscriptionIndicatorView: View { } @State var transcribeEffect = 0 + @State var enhanceEffect = 0 + + // Memoize these calculations to prevent recalculating on every render + private func recordingOpacity(for power: Double, threshold: Double = 0.1) -> Double { + guard status == .recording else { return 0 } + return power < threshold ? power / threshold : 1 + } + + // Cache shadow colors based on status and power + @ViewBuilder + private func shadowEffect(averagePower: Double) -> some View { + switch status { + case .recording: + EmptyView() + .shadow(color: .red.opacity(averagePower), radius: 4) + .shadow(color: .red.opacity(averagePower * 0.5), radius: 8) + case .enhancing: + EmptyView() + .shadow(color: enhanceBaseColor.opacity(0.7), radius: 4) + .shadow(color: enhanceBaseColor.opacity(0.4), radius: 8) + case .transcribing, .prewarming: + EmptyView() + .shadow(color: transcribeBaseColor.opacity(0.7), radius: 4) + .shadow(color: transcribeBaseColor.opacity(0.4), radius: 8) + default: + EmptyView() + .shadow(color: .red.opacity(0), radius: 4) + .shadow(color: .red.opacity(0), radius: 8) + } + } var body: some View { - let averagePower = min(1, meter.averagePower * 3) - let peakPower = min(1, meter.peakPower * 3) - ZStack { - Capsule() - .fill(backgroundColor.shadow(.inner(color: innerShadowColor, radius: 4))) - .overlay { - Capsule() - .stroke(strokeColor, lineWidth: 1) - .blendMode(.screen) - } - .overlay(alignment: .center) { - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.red.opacity(status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 1) : 0)) - .blur(radius: 2) - .blendMode(.screen) - .padding(6) - } - .overlay(alignment: .center) { - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.white.opacity(status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 0.5) : 0)) - .blur(radius: 1) - .blendMode(.screen) - .frame(maxWidth: .infinity, alignment: .center) - .padding(7) - } - .overlay(alignment: .center) { - GeometryReader { proxy in - RoundedRectangle(cornerRadius: cornerRadius) - .fill(Color.red.opacity(status == .recording ? (peakPower < 0.1 ? (peakPower / 0.1) * 0.5 : 0.5) : 0)) - .frame(width: max(proxy.size.width * (peakPower + 0.6), 0), height: proxy.size.height, alignment: .center) - .frame(maxWidth: .infinity, alignment: .center) - .blur(radius: 4) - .blendMode(.screen) - }.padding(6) - } - .cornerRadius(cornerRadius) - .shadow( - color: status == .recording ? .red.opacity(averagePower) : .red.opacity(0), - radius: 4 - ) - .shadow( - color: status == .recording ? .red.opacity(averagePower * 0.5) : .red.opacity(0), - radius: 8 + // Fast track for hidden state to avoid expensive calculations + if status == .hidden { + EmptyView() + } else { + // Only do these calculations when actually visible + let averagePower = min(1, meter.averagePower * 3) + let peakPower = min(1, meter.peakPower * 3) + + ZStack { + // Base capsule with all effects - avoid recreating for hidden state + CapsuleWithEffects( + status: status, + cornerRadius: cornerRadius, + averagePower: averagePower, + peakPower: peakPower, + innerShadowColor: innerShadowColor, + backgroundColor: backgroundColor, + strokeColor: strokeColor ) - .animation(.interactiveSpring(), value: meter) .frame( width: status == .recording ? expandedWidth : baseWidth, height: baseWidth ) + // Combine these into a single animation for better performance + .scaleEffect(status == .optionKeyPressed ? 0.95 : 1) .opacity(status == .hidden ? 0 : 1) - .scaleEffect(status == .hidden ? 0.0 : 1) - .blur(radius: status == .hidden ? 4 : 0) - .animation(.bouncy(duration: 0.3), value: status) - .changeEffect(.glow(color: .red.opacity(0.5), radius: 8), value: status) - .changeEffect(.shine(angle: .degrees(0), duration: 0.6), value: transcribeEffect) + // Apply expensive effects conditionally + .modifier(LightweightEffects(status: status, enhanceBaseColor: enhanceBaseColor)) + // Only apply these effects during active animation states + .apply(needsShine: status == .transcribing || status == .enhancing, + transcribeEffect: transcribeEffect, + enhanceEffect: enhanceEffect) .compositingGroup() - .task(id: status == .transcribing) { - while status == .transcribing, !Task.isCancelled { - transcribeEffect += 1 - try? await Task.sleep(for: .seconds(0.25)) + // Efficient animation task + .task(id: status) { + // Only animate if we're in a state that needs animation + guard status == .transcribing || status == .enhancing else { return } + + // Use longer delay to reduce CPU usage with split sleep pattern for better cancellation + while (status == .transcribing || status == .enhancing), !Task.isCancelled { + try? await Task.sleep(for: .milliseconds(250)) + if Task.isCancelled { break } + + // Update the appropriate counter based on current status + if status == .transcribing { + transcribeEffect += 1 + } else if status == .enhancing { + enhanceEffect += 1 + } + + try? await Task.sleep(for: .milliseconds(250)) } } - // Show tooltip when prewarming - if status == .prewarming { - VStack(spacing: 4) { - Text("Model prewarming...") - .font(.system(size: 12, weight: .medium)) - .foregroundColor(.white) - .padding(.horizontal, 8) - .padding(.vertical, 4) - .background( - RoundedRectangle(cornerRadius: 4) - .fill(Color.black.opacity(0.8)) - ) + // Show tooltip only for prewarming, not for enhancing + if status == .prewarming { + VStack(spacing: 4) { + Text("Model prewarming...") + .font(.system(size: 12, weight: .medium)) + .foregroundColor(.white) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(Color.black.opacity(0.8)) + ) + } + .offset(y: -24) + .transition(.opacity) + .zIndex(2) + } + } + .animation(.interactiveSpring(response: 0.3, dampingFraction: 0.7), value: status) + } + } +} + +// Optimized view hierarchy to improve performance +struct CapsuleWithEffects: View { + var status: TranscriptionIndicatorView.Status + var cornerRadius: CGFloat + var averagePower: Double + var peakPower: Double + var innerShadowColor: Color + var backgroundColor: Color + var strokeColor: Color + + // Cache calculated values to avoid recalculation + private let recordingOpacity: Double + private let whiteOverlayOpacity: Double + private let peakOverlayOpacity: Double + private let primaryShadowColor: Color + private let secondaryShadowColor: Color + + // This will help us avoid constant rebuilding of the view + @ViewBuilder private var innerOverlays: some View { + if status == .recording { + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.red.opacity(recordingOpacity)) + .blur(radius: 2) + .blendMode(.screen) + .padding(6) + + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.white.opacity(whiteOverlayOpacity)) + .blur(radius: 1) + .blendMode(.screen) + .frame(maxWidth: .infinity, alignment: .center) + .padding(7) + } + } + + // Constructor to pre-calculate all values + init(status: TranscriptionIndicatorView.Status, cornerRadius: CGFloat, averagePower: Double, peakPower: Double, + innerShadowColor: Color, backgroundColor: Color, strokeColor: Color) { + self.status = status + self.cornerRadius = cornerRadius + self.averagePower = averagePower + self.peakPower = peakPower + self.innerShadowColor = innerShadowColor + self.backgroundColor = backgroundColor + self.strokeColor = strokeColor + + // Precalculate all values once during initialization + self.recordingOpacity = status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 1) : 0 + self.whiteOverlayOpacity = status == .recording ? (averagePower < 0.1 ? averagePower / 0.1 : 0.5) : 0 + self.peakOverlayOpacity = status == .recording ? (peakPower < 0.1 ? (peakPower / 0.1) * 0.5 : 0.5) : 0 + + // Precalculate shadow colors + switch status { + case .recording: + self.primaryShadowColor = .red.opacity(averagePower) + self.secondaryShadowColor = .red.opacity(averagePower * 0.5) + case .enhancing: + self.primaryShadowColor = Color.green.opacity(0.7) + self.secondaryShadowColor = Color.green.opacity(0.4) + case .transcribing, .prewarming: + self.primaryShadowColor = Color.blue.opacity(0.7) + self.secondaryShadowColor = Color.blue.opacity(0.4) + default: + self.primaryShadowColor = .red.opacity(0) + self.secondaryShadowColor = .red.opacity(0) + } + } + + var body: some View { + ZStack { + // Base capsule + Capsule() + .fill(backgroundColor.shadow(.inner(color: innerShadowColor, radius: 4))) + + // Border capsule + Capsule() + .stroke(strokeColor, lineWidth: 1) + .blendMode(.screen) + + // Conditionally add overlays for performance + innerOverlays + + // Only use GeometryReader when in recording mode + if status == .recording { + GeometryReader { proxy in + RoundedRectangle(cornerRadius: cornerRadius) + .fill(Color.red.opacity(peakOverlayOpacity)) + .frame(width: max(proxy.size.width * (peakPower + 0.6), 0), height: proxy.size.height, alignment: .center) + .frame(maxWidth: .infinity, alignment: .center) + .blur(radius: 4) + .blendMode(.screen) + .padding(6) } - .offset(y: -24) - .transition(.opacity) - .zIndex(2) } } + // Apply common modifiers + .cornerRadius(cornerRadius) + .shadow(color: primaryShadowColor, radius: 4) + .shadow(color: secondaryShadowColor, radius: 8) + .animation(status == .recording ? .interactiveSpring(response: 0.35) : nil, value: averagePower) + } +} + +// Lightweight modifier for effects +struct LightweightEffects: ViewModifier { + var status: TranscriptionIndicatorView.Status + var enhanceBaseColor: Color + + func body(content: Content) -> some View { + content.changeEffect( + .glow(color: status == .enhancing ? enhanceBaseColor.opacity(0.4) : .red.opacity(0.4), radius: 6), + value: status + ) + } +} + +// Extension to conditionally apply shine effects +extension View { + @ViewBuilder + func apply(needsShine: Bool, transcribeEffect: Int, enhanceEffect: Int) -> some View { + if needsShine { + self + .changeEffect(.shine(angle: .degrees(0), duration: 0.8), value: transcribeEffect) + .changeEffect(.shine(angle: .degrees(0), duration: 0.8), value: enhanceEffect) + } else { + self + } } } @@ -153,6 +312,7 @@ struct TranscriptionIndicatorView: View { TranscriptionIndicatorView(status: .recording, meter: .init(averagePower: 0.5, peakPower: 0.5)) TranscriptionIndicatorView(status: .transcribing, meter: .init(averagePower: 0, peakPower: 0)) TranscriptionIndicatorView(status: .prewarming, meter: .init(averagePower: 0, peakPower: 0)) + TranscriptionIndicatorView(status: .enhancing, meter: .init(averagePower: 0, peakPower: 0)) } .padding(40) -} +} \ No newline at end of file diff --git a/Hex/Models/HexSettings.swift b/Hex/Models/HexSettings.swift index ff3804e..2636b77 100644 --- a/Hex/Models/HexSettings.swift +++ b/Hex/Models/HexSettings.swift @@ -17,6 +17,12 @@ struct HexSettings: Codable, Equatable { var useDoubleTapOnly: Bool = false var outputLanguage: String? = nil var selectedMicrophoneID: String? = nil + var disableAutoCapitalization: Bool = false // New setting for disabling auto-capitalization + // AI Enhancement options + var useAIEnhancement: Bool = false + var selectedAIModel: String = "gemma3" + var aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt + var aiEnhancementTemperature: Double = 0.3 // Define coding keys to match struct properties enum CodingKeys: String, CodingKey { @@ -33,6 +39,11 @@ struct HexSettings: Codable, Equatable { case useDoubleTapOnly case outputLanguage case selectedMicrophoneID + case disableAutoCapitalization + case useAIEnhancement + case selectedAIModel + case aiEnhancementPrompt + case aiEnhancementTemperature } init( @@ -48,7 +59,12 @@ struct HexSettings: Codable, Equatable { copyToClipboard: Bool = true, useDoubleTapOnly: Bool = false, on outputLanguage: String? = nil, - selectedMicrophoneID: String? = nil + selectedMicrophoneID: String? = nil, + disableAutoCapitalization: Bool = false, + useAIEnhancement: Bool = false, + selectedAIModel: String = "gemma3", + aiEnhancementPrompt: String = EnhancementOptions.defaultPrompt, + aiEnhancementTemperature: Double = 0.3 ) { self.soundEffectsEnabled = soundEffectsEnabled self.hotkey = hotkey @@ -63,6 +79,11 @@ struct HexSettings: Codable, Equatable { self.useDoubleTapOnly = useDoubleTapOnly self.outputLanguage = outputLanguage self.selectedMicrophoneID = selectedMicrophoneID + self.disableAutoCapitalization = disableAutoCapitalization + self.useAIEnhancement = useAIEnhancement + self.selectedAIModel = selectedAIModel + self.aiEnhancementPrompt = aiEnhancementPrompt + self.aiEnhancementTemperature = aiEnhancementTemperature } // Custom decoder that handles missing fields @@ -92,17 +113,59 @@ struct HexSettings: Codable, Equatable { useDoubleTapOnly = try container.decodeIfPresent(Bool.self, forKey: .useDoubleTapOnly) ?? false outputLanguage = try container.decodeIfPresent(String.self, forKey: .outputLanguage) - selectedMicrophoneID = try container.decodeIfPresent(String.self, forKey: .selectedMicrophoneID) + selectedMicrophoneID = try container.decodeIfPresent(String.self, forKey: .selectedMicrophoneID) + disableAutoCapitalization = try container.decodeIfPresent(Bool.self, forKey: .disableAutoCapitalization) ?? false + // AI Enhancement settings + useAIEnhancement = try container.decodeIfPresent(Bool.self, forKey: .useAIEnhancement) ?? false + selectedAIModel = try container.decodeIfPresent(String.self, forKey: .selectedAIModel) ?? "gemma3" + aiEnhancementPrompt = try container.decodeIfPresent(String.self, forKey: .aiEnhancementPrompt) ?? EnhancementOptions.defaultPrompt + aiEnhancementTemperature = try container.decodeIfPresent(Double.self, forKey: .aiEnhancementTemperature) ?? 0.3 } } +// Cache for HexSettings to reduce disk I/O +private var cachedSettings: HexSettings? = nil +private var lastSettingsLoadTime: Date = .distantPast + +// Helper function to get cached settings or load from disk +func getCachedSettings() -> HexSettings { + // Use cached settings if they exist and are recent (within last 5 seconds) + if let cached = cachedSettings, + Date().timeIntervalSince(lastSettingsLoadTime) < 5.0 { + return cached + } + + // Otherwise read from disk + do { + let url = URL.documentsDirectory.appending(component: "hex_settings.json") + if FileManager.default.fileExists(atPath: url.path) { + let data = try Data(contentsOf: url) + let settings = try JSONDecoder().decode(HexSettings.self, from: data) + + // Update cache + cachedSettings = settings + lastSettingsLoadTime = Date() + + return settings + } + } catch { + print("Error loading settings: \(error)") + } + + // On error or if file doesn't exist, return default settings + let defaultSettings = HexSettings() + cachedSettings = defaultSettings + lastSettingsLoadTime = Date() + return defaultSettings +} + extension SharedReaderKey where Self == FileStorageKey.Default { static var hexSettings: Self { Self[ .fileStorage(URL.documentsDirectory.appending(component: "hex_settings.json")), - default: .init() + default: getCachedSettings() ] } } diff --git a/Localizable.xcstrings b/Localizable.xcstrings index a78507e..e4e9c10 100644 --- a/Localizable.xcstrings +++ b/Localizable.xcstrings @@ -3,6 +3,9 @@ "strings" : { "" : { "shouldTranslate" : false + }, + "%.2f" : { + }, "•" : { "shouldTranslate" : false @@ -31,6 +34,12 @@ }, "Accuracy" : { + }, + "AI Enhancement" : { + + }, + "AI enhancement requires Ollama to be installed and running locally." : { + }, "Are you sure you want to delete all transcripts? This action cannot be undone." : { "comment" : "Delete transcript history confirm", @@ -45,6 +54,9 @@ }, "Become a Sponsor" : { + }, + "Browse Models" : { + }, "Cancel" : { "comment" : "Cancel deleting All Transcripts", @@ -76,6 +88,9 @@ } } } + }, + "Check Connection" : { + }, "Check for Updates" : { "comment" : "Check for updates button in About section of Settings", @@ -133,6 +148,9 @@ }, "Copy transcription text to clipboard in addition to pasting it" : { + }, + "Creativity" : { + }, "Delete" : { @@ -181,6 +199,15 @@ } } } + }, + "Disable auto-capitalization" : { + + }, + "Disable automatic capitalization in transcriptions" : { + + }, + "Done" : { + }, "Download" : { @@ -195,6 +222,9 @@ } } } + }, + "Download Ollama" : { + }, "Download Selected Model" : { "comment" : "In Transcription Model section in settings.", @@ -222,6 +252,15 @@ }, "Downloading model..." : { + }, + "Edit" : { + + }, + "Enable AI to improve transcription quality by fixing grammar, formatting, and errors." : { + + }, + "Enhancement Prompt" : { + }, "Ensure Hex can access your microphone and system accessibility features." : { "comment" : "Footer for permissions section in settings", @@ -244,6 +283,9 @@ } } } + }, + "Error: %@" : { + }, "General" : { "comment" : "General section in Settings Header.", @@ -312,6 +354,21 @@ }, "Input Device" : { + }, + "Instructions" : { + + }, + "Language Model" : { + + }, + "Loading available models..." : { + + }, + "Lower values produce more consistent, conservative improvements. Higher values allow more creative rewording." : { + + }, + "Make changes to customize how the AI enhances your transcriptions. Be specific about what should be preserved or changed." : { + }, "Microphone" : { "comment" : "Microphone permission.", @@ -332,6 +389,12 @@ }, "Model prewarming..." : { + }, + "Model Selection" : { + + }, + "No models found in Ollama" : { + }, "No models found." : { "comment" : "Replacement text in transcription model section when no available models are found", @@ -355,6 +418,18 @@ } } } + }, + "Ollama Connected" : { + + }, + "Ollama connection required to view models" : { + + }, + "Ollama Not Connected" : { + + }, + "Ollama Status" : { + }, "Open on Login" : { "comment" : "Label for general setting to open app on login.", @@ -413,6 +488,9 @@ } } } + }, + "Precision" : { + }, "Prevent System Sleep while Recording" : { "comment" : "Label for general setting whether to prevent system sleep while recording.", @@ -441,6 +519,15 @@ } } } + }, + "Reset to Default" : { + + }, + "Response Style" : { + + }, + "Select AI model:" : { + }, "Selected device not connected. System default will be used." : { @@ -519,6 +606,9 @@ }, "Size" : { + }, + "Smaller models are faster but less capable. Llama3 offers a good balance of speed and quality." : { + }, "Sound" : { "comment" : "sound section in general settings.", @@ -561,6 +651,12 @@ }, "System Default" : { + }, + "These instructions tell the AI how to improve your transcribed text." : { + + }, + "To set up Ollama:" : { + }, "Transcription Model" : { "comment" : "Label for Transcription Model Section", @@ -572,6 +668,9 @@ } } } + }, + "Use AI Enhancement" : { + }, "Use clipboard to insert" : { "comment" : "Label for toggle in general section.",