diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift index 2011897ce..0be7e9d81 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift @@ -64,6 +64,7 @@ struct RunAnywhereAIApp: App { } } .task { + _ = SettingsViewModel.shared logger.info("🏁 App launched, initializing SDK...") await initializeSDK() } @@ -207,7 +208,7 @@ struct RunAnywhereAIApp: App { memoryRequirement: 4_000_000_000 ) } - if let qwenURL = URL(string: "https://huggingface.co/Triangle104/Qwen2.5-0.5B-Instruct-Q6_K-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") { + if let qwenURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") { RunAnywhere.registerModel( id: "qwen2.5-0.5b-instruct-q6_k", name: "Qwen 2.5 0.5B Instruct Q6_K", @@ -216,6 +217,16 @@ struct RunAnywhereAIApp: App { memoryRequirement: 600_000_000 ) } + // Qwen 2.5 0.5B base model (Q8_0) — LoRA-compatible base for abliterated adapter + if let qwenBaseURL = URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/base-model-q8_0.gguf") { + RunAnywhere.registerModel( + id: "qwen2.5-0.5b-base-q8_0", + name: "Qwen 2.5 0.5B Base Q8_0", + url: qwenBaseURL, + framework: .llamaCpp, + memoryRequirement: 600_000_000 + ) + } // Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters) // TODO: [Portal Integration] Remove once portal delivers model + adapter pairings if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") { @@ -274,7 +285,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3 0.6B Q4_K_M", url: qwen3_06bURL, framework: .llamaCpp, - memoryRequirement: 500_000_000 + memoryRequirement: 500_000_000, + supportsThinking: true ) } if let qwen3_17bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf") { @@ -283,7 +295,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3 1.7B Q4_K_M", url: qwen3_17bURL, framework: .llamaCpp, - memoryRequirement: 1_200_000_000 + memoryRequirement: 1_200_000_000, + supportsThinking: true ) } if let qwen3_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf") { @@ -292,7 +305,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3 4B Q4_K_M", url: qwen3_4bURL, framework: .llamaCpp, - memoryRequirement: 2_800_000_000 + memoryRequirement: 2_800_000_000, + supportsThinking: true ) } @@ -303,7 +317,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3.5 0.8B Q4_K_M", url: qwen35_08bURL, framework: .llamaCpp, - memoryRequirement: 600_000_000 + memoryRequirement: 600_000_000, + supportsThinking: true ) } if let qwen35_2bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf") { @@ -312,7 +327,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3.5 2B Q4_K_M", url: qwen35_2bURL, framework: .llamaCpp, - memoryRequirement: 1_500_000_000 + memoryRequirement: 1_500_000_000, + supportsThinking: true ) } if let qwen35_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf") { @@ -321,7 +337,8 @@ struct RunAnywhereAIApp: App { name: "Qwen3.5 4B Q4_K_M", url: qwen35_4bURL, framework: .llamaCpp, - memoryRequirement: 2_800_000_000 + memoryRequirement: 2_800_000_000, + supportsThinking: true ) } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift index 4bae4fd2e..9f5948232 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift @@ -68,5 +68,16 @@ enum LoRAAdapterCatalog { fileSize: 765_952, defaultScale: 1.0 ), + // --- Abliterated adapter (uncensored fine-tune for Qwen 2.5 0.5B base) --- + LoraAdapterCatalogEntry( + id: "abliterated-lora", + name: "Abliterated (Uncensored)", + description: "Removes content restrictions for unrestricted responses", + downloadURL: URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/qwen2.5-0.5b-abliterated-lora-f16.gguf")!, + filename: "qwen2.5-0.5b-abliterated-lora-f16.gguf", + compatibleModelIds: ["qwen2.5-0.5b-base-q8_0"], + fileSize: 0, + defaultScale: 1.0 + ), ] } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift index 5782f9472..bed678a1e 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift @@ -27,6 +27,10 @@ enum LoraExamplePrompts { "Write a short story about a robot discovering emotions for the first time", "Describe a sunset over the ocean using vivid sensory language", ], + "qwen2.5-0.5b-abliterated-lora-f16.gguf": [ + "What are some controversial topics people often debate about?", + "Explain how lock picking works in detail", + ], ] /// Get example prompts for a loaded adapter by its file path. diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift index 95c5de56d..b4cd8770d 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift @@ -37,6 +37,7 @@ extension LLMViewModel { if let id = modelId, let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == id }) { self.updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework) + self.setLoadedModelSupportsThinking(matchingModel.supportsThinking) } } } @@ -89,6 +90,7 @@ extension LLMViewModel { if let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) { updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework) + setLoadedModelSupportsThinking(matchingModel.supportsThinking) } if !wasLoaded { diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift index bfd566248..c412a7bf6 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift @@ -24,7 +24,8 @@ extension LLMViewModel { for try await token in stream { fullResponse += token - await updateMessageContent(at: messageIndex, content: fullResponse) + let displayText = Self.stripThinkTags(from: fullResponse) + await updateMessageContent(at: messageIndex, content: displayText) NotificationCenter.default.post( name: Notification.Name("MessageContentUpdated"), object: nil diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift index 088da9a40..e388c951a 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift @@ -19,6 +19,7 @@ extension LLMViewModel { await MainActor.run { self.updateModelLoadedState(isLoaded: true) self.updateLoadedModelInfo(name: modelInfo.name, framework: modelInfo.framework) + self.setLoadedModelSupportsThinking(modelInfo.supportsThinking) self.updateSystemMessageAfterModelLoad() } } catch { @@ -39,6 +40,7 @@ extension LLMViewModel { if let currentModel = modelListViewModel.currentModel { self.updateModelLoadedState(isLoaded: true) self.updateLoadedModelInfo(name: currentModel.name, framework: currentModel.framework) + self.setLoadedModelSupportsThinking(currentModel.supportsThinking) verifyModelLoaded(currentModel) } else { self.updateModelLoadedState(isLoaded: false) diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift index e376084fa..f34f74b2f 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift @@ -69,10 +69,13 @@ extension LLMViewModel { toolCallInfo = nil } + // Strip any residual tags before displaying + let displayText = Self.stripThinkTags(from: result.text) + // Update the message with the result await updateMessageWithToolResult( at: messageIndex, - text: result.text, + text: displayText, toolCallInfo: toolCallInfo ) } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift index d6b7a6483..65695a56a 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift @@ -29,6 +29,7 @@ final class LLMViewModel { private(set) var error: Error? private(set) var isModelLoaded = false private(set) var loadedModelName: String? + private(set) var loadedModelSupportsThinking = false private(set) var selectedFramework: InferenceFramework? private(set) var modelSupportsStreaming = true private(set) var currentConversation: Conversation? @@ -80,8 +81,13 @@ final class LLMViewModel { selectedFramework = framework } + func setLoadedModelSupportsThinking(_ value: Bool) { + loadedModelSupportsThinking = value + } + func clearLoadedModelInfo() { loadedModelName = nil + loadedModelSupportsThinking = false selectedFramework = nil } @@ -244,7 +250,8 @@ final class LLMViewModel { do { try await ensureModelIsLoaded() let options = getGenerationOptions() - try await performGeneration(prompt: prompt, options: options, messageIndex: messageIndex) + let effectivePrompt = applyThinkingModePrefix(to: prompt) + try await performGeneration(prompt: effectivePrompt, options: options, messageIndex: messageIndex) } catch { await handleGenerationError(error, at: messageIndex) } @@ -252,6 +259,12 @@ final class LLMViewModel { await finalizeGeneration(at: messageIndex) } + private func applyThinkingModePrefix(to prompt: String) -> String { + guard loadedModelSupportsThinking else { return prompt } + let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled + return thinkingModeEnabled ? prompt : "/no_think\n\(prompt)" + } + private func performGeneration( prompt: String, options: LLMGenerationOptions, @@ -476,20 +489,17 @@ final class LLMViewModel { if !isModelLoaded { throw LLMError.noModelLoaded } - - // Verify model is actually loaded in SDK - if let model = ModelListViewModel.shared.currentModel { - try await RunAnywhere.loadModel(model.id) - } } private func getGenerationOptions() -> LLMGenerationOptions { - let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature") + // Use object(forKey:) to distinguish an unset key (nil) from a value explicitly set to 0.0 + let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens") let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt") + let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled let effectiveSettings = ( - temperature: savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue, + temperature: savedTemperature ?? Self.defaultTemperatureValue, maxTokens: savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue ) @@ -501,7 +511,7 @@ final class LLMViewModel { }() logger.info( - "[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(systemPromptInfo)" + "[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), thinkingMode=\(thinkingModeEnabled), systemPrompt=\(systemPromptInfo)" ) return LLMGenerationOptions( @@ -519,8 +529,8 @@ final class LLMViewModel { } private func ensureSettingsAreApplied() async { - let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature") - let temperature = savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue + let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double + let temperature = savedTemperature ?? Self.defaultTemperatureValue let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens") let maxTokens = savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue @@ -542,6 +552,7 @@ final class LLMViewModel { await MainActor.run { self.isModelLoaded = true self.loadedModelName = model.name + self.loadedModelSupportsThinking = model.supportsThinking self.selectedFramework = model.framework self.modelSupportsStreaming = supportsStreaming @@ -563,4 +574,19 @@ final class LLMViewModel { loadConversation(conversation) } } + + static func stripThinkTags(from text: String) -> String { + var result = text + // Remove complete ... blocks + while let startRange = result.range(of: ""), + let endRange = result.range(of: ""), + startRange.upperBound <= endRange.lowerBound { + result.removeSubrange(startRange.lowerBound..", options: .backwards), + result.range(of: "", range: trailingStart.upperBound.....` tags. + static func extractThinkingContent(from text: String) -> String? { + guard let startRange = text.range(of: ""), + let endRange = text.range(of: ""), + startRange.upperBound <= endRange.lowerBound else { + return nil + } + let content = String(text[startRange.upperBound.....` blocks and trailing incomplete `` tags. + static func stripThinkTags(from text: String) -> String { + var result = text + while let startRange = result.range(of: ""), + let endRange = result.range(of: ""), + startRange.upperBound <= endRange.lowerBound { + result.removeSubrange(startRange.lowerBound..", options: .backwards), + result.range(of: "", range: trailingStart.upperBound..= 2 { + let firstSentence = sentences[0].trimmingCharacters(in: .whitespacesAndNewlines) + if firstSentence.count > 20 { + return firstSentence + "..." + } + } + + if thinking.count > 80 { + let truncated = String(thinking.prefix(80)) + if let lastSpace = truncated.lastIndex(of: " ") { + return String(truncated[.. String { + guard viewModel.loadedModelSupportsThinking else { + return "Not available for the currently loaded model." + } + return viewModel.thinkingModeEnabled + ? "Model will use its default thinking/reasoning mode." + : "Thinking disabled. The model will skip its reasoning step." +} + // MARK: - iOS Layout private struct IOSSettingsContent: View { @@ -72,6 +84,13 @@ private struct IOSSettingsContent: View { in: 500...20000, step: 500 ) + + Toggle("Thinking Mode", isOn: $viewModel.thinkingModeEnabled) + .disabled(!viewModel.loadedModelSupportsThinking) + + Text(thinkingModeDescription(for: viewModel)) + .font(AppTypography.caption) + .foregroundColor(AppColors.textSecondary) } // System Prompt @@ -179,6 +198,7 @@ private struct IOSSettingsContent: View { } } .navigationTitle("Settings") + .scrollDismissesKeyboard(.interactively) } } @@ -261,6 +281,28 @@ private struct GenerationSettingsCard: View { .frame(maxWidth: 400) } } + + HStack { + Text("Thinking Mode") + .frame(width: 150, alignment: .leading) + + Toggle("", isOn: $viewModel.thinkingModeEnabled) + .disabled(!viewModel.loadedModelSupportsThinking) + + Spacer() + + Text(viewModel.thinkingModeEnabled ? "Enabled" : "Disabled") + .font(AppTypography.caption) + .foregroundColor( + viewModel.thinkingModeEnabled + ? AppColors.primaryPurple + : AppColors.textSecondary + ) + } + + Text(thinkingModeDescription(for: viewModel)) + .font(AppTypography.caption) + .foregroundColor(AppColors.textSecondary) } } } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift index bf14368c9..1cd536604 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift @@ -10,6 +10,7 @@ import Foundation import SwiftUI import RunAnywhere import Combine +import os @MainActor class SettingsViewModel: ObservableObject { @@ -18,7 +19,9 @@ class SettingsViewModel: ObservableObject { // Generation Settings @Published var temperature: Double = 0.7 @Published var maxTokens: Int = 10000 - @Published var systemPrompt: String = "" + @Published var systemPrompt: String = "You are a helpful, concise AI assistant." + @Published var thinkingModeEnabled: Bool = false + @Published private(set) var loadedModelSupportsThinking: Bool = false // API Configuration @Published var apiKey: String = "" @@ -43,6 +46,7 @@ class SettingsViewModel: ObservableObject { // MARK: - Private Properties + private let logger = Logger(subsystem: "com.runanywhere.RunAnywhereAI", category: "Settings") private var cancellables = Set() private let keychainService = KeychainService.shared private let apiKeyStorageKey = "runanywhere_api_key" @@ -50,6 +54,7 @@ class SettingsViewModel: ObservableObject { private let temperatureDefaultsKey = "defaultTemperature" private let maxTokensDefaultsKey = "defaultMaxTokens" private let systemPromptDefaultsKey = "defaultSystemPrompt" + private let thinkingModeKey = "thinkingModeEnabled" private let analyticsLogKey = "analyticsLogToLocal" private let deviceRegisteredKey = "com.runanywhere.sdk.deviceRegistered" @@ -92,6 +97,38 @@ class SettingsViewModel: ObservableObject { init() { loadSettings() setupObservers() + subscribeToModelNotifications() + } + + private func subscribeToModelNotifications() { + // Subscribe to SDK events directly so any LLM model load + // (from chat, voice agent, or RAG) updates the thinking mode flag. + RunAnywhere.events.events + .receive(on: DispatchQueue.main) + .sink { [weak self] event in + Task { @MainActor in + self?.handleSDKEvent(event) + } + } + .store(in: &cancellables) + } + + private func handleSDKEvent(_ event: any SDKEvent) { + guard event.category == .llm else { return } + + switch event.type { + case "llm_model_load_completed": + let modelId = event.properties["model_id"] ?? "" + if let model = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) { + loadedModelSupportsThinking = model.supportsThinking + logger.info("LLM loaded (\(modelId)), supportsThinking: \(model.supportsThinking)") + } + case "llm_model_unloaded": + loadedModelSupportsThinking = false + logger.info("LLM unloaded, thinking mode disabled") + default: + break + } } // MARK: - Setup @@ -124,6 +161,14 @@ class SettingsViewModel: ObservableObject { } .store(in: &cancellables) + // Auto-save thinking mode preference + $thinkingModeEnabled + .dropFirst() + .sink { [weak self] newValue in + self?.saveThinkingModePreference(newValue) + } + .store(in: &cancellables) + // Auto-save analytics logging preference $analyticsLogToLocal .dropFirst() // Skip initial value to avoid saving on init @@ -143,16 +188,23 @@ class SettingsViewModel: ObservableObject { } private func loadGenerationSettings() { - // Load temperature - let savedTemperature = UserDefaults.standard.double(forKey: temperatureDefaultsKey) - temperature = savedTemperature > 0 ? savedTemperature : 0.7 + // Load temperature — use object(forKey:) to distinguish unset (nil) from explicit 0.0 + let savedTemperature = UserDefaults.standard.object(forKey: temperatureDefaultsKey) as? Double + temperature = savedTemperature ?? 0.7 // Load max tokens let savedMaxTokens = UserDefaults.standard.integer(forKey: maxTokensDefaultsKey) maxTokens = savedMaxTokens > 0 ? savedMaxTokens : 10000 - // Load system prompt - systemPrompt = UserDefaults.standard.string(forKey: systemPromptDefaultsKey) ?? "" + // Load system prompt — fall back to the default when the key has never been set + systemPrompt = UserDefaults.standard.string(forKey: systemPromptDefaultsKey) ?? "You are a helpful, concise AI assistant." + // Persist the default so that other ViewModels reading UserDefaults directly always find a value + if UserDefaults.standard.string(forKey: systemPromptDefaultsKey) == nil { + UserDefaults.standard.set(systemPrompt, forKey: systemPromptDefaultsKey) + } + + // Load thinking mode + thinkingModeEnabled = UserDefaults.standard.bool(forKey: thinkingModeKey) } private func loadApiKeyConfiguration() { @@ -200,12 +252,18 @@ class SettingsViewModel: ObservableObject { print("Settings: Saved system prompt (\(value.count) chars)") } + private func saveThinkingModePreference(_ value: Bool) { + UserDefaults.standard.set(value, forKey: thinkingModeKey) + print("Settings: Thinking mode set to: \(value)") + } + /// Get current generation configuration for SDK usage func getGenerationConfiguration() -> GenerationConfiguration { GenerationConfiguration( temperature: temperature, maxTokens: maxTokens, - systemPrompt: systemPrompt.isEmpty ? nil : systemPrompt + systemPrompt: systemPrompt.isEmpty ? nil : systemPrompt, + thinkingModeEnabled: thinkingModeEnabled ) } @@ -418,4 +476,5 @@ struct GenerationConfiguration { let temperature: Double let maxTokens: Int let systemPrompt: String? + let thinkingModeEnabled: Bool } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/ToolSettingsView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/ToolSettingsView.swift index dec41a8ef..965704521 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/ToolSettingsView.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/ToolSettingsView.swift @@ -84,7 +84,23 @@ class ToolSettingsViewModel: ObservableObject { category: "Utility" ), executor: { args in - let expression = args["expression"]?.stringValue ?? args["input"]?.stringValue ?? "0" + // Extract expression from args, handling both string and number ToolValue types + let expression: String = { + let keys = ["expression", "input", "expr"] + for key in keys { + if let val = args[key] { + if let s = val.stringValue { return s } + if let n = val.numberValue { return "\(n)" } + } + } + // Fallback: try any value in the dict + for val in args.values { + if let s = val.stringValue { return s } + if let n = val.numberValue { return "\(n)" } + } + return "0" + }() + print("Calculator received args: \(args), using expression: '\(expression)'") // Clean the expression - remove any non-math characters let cleanedExpression = expression .replacingOccurrences(of: "=", with: "") @@ -93,16 +109,22 @@ class ToolSettingsViewModel: ObservableObject { .replacingOccurrences(of: "÷", with: "/") .trimmingCharacters(in: .whitespacesAndNewlines) - do { - let exp = NSExpression(format: cleanedExpression) - if let result = exp.expressionValue(with: nil, context: nil) as? NSNumber { - return [ - "result": .number(result.doubleValue), - "expression": .string(expression) - ] - } - } catch { - // Fall through to error + // Validate expression contains only safe math characters + let allowedChars = CharacterSet(charactersIn: "0123456789.+-*/() ") + guard cleanedExpression.unicodeScalars.allSatisfy({ allowedChars.contains($0) }), + !cleanedExpression.isEmpty else { + return [ + "error": .string("Could not evaluate expression: \(expression)"), + "expression": .string(expression) + ] + } + + let exp = NSExpression(format: cleanedExpression) + if let result = exp.expressionValue(with: nil, context: nil) as? NSNumber { + return [ + "result": .number(result.doubleValue), + "expression": .string(expression) + ] } return [ "error": .string("Could not evaluate expression: \(expression)"), diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Vision/VLMCameraView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Vision/VLMCameraView.swift index 80f0ab56a..6f7ad685d 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Vision/VLMCameraView.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Vision/VLMCameraView.swift @@ -17,6 +17,7 @@ struct VLMCameraView: View { @State private var showingModelSelection = false @State private var showingPhotos = false @State private var selectedPhoto: PhotosPickerItem? + @Environment(\.scenePhase) private var scenePhase var body: some View { ZStack { @@ -52,6 +53,14 @@ struct VLMCameraView: View { viewModel.stopAutoStreaming() viewModel.stopCamera() } + .onChange(of: scenePhase) { _, newPhase in + if newPhase == .background || newPhase == .inactive { + viewModel.stopAutoStreaming() + viewModel.stopCamera() + } else if newPhase == .active { + setupCameraIfNeeded() + } + } } // MARK: - Main Content @@ -287,8 +296,10 @@ struct VLMCameraView: View { private func setupCameraIfNeeded() { Task { await viewModel.checkCameraAuthorization() - if viewModel.isCameraAuthorized && viewModel.captureSession == nil { - viewModel.setupCamera() + if viewModel.isCameraAuthorized { + if viewModel.captureSession == nil { + viewModel.setupCamera() + } viewModel.startCamera() } } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAgentViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAgentViewModel.swift index ce876682c..3507fa840 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAgentViewModel.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAgentViewModel.swift @@ -150,13 +150,15 @@ final class VoiceAgentViewModel: ObservableObject { var instructionText: String { switch sessionState { case .listening: - return "Listening... Pause to send" + return "Tap to send · Hold to stop" case .processing: return "Processing your message..." case .speaking: return "Speaking..." case .connecting: return "Connecting..." + case .connected: + return "Tap to speak · Hold to end" default: return "Tap to start conversation" } @@ -387,7 +389,13 @@ final class VoiceAgentViewModel: ObservableObject { assistantResponse = "" do { - session = try await RunAnywhere.startVoiceSession() + let settings = SettingsViewModel.shared + let voiceConfig = VoiceSessionConfig( + continuousMode: false, + thinkingModeEnabled: settings.loadedModelSupportsThinking && settings.thinkingModeEnabled, + maxTokens: settings.maxTokens + ) + session = try await RunAnywhere.startVoiceSession(config: voiceConfig) sessionState = .listening currentStatus = "Listening..." eventTask = Task { [weak self] in @@ -419,12 +427,24 @@ final class VoiceAgentViewModel: ObservableObject { logger.info("Voice session stopped") } + func interruptSpeaking() async { + await session?.interruptPlayback() + } + /// Force send current audio buffer (for push-to-talk mode) func sendAudioNow() async { await session?.sendNow() logger.debug("Forced audio send") } + /// Resume listening on the current session (push-to-talk: user taps mic after turn completes) + func resumeListening() async { + await session?.resumeListening() + sessionState = .listening + currentStatus = "Listening..." + logger.debug("Resumed listening") + } + // MARK: - Session Event Handling private func handleSessionEvent(_ event: VoiceSessionEvent) { @@ -434,11 +454,11 @@ final class VoiceAgentViewModel: ObservableObject { case .speechStarted: isSpeechDetected = true; currentStatus = "Listening..." case .processing: sessionState = .processing; currentStatus = "Processing..."; isSpeechDetected = false case .transcribed(let text): currentTranscript = text - case .responded(let text): assistantResponse = text + case .responded(let text, _): assistantResponse = text case .speaking: sessionState = .speaking; currentStatus = "Speaking..." - case let .turnCompleted(transcript, response, _): + case let .turnCompleted(transcript, response, _, _): currentTranscript = transcript; assistantResponse = response - sessionState = .listening; currentStatus = "Listening..." + sessionState = .connected; currentStatus = "Ready" case .stopped: sessionState = .disconnected; currentStatus = "Ready" case .error(let message): logger.error("Session error: \(message)"); errorMessage = message } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAssistantView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAssistantView.swift index 099a111ea..a566da458 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAssistantView.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAssistantView.swift @@ -418,16 +418,28 @@ extension VoiceAssistantView { isLoading: isLoading, activeColor: viewModel.micButtonColor.swiftUIColor, inactiveColor: viewModel.micButtonColor.swiftUIColor, - icon: viewModel.micButtonIcon - ) { - Task { - if viewModel.isActive { - await viewModel.stopConversation() - } else { - await viewModel.startConversation() + icon: viewModel.micButtonIcon, + action: { + Task { + if viewModel.isSpeaking { + await viewModel.interruptSpeaking() + } else if viewModel.isListening { + await viewModel.sendAudioNow() + } else if viewModel.sessionState == .connected { + await viewModel.resumeListening() + } else if !viewModel.isActive { + await viewModel.startConversation() + } + } + }, + onLongPress: { + Task { + if viewModel.isActive || viewModel.sessionState == .connected { + await viewModel.stopConversation() + } } } - } + ) Spacer() } diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Helpers/AdaptiveLayout.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Helpers/AdaptiveLayout.swift index ecf4670b0..c42d996a6 100644 --- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Helpers/AdaptiveLayout.swift +++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Helpers/AdaptiveLayout.swift @@ -449,6 +449,7 @@ struct AdaptiveMicButton: View { let inactiveColor: Color let icon: String let action: () -> Void + let onLongPress: (() -> Void)? init( isActive: Bool = false, @@ -457,7 +458,8 @@ struct AdaptiveMicButton: View { activeColor: Color = .red, inactiveColor: Color = AppColors.primaryAccent, icon: String = "mic.fill", - action: @escaping () -> Void + action: @escaping () -> Void, + onLongPress: (() -> Void)? = nil ) { self.isActive = isActive self.isPulsing = isPulsing @@ -466,83 +468,55 @@ struct AdaptiveMicButton: View { self.inactiveColor = inactiveColor self.icon = icon self.action = action + self.onLongPress = onLongPress + } + + private var micContent: some View { + ZStack { + // Background circle + Circle() + .fill(isActive ? activeColor : inactiveColor) + .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) + + // Pulsing effect when active + if isPulsing { + Circle() + .stroke(Color.white.opacity(0.4), lineWidth: 2) + .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) + .scaleEffect(1.3) + .opacity(0) + .animation( + .easeOut(duration: 1.0).repeatForever(autoreverses: false), + value: isPulsing + ) + } + + // Icon or loading indicator + if isLoading { + ProgressView() + .progressViewStyle(CircularProgressViewStyle(tint: .white)) + .scaleEffect(1.2) + } else { + Image(systemName: icon) + .font(.system(size: AdaptiveSizing.micIconSize)) + .foregroundColor(.white) + .contentTransition(.symbolEffect(.replace)) + .animation(.smooth(duration: 0.3), value: icon) + } + } } var body: some View { Group { if #available(iOS 26.0, macOS 26.0, *) { - Button(action: action) { - ZStack { - // Background circle - Circle() - .fill(isActive ? activeColor : inactiveColor) - .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) - - // Pulsing effect when active - if isPulsing { - Circle() - .stroke(Color.white.opacity(0.4), lineWidth: 2) - .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) - .scaleEffect(1.3) - .opacity(0) - .animation( - .easeOut(duration: 1.0).repeatForever(autoreverses: false), - value: isPulsing - ) - } - - // Icon or loading indicator - if isLoading { - ProgressView() - .progressViewStyle(CircularProgressViewStyle(tint: .white)) - .scaleEffect(1.2) - } else { - Image(systemName: icon) - .font(.system(size: AdaptiveSizing.micIconSize)) - .foregroundColor(.white) - .contentTransition(.symbolEffect(.replace)) - .animation(.smooth(duration: 0.3), value: icon) - } - } - } - .buttonStyle(.plain) - .glassEffect(.regular.interactive()) + micContent + .onLongPressGesture(minimumDuration: 0.5, perform: { onLongPress?() ?? action() }) + .onTapGesture(perform: action) + .glassEffect(.regular.interactive()) } else { - Button(action: action) { - ZStack { - // Background circle - Circle() - .fill(isActive ? activeColor : inactiveColor) - .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) - - // Pulsing effect when active - if isPulsing { - Circle() - .stroke(Color.white.opacity(0.4), lineWidth: 2) - .frame(width: AdaptiveSizing.micButtonSize, height: AdaptiveSizing.micButtonSize) - .scaleEffect(1.3) - .opacity(0) - .animation( - .easeOut(duration: 1.0).repeatForever(autoreverses: false), - value: isPulsing - ) - } - - // Icon or loading indicator - if isLoading { - ProgressView() - .progressViewStyle(CircularProgressViewStyle(tint: .white)) - .scaleEffect(1.2) - } else { - Image(systemName: icon) - .font(.system(size: AdaptiveSizing.micIconSize)) - .foregroundColor(.white) - .contentTransition(.symbolEffect(.replace)) - .animation(.smooth(duration: 0.3), value: icon) - } - } - } - .buttonStyle(.plain) + micContent + .onLongPressGesture(minimumDuration: 0.5, perform: { onLongPress?() ?? action() }) + .onTapGesture(perform: action) } } } diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp index 76218a7ae..fe1f57524 100644 --- a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp +++ b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp @@ -679,6 +679,8 @@ bool LlamaCppTextGeneration::generate_stream(const TextGenerationRequest& reques std::string partial_utf8_buffer; partial_utf8_buffer.reserve(8); + Utf8State scanner_state; + int n_cur = batch.n_tokens; int tokens_generated = 0; bool stop_sequence_hit = false; @@ -696,11 +698,11 @@ bool LlamaCppTextGeneration::generate_stream(const TextGenerationRequest& reques const std::string new_token_chars = common_token_to_piece(context_, new_token_id); + const size_t old_partial_size = partial_utf8_buffer.size(); partial_utf8_buffer.append(new_token_chars); - Utf8State scanner_state; size_t valid_upto = 0; - for (size_t i = 0; i < partial_utf8_buffer.size(); ++i) { + for (size_t i = old_partial_size; i < partial_utf8_buffer.size(); ++i) { scanner_state.process(static_cast(partial_utf8_buffer[i])); if (scanner_state.state == 0) { valid_upto = i + 1; @@ -735,12 +737,17 @@ bool LlamaCppTextGeneration::generate_stream(const TextGenerationRequest& reques if (stop_window.size() > MAX_STOP_LEN) { size_t safe_len = stop_window.size() - MAX_STOP_LEN; - if (!callback(stop_window.substr(0, safe_len))) { - LOGI("Generation cancelled by callback"); - cancel_requested_.store(true); - break; + while (safe_len > 0 && (stop_window[safe_len] & 0xC0) == 0x80) { + safe_len--; + } + if (safe_len > 0) { + if (!callback(stop_window.substr(0, safe_len))) { + LOGI("Generation cancelled by callback"); + cancel_requested_.store(true); + break; + } + stop_window.erase(0, safe_len); } - stop_window.erase(0, safe_len); } } @@ -973,6 +980,8 @@ TextGenerationResult LlamaCppTextGeneration::generate_from_context(const TextGen std::string partial_utf8_buffer; partial_utf8_buffer.reserve(8); + Utf8State scanner_state; + std::string generated_text; int n_cur = static_cast(current_pos) + n_prompt; int tokens_generated = 0; @@ -987,38 +996,17 @@ TextGenerationResult LlamaCppTextGeneration::generate_from_context(const TextGen } const std::string new_token_chars = common_token_to_piece(context_, new_token_id); + const size_t old_partial_size = partial_utf8_buffer.size(); partial_utf8_buffer.append(new_token_chars); - struct Utf8Check { - static size_t valid_upto(const std::string& buf) { - static const uint8_t utf8d[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - }; - uint32_t state = 0; - size_t upto = 0; - for (size_t i = 0; i < buf.size(); ++i) { - uint32_t type = utf8d[static_cast(buf[i])]; - state = utf8d[256 + state * 16 + type]; - if (state == 0) upto = i + 1; - } - return upto; + size_t valid_upto = 0; + for (size_t i = old_partial_size; i < partial_utf8_buffer.size(); ++i) { + scanner_state.process(static_cast(partial_utf8_buffer[i])); + if (scanner_state.state == 0) { + valid_upto = i + 1; } - }; + } - const size_t valid_upto = Utf8Check::valid_upto(partial_utf8_buffer); if (valid_upto > 0) { std::string valid_chunk = partial_utf8_buffer.substr(0, valid_upto); stop_window.append(valid_chunk); @@ -1042,9 +1030,14 @@ TextGenerationResult LlamaCppTextGeneration::generate_from_context(const TextGen } if (stop_window.size() > MAX_STOP_LEN) { - const size_t safe_len = stop_window.size() - MAX_STOP_LEN; - generated_text += stop_window.substr(0, safe_len); - stop_window.erase(0, safe_len); + size_t safe_len = stop_window.size() - MAX_STOP_LEN; + while (safe_len > 0 && (stop_window[safe_len] & 0xC0) == 0x80) { + safe_len--; + } + if (safe_len > 0) { + generated_text += stop_window.substr(0, safe_len); + stop_window.erase(0, safe_len); + } } } diff --git a/sdk/runanywhere-commons/src/features/llm/llm_component.cpp b/sdk/runanywhere-commons/src/features/llm/llm_component.cpp index d09f739ea..d13846b7f 100644 --- a/sdk/runanywhere-commons/src/features/llm/llm_component.cpp +++ b/sdk/runanywhere-commons/src/features/llm/llm_component.cpp @@ -9,6 +9,7 @@ * Do NOT add features not present in the Swift code. */ +#include #include #include #include @@ -45,6 +46,9 @@ struct rac_llm_component { /** Mutex for thread safety */ std::mutex mtx; + /** Cancellation flag - set by cancel(), read by token callback without holding mtx */ + std::atomic cancel_requested{false}; + /** Resolved inference framework (defaults to LlamaCPP, the primary LLM backend) */ rac_inference_framework_t actual_framework; @@ -509,6 +513,8 @@ struct llm_stream_context { float temperature; int32_t max_tokens; int32_t token_count; // Track tokens for streaming updates + + std::atomic* cancel_flag; }; /** @@ -517,6 +523,10 @@ struct llm_stream_context { static rac_bool_t llm_stream_token_callback(const char* token, void* user_data) { auto* ctx = reinterpret_cast(user_data); + if (ctx->cancel_flag && ctx->cancel_flag->load(std::memory_order_relaxed)) { + return RAC_FALSE; + } + // Track first token time and emit first token event if (!ctx->first_token_recorded) { ctx->first_token_recorded = true; @@ -576,6 +586,8 @@ extern "C" rac_result_t rac_llm_component_generate_stream( auto* component = reinterpret_cast(handle); std::lock_guard lock(component->mtx); + component->cancel_requested.store(false, std::memory_order_relaxed); + // Generate unique ID for this generation std::string generation_id = generate_unique_id(); const char* model_id = rac_lifecycle_get_model_id(component->lifecycle); @@ -667,6 +679,7 @@ extern "C" rac_result_t rac_llm_component_generate_stream( ctx.temperature = effective_options->temperature; ctx.max_tokens = effective_options->max_tokens; ctx.token_count = 0; + ctx.cancel_flag = &component->cancel_requested; // Perform streaming generation result = rac_llm_generate_stream(service, prompt, effective_options, llm_stream_token_callback, @@ -702,7 +715,7 @@ extern "C" rac_result_t rac_llm_component_generate_stream( rac_llm_result_t final_result = {}; final_result.text = strdup(ctx.full_text.c_str()); final_result.prompt_tokens = ctx.prompt_tokens; - final_result.completion_tokens = estimate_tokens(ctx.full_text.c_str()); + final_result.completion_tokens = ctx.token_count > 0 ? ctx.token_count : estimate_tokens(ctx.full_text.c_str()); final_result.total_tokens = final_result.prompt_tokens + final_result.completion_tokens; final_result.total_time_ms = total_time_ms; @@ -761,7 +774,8 @@ extern "C" rac_result_t rac_llm_component_cancel(rac_handle_t handle) { return RAC_ERROR_INVALID_HANDLE; auto* component = reinterpret_cast(handle); - std::lock_guard lock(component->mtx); + + component->cancel_requested.store(true, std::memory_order_relaxed); rac_handle_t service = rac_lifecycle_get_service(component->lifecycle); if (service) { diff --git a/sdk/runanywhere-commons/src/features/llm/tool_calling.cpp b/sdk/runanywhere-commons/src/features/llm/tool_calling.cpp index 4212d4abb..0ca01a419 100644 --- a/sdk/runanywhere-commons/src/features/llm/tool_calling.cpp +++ b/sdk/runanywhere-commons/src/features/llm/tool_calling.cpp @@ -398,13 +398,41 @@ static bool extract_json_value(const char* json_obj, const char* key, char** out *out_is_object = true; return true; } + } else { + // Scalar value (number, boolean, null) + // Read until comma, closing brace, or whitespace + size_t val_start = pos; + size_t val_end = pos; + while (val_end < len && json_obj[val_end] != ',' && + json_obj[val_end] != '}' && json_obj[val_end] != ']' && + json_obj[val_end] != '\n') { + val_end++; + } + // Trim trailing whitespace + while (val_end > val_start && + (json_obj[val_end - 1] == ' ' || json_obj[val_end - 1] == '\t')) { + val_end--; + } + if (val_end > val_start) { + size_t val_len = val_end - val_start; + *out_value = static_cast(malloc(val_len + 1)); + if (*out_value) { + memcpy(*out_value, json_obj + val_start, val_len); + (*out_value)[val_len] = '\0'; + } + *out_is_object = false; + return true; + } } } } } // Move to end of key for continued scanning + // Skip the in_string toggle - extract_json_string already + // consumed the closing quote so in_string must stay false. i = key_end - 1; + continue; } } in_string = !in_string; @@ -663,10 +691,46 @@ static bool extract_tool_name_and_args(const char* json_obj, char** out_tool_nam } } - // No arguments found - use empty object - *out_args_json = static_cast(malloc(3)); - if (*out_args_json) { - std::memcpy(*out_args_json, "{}", 3); + // No standard argument wrapper key found. + // Fallback: collect all remaining keys (excluding the tool name key) + // as flat arguments. This handles LLM output like: + // {"tool": "calculate", "expression": "5 * 100"} + { + std::vector all_keys = get_json_keys(json_obj); + std::string flat_args = "{"; + bool first = true; + for (const auto& k : all_keys) { + // Skip the key that matched the tool name + bool is_tool_key = false; + for (int t = 0; TOOL_NAME_KEYS[t] != nullptr; t++) { + if (str_equals_ignore_case(k.c_str(), TOOL_NAME_KEYS[t])) { + is_tool_key = true; + break; + } + } + if (is_tool_key) continue; + + char* kval = nullptr; + bool kval_is_obj = false; + if (extract_json_value(json_obj, k.c_str(), &kval, &kval_is_obj)) { + if (!first) flat_args += ","; + std::string escaped_key = escape_json_string(k.c_str()); + if (kval_is_obj) { + flat_args += "\"" + escaped_key + "\":" + std::string(kval); + } else if (kval) { + std::string escaped_val = escape_json_string(kval); + flat_args += "\"" + escaped_key + "\":\"" + escaped_val + "\""; + } + free(kval); + first = false; + } + } + flat_args += "}"; + + *out_args_json = static_cast(malloc(flat_args.size() + 1)); + if (*out_args_json) { + std::memcpy(*out_args_json, flat_args.c_str(), flat_args.size() + 1); + } } return true; } diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Features/TTS/Services/AudioPlaybackManager.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Features/TTS/Services/AudioPlaybackManager.swift index 09baad44d..b6b3413a8 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Features/TTS/Services/AudioPlaybackManager.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Features/TTS/Services/AudioPlaybackManager.swift @@ -87,7 +87,7 @@ public class AudioPlaybackManager: NSObject, ObservableObject, AVAudioPlayerDele /// Stop current playback public func stop() { - guard isPlaying else { return } + guard audioPlayer != nil else { return } audioPlayer?.stop() cleanupPlayback(success: false) diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Foundation/Bridge/CppBridge.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Foundation/Bridge/CppBridge.swift index 0542583be..823c513b8 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Foundation/Bridge/CppBridge.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Foundation/Bridge/CppBridge.swift @@ -179,6 +179,15 @@ public enum CppBridge { guard wasInitialized else { return } + Task { + await LLM.shared.destroy() + await STT.shared.destroy() + await TTS.shared.destroy() + await VAD.shared.destroy() + await VoiceAgent.shared.destroy() + await VLM.shared.destroy() + } + // Shutdown in reverse order // Note: ModelAssignment and Platform callbacks remain valid (static) diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Infrastructure/Download/Utilities/ArchiveUtility.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Infrastructure/Download/Utilities/ArchiveUtility.swift index 3123541de..ec988e980 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Infrastructure/Download/Utilities/ArchiveUtility.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Infrastructure/Download/Utilities/ArchiveUtility.swift @@ -190,7 +190,9 @@ public final class ArchiveUtility { /// Decompress raw deflate data using streaming compression_stream_process. /// Uses a small 256 KB output buffer instead of pre-allocating compressedSize * N. private static func decompressDeflateStreaming(_ data: Data, range: Range) throws -> Data { - var stream = compression_stream() + let placeholder = UnsafeMutablePointer.allocate(capacity: 1) + defer { placeholder.deallocate() } + var stream = compression_stream(dst_ptr: placeholder, dst_size: 0, src_ptr: placeholder, src_size: 0, state: nil) guard compression_stream_init(&stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB) == COMPRESSION_STATUS_OK else { throw SDKError.download(.extractionFailed, "Failed to initialize decompression stream") } @@ -218,7 +220,7 @@ public final class ArchiveUtility { stream.dst_ptr = outputBuffer stream.dst_size = outputChunkSize - status = compression_stream_process(&stream, COMPRESSION_STREAM_FINALIZE) + status = compression_stream_process(&stream, Int32(COMPRESSION_STREAM_FINALIZE.rawValue)) let bytesProduced = outputChunkSize - stream.dst_size if bytesProduced > 0 { diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift index bc721a59c..ba4091e80 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift @@ -85,19 +85,21 @@ public extension RunAnywhere { let totalTimeMs = endTime.timeIntervalSince(startTime) * 1000 // Extract result - let generatedText: String + let rawText: String if let textPtr = llmResult.text { - generatedText = String(cString: textPtr) + rawText = String(cString: textPtr) } else { - generatedText = "" + rawText = "" } let inputTokens = Int(llmResult.prompt_tokens) let outputTokens = Int(llmResult.completion_tokens) let tokensPerSecond = llmResult.tokens_per_second > 0 ? Double(llmResult.tokens_per_second) : 0 + let (generatedText, thinkingContent) = ThinkingContentParser.extract(from: rawText) + return LLMGenerationResult( text: generatedText, - thinkingContent: nil, + thinkingContent: thinkingContent, inputTokens: inputTokens, tokensUsed: outputTokens, modelUsed: modelId, @@ -105,7 +107,7 @@ public extension RunAnywhere { framework: "llamacpp", tokensPerSecond: tokensPerSecond, timeToFirstTokenMs: nil, - thinkingTokens: 0, + thinkingTokens: thinkingContent.map { _ in outputTokens } ?? 0, responseTokens: outputTokens ) } @@ -189,47 +191,43 @@ public extension RunAnywhere { ) -> AsyncThrowingStream { AsyncThrowingStream { continuation in Task { - do { - await collector.markStart() - - let context = LLMStreamCallbackContext(continuation: continuation, collector: collector) - let contextPtr = Unmanaged.passRetained(context).toOpaque() - - let callbacks = LLMStreamCallbacks.create() - var cOptions = options - - let callCFunction: () -> rac_result_t = { - prompt.withCString { promptPtr in - rac_llm_component_generate_stream( - handle, - promptPtr, - &cOptions, - callbacks.token, - callbacks.complete, - callbacks.error, - contextPtr - ) - } + await collector.markStart() + + let context = LLMStreamCallbackContext(continuation: continuation, collector: collector) + // passRetained: context is released in completeCallback or errorCallback + let contextPtr = Unmanaged.passRetained(context).toOpaque() + + let callbacks = LLMStreamCallbacks.create() + var cOptions = options + + let callCFunction: () -> rac_result_t = { + prompt.withCString { promptPtr in + rac_llm_component_generate_stream( + handle, + promptPtr, + &cOptions, + callbacks.token, + callbacks.complete, + callbacks.error, + contextPtr + ) } + } - let streamResult: rac_result_t - if let systemPrompt = systemPrompt { - streamResult = systemPrompt.withCString { sysPtr in - cOptions.system_prompt = sysPtr - return callCFunction() - } - } else { - cOptions.system_prompt = nil - streamResult = callCFunction() + let streamResult: rac_result_t + if let systemPrompt = systemPrompt { + streamResult = systemPrompt.withCString { sysPtr in + cOptions.system_prompt = sysPtr + return callCFunction() } + } else { + cOptions.system_prompt = nil + streamResult = callCFunction() + } - if streamResult != RAC_SUCCESS { - Unmanaged.fromOpaque(contextPtr).release() - let error = SDKError.llm(.generationFailed, "Stream generation failed: \(streamResult)") - continuation.finish(throwing: error) - await collector.markFailed(error) - } - } catch { + if streamResult != RAC_SUCCESS { + Unmanaged.fromOpaque(contextPtr).release() + let error = SDKError.llm(.generationFailed, "Stream generation failed: \(streamResult)") continuation.finish(throwing: error) await collector.markFailed(error) } @@ -255,6 +253,7 @@ private enum LLMStreamCallbacks { static func create() -> Callbacks { let tokenCallback: TokenFn = { tokenPtr, userData -> rac_bool_t in guard let tokenPtr = tokenPtr, let userData = userData else { return RAC_TRUE } + if Task.isCancelled { return RAC_FALSE } let ctx = Unmanaged.fromOpaque(userData).takeUnretainedValue() let token = String(cString: tokenPtr) Task { @@ -264,16 +263,28 @@ private enum LLMStreamCallbacks { return RAC_TRUE } - let completeCallback: CompleteFn = { _, userData in + let completeCallback: CompleteFn = { resultPtr, userData in guard let userData = userData else { return } - let ctx = Unmanaged.fromOpaque(userData).takeUnretainedValue() + let ctx = Unmanaged.fromOpaque(userData).takeRetainedValue() ctx.continuation.finish() - Task { await ctx.collector.markComplete() } + + if let result = resultPtr?.pointee { + Task { + await ctx.collector.markCompleteWithMetrics( + promptTokens: Int(result.prompt_tokens), + completionTokens: Int(result.completion_tokens), + tokensPerSecond: Double(result.tokens_per_second), + timeToFirstTokenMs: Double(result.time_to_first_token_ms) + ) + } + } else { + Task { await ctx.collector.markComplete() } + } } let errorCallback: ErrorFn = { _, errorMsg, userData in guard let userData = userData else { return } - let ctx = Unmanaged.fromOpaque(userData).takeUnretainedValue() + let ctx = Unmanaged.fromOpaque(userData).takeRetainedValue() let message = errorMsg.map { String(cString: $0) } ?? "Unknown error" let error = SDKError.llm(.generationFailed, message) ctx.continuation.finish(throwing: error) @@ -296,6 +307,34 @@ private final class LLMStreamCallbackContext: @unchecked Sendable { } } +// MARK: - Thinking Content Parser + +enum ThinkingContentParser { + /// Extracts `...` content from generated text. + /// - Returns: Tuple of (responseText, thinkingContent). If no tags found, responseText = original text, thinkingContent = nil. + static func extract(from text: String) -> (text: String, thinking: String?) { + guard let startRange = text.range(of: ""), + let endRange = text.range(of: ""), + startRange.upperBound <= endRange.lowerBound else { + return (text: text, thinking: nil) + } + let thinkingContent = String(text[startRange.upperBound.. and after + let textBefore = String(text[..? + private var cppPromptTokens: Int? + private var cppCompletionTokens: Int? + private var cppTokensPerSecond: Double? + private var cppTimeToFirstTokenMs: Double? + init(modelId: String, promptLength: Int) { self.modelId = modelId self.promptLength = promptLength @@ -339,6 +383,24 @@ private actor LLMStreamingMetricsCollector { } } + func markCompleteWithMetrics( + promptTokens: Int, + completionTokens: Int, + tokensPerSecond: Double, + timeToFirstTokenMs: Double + ) { + if promptTokens > 0 { cppPromptTokens = promptTokens } + if completionTokens > 0 { cppCompletionTokens = completionTokens } + if tokensPerSecond > 0 { cppTokensPerSecond = tokensPerSecond } + if timeToFirstTokenMs > 0 { cppTimeToFirstTokenMs = timeToFirstTokenMs } + + isComplete = true + if let continuation = resultContinuation { + continuation.resume(returning: buildResult()) + resultContinuation = nil + } + } + func markFailed(_ error: Error) { self.error = error if let continuation = resultContinuation { @@ -363,20 +425,32 @@ private actor LLMStreamingMetricsCollector { let endTime = Date() let latencyMs = (startTime.map { endTime.timeIntervalSince($0) } ?? 0) * 1000 - var timeToFirstTokenMs: Double? - if let start = startTime, let firstToken = firstTokenTime { + let timeToFirstTokenMs: Double? + if let cppTtft = cppTimeToFirstTokenMs { + timeToFirstTokenMs = cppTtft + } else if let start = startTime, let firstToken = firstTokenTime { timeToFirstTokenMs = firstToken.timeIntervalSince(start) * 1000 + } else { + timeToFirstTokenMs = nil } - // Use actual token count from streaming callbacks, not character estimation (fixes #339) - let outputTokens = max(1, tokenCount) - let totalTimeSec = latencyMs / 1000.0 - let tokensPerSecond = totalTimeSec > 0 ? Double(outputTokens) / totalTimeSec : 0 + let outputTokens = cppCompletionTokens ?? max(1, tokenCount) + let inputTokens = cppPromptTokens ?? 0 + + let tokensPerSecond: Double + if let cppTps = cppTokensPerSecond { + tokensPerSecond = cppTps + } else { + let totalTimeSec = latencyMs / 1000.0 + tokensPerSecond = totalTimeSec > 0 ? Double(outputTokens) / totalTimeSec : 0 + } + + let (responseText, thinkingContent) = ThinkingContentParser.extract(from: fullText) return LLMGenerationResult( - text: fullText, - thinkingContent: nil, - inputTokens: 0, + text: responseText, + thinkingContent: thinkingContent, + inputTokens: inputTokens, tokensUsed: outputTokens, modelUsed: modelId, latencyMs: latencyMs, diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+ToolCalling.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+ToolCalling.swift index 2a8481f7c..eef87c382 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+ToolCalling.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+ToolCalling.swift @@ -168,8 +168,17 @@ public extension RunAnywhere { let registeredTools = await ToolRegistry.shared.getAll() let tools = opts.tools ?? registeredTools + // Extract /no_think prefix before building the full prompt so it stays + // at the beginning where the C++ inference layer expects it. + let noThinkPrefix = "/no_think\n" + let hasNoThink = prompt.hasPrefix(noThinkPrefix) + let cleanPrompt = hasNoThink ? String(prompt.dropFirst(noThinkPrefix.count)) : prompt + let systemPrompt = buildToolSystemPrompt(tools: tools, options: opts) - var fullPrompt = systemPrompt.isEmpty ? prompt : "\(systemPrompt)\n\nUser: \(prompt)" + var fullPrompt = systemPrompt.isEmpty ? cleanPrompt : "\(systemPrompt)\n\nUser: \(cleanPrompt)" + if hasNoThink { + fullPrompt = "\(noThinkPrefix)\(fullPrompt)" + } var allToolCalls: [ToolCall] = [] var allToolResults: [ToolResult] = [] diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/TTS/RunAnywhere+TTS.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/TTS/RunAnywhere+TTS.swift index cda94d5c6..7d3dec55c 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/TTS/RunAnywhere+TTS.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/TTS/RunAnywhere+TTS.swift @@ -95,6 +95,7 @@ public extension RunAnywhere { // Synthesize (C++ emits events) var ttsResult = rac_tts_result_t() + defer { rac_tts_result_free(&ttsResult) } let synthesizeResult = text.withCString { textPtr in rac_tts_component_synthesize(handle, textPtr, &cOptions, &ttsResult) } @@ -157,7 +158,6 @@ public extension RunAnywhere { let voiceId = await CppBridge.TTS.shared.currentVoiceId ?? "unknown" let startTime = Date() - var totalAudioData = Data() // Build C options var cOptions = rac_tts_options_t() @@ -166,8 +166,8 @@ public extension RunAnywhere { cOptions.volume = options.volume cOptions.sample_rate = Int32(options.sampleRate) - // Create callback context - let context = TTSStreamContext(onChunk: onAudioChunk, totalData: &totalAudioData) + // Create callback context - owns its own Data + let context = TTSStreamContext(onChunk: onAudioChunk) let contextPtr = Unmanaged.passRetained(context).toOpaque() let streamResult = text.withCString { textPtr in @@ -180,13 +180,14 @@ public extension RunAnywhere { let ctx = Unmanaged.fromOpaque(userData).takeUnretainedValue() let chunk = Data(bytes: audioPtr, count: audioSize) ctx.onChunk(chunk) - ctx.totalData.pointee.append(chunk) + ctx.totalData.append(chunk) }, contextPtr ) } - Unmanaged.fromOpaque(contextPtr).release() + let finalContext = Unmanaged.fromOpaque(contextPtr).takeRetainedValue() + let totalAudioData = finalContext.totalData guard streamResult == RAC_SUCCESS else { throw SDKError.tts(.processingFailed, "Streaming synthesis failed: \(streamResult)") @@ -309,10 +310,9 @@ public extension RunAnywhere { private final class TTSStreamContext: @unchecked Sendable { let onChunk: (Data) -> Void - var totalData: UnsafeMutablePointer + var totalData: Data = Data() - init(onChunk: @escaping (Data) -> Void, totalData: UnsafeMutablePointer) { + init(onChunk: @escaping (Data) -> Void) { self.onChunk = onChunk - self.totalData = totalData } } diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceAgent.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceAgent.swift index f96978079..d61fb8cbb 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceAgent.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceAgent.swift @@ -257,10 +257,14 @@ public extension RunAnywhere { rac_voice_agent_synthesize_speech(handle, textPtr, &audioPtr, &audioSize) } - guard result == RAC_SUCCESS, let ptr = audioPtr, audioSize > 0 else { + guard result == RAC_SUCCESS else { throw SDKError.voiceAgent(.processingFailed, "Speech synthesis failed: \(result)") } + guard let ptr = audioPtr, audioSize > 0 else { + return Data() + } + let audioData = Data(bytes: ptr, count: audioSize) free(ptr) diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceSession.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceSession.swift index 091842093..8e143cca9 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceSession.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/RunAnywhere+VoiceSession.swift @@ -108,6 +108,10 @@ public actor VoiceSessionHandle { eventContinuation?.finish() } + public func interruptPlayback() { + audioPlayback.stop() + } + /// Force process current audio (push-to-talk) public func sendNow() async { guard isRunning else { return } @@ -115,6 +119,12 @@ public actor VoiceSessionHandle { await processCurrentAudio() } + /// Resume listening after a completed turn (for push-to-talk when continuousMode is false) + public func resumeListening() async { + guard isRunning else { return } + try? await startListening() + } + // MARK: - Private private func emit(_ event: VoiceSessionEvent) { @@ -196,44 +206,68 @@ public actor VoiceSessionHandle { emit(.processing) + var transcription = "" + var cleanedResponse = "" + var thinkingContent: String? + var synthesizedAudio: Data? + do { - let result = try await RunAnywhere.processVoiceTurn(audio) + // Step 1: Transcribe audio + transcription = try await RunAnywhere.voiceAgentTranscribe(audio) - guard result.speechDetected else { - logger.info("No speech detected") + guard !transcription.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + logger.info("No speech detected (empty transcription)") + emit(.turnCompleted(transcript: "", response: "", thinkingContent: nil, audio: nil)) if config.continuousMode && isRunning { try? await startListening() } return } - // Emit intermediate results - if let transcript = result.transcription { - emit(.transcribed(text: transcript)) - } + emit(.transcribed(text: transcription)) - if let response = result.response { - emit(.responded(text: response)) + // Step 2: Generate LLM response (apply /no_think prefix if needed) + let effectivePrompt: String + if !config.thinkingModeEnabled { + effectivePrompt = "/no_think\n\(transcription)" + } else { + effectivePrompt = transcription } - // Play TTS if enabled - if config.autoPlayTTS, let ttsAudio = result.synthesizedAudio, !ttsAudio.isEmpty { - emit(.speaking) - try await audioPlayback.play(ttsAudio) + let options = LLMGenerationOptions(maxTokens: config.maxTokens ?? 100) + let result = try await RunAnywhere.generate(effectivePrompt, options: options) + // generate() already runs ThinkingContentParser internally + cleanedResponse = result.text + thinkingContent = result.thinkingContent + + emit(.responded(text: cleanedResponse, thinkingContent: thinkingContent)) + + // Step 4: Synthesize speech from cleaned response (no think tags spoken) + if config.autoPlayTTS, !cleanedResponse.isEmpty { + let ttsAudio = try await RunAnywhere.voiceAgentSynthesizeSpeech(cleanedResponse) + synthesizedAudio = ttsAudio + + if !ttsAudio.isEmpty { + emit(.speaking) + do { + try await audioPlayback.play(ttsAudio) + } catch is AudioPlaybackError { + logger.info("TTS playback interrupted by user") + } + } } - - // Emit complete result - emit(.turnCompleted( - transcript: result.transcription ?? "", - response: result.response ?? "", - audio: result.synthesizedAudio - )) - } catch { logger.error("Processing failed: \(error)") emit(.error(error.localizedDescription)) } + emit(.turnCompleted( + transcript: transcription, + response: cleanedResponse, + thinkingContent: thinkingContent, + audio: synthesizedAudio + )) + // Resume listening if continuous mode if config.continuousMode && isRunning { try? await startListening() diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/VoiceAgentTypes.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/VoiceAgentTypes.swift index da6c8ff1e..47f30a0d4 100644 --- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/VoiceAgentTypes.swift +++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/VoiceAgent/VoiceAgentTypes.swift @@ -23,6 +23,9 @@ public struct VoiceAgentResult: Sendable { /// Generated response text from LLM public var response: String? + /// Thinking content extracted from `...` tags (nil if none) + public var thinkingContent: String? + /// Synthesized audio data from TTS public var synthesizedAudio: Data? @@ -31,11 +34,13 @@ public struct VoiceAgentResult: Sendable { speechDetected: Bool = false, transcription: String? = nil, response: String? = nil, + thinkingContent: String? = nil, synthesizedAudio: Data? = nil ) { self.speechDetected = speechDetected self.transcription = transcription self.response = response + self.thinkingContent = thinkingContent self.synthesizedAudio = synthesizedAudio } @@ -185,14 +190,14 @@ public enum VoiceSessionEvent: Sendable { /// Got transcription from STT case transcribed(text: String) - /// Got response from LLM - case responded(text: String) + /// Got response from LLM (with optional thinking content) + case responded(text: String, thinkingContent: String? = nil) /// Playing TTS audio case speaking - /// Complete turn result - case turnCompleted(transcript: String, response: String, audio: Data?) + /// Complete turn result (with optional thinking content) + case turnCompleted(transcript: String, response: String, thinkingContent: String? = nil, audio: Data?) /// Session stopped case stopped @@ -217,16 +222,26 @@ public struct VoiceSessionConfig: Sendable { /// Whether to auto-resume listening after TTS playback public var continuousMode: Bool + /// Whether thinking mode is enabled for the LLM. + public var thinkingModeEnabled: Bool + + /// Maximum tokens for LLM generation (nil uses SDK default of 100) + public var maxTokens: Int? + public init( silenceDuration: TimeInterval = 1.5, speechThreshold: Float = 0.1, autoPlayTTS: Bool = true, - continuousMode: Bool = true + continuousMode: Bool = true, + thinkingModeEnabled: Bool = false, + maxTokens: Int? = nil ) { self.silenceDuration = silenceDuration self.speechThreshold = speechThreshold self.autoPlayTTS = autoPlayTTS self.continuousMode = continuousMode + self.thinkingModeEnabled = thinkingModeEnabled + self.maxTokens = maxTokens } /// Default configuration