Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct RunAnywhereAIApp: App {
}
}
.task {
_ = SettingsViewModel.shared
logger.info("🏁 App launched, initializing SDK...")
await initializeSDK()
}
Expand Down Expand Up @@ -274,7 +275,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3 0.6B Q4_K_M",
url: qwen3_06bURL,
framework: .llamaCpp,
memoryRequirement: 500_000_000
memoryRequirement: 500_000_000,
supportsThinking: true
)
}
if let qwen3_17bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf") {
Expand All @@ -283,7 +285,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3 1.7B Q4_K_M",
url: qwen3_17bURL,
framework: .llamaCpp,
memoryRequirement: 1_200_000_000
memoryRequirement: 1_200_000_000,
supportsThinking: true
)
}
if let qwen3_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf") {
Expand All @@ -292,7 +295,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3 4B Q4_K_M",
url: qwen3_4bURL,
framework: .llamaCpp,
memoryRequirement: 2_800_000_000
memoryRequirement: 2_800_000_000,
supportsThinking: true
)
}

Expand All @@ -303,7 +307,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3.5 0.8B Q4_K_M",
url: qwen35_08bURL,
framework: .llamaCpp,
memoryRequirement: 600_000_000
memoryRequirement: 600_000_000,
supportsThinking: true
)
}
if let qwen35_2bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf") {
Expand All @@ -312,7 +317,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3.5 2B Q4_K_M",
url: qwen35_2bURL,
framework: .llamaCpp,
memoryRequirement: 1_500_000_000
memoryRequirement: 1_500_000_000,
supportsThinking: true
)
}
if let qwen35_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf") {
Expand All @@ -321,7 +327,8 @@ struct RunAnywhereAIApp: App {
name: "Qwen3.5 4B Q4_K_M",
url: qwen35_4bURL,
framework: .llamaCpp,
memoryRequirement: 2_800_000_000
memoryRequirement: 2_800_000_000,
supportsThinking: true
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ extension LLMViewModel {
if let id = modelId,
let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == id }) {
self.updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
self.setLoadedModelSupportsThinking(matchingModel.supportsThinking)
}
}
}
Expand Down Expand Up @@ -89,6 +90,7 @@ extension LLMViewModel {

if let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) {
updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
setLoadedModelSupportsThinking(matchingModel.supportsThinking)
}

if !wasLoaded {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ extension LLMViewModel {

for try await token in stream {
fullResponse += token
await updateMessageContent(at: messageIndex, content: fullResponse)
let displayText = Self.stripThinkTags(from: fullResponse)
await updateMessageContent(at: messageIndex, content: displayText)
NotificationCenter.default.post(
Comment on lines 25 to 29
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Skip no-op UI updates while the model is still inside <think>.

stripThinkTags can keep displayText unchanged for many hidden tokens, but this loop still rebuilds the message and posts a scroll notification on every token. On long thinking traces that becomes avoidable main-thread churn.

Suggested simplification
     func generateStreamingResponse(
         prompt: String,
         options: LLMGenerationOptions,
         messageIndex: Int
     ) async throws {
         var fullResponse = ""
+        var lastDisplayText = ""

         let streamingResult = try await RunAnywhere.generateStream(prompt, options: options)
         let stream = streamingResult.stream
         let metricsTask = streamingResult.result

         for try await token in stream {
             fullResponse += token
             let displayText = Self.stripThinkTags(from: fullResponse)
+            guard displayText != lastDisplayText else { continue }
+            lastDisplayText = displayText
             await updateMessageContent(at: messageIndex, content: displayText)
             NotificationCenter.default.post(
                 name: Notification.Name("MessageContentUpdated"),
                 object: nil
             )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for try await token in stream {
fullResponse += token
await updateMessageContent(at: messageIndex, content: fullResponse)
let displayText = Self.stripThinkTags(from: fullResponse)
await updateMessageContent(at: messageIndex, content: displayText)
NotificationCenter.default.post(
var fullResponse = ""
var lastDisplayText = ""
let streamingResult = try await RunAnywhere.generateStream(prompt, options: options)
let stream = streamingResult.stream
let metricsTask = streamingResult.result
for try await token in stream {
fullResponse += token
let displayText = Self.stripThinkTags(from: fullResponse)
guard displayText != lastDisplayText else { continue }
lastDisplayText = displayText
await updateMessageContent(at: messageIndex, content: displayText)
NotificationCenter.default.post(
name: Notification.Name("MessageContentUpdated"),
object: nil
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel`+Generation.swift
around lines 25 - 29, The loop is issuing UI updates and notifications on every
token even when Self.stripThinkTags(from:) yields the same displayText; change
the streaming loop (where fullResponse, displayText are computed and
updateMessageContent(at:content:) and NotificationCenter.default.post are
called) to track the last-displayed text (e.g., a local prevDisplayText) and
only call updateMessageContent(at:content:) and post the scroll notification
when displayText != prevDisplayText, updating prevDisplayText after a successful
update; this avoids no-op main-thread churn while still showing visible changes.

name: Notification.Name("MessageContentUpdated"),
object: nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extension LLMViewModel {
await MainActor.run {
self.updateModelLoadedState(isLoaded: true)
self.updateLoadedModelInfo(name: modelInfo.name, framework: modelInfo.framework)
self.setLoadedModelSupportsThinking(modelInfo.supportsThinking)
self.updateSystemMessageAfterModelLoad()
}
} catch {
Expand All @@ -39,6 +40,7 @@ extension LLMViewModel {
if let currentModel = modelListViewModel.currentModel {
self.updateModelLoadedState(isLoaded: true)
self.updateLoadedModelInfo(name: currentModel.name, framework: currentModel.framework)
self.setLoadedModelSupportsThinking(currentModel.supportsThinking)
verifyModelLoaded(currentModel)
} else {
self.updateModelLoadedState(isLoaded: false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@ extension LLMViewModel {
toolCallInfo = nil
}

// Strip any residual <think> tags before displaying
let displayText = Self.stripThinkTags(from: result.text)

// Update the message with the result
await updateMessageWithToolResult(
at: messageIndex,
text: result.text,
text: displayText,
toolCallInfo: toolCallInfo
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ final class LLMViewModel {
private(set) var error: Error?
private(set) var isModelLoaded = false
private(set) var loadedModelName: String?
private(set) var loadedModelSupportsThinking = false
private(set) var selectedFramework: InferenceFramework?
private(set) var modelSupportsStreaming = true
private(set) var currentConversation: Conversation?
Expand Down Expand Up @@ -80,8 +81,13 @@ final class LLMViewModel {
selectedFramework = framework
}

func setLoadedModelSupportsThinking(_ value: Bool) {
loadedModelSupportsThinking = value
}

func clearLoadedModelInfo() {
loadedModelName = nil
loadedModelSupportsThinking = false
selectedFramework = nil
}

Expand Down Expand Up @@ -244,14 +250,21 @@ final class LLMViewModel {
do {
try await ensureModelIsLoaded()
let options = getGenerationOptions()
try await performGeneration(prompt: prompt, options: options, messageIndex: messageIndex)
let effectivePrompt = applyThinkingModePrefix(to: prompt)
try await performGeneration(prompt: effectivePrompt, options: options, messageIndex: messageIndex)
} catch {
await handleGenerationError(error, at: messageIndex)
}

await finalizeGeneration(at: messageIndex)
}

private func applyThinkingModePrefix(to prompt: String) -> String {
guard loadedModelSupportsThinking else { return prompt }
let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled
return thinkingModeEnabled ? prompt : "/no_think\n\(prompt)"
}

private func performGeneration(
prompt: String,
options: LLMGenerationOptions,
Expand Down Expand Up @@ -476,20 +489,17 @@ final class LLMViewModel {
if !isModelLoaded {
throw LLMError.noModelLoaded
}

// Verify model is actually loaded in SDK
if let model = ModelListViewModel.shared.currentModel {
try await RunAnywhere.loadModel(model.id)
}
}

private func getGenerationOptions() -> LLMGenerationOptions {
let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
// Use object(forKey:) to distinguish an unset key (nil) from a value explicitly set to 0.0
let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt")
let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled

let effectiveSettings = (
temperature: savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue,
temperature: savedTemperature ?? Self.defaultTemperatureValue,
maxTokens: savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
)

Expand All @@ -501,7 +511,7 @@ final class LLMViewModel {
}()

logger.info(
"[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(systemPromptInfo)"
"[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), thinkingMode=\(thinkingModeEnabled), systemPrompt=\(systemPromptInfo)"
)

return LLMGenerationOptions(
Expand All @@ -519,8 +529,8 @@ final class LLMViewModel {
}

private func ensureSettingsAreApplied() async {
let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
let temperature = savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue
let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
let temperature = savedTemperature ?? Self.defaultTemperatureValue

let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
let maxTokens = savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
Expand All @@ -542,6 +552,7 @@ final class LLMViewModel {
await MainActor.run {
self.isModelLoaded = true
self.loadedModelName = model.name
self.loadedModelSupportsThinking = model.supportsThinking
self.selectedFramework = model.framework
self.modelSupportsStreaming = supportsStreaming

Expand All @@ -563,4 +574,19 @@ final class LLMViewModel {
loadConversation(conversation)
}
}

static func stripThinkTags(from text: String) -> String {
var result = text
// Remove complete <think>...</think> blocks
while let startRange = result.range(of: "<think>"),
let endRange = result.range(of: "</think>"),
startRange.upperBound <= endRange.lowerBound {
result.removeSubrange(startRange.lowerBound..<endRange.upperBound)
}
if let trailingStart = result.range(of: "<think>", options: .backwards),
result.range(of: "</think>", range: trailingStart.upperBound..<result.endIndex) == nil {
result = String(result[result.startIndex..<trailingStart.lowerBound])
}
return result.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ struct ChatInterfaceView: View {
@State private var showingLoRAManagement = false
@State private var pendingLoRAURL: URL?
@State private var loraScale: Float = 1.0
@ObservedObject private var toolSettingsViewModel = ToolSettingsViewModel.shared
@AppStorage("thinkingModeEnabled") private var thinkingModeEnabled = false
Comment on lines +34 to +35
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid a second source of truth for thinking mode.

SettingsViewModel already owns thinkingModeEnabled and persists it. Toggling a separate @AppStorage copy here can update UserDefaults without updating SettingsViewModel.shared.thinkingModeEnabled, so generation code that reads the shared settings can stay stale for the rest of the session. Bind the badge to the shared settings view model instead of duplicating the state.

Suggested direction
-    `@ObservedObject` private var toolSettingsViewModel = ToolSettingsViewModel.shared
-    `@AppStorage`("thinkingModeEnabled") private var thinkingModeEnabled = false
+    `@ObservedObject` private var toolSettingsViewModel = ToolSettingsViewModel.shared
+    `@ObservedObject` private var settingsViewModel = SettingsViewModel.shared
...
-                if thinkingModeEnabled && viewModel.loadedModelSupportsThinking {
+                if settingsViewModel.thinkingModeEnabled && viewModel.loadedModelSupportsThinking {
                     thinkingModeBadge
                 }
...
-            .padding(.top, ((thinkingModeEnabled && viewModel.loadedModelSupportsThinking) || viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
+            .padding(.top, ((settingsViewModel.thinkingModeEnabled && viewModel.loadedModelSupportsThinking) || viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
...
     var thinkingModeBadge: some View {
         Button {
-            thinkingModeEnabled.toggle()
+            settingsViewModel.thinkingModeEnabled.toggle()
         } label: {

Also applies to: 502-518

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Views/ChatInterfaceView.swift`
around lines 34 - 35, Remove the duplicate `@AppStorage`("thinkingModeEnabled")
property in ChatInterfaceView and use the shared
Settings/ToolSettingsViewModel's thinkingModeEnabled instead; replace any uses
or bindings of the local thinkingModeEnabled with
toolSettingsViewModel.thinkingModeEnabled (including the badge binding and other
occurrences mentioned around the 502-518 region) so toggles update
SettingsViewModel.shared and avoid the second source of truth.

@FocusState private var isTextFieldFocused: Bool

private let logger = Logger(
Expand Down Expand Up @@ -369,8 +371,8 @@ extension ChatInterfaceView {
.onReceive(
NotificationCenter.default.publisher(for: Notification.Name("MessageContentUpdated"))
) { _ in
if viewModel.isGenerating {
proxy.scrollTo("typing", anchor: .bottom)
if viewModel.isGenerating, let lastMessage = viewModel.messages.last {
proxy.scrollTo(lastMessage.id, anchor: .bottom)
}
}
}
Expand Down Expand Up @@ -412,7 +414,7 @@ extension ChatInterfaceView {
.animation(nil, value: message.content)
}

if viewModel.isGenerating {
if viewModel.isGenerating, viewModel.messages.last?.content.isEmpty == true {
TypingIndicatorView()
.id("typing")
.transition(typingTransition)
Expand Down Expand Up @@ -445,9 +447,13 @@ extension ChatInterfaceView {
VStack(spacing: 0) {
Divider()

// Status badges (tool calling + LoRA)
// Status badges (thinking mode + tool calling + LoRA)
HStack(spacing: 8) {
if viewModel.useToolCalling {
if thinkingModeEnabled && viewModel.loadedModelSupportsThinking {
thinkingModeBadge
}

if viewModel.useToolCalling && !toolSettingsViewModel.registeredTools.isEmpty {
toolCallingBadge
}

Expand All @@ -459,7 +465,7 @@ extension ChatInterfaceView {
loraAddButton
}
}
.padding(.top, (viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
.padding(.top, ((thinkingModeEnabled && viewModel.loadedModelSupportsThinking) || viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)

HStack(spacing: AppSpacing.mediumLarge) {
TextField("Type a message...", text: $viewModel.currentInput, axis: .vertical)
Expand Down Expand Up @@ -493,6 +499,24 @@ extension ChatInterfaceView {
}
}

var thinkingModeBadge: some View {
Button {
thinkingModeEnabled.toggle()
} label: {
HStack(spacing: 6) {
Image(systemName: "lightbulb.min.fill")
.font(.system(size: 10))
Text("Thinking")
.font(AppTypography.caption2)
}
.foregroundColor(AppColors.primaryPurple)
.padding(.horizontal, 10)
.padding(.vertical, 4)
.background(AppColors.primaryPurple.opacity(0.1))
.cornerRadius(6)
}
}

var toolCallingBadge: some View {
HStack(spacing: 6) {
Image(systemName: "wrench.and.screwdriver")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,14 @@ class ModelListViewModel: ObservableObject {
await loadModelsFromRegistry()
}

private var isLoadingModel = false

/// Select and load a model
func selectModel(_ model: ModelInfo) async {
guard !isLoadingModel else { return }
isLoadingModel = true
defer { isLoadingModel = false }

do {
try await loadModel(model)
setCurrentModel(model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,16 @@ final class RAGViewModel {
}

do {
let settings = SettingsViewModel.shared
let effectiveQuestion: String
if settings.loadedModelSupportsThinking && !settings.thinkingModeEnabled {
effectiveQuestion = "/no_think\n\(question)"
} else {
effectiveQuestion = question
}

logger.info("Querying RAG pipeline: \(question)")
let result = try await RunAnywhere.ragQuery(question: question)
let result = try await RunAnywhere.ragQuery(question: effectiveQuestion)
messages.append((role: .assistant, text: result.answer))
logger.info("Query complete (\(result.totalTimeMs, format: .fixed(precision: 0))ms)")
} catch {
Expand Down
Loading
Loading