From aec553128a693c75f4280f37682ce7f78bc153e0 Mon Sep 17 00:00:00 2001 From: Michael Meding Date: Sun, 3 May 2026 18:19:08 -0300 Subject: [PATCH] feat(chat): lighten Foundation context Business rationale: Apple's Foundation model is useful for local, private chat, but its small context window makes the default Osaurus harness too heavy if every baseline tool and memory section is injected up front. This keeps the harness trustworthy by making Foundation chats start light while still allowing explicit on-demand tools when the agent or user needs them. Coding rationale: Reuse the existing ContextSizeResolver from main instead of adding a second Foundation detector. Treat tiny-context models as a baseline-tool trim, not a hard tool ban, so additional session-loaded tools can still be resolved. Keep gated guide text and memory estimates out of tiny prompts, and pin the behavior with SessionPreflightCacheTests while preserving the preview/send disable-info parity tests. Co-authored-by: Codex --- .../Services/Chat/ContextSizeClass.swift | 25 ++++---- .../Services/Chat/SystemPromptComposer.swift | 63 ++++++++++++------- .../Chat/SessionPreflightCacheTests.swift | 53 ++++++++++++++++ .../OsaurusCore/Views/Chat/ChatView.swift | 33 ++++++---- 4 files changed, 128 insertions(+), 46 deletions(-) diff --git a/Packages/OsaurusCore/Services/Chat/ContextSizeClass.swift b/Packages/OsaurusCore/Services/Chat/ContextSizeClass.swift index fb5b6ddb5..b32befb37 100644 --- a/Packages/OsaurusCore/Services/Chat/ContextSizeClass.swift +++ b/Packages/OsaurusCore/Services/Chat/ContextSizeClass.swift @@ -6,9 +6,8 @@ // prompt features that don't fit into very small windows. Apple's // Foundation model has a ~4K window; even before any user message // the always-loaded tool schemas push past it. The system-prompt -// composer reads this resolver at compose time and ORs the result -// into the agent's effective tools/memory disable flags so we never -// ship a request that's already over budget. +// composer reads this resolver at compose time so it can drop default +// prompt features before they crowd out the user's actual request. // import Foundation @@ -17,12 +16,12 @@ import Foundation /// Coarse classification of a model's nominal context window. Three /// buckets are enough — the prompt composer only needs to decide -/// whether to disable tools (tiny only) and/or memory (tiny + small). +/// whether to trim baseline tools (tiny only) and/or memory (tiny + small). public enum ContextSizeClass: Sendable, Equatable { /// `<= 4096` tokens. Apple Foundation and any equally tight - /// future model. Tools, memory, and skill suggestions all auto - /// off — at this size even the always-loaded tool JSON schemas - /// cost more than the available budget. + /// future model. Memory, skill suggestions, and the default tool + /// baseline are off — explicit on-demand tools can still be added + /// when the agent or user asks for them. case tiny /// `<= 8192` tokens. Fits a reasonable chat schema but not @@ -33,10 +32,9 @@ public enum ContextSizeClass: Sendable, Equatable { /// Larger than `8192` tokens, or unknown. No auto-overrides. case normal - /// Whether this class auto-disables tools (and the entire - /// gated-section surface that depends on tools, including - /// agent-loop guidance, capability discovery, skill suggestions, - /// and the model-family nudge). + /// Whether this class auto-disables the default tool baseline and + /// its gated prompt text. Session-loaded, manual, or preflighted + /// tools can still appear when they are worth the context cost. public var disablesTools: Bool { self == .tiny } /// Whether this class auto-disables memory injection. Memory is @@ -132,9 +130,8 @@ public enum ContextSizeResolver { // duplicate the three-line check rather than spin one up just // to call it. let trimmed = modelId.trimmingCharacters(in: .whitespacesAndNewlines) - if trimmed.caseInsensitiveCompare("foundation") == .orderedSame - || trimmed.caseInsensitiveCompare("default") == .orderedSame - { + let foundationAliases = ["foundation", "default"] + if foundationAliases.contains(where: { trimmed.caseInsensitiveCompare($0) == .orderedSame }) { return (.tiny, tinyCeiling) } diff --git a/Packages/OsaurusCore/Services/Chat/SystemPromptComposer.swift b/Packages/OsaurusCore/Services/Chat/SystemPromptComposer.swift index 3ee382982..c373c4358 100644 --- a/Packages/OsaurusCore/Services/Chat/SystemPromptComposer.swift +++ b/Packages/OsaurusCore/Services/Chat/SystemPromptComposer.swift @@ -10,6 +10,9 @@ // resolves whether to use compact or full prompt variants via isLocalModel. // +// SwiftFormat owns multiline condition layout here; SwiftLint's brace rule conflicts with it. +// swiftlint:disable opening_brace + import Foundation // MARK: - SystemPromptComposer @@ -156,13 +159,12 @@ public struct SystemPromptComposer: Sendable { let canCreatePlugins = autonomousConfig.map { $0.enabled && $0.pluginCreate } ?? false let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId) - // Auto-disable for small-context models (Foundation et al.). - // OR into the agent's flags so every downstream gate (preflight, - // skills, agent loop, capability nudge, model family, plugin - // creator, memory assembly) cascades correctly without each - // gate having to know about the size class itself. + // Small-context models can still use tools, but they should not pay + // the startup cost of the always-loaded baseline or its guide text. let (sizeClass, contextLength) = ContextSizeResolver.resolve(modelId: model) - let effectiveToolsOff = agentToolsOff || sizeClass.disablesTools + let lightweightContext = sizeClass.disablesTools + let effectiveToolsOff = agentToolsOff + let gatedPromptSectionsOff = effectiveToolsOff || lightweightContext let memoryOff = agentMemoryOff || sizeClass.disablesMemory let contextDisable = ContextDisableInfo( sizeClass: sizeClass, @@ -174,6 +176,7 @@ public struct SystemPromptComposer: Sendable { if contextDisable != nil { trace?.set("contextSizeClass", String(describing: sizeClass)) } + trace?.set("lightweightContext", lightweightContext ? "1" : "0") // Memory is assembled here but returned separately (see ComposedContext.memorySection). // We deliberately do NOT pass `query` so the cached memory snapshot @@ -215,13 +218,15 @@ public struct SystemPromptComposer: Sendable { toolsDisabled: effectiveToolsOff, preflight: preflight, additionalToolNames: additionalToolNames, - frozenAlwaysLoadedNames: frozenAlwaysLoadedNames + frozenAlwaysLoadedNames: frozenAlwaysLoadedNames, + includeBaselineBuiltIns: !lightweightContext ) trace?.mark("resolve_tools_done") let alwaysLoadedNames = resolveAlwaysLoadedNames( tools: tools, executionMode: executionMode, - frozenAlwaysLoadedNames: frozenAlwaysLoadedNames + frozenAlwaysLoadedNames: frozenAlwaysLoadedNames, + includeBaselineBuiltIns: !lightweightContext ) // Skill suggestions: when the user's query semantically matches @@ -233,7 +238,7 @@ public struct SystemPromptComposer: Sendable { // or already loaded mid-session are filtered out so the model // doesn't see the same name twice. let skillSuggestions: [SkillTeaser] = await { - guard toolMode == .auto, !effectiveToolsOff, !query.isEmpty, + guard toolMode == .auto, !gatedPromptSectionsOff, !query.isEmpty, tools.contains(where: { $0.function.name == "capabilities_load" }) else { return [] } let alreadySurfaced = Set(preflight.companions.compactMap(\.skill?.name)) @@ -256,7 +261,7 @@ public struct SystemPromptComposer: Sendable { tools: tools, preflight: preflight, skillSuggestions: skillSuggestions, - effectiveToolsOff: effectiveToolsOff, + effectiveToolsOff: gatedPromptSectionsOff, autonomousEnabled: autonomousEnabled, canCreatePlugins: canCreatePlugins, toolMode: toolMode, @@ -499,12 +504,15 @@ public struct SystemPromptComposer: Sendable { private static func resolveAlwaysLoadedNames( tools: [Tool], executionMode: ExecutionMode, - frozenAlwaysLoadedNames: Set? + frozenAlwaysLoadedNames: Set?, + includeBaselineBuiltIns: Bool = true ) -> Set { if let frozenAlwaysLoadedNames { return frozenAlwaysLoadedNames } - let live = ToolRegistry.shared.alwaysLoadedSpecs(mode: executionMode) + let registry = ToolRegistry.shared + let live = registry.alwaysLoadedSpecs(mode: executionMode) + .filter { includeBaselineBuiltIns || registry.runtimeManagedToolNames.contains($0.function.name) } .map { $0.function.name } let resolved = Set(tools.map { $0.function.name }) return Set(live) @@ -539,7 +547,9 @@ public struct SystemPromptComposer: Sendable { let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId) let (sizeClass, contextLength) = ContextSizeResolver.resolve(modelId: model) - let effectiveToolsOff = agentToolsOff || sizeClass.disablesTools + let lightweightContext = sizeClass.disablesTools + let effectiveToolsOff = agentToolsOff + let gatedPromptSectionsOff = effectiveToolsOff || lightweightContext let contextDisable = ContextDisableInfo( sizeClass: sizeClass, modelId: model, @@ -551,7 +561,8 @@ public struct SystemPromptComposer: Sendable { let tools = resolveTools( agentId: agentId, executionMode: executionMode, - toolsDisabled: effectiveToolsOff + toolsDisabled: effectiveToolsOff, + includeBaselineBuiltIns: !lightweightContext ) appendGatedSections( @@ -561,7 +572,7 @@ public struct SystemPromptComposer: Sendable { model: model, tools: tools, preflight: .empty, - effectiveToolsOff: effectiveToolsOff, + effectiveToolsOff: gatedPromptSectionsOff, autonomousEnabled: autonomousEnabled, canCreatePlugins: canCreatePlugins, toolMode: toolMode @@ -570,7 +581,8 @@ public struct SystemPromptComposer: Sendable { let alwaysLoadedNames = resolveAlwaysLoadedNames( tools: tools, executionMode: executionMode, - frozenAlwaysLoadedNames: nil + frozenAlwaysLoadedNames: nil, + includeBaselineBuiltIns: !lightweightContext ) let manifest = composer.manifest() @@ -788,10 +800,12 @@ public struct SystemPromptComposer: Sendable { toolsDisabled: Bool = false, preflight: PreflightResult = .empty, additionalToolNames: Set = [], - frozenAlwaysLoadedNames: Set? = nil + frozenAlwaysLoadedNames: Set? = nil, + includeBaselineBuiltIns: Bool = true ) -> [Tool] { guard !toolsDisabled else { return [] } + let registry = ToolRegistry.shared let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId) let isManual = toolMode == .manual @@ -816,7 +830,7 @@ public struct SystemPromptComposer: Sendable { // Late-arriving plugin / MCP tools still need explicit // `capabilities_load` to appear — that path is the only sanctioned // way to grow the dynamic surface mid-session. - let liveSandboxNames = ToolRegistry.shared.builtInSandboxToolNamesSnapshot + let liveSandboxNames = registry.builtInSandboxToolNamesSnapshot let filtered: ([Tool]) -> [Tool] = { specs in specs.filter { spec in let name = spec.function.name @@ -833,18 +847,23 @@ public struct SystemPromptComposer: Sendable { // Manual mode opts out of the LLM-driven preflight only — it does // NOT strip the always-loaded surface (the chat layer depends on // the loop tools). - add(filtered(ToolRegistry.shared.alwaysLoadedSpecs(mode: executionMode))) + let baselineSpecs = registry.alwaysLoadedSpecs(mode: executionMode) + let includedBaselineSpecs = + includeBaselineBuiltIns + ? baselineSpecs + : baselineSpecs.filter { registry.runtimeManagedToolNames.contains($0.function.name) } + add(filtered(includedBaselineSpecs)) if isManual { if let manualNames = AgentManager.shared.effectiveManualToolNames(for: agentId) { - add(ToolRegistry.shared.specs(forTools: manualNames)) + add(registry.specs(forTools: manualNames)) } } else { add(preflight.toolSpecs) } if !additionalToolNames.isEmpty { - add(ToolRegistry.shared.specs(forTools: Array(additionalToolNames))) + add(registry.specs(forTools: Array(additionalToolNames))) } return canonicalToolOrder(Array(byName.values)) @@ -1059,3 +1078,5 @@ public struct SystemPromptComposer: Sendable { mergeSystemContent(content, into: &messages, prepend: false) } } + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Tests/Chat/SessionPreflightCacheTests.swift b/Packages/OsaurusCore/Tests/Chat/SessionPreflightCacheTests.swift index ba7d0be48..41def72ca 100644 --- a/Packages/OsaurusCore/Tests/Chat/SessionPreflightCacheTests.swift +++ b/Packages/OsaurusCore/Tests/Chat/SessionPreflightCacheTests.swift @@ -96,6 +96,59 @@ struct SessionPreflightCacheTests { } } + @Test("foundation context omits baseline tools by default") + func foundationContext_omitsBaselineToolsByDefault() async { + await withSessionPreflightAgent { agentId in + + let ctx = await SystemPromptComposer.composeChatContext( + agentId: agentId, + executionMode: .none, + model: "foundation" + ) + + #expect(ctx.tools.isEmpty) + #expect(ctx.alwaysLoadedNames.isEmpty) + #expect(ctx.prompt.contains("## Agent loop") == false) + #expect(ctx.prompt.contains("## Discovering more tools") == false) + } + } + + @Test("foundation context keeps session-loaded tools on demand") + func foundationContext_keepsLoadedToolsOnDemand() async { + await withSessionPreflightAgent { agentId in + + let ctx = await SystemPromptComposer.composeChatContext( + agentId: agentId, + executionMode: .none, + model: "foundation", + additionalToolNames: ["search_memory"] + ) + + let names = Set(ctx.tools.map { $0.function.name }) + #expect(names == ["search_memory"]) + #expect(ctx.alwaysLoadedNames.isEmpty) + #expect(ctx.prompt.contains("## Agent loop") == false) + #expect(ctx.prompt.contains("## Discovering more tools") == false) + } + } + + @Test("non-foundation context keeps baseline tools") + func nonFoundationContext_keepsBaselineTools() async { + await withSessionPreflightAgent { agentId in + + let ctx = await SystemPromptComposer.composeChatContext( + agentId: agentId, + executionMode: .none, + model: "anthropic/claude-haiku-4-5" + ) + + let names = Set(ctx.tools.map { $0.function.name }) + #expect(names.contains("todo")) + #expect(names.contains("capabilities_search")) + #expect(ctx.alwaysLoadedNames.contains("todo")) + } + } + private func withSessionPreflightAgent( _ body: @MainActor @Sendable (UUID) async -> Void ) async { diff --git a/Packages/OsaurusCore/Views/Chat/ChatView.swift b/Packages/OsaurusCore/Views/Chat/ChatView.swift index 26a8dc071..dd0afaa69 100644 --- a/Packages/OsaurusCore/Views/Chat/ChatView.swift +++ b/Packages/OsaurusCore/Views/Chat/ChatView.swift @@ -5,6 +5,9 @@ // Created by Terence on 10/26/25. // +// SwiftFormat owns multiline condition layout here; SwiftLint's brace rule conflicts with it. +// swiftlint:disable opening_brace + import AppKit import Combine import LocalAuthentication @@ -50,7 +53,7 @@ final class ChatSession: ObservableObject { let expandedBlocksStore = ExpandedBlocksStore() @Published var input: String = "" @Published var pendingAttachments: [Attachment] = [] - @Published var selectedModel: String? = nil + @Published var selectedModel: String? @Published var pickerItems: [ModelPickerItem] = [] @Published var activeModelOptions: [String: ModelOptionValue] = [:] @Published var hasAnyModel: Bool = false @@ -473,10 +476,14 @@ final class ChatSession: ObservableObject { executionMode: executionMode, model: selectedModel ) + let memoryTokens = + preview.contextDisable?.disabledMemory == true + ? 0 + : cachedMemoryTokens return .from( manifest: preview.manifest, toolTokens: preview.toolTokens, - memoryTokens: cachedMemoryTokens, + memoryTokens: memoryTokens, conversationTokens: conversationTokens, inputTokens: inputTokens, outputTokens: outputTokens @@ -2040,13 +2047,13 @@ struct ChatView: View { @State private var editText: String = "" @State private var userImagePreview: NSImage? // Bonjour agent connection - @State private var pendingDiscoveredAgent: DiscoveredAgent? = nil + @State private var pendingDiscoveredAgent: DiscoveredAgent? // Minimap @State private var activeMinimapTurnId: UUID? @State private var scrollToTurnId: UUID? @State private var scrollToTurnTrigger: Int = 0 // What's New modal - @State private var pendingWhatsNew: WhatsNewRelease? = nil + @State private var pendingWhatsNew: WhatsNewRelease? /// Convenience accessor for the window's theme private var theme: ThemeProtocol { windowState.theme } @@ -2116,6 +2123,7 @@ struct ChatView: View { } var body: some View { + // swiftlint:disable:next redundant_discardable_let let _ = ChatPerfTrace.shared.count("body.ChatView") chatModeContent .themedAlertScope(.chat(windowState.windowId)) @@ -2759,11 +2767,12 @@ private struct IsolatedThreadView: View { let onConfirmEdit: (() -> Void)? let onCancelEdit: (() -> Void)? let onUserImagePreview: ((String) -> Void)? - var onVisibleTopUserTurnChanged: ((UUID?) -> Void)? = nil - var scrollToTurnId: UUID? = nil + var onVisibleTopUserTurnChanged: ((UUID?) -> Void)? + var scrollToTurnId: UUID? var scrollToTurnTrigger: Int = 0 var body: some View { + // swiftlint:disable:next redundant_discardable_let let _ = ChatPerfTrace.shared.count("body.IsolatedThreadView") MessageThreadView( blocks: store.blocks, @@ -3030,7 +3039,7 @@ private struct PairingSheet: View { let onCancel: () -> Void @State private var isPairing = false - @State private var errorMessage: String? = nil + @State private var errorMessage: String? @Environment(\.theme) private var theme var body: some View { @@ -3136,17 +3145,17 @@ private enum PairingClient { let context = LAContext() context.touchIDAuthenticationAllowableReuseDuration = 300 - var masterKey = try MasterKey.getPrivateKey(context: context) + var privateKey = try MasterKey.getPrivateKey(context: context) defer { - masterKey.withUnsafeMutableBytes { ptr in + privateKey.withUnsafeMutableBytes { ptr in if let base = ptr.baseAddress { memset(base, 0, ptr.count) } } } - let connectorAddress = try PairingKey.deriveAddress(masterKey: masterKey) + let connectorAddress = try PairingKey.deriveAddress(masterKey: privateKey) let nonce = UUID().uuidString - let signature = try PairingKey.sign(payload: Data(nonce.utf8), masterKey: masterKey) + let signature = try PairingKey.sign(payload: Data(nonce.utf8), masterKey: privateKey) let hexSig = "0x" + signature.hexEncodedString let rawHost = agent.host ?? "" @@ -3185,3 +3194,5 @@ private enum PairingClient { // MARK: - Shared Header Components // HeaderActionButton, SettingsButton, CloseButton, PinButton are now in SharedHeaderComponents.swift + +// swiftlint:enable opening_brace