Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions Packages/OsaurusCore/Services/Chat/ContextSizeClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
// prompt features that don't fit into very small windows. Apple's
// Foundation model has a ~4K window; even before any user message
// the always-loaded tool schemas push past it. The system-prompt
// composer reads this resolver at compose time and ORs the result
// into the agent's effective tools/memory disable flags so we never
// ship a request that's already over budget.
// composer reads this resolver at compose time so it can drop default
// prompt features before they crowd out the user's actual request.
//

import Foundation
Expand All @@ -17,12 +16,12 @@ import Foundation

/// Coarse classification of a model's nominal context window. Three
/// buckets are enough — the prompt composer only needs to decide
/// whether to disable tools (tiny only) and/or memory (tiny + small).
/// whether to trim baseline tools (tiny only) and/or memory (tiny + small).
public enum ContextSizeClass: Sendable, Equatable {
/// `<= 4096` tokens. Apple Foundation and any equally tight
/// future model. Tools, memory, and skill suggestions all auto
/// off — at this size even the always-loaded tool JSON schemas
/// cost more than the available budget.
/// future model. Memory, skill suggestions, and the default tool
/// baseline are off — explicit on-demand tools can still be added
/// when the agent or user asks for them.
case tiny

/// `<= 8192` tokens. Fits a reasonable chat schema but not
Expand All @@ -33,10 +32,9 @@ public enum ContextSizeClass: Sendable, Equatable {
/// Larger than `8192` tokens, or unknown. No auto-overrides.
case normal

/// Whether this class auto-disables tools (and the entire
/// gated-section surface that depends on tools, including
/// agent-loop guidance, capability discovery, skill suggestions,
/// and the model-family nudge).
/// Whether this class auto-disables the default tool baseline and
/// its gated prompt text. Session-loaded, manual, or preflighted
/// tools can still appear when they are worth the context cost.
public var disablesTools: Bool { self == .tiny }

/// Whether this class auto-disables memory injection. Memory is
Expand Down Expand Up @@ -132,9 +130,8 @@ public enum ContextSizeResolver {
// duplicate the three-line check rather than spin one up just
// to call it.
let trimmed = modelId.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.caseInsensitiveCompare("foundation") == .orderedSame
|| trimmed.caseInsensitiveCompare("default") == .orderedSame
{
let foundationAliases = ["foundation", "default"]
if foundationAliases.contains(where: { trimmed.caseInsensitiveCompare($0) == .orderedSame }) {
return (.tiny, tinyCeiling)
}

Expand Down
63 changes: 42 additions & 21 deletions Packages/OsaurusCore/Services/Chat/SystemPromptComposer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
// resolves whether to use compact or full prompt variants via isLocalModel.
//

// SwiftFormat owns multiline condition layout here; SwiftLint's brace rule conflicts with it.
// swiftlint:disable opening_brace

import Foundation

// MARK: - SystemPromptComposer
Expand Down Expand Up @@ -156,13 +159,12 @@ public struct SystemPromptComposer: Sendable {
let canCreatePlugins = autonomousConfig.map { $0.enabled && $0.pluginCreate } ?? false
let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId)

// Auto-disable for small-context models (Foundation et al.).
// OR into the agent's flags so every downstream gate (preflight,
// skills, agent loop, capability nudge, model family, plugin
// creator, memory assembly) cascades correctly without each
// gate having to know about the size class itself.
// Small-context models can still use tools, but they should not pay
// the startup cost of the always-loaded baseline or its guide text.
let (sizeClass, contextLength) = ContextSizeResolver.resolve(modelId: model)
let effectiveToolsOff = agentToolsOff || sizeClass.disablesTools
let lightweightContext = sizeClass.disablesTools
let effectiveToolsOff = agentToolsOff
let gatedPromptSectionsOff = effectiveToolsOff || lightweightContext
let memoryOff = agentMemoryOff || sizeClass.disablesMemory
let contextDisable = ContextDisableInfo(
sizeClass: sizeClass,
Expand All @@ -174,6 +176,7 @@ public struct SystemPromptComposer: Sendable {
if contextDisable != nil {
trace?.set("contextSizeClass", String(describing: sizeClass))
}
trace?.set("lightweightContext", lightweightContext ? "1" : "0")

// Memory is assembled here but returned separately (see ComposedContext.memorySection).
// We deliberately do NOT pass `query` so the cached memory snapshot
Expand Down Expand Up @@ -215,13 +218,15 @@ public struct SystemPromptComposer: Sendable {
toolsDisabled: effectiveToolsOff,
preflight: preflight,
additionalToolNames: additionalToolNames,
frozenAlwaysLoadedNames: frozenAlwaysLoadedNames
frozenAlwaysLoadedNames: frozenAlwaysLoadedNames,
includeBaselineBuiltIns: !lightweightContext
)
trace?.mark("resolve_tools_done")
let alwaysLoadedNames = resolveAlwaysLoadedNames(
tools: tools,
executionMode: executionMode,
frozenAlwaysLoadedNames: frozenAlwaysLoadedNames
frozenAlwaysLoadedNames: frozenAlwaysLoadedNames,
includeBaselineBuiltIns: !lightweightContext
)

// Skill suggestions: when the user's query semantically matches
Expand All @@ -233,7 +238,7 @@ public struct SystemPromptComposer: Sendable {
// or already loaded mid-session are filtered out so the model
// doesn't see the same name twice.
let skillSuggestions: [SkillTeaser] = await {
guard toolMode == .auto, !effectiveToolsOff, !query.isEmpty,
guard toolMode == .auto, !gatedPromptSectionsOff, !query.isEmpty,
tools.contains(where: { $0.function.name == "capabilities_load" })
else { return [] }
let alreadySurfaced = Set(preflight.companions.compactMap(\.skill?.name))
Expand All @@ -256,7 +261,7 @@ public struct SystemPromptComposer: Sendable {
tools: tools,
preflight: preflight,
skillSuggestions: skillSuggestions,
effectiveToolsOff: effectiveToolsOff,
effectiveToolsOff: gatedPromptSectionsOff,
autonomousEnabled: autonomousEnabled,
canCreatePlugins: canCreatePlugins,
toolMode: toolMode,
Expand Down Expand Up @@ -499,12 +504,15 @@ public struct SystemPromptComposer: Sendable {
private static func resolveAlwaysLoadedNames(
tools: [Tool],
executionMode: ExecutionMode,
frozenAlwaysLoadedNames: Set<String>?
frozenAlwaysLoadedNames: Set<String>?,
includeBaselineBuiltIns: Bool = true
) -> Set<String> {
if let frozenAlwaysLoadedNames {
return frozenAlwaysLoadedNames
}
let live = ToolRegistry.shared.alwaysLoadedSpecs(mode: executionMode)
let registry = ToolRegistry.shared
let live = registry.alwaysLoadedSpecs(mode: executionMode)
.filter { includeBaselineBuiltIns || registry.runtimeManagedToolNames.contains($0.function.name) }
.map { $0.function.name }
let resolved = Set(tools.map { $0.function.name })
return Set(live)
Expand Down Expand Up @@ -539,7 +547,9 @@ public struct SystemPromptComposer: Sendable {
let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId)

let (sizeClass, contextLength) = ContextSizeResolver.resolve(modelId: model)
let effectiveToolsOff = agentToolsOff || sizeClass.disablesTools
let lightweightContext = sizeClass.disablesTools
let effectiveToolsOff = agentToolsOff
let gatedPromptSectionsOff = effectiveToolsOff || lightweightContext
let contextDisable = ContextDisableInfo(
sizeClass: sizeClass,
modelId: model,
Expand All @@ -551,7 +561,8 @@ public struct SystemPromptComposer: Sendable {
let tools = resolveTools(
agentId: agentId,
executionMode: executionMode,
toolsDisabled: effectiveToolsOff
toolsDisabled: effectiveToolsOff,
includeBaselineBuiltIns: !lightweightContext
)

appendGatedSections(
Expand All @@ -561,7 +572,7 @@ public struct SystemPromptComposer: Sendable {
model: model,
tools: tools,
preflight: .empty,
effectiveToolsOff: effectiveToolsOff,
effectiveToolsOff: gatedPromptSectionsOff,
autonomousEnabled: autonomousEnabled,
canCreatePlugins: canCreatePlugins,
toolMode: toolMode
Expand All @@ -570,7 +581,8 @@ public struct SystemPromptComposer: Sendable {
let alwaysLoadedNames = resolveAlwaysLoadedNames(
tools: tools,
executionMode: executionMode,
frozenAlwaysLoadedNames: nil
frozenAlwaysLoadedNames: nil,
includeBaselineBuiltIns: !lightweightContext
)

let manifest = composer.manifest()
Expand Down Expand Up @@ -788,10 +800,12 @@ public struct SystemPromptComposer: Sendable {
toolsDisabled: Bool = false,
preflight: PreflightResult = .empty,
additionalToolNames: Set<String> = [],
frozenAlwaysLoadedNames: Set<String>? = nil
frozenAlwaysLoadedNames: Set<String>? = nil,
includeBaselineBuiltIns: Bool = true
) -> [Tool] {
guard !toolsDisabled else { return [] }

let registry = ToolRegistry.shared
let toolMode = AgentManager.shared.effectiveToolSelectionMode(for: agentId)
let isManual = toolMode == .manual

Expand All @@ -816,7 +830,7 @@ public struct SystemPromptComposer: Sendable {
// Late-arriving plugin / MCP tools still need explicit
// `capabilities_load` to appear — that path is the only sanctioned
// way to grow the dynamic surface mid-session.
let liveSandboxNames = ToolRegistry.shared.builtInSandboxToolNamesSnapshot
let liveSandboxNames = registry.builtInSandboxToolNamesSnapshot
let filtered: ([Tool]) -> [Tool] = { specs in
specs.filter { spec in
let name = spec.function.name
Expand All @@ -833,18 +847,23 @@ public struct SystemPromptComposer: Sendable {
// Manual mode opts out of the LLM-driven preflight only — it does
// NOT strip the always-loaded surface (the chat layer depends on
// the loop tools).
add(filtered(ToolRegistry.shared.alwaysLoadedSpecs(mode: executionMode)))
let baselineSpecs = registry.alwaysLoadedSpecs(mode: executionMode)
let includedBaselineSpecs =
includeBaselineBuiltIns
? baselineSpecs
: baselineSpecs.filter { registry.runtimeManagedToolNames.contains($0.function.name) }
add(filtered(includedBaselineSpecs))

if isManual {
if let manualNames = AgentManager.shared.effectiveManualToolNames(for: agentId) {
add(ToolRegistry.shared.specs(forTools: manualNames))
add(registry.specs(forTools: manualNames))
}
} else {
add(preflight.toolSpecs)
}

if !additionalToolNames.isEmpty {
add(ToolRegistry.shared.specs(forTools: Array(additionalToolNames)))
add(registry.specs(forTools: Array(additionalToolNames)))
}

return canonicalToolOrder(Array(byName.values))
Expand Down Expand Up @@ -1059,3 +1078,5 @@ public struct SystemPromptComposer: Sendable {
mergeSystemContent(content, into: &messages, prepend: false)
}
}

// swiftlint:enable opening_brace
53 changes: 53 additions & 0 deletions Packages/OsaurusCore/Tests/Chat/SessionPreflightCacheTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,59 @@ struct SessionPreflightCacheTests {
}
}

@Test("foundation context omits baseline tools by default")
func foundationContext_omitsBaselineToolsByDefault() async {
await withSessionPreflightAgent { agentId in

let ctx = await SystemPromptComposer.composeChatContext(
agentId: agentId,
executionMode: .none,
model: "foundation"
)

#expect(ctx.tools.isEmpty)
#expect(ctx.alwaysLoadedNames.isEmpty)
#expect(ctx.prompt.contains("## Agent loop") == false)
#expect(ctx.prompt.contains("## Discovering more tools") == false)
}
}

@Test("foundation context keeps session-loaded tools on demand")
func foundationContext_keepsLoadedToolsOnDemand() async {
await withSessionPreflightAgent { agentId in

let ctx = await SystemPromptComposer.composeChatContext(
agentId: agentId,
executionMode: .none,
model: "foundation",
additionalToolNames: ["search_memory"]
)

let names = Set(ctx.tools.map { $0.function.name })
#expect(names == ["search_memory"])
#expect(ctx.alwaysLoadedNames.isEmpty)
#expect(ctx.prompt.contains("## Agent loop") == false)
#expect(ctx.prompt.contains("## Discovering more tools") == false)
}
}

@Test("non-foundation context keeps baseline tools")
func nonFoundationContext_keepsBaselineTools() async {
await withSessionPreflightAgent { agentId in

let ctx = await SystemPromptComposer.composeChatContext(
agentId: agentId,
executionMode: .none,
model: "anthropic/claude-haiku-4-5"
)

let names = Set(ctx.tools.map { $0.function.name })
#expect(names.contains("todo"))
#expect(names.contains("capabilities_search"))
#expect(ctx.alwaysLoadedNames.contains("todo"))
}
}

private func withSessionPreflightAgent(
_ body: @MainActor @Sendable (UUID) async -> Void
) async {
Expand Down
Loading
Loading