From 09fff9599c73a87d4f0e0aaa2ae5165bb1f1949b Mon Sep 17 00:00:00 2001
From: Michael Meding <mmeding@Michaels-Mac-Studio.local>
Date: Sun, 3 May 2026 18:36:22 -0300
Subject: [PATCH] feat(chat): add LLM capability snapshot

Business rationale: model routing and prompt/tool decisions are part of the Osaurus harness, and users need them to stay consistent across local, Foundation, and remote providers without each call site guessing differently.

Coding rationale: the snapshot centralizes provider, runtime, modality, reasoning, tool, and context traits in one value type; ChatEngine and ModelService consume that contract instead of ad hoc checks, while tests pin both service dispatch and provider mapping including Azure OpenAI.

Co-authored-by: Michael Meding <mmeding@Michaels-Mac-Studio.local>
---
 .../Configuration/LLMCapabilitySnapshot.swift | 327 ++++++++++++++++++
 .../Services/Chat/ChatEngine.swift            |  80 ++++-
 .../Services/Inference/ModelService.swift     |   9 +-
 .../Tests/Chat/ChatEngineTests.swift          |  72 ++++
 .../Model/LLMCapabilitySnapshotTests.swift    | 162 +++++++++
 5 files changed, 630 insertions(+), 20 deletions(-)
 create mode 100644 Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift
 create mode 100644 Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift

diff --git a/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift b/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift
new file mode 100644
index 000000000..4314043dd
--- /dev/null
+++ b/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift
@@ -0,0 +1,327 @@
+//
+//  LLMCapabilitySnapshot.swift
+//  osaurus
+//
+//  Central model/provider capability contract used to keep UI options,
+//  prompt guidance, request construction, and tests aligned.
+//
+
+import Foundation
+
+enum LLMProviderKind: String, Sendable, Equatable {
+    case foundation
+    case localMLX
+    case remoteOpenAILegacy
+    case remoteAnthropic
+    case remoteOpenResponses
+    case remoteOpenAICodex
+    case remoteGemini
+    case remoteOsaurus
+    case unknown
+}
+
+enum LLMRuntimeKind: String, Sendable, Equatable {
+    case foundation
+    case localMLX
+    case remote
+    case osaurusAgent
+    case unknown
+}
+
+enum LLMToolCallMode: String, Sendable, Equatable {
+    case nativeStructured
+    case adapterStructured
+    case serverSideAgent
+    case textFallback
+    case none
+}
+
+enum LLMReasoningMode: Sendable, Equatable {
+    case none
+    case effort(optionId: String, levels: [String])
+    case toggle(optionId: String, inverted: Bool)
+    case providerSpecific(optionId: String)
+
+    var optionId: String? {
+        switch self {
+        case .none:
+            return nil
+        case .effort(let optionId, _), .toggle(let optionId, _), .providerSpecific(let optionId):
+            return optionId
+        }
+    }
+}
+
+enum LLMReasoningStreamMode: String, Sendable, Equatable {
+    case none
+    case native
+    case sentinel
+}
+
+struct LLMModalities: OptionSet, Sendable, Equatable {
+    let rawValue: Int
+
+    static let textInput = LLMModalities(rawValue: 1 << 0)
+    static let imageInput = LLMModalities(rawValue: 1 << 1)
+    static let textOutput = LLMModalities(rawValue: 1 << 2)
+    static let imageOutput = LLMModalities(rawValue: 1 << 3)
+}
+
+enum LLMRequestParameter: String, Sendable, Hashable {
+    case temperature
+    case topP = "top_p"
+    case reasoningEffort = "reasoning_effort"
+    case reasoning
+    case tools
+    case toolChoice = "tool_choice"
+    case imageOptions = "image_options"
+}
+
+struct LLMCapabilitySnapshot: Sendable {
+    let modelId: String
+    let providerKind: LLMProviderKind
+    let runtimeKind: LLMRuntimeKind
+    let family: ModelFamily
+    let contextWindowTokens: Int
+    let defaultMaxOutputTokens: Int
+    let supportsStreaming: Bool
+    let toolCallMode: LLMToolCallMode
+    let reasoningMode: LLMReasoningMode
+    let reasoningStreamMode: LLMReasoningStreamMode
+    let inputModalities: LLMModalities
+    let outputModalities: LLMModalities
+    let unsupportedParameters: Set<LLMRequestParameter>
+    let optionDefinitions: [ModelOptionDefinition]
+
+    var diagnosticID: String {
+        [
+            providerKind.rawValue,
+            runtimeKind.rawValue,
+            family.rawValue,
+            toolCallMode.rawValue,
+            reasoningStreamMode.rawValue,
+            String(contextWindowTokens),
+        ].joined(separator: "/")
+    }
+}
+
+enum LLMCapabilityResolver {
+    static let defaultContextWindowTokens = 128_000
+    static let defaultMaxOutputTokens = 16_384
+
+    static func resolve(
+        modelId rawModelId: String?,
+        providerType: RemoteProviderType? = nil,
+        runtimeKind runtimeHint: LLMRuntimeKind? = nil,
+        contextWindowTokens contextOverride: Int? = nil
+    ) -> LLMCapabilitySnapshot {
+        let modelId = normalizedModelId(rawModelId)
+        let providerKind = resolveProviderKind(
+            modelId: modelId,
+            providerType: providerType,
+            runtimeHint: runtimeHint
+        )
+        let runtimeKind = resolveRuntimeKind(providerKind: providerKind, runtimeHint: runtimeHint)
+        let family = ModelFamilyGuidance.family(for: modelId)
+        let modelInfo = ModelInfo.load(modelId: modelId)
+        let contextWindowTokens =
+            contextOverride
+            ?? modelInfo?.model.contextLength
+            ?? defaultContextWindowTokens
+        let optionDefinitions = ModelProfileRegistry.options(for: modelId)
+        let reasoningMode = resolveReasoningMode(modelId: modelId)
+        let toolCallMode = resolveToolCallMode(providerKind: providerKind, runtimeKind: runtimeKind)
+        let inputModalities = resolveInputModalities(
+            modelId: modelId,
+            providerKind: providerKind,
+            modelInfo: modelInfo
+        )
+        let outputModalities = resolveOutputModalities(modelId: modelId, providerKind: providerKind)
+        let unsupportedParameters = resolveUnsupportedParameters(
+            modelId: modelId,
+            toolCallMode: toolCallMode,
+            reasoningMode: reasoningMode,
+            outputModalities: outputModalities
+        )
+
+        return LLMCapabilitySnapshot(
+            modelId: modelId,
+            providerKind: providerKind,
+            runtimeKind: runtimeKind,
+            family: family,
+            contextWindowTokens: contextWindowTokens,
+            defaultMaxOutputTokens: defaultMaxOutputTokens,
+            supportsStreaming: true,
+            toolCallMode: toolCallMode,
+            reasoningMode: reasoningMode,
+            reasoningStreamMode: resolveReasoningStreamMode(runtimeKind: runtimeKind, reasoningMode: reasoningMode),
+            inputModalities: inputModalities,
+            outputModalities: outputModalities,
+            unsupportedParameters: unsupportedParameters,
+            optionDefinitions: optionDefinitions
+        )
+    }
+
+    private static func normalizedModelId(_ raw: String?) -> String {
+        let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+        return trimmed.isEmpty ? "foundation" : trimmed
+    }
+
+    private static func resolveProviderKind(
+        modelId: String,
+        providerType: RemoteProviderType?,
+        runtimeHint: LLMRuntimeKind?
+    ) -> LLMProviderKind {
+        if let providerType {
+            switch providerType {
+            case .openaiLegacy, .azureOpenAI: return .remoteOpenAILegacy
+            case .anthropic: return .remoteAnthropic
+            case .openResponses: return .remoteOpenResponses
+            case .openAICodex: return .remoteOpenAICodex
+            case .gemini: return .remoteGemini
+            case .osaurus: return .remoteOsaurus
+            }
+        }
+        // swiftlint:disable opening_brace
+        if runtimeHint == .foundation
+            || modelId.caseInsensitiveCompare("foundation") == .orderedSame
+            || modelId.caseInsensitiveCompare("default") == .orderedSame
+        {
+            return .foundation
+        }
+        // swiftlint:enable opening_brace
+        if runtimeHint == .remote { return .unknown }
+        if runtimeHint == .localMLX { return .localMLX }
+        if runtimeHint == .unknown { return .unknown }
+        return .localMLX
+    }
+
+    private static func resolveRuntimeKind(
+        providerKind: LLMProviderKind,
+        runtimeHint: LLMRuntimeKind?
+    ) -> LLMRuntimeKind {
+        if let runtimeHint { return runtimeHint }
+        switch providerKind {
+        case .foundation:
+            return .foundation
+        case .localMLX:
+            return .localMLX
+        case .remoteOsaurus:
+            return .osaurusAgent
+        case .remoteOpenAILegacy, .remoteAnthropic, .remoteOpenResponses, .remoteOpenAICodex,
+            .remoteGemini:
+            return .remote
+        case .unknown:
+            return .unknown
+        }
+    }
+
+    private static func resolveToolCallMode(
+        providerKind: LLMProviderKind,
+        runtimeKind: LLMRuntimeKind
+    ) -> LLMToolCallMode {
+        switch providerKind {
+        case .remoteOsaurus:
+            return .serverSideAgent
+        case .remoteOpenAILegacy, .remoteAnthropic, .remoteOpenResponses, .remoteOpenAICodex,
+            .remoteGemini:
+            return .adapterStructured
+        case .foundation, .localMLX:
+            return .nativeStructured
+        case .unknown:
+            return runtimeKind == .remote ? .adapterStructured : .none
+        }
+    }
+
+    private static func resolveReasoningMode(modelId: String) -> LLMReasoningMode {
+        if OpenAIReasoningProfile.matches(modelId: modelId) {
+            return .effort(
+                optionId: "reasoningEffort",
+                levels: ["minimal", "low", "medium", "high"]
+            )
+        }
+        if let thinkingOption = ModelProfileRegistry.profile(for: modelId)?.thinkingOption {
+            return .toggle(optionId: thinkingOption.id, inverted: thinkingOption.inverted)
+        }
+        return .none
+    }
+
+    private static func resolveReasoningStreamMode(
+        runtimeKind: LLMRuntimeKind,
+        reasoningMode: LLMReasoningMode
+    ) -> LLMReasoningStreamMode {
+        guard reasoningMode != .none else { return .none }
+        switch runtimeKind {
+        case .localMLX, .remote:
+            return .sentinel
+        case .foundation, .osaurusAgent, .unknown:
+            return .none
+        }
+    }
+
+    private static func resolveInputModalities(
+        modelId: String,
+        providerKind: LLMProviderKind,
+        modelInfo: ModelInfo?
+    ) -> LLMModalities {
+        var modalities: LLMModalities = [.textInput]
+        let lower = modelId.lowercased()
+        // swiftlint:disable opening_brace
+        if providerKind == .remoteGemini
+            || modelInfo?.capabilities.contains("vision") == true
+            || lower.contains("vision") || lower.contains("pixtral") || lower.contains("gpt-4o")
+            || lower.contains("gemini")
+        {
+            modalities.insert(.imageInput)
+        }
+        // swiftlint:enable opening_brace
+        return modalities
+    }
+
+    private static func resolveOutputModalities(
+        modelId: String,
+        providerKind: LLMProviderKind
+    ) -> LLMModalities {
+        var modalities: LLMModalities = [.textOutput]
+        if providerKind == .remoteGemini && isGeminiImageOutputModel(modelId) {
+            modalities.insert(.imageOutput)
+        }
+        return modalities
+    }
+
+    private static func isGeminiImageOutputModel(_ modelId: String) -> Bool {
+        Gemini31FlashImageProfile.matches(modelId: modelId)
+            || GeminiProImageProfile.matches(modelId: modelId)
+            || GeminiFlashImageProfile.matches(modelId: modelId)
+    }
+
+    private static func resolveUnsupportedParameters(
+        modelId: String,
+        toolCallMode: LLMToolCallMode,
+        reasoningMode: LLMReasoningMode,
+        outputModalities: LLMModalities
+    ) -> Set<LLMRequestParameter> {
+        var unsupported = Set<LLMRequestParameter>()
+
+        if OpenAIReasoningProfile.matches(modelId: modelId) {
+            unsupported.insert(.temperature)
+            unsupported.insert(.topP)
+        }
+        if case .effort = reasoningMode {
+            // OpenAI-style effort is the only standard request-level
+            // reasoning shape Osaurus can currently serialize.
+        } else {
+            unsupported.insert(.reasoning)
+            unsupported.insert(.reasoningEffort)
+        }
+        if toolCallMode == .none || toolCallMode == .textFallback {
+            unsupported.insert(.tools)
+            unsupported.insert(.toolChoice)
+        }
+        if !outputModalities.contains(.imageOutput) {
+            unsupported.insert(.imageOptions)
+        }
+
+        return unsupported
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
index 796ac0371..0313c1b68 100644
--- a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
+++ b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
@@ -7,7 +7,7 @@
 
 import Foundation
 
-actor ChatEngine: Sendable, ChatEngineProtocol {
+actor ChatEngine: ChatEngineProtocol {
     private let services: [ModelService]
     private let installedModelsProvider: @Sendable () -> [String]
 
@@ -101,20 +101,6 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
         }()
         let seedBits: UInt64? = request.seed.map { UInt64(bitPattern: Int64($0)) }
         let isJSONObject = (request.response_format?.type == "json_object")
-        let params = GenerationParameters(
-            temperature: temperature,
-            maxTokens: maxTokens,
-            topPOverride: request.top_p,
-            repetitionPenalty: repPenalty,
-            frequencyPenalty: request.frequency_penalty,
-            presencePenalty: request.presence_penalty,
-            seed: seedBits,
-            jsonMode: isJSONObject,
-            modelOptions: request.modelOptions ?? [:],
-            sessionId: request.session_id,
-            ttftTrace: trace
-        )
-
         let services = self.services
         // Fetch remote services on the MainActor so routing reflects the
         // latest connected Bonjour/remote agents per request.
@@ -128,9 +114,67 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             services: services,
             remoteServices: remoteServices
         )
+        let capabilitySnapshot = Self.resolveCapabilitySnapshot(requestedModel: request.model, route: route)
+        trace?.set("llmCapability", capabilitySnapshot.diagnosticID)
+        trace?.set("llmProviderKind", capabilitySnapshot.providerKind.rawValue)
+        trace?.set("llmToolCallMode", capabilitySnapshot.toolCallMode.rawValue)
+        let params = GenerationParameters(
+            temperature: temperature,
+            maxTokens: maxTokens,
+            topPOverride: request.top_p,
+            repetitionPenalty: repPenalty,
+            frequencyPenalty: request.frequency_penalty,
+            presencePenalty: request.presence_penalty,
+            seed: seedBits,
+            jsonMode: isJSONObject,
+            modelOptions: request.modelOptions ?? [:],
+            sessionId: request.session_id,
+            ttftTrace: trace,
+            capabilitySnapshot: capabilitySnapshot
+        )
         return Dispatch(route: route, params: params, remoteServices: remoteServices)
     }
 
+    private static func resolveCapabilitySnapshot(
+        requestedModel: String?,
+        route: ModelRoute
+    ) -> LLMCapabilitySnapshot {
+        switch route {
+        case .service(let service, let effectiveModel):
+            if let remote = service as? RemoteProviderService {
+                let runtimeKind: LLMRuntimeKind =
+                    remote.provider.providerType == .osaurus ? .osaurusAgent : .remote
+                return LLMCapabilityResolver.resolve(
+                    modelId: effectiveModel,
+                    providerType: remote.provider.providerType,
+                    runtimeKind: runtimeKind
+                )
+            }
+            if service is FoundationModelService {
+                return LLMCapabilityResolver.resolve(
+                    modelId: effectiveModel,
+                    runtimeKind: .foundation
+                )
+            }
+            if service is MLXService {
+                return LLMCapabilityResolver.resolve(
+                    modelId: effectiveModel,
+                    runtimeKind: .localMLX
+                )
+            }
+            return LLMCapabilityResolver.resolve(
+                modelId: effectiveModel,
+                runtimeKind: .unknown
+            )
+
+        case .none:
+            return LLMCapabilityResolver.resolve(
+                modelId: requestedModel,
+                runtimeKind: .unknown
+            )
+        }
+    }
+
     private func estimateInputTokens(_ messages: [ChatMessage]) -> Int {
         let totalChars = messages.reduce(0) { sum, msg in
             var chars = msg.content?.count ?? 0
@@ -311,8 +355,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             var outputTokenCount = 0
             var deltaCount = 0
             var finishReason: InferenceLog.FinishReason = .stop
-            var errorMsg: String? = nil
-            var toolInvocation: (name: String, args: String)? = nil
+            var errorMsg: String?
+            var toolInvocation: (name: String, args: String)?
             var lastDeltaTime = startTime
 
             print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
@@ -385,7 +429,7 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             // Log the completed inference (only for Chat UI - HTTP requests are logged by HTTPHandler)
             if source == .chatUI {
                 let durationMs = Date().timeIntervalSince(startTime) * 1000
-                var toolCalls: [ToolCallLog]? = nil
+                var toolCalls: [ToolCallLog]?
                 if let (name, args) = toolInvocation {
                     toolCalls = [ToolCallLog(name: name, arguments: args)]
                 }
diff --git a/Packages/OsaurusCore/Services/Inference/ModelService.swift b/Packages/OsaurusCore/Services/Inference/ModelService.swift
index a6591b905..17c2f8d67 100644
--- a/Packages/OsaurusCore/Services/Inference/ModelService.swift
+++ b/Packages/OsaurusCore/Services/Inference/ModelService.swift
@@ -41,6 +41,9 @@ struct GenerationParameters: Sendable {
     let sessionId: String?
     /// Optional TTFT trace for diagnostic timing instrumentation.
     let ttftTrace: TTFTTrace?
+    /// Resolved model/provider capabilities for this dispatch. Adapters use
+    /// this to avoid sending options the active backend does not support.
+    let capabilitySnapshot: LLMCapabilitySnapshot?
 
     init(
         temperature: Float?,
@@ -53,7 +56,8 @@ struct GenerationParameters: Sendable {
         jsonMode: Bool = false,
         modelOptions: [String: ModelOptionValue] = [:],
         sessionId: String? = nil,
-        ttftTrace: TTFTTrace? = nil
+        ttftTrace: TTFTTrace? = nil,
+        capabilitySnapshot: LLMCapabilitySnapshot? = nil
     ) {
         self.temperature = temperature
         self.maxTokens = maxTokens
@@ -66,6 +70,7 @@ struct GenerationParameters: Sendable {
         self.modelOptions = modelOptions
         self.sessionId = sessionId
         self.ttftTrace = ttftTrace
+        self.capabilitySnapshot = capabilitySnapshot
     }
 }
 
@@ -122,7 +127,7 @@ public enum StreamingToolHint: Sendable {
         struct Payload: Encodable { let id, name, arguments, result: String }
         let json =
             (try? JSONEncoder().encode(Payload(id: callId, name: name, arguments: arguments, result: result)))
-            .map { String(decoding: $0, as: UTF8.self) } ?? "{}"
+            .flatMap { String(bytes: $0, encoding: .utf8) } ?? "{}"
         return donePrefix + json
     }
 
diff --git a/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift b/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift
index d7af9f67a..7b1debd92 100644
--- a/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift
+++ b/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift
@@ -34,6 +34,78 @@ struct ChatEngineTests {
         #expect(out == "abc")
     }
 
+    @Test func streamChat_threads_capability_snapshot_to_service() async throws {
+        final class CapturingService: ModelService, @unchecked Sendable {
+            private let lock = NSLock()
+            private var captured: LLMCapabilitySnapshot?
+
+            var id: String { "capturing" }
+            func isAvailable() -> Bool { true }
+            func handles(requestedModel: String?) -> Bool { requestedModel == "gemma-capture" }
+
+            func generateOneShot(
+                messages: [ChatMessage],
+                parameters: GenerationParameters,
+                requestedModel: String?
+            ) async throws -> String {
+                capture(parameters.capabilitySnapshot)
+                return "ok"
+            }
+
+            func streamDeltas(
+                messages: [ChatMessage],
+                parameters: GenerationParameters,
+                requestedModel: String?,
+                stopSequences: [String]
+            ) async throws -> AsyncThrowingStream<String, Error> {
+                capture(parameters.capabilitySnapshot)
+                return AsyncThrowingStream { continuation in
+                    continuation.yield("ok")
+                    continuation.finish()
+                }
+            }
+
+            func snapshot() -> LLMCapabilitySnapshot? {
+                lock.lock()
+                defer { lock.unlock() }
+                return captured
+            }
+
+            private func capture(_ snapshot: LLMCapabilitySnapshot?) {
+                lock.lock()
+                captured = snapshot
+                lock.unlock()
+            }
+        }
+
+        let svc = CapturingService()
+        let engine = ChatEngine(services: [svc], installedModelsProvider: { [] })
+        let req = ChatCompletionRequest(
+            model: "gemma-capture",
+            messages: [ChatMessage(role: "user", content: "hi")],
+            temperature: 0.5,
+            max_tokens: 16,
+            stream: true,
+            top_p: nil,
+            frequency_penalty: nil,
+            presence_penalty: nil,
+            stop: nil,
+            n: nil,
+            tools: nil,
+            tool_choice: nil,
+            session_id: nil
+        )
+
+        let stream = try await engine.streamChat(request: req)
+        for try await _ in stream {}
+
+        let snapshot = try #require(svc.snapshot())
+        #expect(snapshot.modelId == "gemma-capture")
+        #expect(snapshot.family == .googleGemma)
+        #expect(snapshot.runtimeKind == .unknown)
+        #expect(snapshot.toolCallMode == .none)
+    }
+
     @Test func completeChat_returns_choice_success() async throws {
         let svc = FakeModelService()
         let engine = ChatEngine(services: [svc], installedModelsProvider: { [] })
diff --git a/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift b/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift
new file mode 100644
index 000000000..57299a550
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift
@@ -0,0 +1,162 @@
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("LLMCapabilityResolver")
+struct LLMCapabilitySnapshotTests {
+
+    @Test("default model resolves to Foundation text-only capabilities")
+    func defaultFoundationSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(modelId: nil)
+
+        #expect(snapshot.modelId == "foundation")
+        #expect(snapshot.providerKind == .foundation)
+        #expect(snapshot.runtimeKind == .foundation)
+        #expect(snapshot.toolCallMode == .nativeStructured)
+        #expect(snapshot.reasoningMode == .none)
+        #expect(snapshot.reasoningStreamMode == .none)
+        #expect(snapshot.inputModalities == [.textInput])
+        #expect(snapshot.outputModalities == [.textOutput])
+        #expect(snapshot.unsupportedParameters.contains(.reasoning))
+        #expect(snapshot.unsupportedParameters.contains(.reasoningEffort))
+    }
+
+    @Test("Qwen thinking model exposes local thinking toggle")
+    func qwenThinkingSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(modelId: "qwen3.5-35b-a3b-4bit")
+
+        #expect(snapshot.providerKind == .localMLX)
+        #expect(snapshot.runtimeKind == .localMLX)
+        #expect(snapshot.family == .glmQwen)
+        #expect(snapshot.toolCallMode == .nativeStructured)
+        #expect(snapshot.reasoningStreamMode == .sentinel)
+        guard case .toggle(let optionId, let inverted) = snapshot.reasoningMode else {
+            #expect(Bool(false), "Qwen thinking models should expose a toggle")
+            return
+        }
+        #expect(optionId == "disableThinking")
+        #expect(inverted)
+        #expect(snapshot.optionDefinitions.map(\.id).contains("disableThinking"))
+        #expect(snapshot.unsupportedParameters.contains(.reasoning))
+        #expect(snapshot.unsupportedParameters.contains(.reasoningEffort))
+    }
+
+    @Test("Qwen coder does not expose reasoning controls")
+    func qwenCoderSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(modelId: "qwen3-coder-plus")
+
+        #expect(snapshot.family == .glmQwen)
+        #expect(snapshot.reasoningMode == .none)
+        #expect(!snapshot.optionDefinitions.map(\.id).contains("disableThinking"))
+    }
+
+    @Test("Gemma family is identified without adding reasoning controls")
+    func gemmaSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(modelId: "gemma-2-non-reasoning-\(UUID().uuidString)")
+
+        #expect(snapshot.family == .googleGemma)
+        #expect(snapshot.providerKind == .localMLX)
+        #expect(snapshot.reasoningMode == .none)
+    }
+
+    @Test("Open Responses reasoning model omits unsupported sampling parameters")
+    func openResponsesReasoningSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(
+            modelId: "gpt-5-mini",
+            providerType: .openResponses
+        )
+
+        #expect(snapshot.providerKind == .remoteOpenResponses)
+        #expect(snapshot.runtimeKind == .remote)
+        #expect(snapshot.toolCallMode == .adapterStructured)
+        #expect(snapshot.reasoningStreamMode == .sentinel)
+        guard case .effort(let optionId, let levels) = snapshot.reasoningMode else {
+            #expect(Bool(false), "OpenAI-style reasoning models should expose effort")
+            return
+        }
+        #expect(optionId == "reasoningEffort")
+        #expect(levels == ["minimal", "low", "medium", "high"])
+        #expect(snapshot.unsupportedParameters.contains(.temperature))
+        #expect(snapshot.unsupportedParameters.contains(.topP))
+        #expect(!snapshot.unsupportedParameters.contains(.reasoningEffort))
+    }
+
+    @Test("OpenAI Codex provider resolves as a remote adapter")
+    func openAICodexSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(
+            modelId: "gpt-5-codex",
+            providerType: .openAICodex
+        )
+
+        #expect(snapshot.providerKind == .remoteOpenAICodex)
+        #expect(snapshot.runtimeKind == .remote)
+        #expect(snapshot.toolCallMode == .adapterStructured)
+        #expect(snapshot.family == .gptCodex)
+    }
+
+    @Test("Azure OpenAI provider resolves as an OpenAI-compatible remote adapter")
+    func azureOpenAISnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(
+            modelId: "gpt-5-mini",
+            providerType: .azureOpenAI
+        )
+
+        #expect(snapshot.providerKind == .remoteOpenAILegacy)
+        #expect(snapshot.runtimeKind == .remote)
+        #expect(snapshot.toolCallMode == .adapterStructured)
+    }
+
+    @Test("Gemini image model exposes image input and output options")
+    func geminiImageSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(
+            modelId: "gemini-3-pro-image-preview",
+            providerType: .gemini
+        )
+
+        #expect(snapshot.providerKind == .remoteGemini)
+        #expect(snapshot.runtimeKind == .remote)
+        #expect(snapshot.inputModalities.contains(.imageInput))
+        #expect(snapshot.outputModalities.contains(.imageOutput))
+        #expect(snapshot.optionDefinitions.map(\.id).contains("aspectRatio"))
+        #expect(snapshot.optionDefinitions.map(\.id).contains("imageSize"))
+        #expect(snapshot.optionDefinitions.map(\.id).contains("outputType"))
+        #expect(!snapshot.unsupportedParameters.contains(.imageOptions))
+    }
+
+    @Test("Venice model options are surfaced without standard reasoning request fields")
+    func veniceSnapshot() {
+        let snapshot = LLMCapabilityResolver.resolve(
+            modelId: "venice-ai/llama-3.1-405b",
+            providerType: .openaiLegacy
+        )
+        let optionIds = snapshot.optionDefinitions.map(\.id)
+
+        #expect(snapshot.providerKind == .remoteOpenAILegacy)
+        #expect(optionIds.contains("enableWebSearch"))
+        #expect(optionIds.contains("disableThinking"))
+        #expect(optionIds.contains("includeVeniceSystemPrompt"))
+        #expect(snapshot.unsupportedParameters.contains(.reasoning))
+        #expect(snapshot.unsupportedParameters.contains(.reasoningEffort))
+    }
+
+    @Test("unknown remote model remains deterministic")
+    func unknownRemoteSnapshotDeterministic() {
+        let first = LLMCapabilityResolver.resolve(
+            modelId: "provider/model-x",
+            providerType: .openaiLegacy,
+            contextWindowTokens: 32_000
+        )
+        let second = LLMCapabilityResolver.resolve(
+            modelId: "provider/model-x",
+            providerType: .openaiLegacy,
+            contextWindowTokens: 32_000
+        )
+
+        #expect(first.diagnosticID == second.diagnosticID)
+        #expect(first.contextWindowTokens == 32_000)
+        #expect(first.providerKind == .remoteOpenAILegacy)
+        #expect(first.toolCallMode == .adapterStructured)
+        #expect(first.reasoningMode == .none)
+    }
+}