From 09fff9599c73a87d4f0e0aaa2ae5165bb1f1949b Mon Sep 17 00:00:00 2001 From: Michael Meding Date: Sun, 3 May 2026 18:36:22 -0300 Subject: [PATCH] feat(chat): add LLM capability snapshot Business rationale: model routing and prompt/tool decisions are part of the Osaurus harness, and users need them to stay consistent across local, Foundation, and remote providers without each call site guessing differently. Coding rationale: the snapshot centralizes provider, runtime, modality, reasoning, tool, and context traits in one value type; ChatEngine and ModelService consume that contract instead of ad hoc checks, while tests pin both service dispatch and provider mapping including Azure OpenAI. Co-authored-by: Michael Meding --- .../Configuration/LLMCapabilitySnapshot.swift | 327 ++++++++++++++++++ .../Services/Chat/ChatEngine.swift | 80 ++++- .../Services/Inference/ModelService.swift | 9 +- .../Tests/Chat/ChatEngineTests.swift | 72 ++++ .../Model/LLMCapabilitySnapshotTests.swift | 162 +++++++++ 5 files changed, 630 insertions(+), 20 deletions(-) create mode 100644 Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift create mode 100644 Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift diff --git a/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift b/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift new file mode 100644 index 000000000..4314043dd --- /dev/null +++ b/Packages/OsaurusCore/Models/Configuration/LLMCapabilitySnapshot.swift @@ -0,0 +1,327 @@ +// +// LLMCapabilitySnapshot.swift +// osaurus +// +// Central model/provider capability contract used to keep UI options, +// prompt guidance, request construction, and tests aligned. +// + +import Foundation + +enum LLMProviderKind: String, Sendable, Equatable { + case foundation + case localMLX + case remoteOpenAILegacy + case remoteAnthropic + case remoteOpenResponses + case remoteOpenAICodex + case remoteGemini + case remoteOsaurus + case unknown +} + +enum LLMRuntimeKind: String, Sendable, Equatable { + case foundation + case localMLX + case remote + case osaurusAgent + case unknown +} + +enum LLMToolCallMode: String, Sendable, Equatable { + case nativeStructured + case adapterStructured + case serverSideAgent + case textFallback + case none +} + +enum LLMReasoningMode: Sendable, Equatable { + case none + case effort(optionId: String, levels: [String]) + case toggle(optionId: String, inverted: Bool) + case providerSpecific(optionId: String) + + var optionId: String? { + switch self { + case .none: + return nil + case .effort(let optionId, _), .toggle(let optionId, _), .providerSpecific(let optionId): + return optionId + } + } +} + +enum LLMReasoningStreamMode: String, Sendable, Equatable { + case none + case native + case sentinel +} + +struct LLMModalities: OptionSet, Sendable, Equatable { + let rawValue: Int + + static let textInput = LLMModalities(rawValue: 1 << 0) + static let imageInput = LLMModalities(rawValue: 1 << 1) + static let textOutput = LLMModalities(rawValue: 1 << 2) + static let imageOutput = LLMModalities(rawValue: 1 << 3) +} + +enum LLMRequestParameter: String, Sendable, Hashable { + case temperature + case topP = "top_p" + case reasoningEffort = "reasoning_effort" + case reasoning + case tools + case toolChoice = "tool_choice" + case imageOptions = "image_options" +} + +struct LLMCapabilitySnapshot: Sendable { + let modelId: String + let providerKind: LLMProviderKind + let runtimeKind: LLMRuntimeKind + let family: ModelFamily + let contextWindowTokens: Int + let defaultMaxOutputTokens: Int + let supportsStreaming: Bool + let toolCallMode: LLMToolCallMode + let reasoningMode: LLMReasoningMode + let reasoningStreamMode: LLMReasoningStreamMode + let inputModalities: LLMModalities + let outputModalities: LLMModalities + let unsupportedParameters: Set + let optionDefinitions: [ModelOptionDefinition] + + var diagnosticID: String { + [ + providerKind.rawValue, + runtimeKind.rawValue, + family.rawValue, + toolCallMode.rawValue, + reasoningStreamMode.rawValue, + String(contextWindowTokens), + ].joined(separator: "/") + } +} + +enum LLMCapabilityResolver { + static let defaultContextWindowTokens = 128_000 + static let defaultMaxOutputTokens = 16_384 + + static func resolve( + modelId rawModelId: String?, + providerType: RemoteProviderType? = nil, + runtimeKind runtimeHint: LLMRuntimeKind? = nil, + contextWindowTokens contextOverride: Int? = nil + ) -> LLMCapabilitySnapshot { + let modelId = normalizedModelId(rawModelId) + let providerKind = resolveProviderKind( + modelId: modelId, + providerType: providerType, + runtimeHint: runtimeHint + ) + let runtimeKind = resolveRuntimeKind(providerKind: providerKind, runtimeHint: runtimeHint) + let family = ModelFamilyGuidance.family(for: modelId) + let modelInfo = ModelInfo.load(modelId: modelId) + let contextWindowTokens = + contextOverride + ?? modelInfo?.model.contextLength + ?? defaultContextWindowTokens + let optionDefinitions = ModelProfileRegistry.options(for: modelId) + let reasoningMode = resolveReasoningMode(modelId: modelId) + let toolCallMode = resolveToolCallMode(providerKind: providerKind, runtimeKind: runtimeKind) + let inputModalities = resolveInputModalities( + modelId: modelId, + providerKind: providerKind, + modelInfo: modelInfo + ) + let outputModalities = resolveOutputModalities(modelId: modelId, providerKind: providerKind) + let unsupportedParameters = resolveUnsupportedParameters( + modelId: modelId, + toolCallMode: toolCallMode, + reasoningMode: reasoningMode, + outputModalities: outputModalities + ) + + return LLMCapabilitySnapshot( + modelId: modelId, + providerKind: providerKind, + runtimeKind: runtimeKind, + family: family, + contextWindowTokens: contextWindowTokens, + defaultMaxOutputTokens: defaultMaxOutputTokens, + supportsStreaming: true, + toolCallMode: toolCallMode, + reasoningMode: reasoningMode, + reasoningStreamMode: resolveReasoningStreamMode(runtimeKind: runtimeKind, reasoningMode: reasoningMode), + inputModalities: inputModalities, + outputModalities: outputModalities, + unsupportedParameters: unsupportedParameters, + optionDefinitions: optionDefinitions + ) + } + + private static func normalizedModelId(_ raw: String?) -> String { + let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + return trimmed.isEmpty ? "foundation" : trimmed + } + + private static func resolveProviderKind( + modelId: String, + providerType: RemoteProviderType?, + runtimeHint: LLMRuntimeKind? + ) -> LLMProviderKind { + if let providerType { + switch providerType { + case .openaiLegacy, .azureOpenAI: return .remoteOpenAILegacy + case .anthropic: return .remoteAnthropic + case .openResponses: return .remoteOpenResponses + case .openAICodex: return .remoteOpenAICodex + case .gemini: return .remoteGemini + case .osaurus: return .remoteOsaurus + } + } + // swiftlint:disable opening_brace + if runtimeHint == .foundation + || modelId.caseInsensitiveCompare("foundation") == .orderedSame + || modelId.caseInsensitiveCompare("default") == .orderedSame + { + return .foundation + } + // swiftlint:enable opening_brace + if runtimeHint == .remote { return .unknown } + if runtimeHint == .localMLX { return .localMLX } + if runtimeHint == .unknown { return .unknown } + return .localMLX + } + + private static func resolveRuntimeKind( + providerKind: LLMProviderKind, + runtimeHint: LLMRuntimeKind? + ) -> LLMRuntimeKind { + if let runtimeHint { return runtimeHint } + switch providerKind { + case .foundation: + return .foundation + case .localMLX: + return .localMLX + case .remoteOsaurus: + return .osaurusAgent + case .remoteOpenAILegacy, .remoteAnthropic, .remoteOpenResponses, .remoteOpenAICodex, + .remoteGemini: + return .remote + case .unknown: + return .unknown + } + } + + private static func resolveToolCallMode( + providerKind: LLMProviderKind, + runtimeKind: LLMRuntimeKind + ) -> LLMToolCallMode { + switch providerKind { + case .remoteOsaurus: + return .serverSideAgent + case .remoteOpenAILegacy, .remoteAnthropic, .remoteOpenResponses, .remoteOpenAICodex, + .remoteGemini: + return .adapterStructured + case .foundation, .localMLX: + return .nativeStructured + case .unknown: + return runtimeKind == .remote ? .adapterStructured : .none + } + } + + private static func resolveReasoningMode(modelId: String) -> LLMReasoningMode { + if OpenAIReasoningProfile.matches(modelId: modelId) { + return .effort( + optionId: "reasoningEffort", + levels: ["minimal", "low", "medium", "high"] + ) + } + if let thinkingOption = ModelProfileRegistry.profile(for: modelId)?.thinkingOption { + return .toggle(optionId: thinkingOption.id, inverted: thinkingOption.inverted) + } + return .none + } + + private static func resolveReasoningStreamMode( + runtimeKind: LLMRuntimeKind, + reasoningMode: LLMReasoningMode + ) -> LLMReasoningStreamMode { + guard reasoningMode != .none else { return .none } + switch runtimeKind { + case .localMLX, .remote: + return .sentinel + case .foundation, .osaurusAgent, .unknown: + return .none + } + } + + private static func resolveInputModalities( + modelId: String, + providerKind: LLMProviderKind, + modelInfo: ModelInfo? + ) -> LLMModalities { + var modalities: LLMModalities = [.textInput] + let lower = modelId.lowercased() + // swiftlint:disable opening_brace + if providerKind == .remoteGemini + || modelInfo?.capabilities.contains("vision") == true + || lower.contains("vision") || lower.contains("pixtral") || lower.contains("gpt-4o") + || lower.contains("gemini") + { + modalities.insert(.imageInput) + } + // swiftlint:enable opening_brace + return modalities + } + + private static func resolveOutputModalities( + modelId: String, + providerKind: LLMProviderKind + ) -> LLMModalities { + var modalities: LLMModalities = [.textOutput] + if providerKind == .remoteGemini && isGeminiImageOutputModel(modelId) { + modalities.insert(.imageOutput) + } + return modalities + } + + private static func isGeminiImageOutputModel(_ modelId: String) -> Bool { + Gemini31FlashImageProfile.matches(modelId: modelId) + || GeminiProImageProfile.matches(modelId: modelId) + || GeminiFlashImageProfile.matches(modelId: modelId) + } + + private static func resolveUnsupportedParameters( + modelId: String, + toolCallMode: LLMToolCallMode, + reasoningMode: LLMReasoningMode, + outputModalities: LLMModalities + ) -> Set { + var unsupported = Set() + + if OpenAIReasoningProfile.matches(modelId: modelId) { + unsupported.insert(.temperature) + unsupported.insert(.topP) + } + if case .effort = reasoningMode { + // OpenAI-style effort is the only standard request-level + // reasoning shape Osaurus can currently serialize. + } else { + unsupported.insert(.reasoning) + unsupported.insert(.reasoningEffort) + } + if toolCallMode == .none || toolCallMode == .textFallback { + unsupported.insert(.tools) + unsupported.insert(.toolChoice) + } + if !outputModalities.contains(.imageOutput) { + unsupported.insert(.imageOptions) + } + + return unsupported + } +} diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift index 796ac0371..0313c1b68 100644 --- a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift +++ b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift @@ -7,7 +7,7 @@ import Foundation -actor ChatEngine: Sendable, ChatEngineProtocol { +actor ChatEngine: ChatEngineProtocol { private let services: [ModelService] private let installedModelsProvider: @Sendable () -> [String] @@ -101,20 +101,6 @@ actor ChatEngine: Sendable, ChatEngineProtocol { }() let seedBits: UInt64? = request.seed.map { UInt64(bitPattern: Int64($0)) } let isJSONObject = (request.response_format?.type == "json_object") - let params = GenerationParameters( - temperature: temperature, - maxTokens: maxTokens, - topPOverride: request.top_p, - repetitionPenalty: repPenalty, - frequencyPenalty: request.frequency_penalty, - presencePenalty: request.presence_penalty, - seed: seedBits, - jsonMode: isJSONObject, - modelOptions: request.modelOptions ?? [:], - sessionId: request.session_id, - ttftTrace: trace - ) - let services = self.services // Fetch remote services on the MainActor so routing reflects the // latest connected Bonjour/remote agents per request. @@ -128,9 +114,67 @@ actor ChatEngine: Sendable, ChatEngineProtocol { services: services, remoteServices: remoteServices ) + let capabilitySnapshot = Self.resolveCapabilitySnapshot(requestedModel: request.model, route: route) + trace?.set("llmCapability", capabilitySnapshot.diagnosticID) + trace?.set("llmProviderKind", capabilitySnapshot.providerKind.rawValue) + trace?.set("llmToolCallMode", capabilitySnapshot.toolCallMode.rawValue) + let params = GenerationParameters( + temperature: temperature, + maxTokens: maxTokens, + topPOverride: request.top_p, + repetitionPenalty: repPenalty, + frequencyPenalty: request.frequency_penalty, + presencePenalty: request.presence_penalty, + seed: seedBits, + jsonMode: isJSONObject, + modelOptions: request.modelOptions ?? [:], + sessionId: request.session_id, + ttftTrace: trace, + capabilitySnapshot: capabilitySnapshot + ) return Dispatch(route: route, params: params, remoteServices: remoteServices) } + private static func resolveCapabilitySnapshot( + requestedModel: String?, + route: ModelRoute + ) -> LLMCapabilitySnapshot { + switch route { + case .service(let service, let effectiveModel): + if let remote = service as? RemoteProviderService { + let runtimeKind: LLMRuntimeKind = + remote.provider.providerType == .osaurus ? .osaurusAgent : .remote + return LLMCapabilityResolver.resolve( + modelId: effectiveModel, + providerType: remote.provider.providerType, + runtimeKind: runtimeKind + ) + } + if service is FoundationModelService { + return LLMCapabilityResolver.resolve( + modelId: effectiveModel, + runtimeKind: .foundation + ) + } + if service is MLXService { + return LLMCapabilityResolver.resolve( + modelId: effectiveModel, + runtimeKind: .localMLX + ) + } + return LLMCapabilityResolver.resolve( + modelId: effectiveModel, + runtimeKind: .unknown + ) + + case .none: + return LLMCapabilityResolver.resolve( + modelId: requestedModel, + runtimeKind: .unknown + ) + } + } + private func estimateInputTokens(_ messages: [ChatMessage]) -> Int { let totalChars = messages.reduce(0) { sum, msg in var chars = msg.content?.count ?? 0 @@ -311,8 +355,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol { var outputTokenCount = 0 var deltaCount = 0 var finishReason: InferenceLog.FinishReason = .stop - var errorMsg: String? = nil - var toolInvocation: (name: String, args: String)? = nil + var errorMsg: String? + var toolInvocation: (name: String, args: String)? var lastDeltaTime = startTime print("[Osaurus][Stream] Starting stream wrapper for model: \(model)") @@ -385,7 +429,7 @@ actor ChatEngine: Sendable, ChatEngineProtocol { // Log the completed inference (only for Chat UI - HTTP requests are logged by HTTPHandler) if source == .chatUI { let durationMs = Date().timeIntervalSince(startTime) * 1000 - var toolCalls: [ToolCallLog]? = nil + var toolCalls: [ToolCallLog]? if let (name, args) = toolInvocation { toolCalls = [ToolCallLog(name: name, arguments: args)] } diff --git a/Packages/OsaurusCore/Services/Inference/ModelService.swift b/Packages/OsaurusCore/Services/Inference/ModelService.swift index a6591b905..17c2f8d67 100644 --- a/Packages/OsaurusCore/Services/Inference/ModelService.swift +++ b/Packages/OsaurusCore/Services/Inference/ModelService.swift @@ -41,6 +41,9 @@ struct GenerationParameters: Sendable { let sessionId: String? /// Optional TTFT trace for diagnostic timing instrumentation. let ttftTrace: TTFTTrace? + /// Resolved model/provider capabilities for this dispatch. Adapters use + /// this to avoid sending options the active backend does not support. + let capabilitySnapshot: LLMCapabilitySnapshot? init( temperature: Float?, @@ -53,7 +56,8 @@ struct GenerationParameters: Sendable { jsonMode: Bool = false, modelOptions: [String: ModelOptionValue] = [:], sessionId: String? = nil, - ttftTrace: TTFTTrace? = nil + ttftTrace: TTFTTrace? = nil, + capabilitySnapshot: LLMCapabilitySnapshot? = nil ) { self.temperature = temperature self.maxTokens = maxTokens @@ -66,6 +70,7 @@ struct GenerationParameters: Sendable { self.modelOptions = modelOptions self.sessionId = sessionId self.ttftTrace = ttftTrace + self.capabilitySnapshot = capabilitySnapshot } } @@ -122,7 +127,7 @@ public enum StreamingToolHint: Sendable { struct Payload: Encodable { let id, name, arguments, result: String } let json = (try? JSONEncoder().encode(Payload(id: callId, name: name, arguments: arguments, result: result))) - .map { String(decoding: $0, as: UTF8.self) } ?? "{}" + .flatMap { String(bytes: $0, encoding: .utf8) } ?? "{}" return donePrefix + json } diff --git a/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift b/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift index d7af9f67a..7b1debd92 100644 --- a/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift +++ b/Packages/OsaurusCore/Tests/Chat/ChatEngineTests.swift @@ -34,6 +34,78 @@ struct ChatEngineTests { #expect(out == "abc") } + @Test func streamChat_threads_capability_snapshot_to_service() async throws { + final class CapturingService: ModelService, @unchecked Sendable { + private let lock = NSLock() + private var captured: LLMCapabilitySnapshot? + + var id: String { "capturing" } + func isAvailable() -> Bool { true } + func handles(requestedModel: String?) -> Bool { requestedModel == "gemma-capture" } + + func generateOneShot( + messages: [ChatMessage], + parameters: GenerationParameters, + requestedModel: String? + ) async throws -> String { + capture(parameters.capabilitySnapshot) + return "ok" + } + + func streamDeltas( + messages: [ChatMessage], + parameters: GenerationParameters, + requestedModel: String?, + stopSequences: [String] + ) async throws -> AsyncThrowingStream { + capture(parameters.capabilitySnapshot) + return AsyncThrowingStream { continuation in + continuation.yield("ok") + continuation.finish() + } + } + + func snapshot() -> LLMCapabilitySnapshot? { + lock.lock() + defer { lock.unlock() } + return captured + } + + private func capture(_ snapshot: LLMCapabilitySnapshot?) { + lock.lock() + captured = snapshot + lock.unlock() + } + } + + let svc = CapturingService() + let engine = ChatEngine(services: [svc], installedModelsProvider: { [] }) + let req = ChatCompletionRequest( + model: "gemma-capture", + messages: [ChatMessage(role: "user", content: "hi")], + temperature: 0.5, + max_tokens: 16, + stream: true, + top_p: nil, + frequency_penalty: nil, + presence_penalty: nil, + stop: nil, + n: nil, + tools: nil, + tool_choice: nil, + session_id: nil + ) + + let stream = try await engine.streamChat(request: req) + for try await _ in stream {} + + let snapshot = try #require(svc.snapshot()) + #expect(snapshot.modelId == "gemma-capture") + #expect(snapshot.family == .googleGemma) + #expect(snapshot.runtimeKind == .unknown) + #expect(snapshot.toolCallMode == .none) + } + @Test func completeChat_returns_choice_success() async throws { let svc = FakeModelService() let engine = ChatEngine(services: [svc], installedModelsProvider: { [] }) diff --git a/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift b/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift new file mode 100644 index 000000000..57299a550 --- /dev/null +++ b/Packages/OsaurusCore/Tests/Model/LLMCapabilitySnapshotTests.swift @@ -0,0 +1,162 @@ +import Foundation +import Testing + +@testable import OsaurusCore + +@Suite("LLMCapabilityResolver") +struct LLMCapabilitySnapshotTests { + + @Test("default model resolves to Foundation text-only capabilities") + func defaultFoundationSnapshot() { + let snapshot = LLMCapabilityResolver.resolve(modelId: nil) + + #expect(snapshot.modelId == "foundation") + #expect(snapshot.providerKind == .foundation) + #expect(snapshot.runtimeKind == .foundation) + #expect(snapshot.toolCallMode == .nativeStructured) + #expect(snapshot.reasoningMode == .none) + #expect(snapshot.reasoningStreamMode == .none) + #expect(snapshot.inputModalities == [.textInput]) + #expect(snapshot.outputModalities == [.textOutput]) + #expect(snapshot.unsupportedParameters.contains(.reasoning)) + #expect(snapshot.unsupportedParameters.contains(.reasoningEffort)) + } + + @Test("Qwen thinking model exposes local thinking toggle") + func qwenThinkingSnapshot() { + let snapshot = LLMCapabilityResolver.resolve(modelId: "qwen3.5-35b-a3b-4bit") + + #expect(snapshot.providerKind == .localMLX) + #expect(snapshot.runtimeKind == .localMLX) + #expect(snapshot.family == .glmQwen) + #expect(snapshot.toolCallMode == .nativeStructured) + #expect(snapshot.reasoningStreamMode == .sentinel) + guard case .toggle(let optionId, let inverted) = snapshot.reasoningMode else { + #expect(Bool(false), "Qwen thinking models should expose a toggle") + return + } + #expect(optionId == "disableThinking") + #expect(inverted) + #expect(snapshot.optionDefinitions.map(\.id).contains("disableThinking")) + #expect(snapshot.unsupportedParameters.contains(.reasoning)) + #expect(snapshot.unsupportedParameters.contains(.reasoningEffort)) + } + + @Test("Qwen coder does not expose reasoning controls") + func qwenCoderSnapshot() { + let snapshot = LLMCapabilityResolver.resolve(modelId: "qwen3-coder-plus") + + #expect(snapshot.family == .glmQwen) + #expect(snapshot.reasoningMode == .none) + #expect(!snapshot.optionDefinitions.map(\.id).contains("disableThinking")) + } + + @Test("Gemma family is identified without adding reasoning controls") + func gemmaSnapshot() { + let snapshot = LLMCapabilityResolver.resolve(modelId: "gemma-2-non-reasoning-\(UUID().uuidString)") + + #expect(snapshot.family == .googleGemma) + #expect(snapshot.providerKind == .localMLX) + #expect(snapshot.reasoningMode == .none) + } + + @Test("Open Responses reasoning model omits unsupported sampling parameters") + func openResponsesReasoningSnapshot() { + let snapshot = LLMCapabilityResolver.resolve( + modelId: "gpt-5-mini", + providerType: .openResponses + ) + + #expect(snapshot.providerKind == .remoteOpenResponses) + #expect(snapshot.runtimeKind == .remote) + #expect(snapshot.toolCallMode == .adapterStructured) + #expect(snapshot.reasoningStreamMode == .sentinel) + guard case .effort(let optionId, let levels) = snapshot.reasoningMode else { + #expect(Bool(false), "OpenAI-style reasoning models should expose effort") + return + } + #expect(optionId == "reasoningEffort") + #expect(levels == ["minimal", "low", "medium", "high"]) + #expect(snapshot.unsupportedParameters.contains(.temperature)) + #expect(snapshot.unsupportedParameters.contains(.topP)) + #expect(!snapshot.unsupportedParameters.contains(.reasoningEffort)) + } + + @Test("OpenAI Codex provider resolves as a remote adapter") + func openAICodexSnapshot() { + let snapshot = LLMCapabilityResolver.resolve( + modelId: "gpt-5-codex", + providerType: .openAICodex + ) + + #expect(snapshot.providerKind == .remoteOpenAICodex) + #expect(snapshot.runtimeKind == .remote) + #expect(snapshot.toolCallMode == .adapterStructured) + #expect(snapshot.family == .gptCodex) + } + + @Test("Azure OpenAI provider resolves as an OpenAI-compatible remote adapter") + func azureOpenAISnapshot() { + let snapshot = LLMCapabilityResolver.resolve( + modelId: "gpt-5-mini", + providerType: .azureOpenAI + ) + + #expect(snapshot.providerKind == .remoteOpenAILegacy) + #expect(snapshot.runtimeKind == .remote) + #expect(snapshot.toolCallMode == .adapterStructured) + } + + @Test("Gemini image model exposes image input and output options") + func geminiImageSnapshot() { + let snapshot = LLMCapabilityResolver.resolve( + modelId: "gemini-3-pro-image-preview", + providerType: .gemini + ) + + #expect(snapshot.providerKind == .remoteGemini) + #expect(snapshot.runtimeKind == .remote) + #expect(snapshot.inputModalities.contains(.imageInput)) + #expect(snapshot.outputModalities.contains(.imageOutput)) + #expect(snapshot.optionDefinitions.map(\.id).contains("aspectRatio")) + #expect(snapshot.optionDefinitions.map(\.id).contains("imageSize")) + #expect(snapshot.optionDefinitions.map(\.id).contains("outputType")) + #expect(!snapshot.unsupportedParameters.contains(.imageOptions)) + } + + @Test("Venice model options are surfaced without standard reasoning request fields") + func veniceSnapshot() { + let snapshot = LLMCapabilityResolver.resolve( + modelId: "venice-ai/llama-3.1-405b", + providerType: .openaiLegacy + ) + let optionIds = snapshot.optionDefinitions.map(\.id) + + #expect(snapshot.providerKind == .remoteOpenAILegacy) + #expect(optionIds.contains("enableWebSearch")) + #expect(optionIds.contains("disableThinking")) + #expect(optionIds.contains("includeVeniceSystemPrompt")) + #expect(snapshot.unsupportedParameters.contains(.reasoning)) + #expect(snapshot.unsupportedParameters.contains(.reasoningEffort)) + } + + @Test("unknown remote model remains deterministic") + func unknownRemoteSnapshotDeterministic() { + let first = LLMCapabilityResolver.resolve( + modelId: "provider/model-x", + providerType: .openaiLegacy, + contextWindowTokens: 32_000 + ) + let second = LLMCapabilityResolver.resolve( + modelId: "provider/model-x", + providerType: .openaiLegacy, + contextWindowTokens: 32_000 + ) + + #expect(first.diagnosticID == second.diagnosticID) + #expect(first.contextWindowTokens == 32_000) + #expect(first.providerKind == .remoteOpenAILegacy) + #expect(first.toolCallMode == .adapterStructured) + #expect(first.reasoningMode == .none) + } +}