osaurus-ai · mimeding · Apr 30, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 30, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ permissions:
 env:
   # Bump to invalidate every cache entry without source surgery (e.g., after a
   # known-bad cache or an Xcode toolchain upgrade we want to flush manually).
-  CACHE_SALT: v2-vmlx-5b84387
+  CACHE_SALT: v3-pr-cold-deriveddata
   # Pin Xcode so cache keys are stable across runner image bumps. When you
   # need to upgrade, change here AND in setup-xcode below.
   XCODE_VERSION: "26.4.1"

diff --git a/Packages/OsaurusCore/Models/API/OpenAIAPI.swift b/Packages/OsaurusCore/Models/API/OpenAIAPI.swift
@@ -15,15 +15,15 @@ struct OpenAIModel: Codable, Sendable {
     var object: String = "model"
     var created: Int = 0
     var owned_by: String = "osaurus"
-    var permission: [ModelPermission]? = nil
-    var root: String? = nil
-    var parent: String? = nil
-    var name: String? = nil
-    var model: String? = nil
-    var modified_at: String? = nil
-    var size: Int? = nil
-    var digest: String? = nil
-    var details: ModelDetails? = nil
+    var permission: [ModelPermission]?
+    var root: String?
+    var parent: String?
+    var name: String?
+    var model: String?
+    var modified_at: String?
+    var size: Int?
+    var digest: String?
+    var details: ModelDetails?
 
     /// Initialize from a model name (for local models)
     init(modelName: String) {
@@ -89,7 +89,8 @@ struct OpenAIModel: Codable, Sendable {
     }
 }
 
-/// Model permission object (OpenAI format)
+// Model permission object. Permission booleans mirror provider JSON where omitted and false are not equivalent.
+// swiftlint:disable discouraged_optional_boolean
 struct ModelPermission: Codable, Sendable {
     var id: String?
     var object: String?
@@ -104,6 +105,7 @@ struct ModelPermission: Codable, Sendable {
     var group: String?
     var is_blocking: Bool?
 }
+// swiftlint:enable discouraged_optional_boolean
 
 struct ModelDetails: Codable, Sendable {
     let parent_model: String?
@@ -348,14 +350,14 @@ extension ChatMessage {
         // otherwise as string. Round-trip preserves audio/video/image parts
         // so a request that came in with `input_audio` or `video_url` is
         // re-serialized in the same shape.
-        if let parts = contentParts,
-            parts.contains(where: {
+        let shouldEncodeContentParts =
+            contentParts?.contains {
                 switch $0 {
                 case .imageUrl, .audioInput, .videoUrl: return true
                 case .text: return false
                 }
-            })
-        {
+            } == true
+        if shouldEncodeContentParts, let parts = contentParts {
             try container.encode(parts, forKey: .content)
         } else if let content = content {
             // Only encode content if it's not nil (OpenAI rejects null content)
@@ -482,7 +484,9 @@ struct ChatCompletionRequest: Codable, Sendable {
     let temperature: Float?
     let max_tokens: Int?
     /// OpenAI newer alias for max_tokens; accepted on inbound requests alongside max_tokens.
-    var max_completion_tokens: Int? = nil
+    var max_completion_tokens: Int?
+    // Omission carries provider-default semantics that differ from explicitly sending false.
+    // swiftlint:disable:next discouraged_optional_boolean
     let stream: Bool?
     let top_p: Float?
     let frequency_penalty: Float?
@@ -496,20 +500,20 @@ struct ChatCompletionRequest: Codable, Sendable {
     /// Optional session identifier for chat/history grouping. Not a KV cache key —
     /// vmlx-swift-lm's `CacheCoordinator` is content-addressed and discovers
     /// reusable prefixes autonomously.
-    var session_id: String? = nil
+    var session_id: String?
     /// Deterministic-sampling seed (OpenAI v1.x). When set, identical
     /// requests should yield identical completions on the same backend.
-    var seed: Int? = nil
+    var seed: Int?
     /// `{"type":"json_object"}` for OpenAI JSON mode. Other shapes
     /// (`text`, `json_schema`) are rejected at request validation.
-    var response_format: ResponseFormat? = nil
+    var response_format: ResponseFormat?
     /// `{"include_usage": true}` instructs the SSE producer to emit a
     /// final chunk carrying `usage` (prompt/completion/total tokens).
-    var stream_options: StreamOptions? = nil
+    var stream_options: StreamOptions?
     /// Model-specific options from the active ModelProfile (not serialized to JSON).
-    var modelOptions: [String: ModelOptionValue]? = nil
+    var modelOptions: [String: ModelOptionValue]?
     /// Optional TTFT trace for diagnostic timing (not serialized to JSON).
-    var ttftTrace: TTFTTrace? = nil
+    var ttftTrace: TTFTTrace?
 
     /// Resolved max tokens, preferring max_tokens then max_completion_tokens.
     var resolvedMaxTokens: Int? { max_tokens ?? max_completion_tokens }
@@ -554,6 +558,8 @@ struct ResponseFormat: Codable, Sendable, Equatable {
 
 /// OpenAI `stream_options` shape. Today we only honor `include_usage`.
 struct StreamOptions: Codable, Sendable, Equatable {
+    // Omission carries provider-default semantics that differ from explicitly sending false.
+    // swiftlint:disable:next discouraged_optional_boolean
     let include_usage: Bool?
 }
 
@@ -579,12 +585,12 @@ struct ChatCompletionResponse: Codable, Sendable {
     let model: String
     let choices: [ChatChoice]
     let usage: Usage
-    var system_fingerprint: String? = nil
+    var system_fingerprint: String?
     /// Content hash of the system prompt + tool names used for this request.
     /// Informational only — clients can use it to detect when the system
     /// prefix changed across requests. KV reuse itself is handled
     /// autonomously by vmlx's `CacheCoordinator` (content-addressed).
-    var prefix_hash: String? = nil
+    var prefix_hash: String?
 }
 
 // MARK: - Streaming Response Structures
@@ -631,12 +637,12 @@ struct ChatCompletionChunk: Codable, Sendable {
     let created: Int
     let model: String
     let choices: [StreamChoice]
-    var system_fingerprint: String? = nil
+    var system_fingerprint: String?
     /// Included only in the first chunk; see `ChatCompletionResponse.prefix_hash`.
-    var prefix_hash: String? = nil
+    var prefix_hash: String?
     /// Final usage chunk (OpenAI `stream_options.include_usage`). Populated
     /// only on the dedicated penultimate SSE chunk; nil on every other.
-    var usage: Usage? = nil
+    var usage: Usage?
 }
 
 // MARK: - Error Response
@@ -846,26 +852,26 @@ public enum JSONValue: Codable, Sendable, Equatable {
 
 extension JSONValue {
     /// Convert JSONValue to Sendable-compatible value for Jinja chat templates.
-    /// Null values are dropped from dictionaries because Jinja's `Value(any:)` cannot
-    /// handle `NSNull` and throws a runtime error. JSON Schema treats a missing key
-    /// the same as `null`, so this is semantically lossless for tool specs.
-    var sendableValue: any Sendable {
+    /// Null values are dropped because Jinja's `Value(any:)` cannot handle
+    /// null/optional placeholders inside erased Swift containers.
+    var sendableValue: (any Sendable)? {
         switch self {
         case .null:
-            return NSNull()
+            return nil
         case .bool(let b):
             return b
         case .number(let n):
             return n
         case .string(let s):
             return s
         case .array(let arr):
-            return arr.map { $0.sendableValue }
+            return arr.compactMap { $0.sendableValue }
         case .object(let obj):
             var dict: [String: any Sendable] = [:]
             for (k, v) in obj {
-                if case .null = v { continue }
-                dict[k] = v.sendableValue
+                if let converted = v.sendableValue {
+                    dict[k] = converted
+                }
             }
             return dict
         }
@@ -903,8 +909,8 @@ extension ToolFunction {
         if let description {
             fn["description"] = description
         }
-        if let parameters {
-            fn["parameters"] = parameters.sendableValue
+        if let parameters, let converted = parameters.sendableValue {
+            fn["parameters"] = converted
         }
         return fn
     }

diff --git a/Packages/OsaurusCore/Models/API/OpenResponsesAPI.swift b/Packages/OsaurusCore/Models/API/OpenResponsesAPI.swift
@@ -21,8 +21,11 @@ public struct OpenResponsesRequest: Codable, Sendable {
     public let model: String
     /// Input content - can be a string or array of input items
     public let input: OpenResponsesInput
+    // Omission carries provider-default semantics that differ from explicitly sending false.
+    // swiftlint:disable discouraged_optional_boolean
     /// Whether to stream the response
     public let stream: Bool?
+    // swiftlint:enable discouraged_optional_boolean
     /// Available tools for the model to use
     public let tools: [OpenResponsesTool]?
     /// Tool choice configuration
@@ -91,7 +94,7 @@ public enum OpenResponsesInputItem: Codable, Sendable {
 
     public init(from decoder: Decoder) throws {
         let container = try decoder.container(keyedBy: CodingKeys.self)
-        let type = try container.decode(String.self, forKey: .type)
+        let type = try container.decodeIfPresent(String.self, forKey: .type) ?? "message"
 
         switch type {
         case "message":
@@ -132,6 +135,19 @@ public struct OpenResponsesMessageItem: Codable, Sendable {
         self.role = role
         self.content = content
     }
+
+    private enum CodingKeys: String, CodingKey {
+        case type
+        case role
+        case content
+    }
+
+    public init(from decoder: Decoder) throws {
+        let container = try decoder.container(keyedBy: CodingKeys.self)
+        self.type = try container.decodeIfPresent(String.self, forKey: .type) ?? "message"
+        self.role = try container.decode(String.self, forKey: .role)
+        self.content = try container.decode(OpenResponsesMessageContent.self, forKey: .content)
+    }
 }
 
 /// Message content can be string or array of content parts
@@ -286,9 +302,7 @@ public enum OpenResponsesToolChoice: Codable, Sendable {
 
     public init(from decoder: Decoder) throws {
         // Try decoding as string first
-        if let container = try? decoder.singleValueContainer(),
-            let str = try? container.decode(String.self)
-        {
+        if let str = try? decoder.singleValueContainer().decode(String.self) {
             switch str {
             case "auto": self = .auto
             case "none": self = .none
@@ -843,7 +857,7 @@ extension OpenResponsesRequest {
         }
 
         // Convert tools
-        var openAITools: [Tool]? = nil
+        var openAITools: [Tool]?
         if let tools = tools {
             openAITools = tools.map { tool in
                 Tool(
@@ -858,7 +872,7 @@ extension OpenResponsesRequest {
         }
 
         // Convert tool choice
-        var openAIToolChoice: ToolChoiceOption? = nil
+        var openAIToolChoice: ToolChoiceOption?
         if let choice = tool_choice {
             switch choice {
             case .auto:

diff --git a/Packages/OsaurusCore/Models/Chat/ChatSessionStore.swift b/Packages/OsaurusCore/Models/Chat/ChatSessionStore.swift
@@ -72,6 +72,11 @@ enum ChatSessionStore {
             print("[ChatSessionStore] Failed to open chat-history database: \(error)")
             return
         }
+        #if DEBUG
+            if RuntimeEnvironment.isUnderTests, OsaurusPaths.overrideRoot == nil {
+                return
+            }
+        #endif
         LegacySessionImporter.runIfNeeded()
     }
 

diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
@@ -7,7 +7,7 @@
 
 import Foundation
 
-actor ChatEngine: Sendable, ChatEngineProtocol {
+actor ChatEngine: ChatEngineProtocol {
     private let services: [ModelService]
     private let installedModelsProvider: @Sendable () -> [String]
 
@@ -177,7 +177,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             role: "assistant",
             content: nil,
             tool_calls: toolCalls,
-            tool_call_id: nil
+            tool_call_id: nil,
+            reasoning_content: invocations.compactMap(\.reasoningContent).first
         )
         let choice = ChatChoice(index: 0, message: assistant, finish_reason: "tool_calls")
         let usage = Usage(prompt_tokens: inputTokens, completion_tokens: 0, total_tokens: inputTokens)
@@ -311,8 +312,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             var outputTokenCount = 0
             var deltaCount = 0
             var finishReason: InferenceLog.FinishReason = .stop
-            var errorMsg: String? = nil
-            var toolInvocation: (name: String, args: String)? = nil
+            var errorMsg: String?
+            var toolInvocation: (name: String, args: String)?
             var lastDeltaTime = startTime
 
             print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
@@ -385,7 +386,7 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             // Log the completed inference (only for Chat UI - HTTP requests are logged by HTTPHandler)
             if source == .chatUI {
                 let durationMs = Date().timeIntervalSince(startTime) * 1000
-                var toolCalls: [ToolCallLog]? = nil
+                var toolCalls: [ToolCallLog]?
                 if let (name, args) = toolInvocation {
                     toolCalls = [ToolCallLog(name: name, arguments: args)]
                 }

diff --git a/Packages/OsaurusCore/Services/Inference/ModelService.swift b/Packages/OsaurusCore/Services/Inference/ModelService.swift
@@ -77,12 +77,22 @@ struct ServiceToolInvocation: Error, Sendable {
     let toolCallId: String?
     /// Optional thought signature for Gemini thinking-mode models (e.g. Gemini 2.5)
     let geminiThoughtSignature: String?
+    /// Provider reasoning text that must be echoed on assistant tool-call
+    /// messages for APIs such as DeepSeek thinking mode.
+    let reasoningContent: String?
 
-    init(toolName: String, jsonArguments: String, toolCallId: String? = nil, geminiThoughtSignature: String? = nil) {
+    init(
+        toolName: String,
+        jsonArguments: String,
+        toolCallId: String? = nil,
+        geminiThoughtSignature: String? = nil,
+        reasoningContent: String? = nil
+    ) {
         self.toolName = toolName
         self.jsonArguments = jsonArguments
         self.toolCallId = toolCallId
         self.geminiThoughtSignature = geminiThoughtSignature
+        self.reasoningContent = reasoningContent
     }
 }
 
@@ -122,7 +132,7 @@ public enum StreamingToolHint: Sendable {
         struct Payload: Encodable { let id, name, arguments, result: String }
         let json =
             (try? JSONEncoder().encode(Payload(id: callId, name: name, arguments: arguments, result: result)))
-            .map { String(decoding: $0, as: UTF8.self) } ?? "{}"
+            .flatMap { String(bytes: $0, encoding: .utf8) } ?? "{}"
         return donePrefix + json
     }