Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ permissions:
env:
# Bump to invalidate every cache entry without source surgery (e.g., after a
# known-bad cache or an Xcode toolchain upgrade we want to flush manually).
CACHE_SALT: v2-vmlx-5b84387
CACHE_SALT: v3-pr-cold-deriveddata
# Pin Xcode so cache keys are stable across runner image bumps. When you
# need to upgrade, change here AND in setup-xcode below.
XCODE_VERSION: "26.4.1"
Expand Down
78 changes: 42 additions & 36 deletions Packages/OsaurusCore/Models/API/OpenAIAPI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ struct OpenAIModel: Codable, Sendable {
var object: String = "model"
var created: Int = 0
var owned_by: String = "osaurus"
var permission: [ModelPermission]? = nil
var root: String? = nil
var parent: String? = nil
var name: String? = nil
var model: String? = nil
var modified_at: String? = nil
var size: Int? = nil
var digest: String? = nil
var details: ModelDetails? = nil
var permission: [ModelPermission]?
var root: String?
var parent: String?
var name: String?
var model: String?
var modified_at: String?
var size: Int?
var digest: String?
var details: ModelDetails?

/// Initialize from a model name (for local models)
init(modelName: String) {
Expand Down Expand Up @@ -89,7 +89,8 @@ struct OpenAIModel: Codable, Sendable {
}
}

/// Model permission object (OpenAI format)
// Model permission object. Permission booleans mirror provider JSON where omitted and false are not equivalent.
// swiftlint:disable discouraged_optional_boolean
struct ModelPermission: Codable, Sendable {
var id: String?
var object: String?
Expand All @@ -104,6 +105,7 @@ struct ModelPermission: Codable, Sendable {
var group: String?
var is_blocking: Bool?
}
// swiftlint:enable discouraged_optional_boolean

struct ModelDetails: Codable, Sendable {
let parent_model: String?
Expand Down Expand Up @@ -348,14 +350,14 @@ extension ChatMessage {
// otherwise as string. Round-trip preserves audio/video/image parts
// so a request that came in with `input_audio` or `video_url` is
// re-serialized in the same shape.
if let parts = contentParts,
parts.contains(where: {
let shouldEncodeContentParts =
contentParts?.contains {
switch $0 {
case .imageUrl, .audioInput, .videoUrl: return true
case .text: return false
}
})
{
} == true
if shouldEncodeContentParts, let parts = contentParts {
try container.encode(parts, forKey: .content)
} else if let content = content {
// Only encode content if it's not nil (OpenAI rejects null content)
Expand Down Expand Up @@ -482,7 +484,9 @@ struct ChatCompletionRequest: Codable, Sendable {
let temperature: Float?
let max_tokens: Int?
/// OpenAI newer alias for max_tokens; accepted on inbound requests alongside max_tokens.
var max_completion_tokens: Int? = nil
var max_completion_tokens: Int?
// Omission carries provider-default semantics that differ from explicitly sending false.
// swiftlint:disable:next discouraged_optional_boolean
let stream: Bool?
let top_p: Float?
let frequency_penalty: Float?
Expand All @@ -496,20 +500,20 @@ struct ChatCompletionRequest: Codable, Sendable {
/// Optional session identifier for chat/history grouping. Not a KV cache key —
/// vmlx-swift-lm's `CacheCoordinator` is content-addressed and discovers
/// reusable prefixes autonomously.
var session_id: String? = nil
var session_id: String?
/// Deterministic-sampling seed (OpenAI v1.x). When set, identical
/// requests should yield identical completions on the same backend.
var seed: Int? = nil
var seed: Int?
/// `{"type":"json_object"}` for OpenAI JSON mode. Other shapes
/// (`text`, `json_schema`) are rejected at request validation.
var response_format: ResponseFormat? = nil
var response_format: ResponseFormat?
/// `{"include_usage": true}` instructs the SSE producer to emit a
/// final chunk carrying `usage` (prompt/completion/total tokens).
var stream_options: StreamOptions? = nil
var stream_options: StreamOptions?
/// Model-specific options from the active ModelProfile (not serialized to JSON).
var modelOptions: [String: ModelOptionValue]? = nil
var modelOptions: [String: ModelOptionValue]?
/// Optional TTFT trace for diagnostic timing (not serialized to JSON).
var ttftTrace: TTFTTrace? = nil
var ttftTrace: TTFTTrace?

/// Resolved max tokens, preferring max_tokens then max_completion_tokens.
var resolvedMaxTokens: Int? { max_tokens ?? max_completion_tokens }
Expand Down Expand Up @@ -554,6 +558,8 @@ struct ResponseFormat: Codable, Sendable, Equatable {

/// OpenAI `stream_options` shape. Today we only honor `include_usage`.
struct StreamOptions: Codable, Sendable, Equatable {
// Omission carries provider-default semantics that differ from explicitly sending false.
// swiftlint:disable:next discouraged_optional_boolean
let include_usage: Bool?
}

Expand All @@ -579,12 +585,12 @@ struct ChatCompletionResponse: Codable, Sendable {
let model: String
let choices: [ChatChoice]
let usage: Usage
var system_fingerprint: String? = nil
var system_fingerprint: String?
/// Content hash of the system prompt + tool names used for this request.
/// Informational only — clients can use it to detect when the system
/// prefix changed across requests. KV reuse itself is handled
/// autonomously by vmlx's `CacheCoordinator` (content-addressed).
var prefix_hash: String? = nil
var prefix_hash: String?
}

// MARK: - Streaming Response Structures
Expand Down Expand Up @@ -631,12 +637,12 @@ struct ChatCompletionChunk: Codable, Sendable {
let created: Int
let model: String
let choices: [StreamChoice]
var system_fingerprint: String? = nil
var system_fingerprint: String?
/// Included only in the first chunk; see `ChatCompletionResponse.prefix_hash`.
var prefix_hash: String? = nil
var prefix_hash: String?
/// Final usage chunk (OpenAI `stream_options.include_usage`). Populated
/// only on the dedicated penultimate SSE chunk; nil on every other.
var usage: Usage? = nil
var usage: Usage?
}

// MARK: - Error Response
Expand Down Expand Up @@ -846,26 +852,26 @@ public enum JSONValue: Codable, Sendable, Equatable {

extension JSONValue {
/// Convert JSONValue to Sendable-compatible value for Jinja chat templates.
/// Null values are dropped from dictionaries because Jinja's `Value(any:)` cannot
/// handle `NSNull` and throws a runtime error. JSON Schema treats a missing key
/// the same as `null`, so this is semantically lossless for tool specs.
var sendableValue: any Sendable {
/// Null values are dropped because Jinja's `Value(any:)` cannot handle
/// null/optional placeholders inside erased Swift containers.
var sendableValue: (any Sendable)? {
switch self {
case .null:
return NSNull()
return nil
case .bool(let b):
return b
case .number(let n):
return n
case .string(let s):
return s
case .array(let arr):
return arr.map { $0.sendableValue }
return arr.compactMap { $0.sendableValue }
case .object(let obj):
var dict: [String: any Sendable] = [:]
for (k, v) in obj {
if case .null = v { continue }
dict[k] = v.sendableValue
if let converted = v.sendableValue {
dict[k] = converted
}
}
return dict
}
Expand Down Expand Up @@ -903,8 +909,8 @@ extension ToolFunction {
if let description {
fn["description"] = description
}
if let parameters {
fn["parameters"] = parameters.sendableValue
if let parameters, let converted = parameters.sendableValue {
fn["parameters"] = converted
}
return fn
}
Expand Down
26 changes: 20 additions & 6 deletions Packages/OsaurusCore/Models/API/OpenResponsesAPI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@ public struct OpenResponsesRequest: Codable, Sendable {
public let model: String
/// Input content - can be a string or array of input items
public let input: OpenResponsesInput
// Omission carries provider-default semantics that differ from explicitly sending false.
// swiftlint:disable discouraged_optional_boolean
/// Whether to stream the response
public let stream: Bool?
// swiftlint:enable discouraged_optional_boolean
/// Available tools for the model to use
public let tools: [OpenResponsesTool]?
/// Tool choice configuration
Expand Down Expand Up @@ -91,7 +94,7 @@ public enum OpenResponsesInputItem: Codable, Sendable {

public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
let type = try container.decode(String.self, forKey: .type)
let type = try container.decodeIfPresent(String.self, forKey: .type) ?? "message"

switch type {
case "message":
Expand Down Expand Up @@ -132,6 +135,19 @@ public struct OpenResponsesMessageItem: Codable, Sendable {
self.role = role
self.content = content
}

private enum CodingKeys: String, CodingKey {
case type
case role
case content
}

public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
self.type = try container.decodeIfPresent(String.self, forKey: .type) ?? "message"
self.role = try container.decode(String.self, forKey: .role)
self.content = try container.decode(OpenResponsesMessageContent.self, forKey: .content)
}
}

/// Message content can be string or array of content parts
Expand Down Expand Up @@ -286,9 +302,7 @@ public enum OpenResponsesToolChoice: Codable, Sendable {

public init(from decoder: Decoder) throws {
// Try decoding as string first
if let container = try? decoder.singleValueContainer(),
let str = try? container.decode(String.self)
{
if let str = try? decoder.singleValueContainer().decode(String.self) {
switch str {
case "auto": self = .auto
case "none": self = .none
Expand Down Expand Up @@ -843,7 +857,7 @@ extension OpenResponsesRequest {
}

// Convert tools
var openAITools: [Tool]? = nil
var openAITools: [Tool]?
if let tools = tools {
openAITools = tools.map { tool in
Tool(
Expand All @@ -858,7 +872,7 @@ extension OpenResponsesRequest {
}

// Convert tool choice
var openAIToolChoice: ToolChoiceOption? = nil
var openAIToolChoice: ToolChoiceOption?
if let choice = tool_choice {
switch choice {
case .auto:
Expand Down
5 changes: 5 additions & 0 deletions Packages/OsaurusCore/Models/Chat/ChatSessionStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ enum ChatSessionStore {
print("[ChatSessionStore] Failed to open chat-history database: \(error)")
return
}
#if DEBUG
if RuntimeEnvironment.isUnderTests, OsaurusPaths.overrideRoot == nil {
return
}
#endif
LegacySessionImporter.runIfNeeded()
}

Expand Down
11 changes: 6 additions & 5 deletions Packages/OsaurusCore/Services/Chat/ChatEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import Foundation

actor ChatEngine: Sendable, ChatEngineProtocol {
actor ChatEngine: ChatEngineProtocol {
private let services: [ModelService]
private let installedModelsProvider: @Sendable () -> [String]

Expand Down Expand Up @@ -177,7 +177,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
role: "assistant",
content: nil,
tool_calls: toolCalls,
tool_call_id: nil
tool_call_id: nil,
reasoning_content: invocations.compactMap(\.reasoningContent).first
)
let choice = ChatChoice(index: 0, message: assistant, finish_reason: "tool_calls")
let usage = Usage(prompt_tokens: inputTokens, completion_tokens: 0, total_tokens: inputTokens)
Expand Down Expand Up @@ -311,8 +312,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
var outputTokenCount = 0
var deltaCount = 0
var finishReason: InferenceLog.FinishReason = .stop
var errorMsg: String? = nil
var toolInvocation: (name: String, args: String)? = nil
var errorMsg: String?
var toolInvocation: (name: String, args: String)?
var lastDeltaTime = startTime

print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
Expand Down Expand Up @@ -385,7 +386,7 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
// Log the completed inference (only for Chat UI - HTTP requests are logged by HTTPHandler)
if source == .chatUI {
let durationMs = Date().timeIntervalSince(startTime) * 1000
var toolCalls: [ToolCallLog]? = nil
var toolCalls: [ToolCallLog]?
if let (name, args) = toolInvocation {
toolCalls = [ToolCallLog(name: name, arguments: args)]
}
Expand Down
14 changes: 12 additions & 2 deletions Packages/OsaurusCore/Services/Inference/ModelService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,22 @@ struct ServiceToolInvocation: Error, Sendable {
let toolCallId: String?
/// Optional thought signature for Gemini thinking-mode models (e.g. Gemini 2.5)
let geminiThoughtSignature: String?
/// Provider reasoning text that must be echoed on assistant tool-call
/// messages for APIs such as DeepSeek thinking mode.
let reasoningContent: String?

init(toolName: String, jsonArguments: String, toolCallId: String? = nil, geminiThoughtSignature: String? = nil) {
init(
toolName: String,
jsonArguments: String,
toolCallId: String? = nil,
geminiThoughtSignature: String? = nil,
reasoningContent: String? = nil
) {
self.toolName = toolName
self.jsonArguments = jsonArguments
self.toolCallId = toolCallId
self.geminiThoughtSignature = geminiThoughtSignature
self.reasoningContent = reasoningContent
}
}

Expand Down Expand Up @@ -122,7 +132,7 @@ public enum StreamingToolHint: Sendable {
struct Payload: Encodable { let id, name, arguments, result: String }
let json =
(try? JSONEncoder().encode(Payload(id: callId, name: name, arguments: arguments, result: result)))
.map { String(decoding: $0, as: UTF8.self) } ?? "{}"
.flatMap { String(bytes: $0, encoding: .utf8) } ?? "{}"
return donePrefix + json
}

Expand Down
Loading
Loading