Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions Packages/OsaurusCore/Folder/ChatExecutionContext.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,35 @@

import Foundation

/// A file attachment that is available to tool/plugin execution for the
/// current chat turn context. The original bytes live on disk, not in chat
/// history JSON.
public struct ChatInputFile: Codable, Sendable, Equatable, Identifiable {
public let id: String
public let filename: String
public let mimeType: String
public let fileSize: Int
public let hostPath: String

public init(id: String, filename: String, mimeType: String, fileSize: Int, hostPath: String) {
self.id = id
self.filename = filename
self.mimeType = mimeType
self.fileSize = fileSize
self.hostPath = hostPath
}

var toolPayload: [String: Any] {
[
"id": id,
"filename": filename,
"mime_type": mimeType,
"file_size": fileSize,
"host_path": hostPath,
]
}
}

/// TaskLocal storage carrying the active chat session / agent / batch ids
/// down through tool execution. The chat engine seeds these in
/// `ChatSession.send` (and equivalent headless paths) so any tool reading
Expand All @@ -32,4 +61,7 @@ public enum ChatExecutionContext {
/// Specific tool invocation id. Used by `speak` so the inline card
/// can swap its check for a spinner while its audio plays
@TaskLocal public static var currentToolCallId: String?

/// Preserved high-fidelity file attachments visible to the current tool call.
@TaskLocal public static var currentInputFiles: [ChatInputFile] = []
}
134 changes: 120 additions & 14 deletions Packages/OsaurusCore/Models/Chat/Attachment.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
public enum Kind: Codable, Sendable, Equatable {
case image(Data)
case document(filename: String, content: String, fileSize: Int)
case file(filename: String, mimeType: String, fileSize: Int, hostPath: String, extractedPreview: String?)

/// Audio bytes + format hint (e.g. "wav", "mp3", "m4a", "flac",
/// "ogg"). Format flows into `MessageContentPart.audioInput.format`
Expand Down Expand Up @@ -47,7 +48,8 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
case videoRef(hash: String, byteCount: Int, filename: String?)

private enum CodingKeys: String, CodingKey {
case type, data, filename, content, fileSize, hash, byteCount, format
case type, data, filename, content, fileSize, hash, byteCount, mimeType, hostPath, extractedPreview
case format
}

public func encode(to encoder: Encoder) throws {
Expand All @@ -61,6 +63,13 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
try container.encode(filename, forKey: .filename)
try container.encode(content, forKey: .content)
try container.encode(fileSize, forKey: .fileSize)
case .file(let filename, let mimeType, let fileSize, let hostPath, let extractedPreview):
try container.encode("file", forKey: .type)
try container.encode(filename, forKey: .filename)
try container.encode(mimeType, forKey: .mimeType)
try container.encode(fileSize, forKey: .fileSize)
try container.encode(hostPath, forKey: .hostPath)
try container.encodeIfPresent(extractedPreview, forKey: .extractedPreview)
case .audio(let data, let format, let filename):
try container.encode("audio", forKey: .type)
try container.encode(data, forKey: .data)
Expand Down Expand Up @@ -105,6 +114,19 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
let content = try container.decode(String.self, forKey: .content)
let fileSize = try container.decode(Int.self, forKey: .fileSize)
self = .document(filename: filename, content: content, fileSize: fileSize)
case "file":
let filename = try container.decode(String.self, forKey: .filename)
let mimeType = try container.decode(String.self, forKey: .mimeType)
let fileSize = try container.decode(Int.self, forKey: .fileSize)
let hostPath = try container.decode(String.self, forKey: .hostPath)
let extractedPreview = try container.decodeIfPresent(String.self, forKey: .extractedPreview)
self = .file(
filename: filename,
mimeType: mimeType,
fileSize: fileSize,
hostPath: hostPath,
extractedPreview: extractedPreview
)
case "audio":
let data = try container.decode(Data.self, forKey: .data)
let format = try container.decode(String.self, forKey: .format)
Expand Down Expand Up @@ -159,6 +181,26 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
Attachment(kind: .document(filename: filename, content: content, fileSize: fileSize))
}

public static func file(
id: UUID = UUID(),
filename: String,
mimeType: String,
fileSize: Int,
hostPath: String,
extractedPreview: String? = nil
) -> Attachment {
Attachment(
id: id,
kind: .file(
filename: filename,
mimeType: mimeType,
fileSize: fileSize,
hostPath: hostPath,
extractedPreview: extractedPreview
)
)
}

public static func audio(_ data: Data, format: String, filename: String? = nil) -> Attachment {
Attachment(kind: .audio(data, format: format, filename: filename))
}
Expand All @@ -178,11 +220,16 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {

public var isDocument: Bool {
switch kind {
case .document, .documentRef: return true
case .document, .documentRef, .file: return true
default: return false
}
}

public var isPreservedFile: Bool {
if case .file = kind { return true }
return false
}

public var isAudio: Bool {
switch kind {
case .audio, .audioRef: return true
Expand All @@ -208,7 +255,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {

public var filename: String? {
switch kind {
case .document(let name, _, _), .documentRef(let name, _, _):
case .document(let name, _, _), .documentRef(let name, _, _), .file(let name, _, _, _, _):
return name
case .audio(_, _, let name), .audioRef(_, _, _, let name),
.video(_, let name), .videoRef(_, _, let name):
Expand Down Expand Up @@ -237,7 +284,47 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
return nil
}

/// Resolves the attachment to its raw image bytes — inline or
public var extractedPreview: String? {
if case .file(_, _, _, _, let preview) = kind { return preview }
return nil
}

public var mimeType: String? {
switch kind {
case .file(_, let mimeType, _, _, _): return mimeType
case .document(let filename, _, _), .documentRef(let filename, _, _):
return SharedArtifact.mimeType(from: filename)
case .image, .imageRef: return "image/png"
case .audio(_, let format, let filename), .audioRef(_, _, let format, let filename):
if let filename {
return SharedArtifact.mimeType(from: filename)
}
return SharedArtifact.mimeType(from: "audio.\(format)")
case .video(_, let filename), .videoRef(_, _, let filename):
if let filename {
return SharedArtifact.mimeType(from: filename)
}
return "video/mp4"
}
}

public var hostPath: String? {
if case .file(_, _, _, let hostPath, _) = kind { return hostPath }
return nil
}

public var inputFile: ChatInputFile? {
guard case .file(let filename, let mimeType, let fileSize, let hostPath, _) = kind else { return nil }
return ChatInputFile(
id: id.uuidString,
filename: filename,
mimeType: mimeType,
fileSize: fileSize,
hostPath: hostPath
)
}

/// Resolves the attachment to its raw image bytes, inline or
/// hydrated from the blob store. Returns `nil` for non-image kinds
/// or read failures.
public func loadImageData() -> Data? {
Expand Down Expand Up @@ -289,15 +376,17 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
}
}

/// Resolves the attachment to its document content text — inline or
/// hydrated from the blob store. Returns `nil` for non-document
/// kinds or read failures.
/// Resolves the attachment to its document preview/content text,
/// inline or hydrated from the blob store. Preserved files return
/// their extracted preview only; tools use `inputFile` for bytes.
public func loadDocumentContent() -> String? {
switch kind {
case .document(_, let content, _):
return content
case .documentRef(_, let hash, _):
return (try? AttachmentBlobStore.read(hash)).flatMap { String(data: $0, encoding: .utf8) }
case .file(_, _, _, _, let preview):
return preview
default:
return nil
}
Expand All @@ -306,18 +395,23 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
// MARK: - Display Helpers

public var fileSizeFormatted: String? {
let size: Int?
switch kind {
case .document(_, _, let size), .documentRef(_, _, let size):
return ByteCountFormatter.string(fromByteCount: Int64(size), countStyle: .file)
case .document(_, _, let value), .documentRef(_, _, let value), .file(_, _, let value, _, _):
size = value
case .audio(let data, _, _):
return ByteCountFormatter.string(fromByteCount: Int64(data.count), countStyle: .file)
size = data.count
case .video(let data, _):
return ByteCountFormatter.string(fromByteCount: Int64(data.count), countStyle: .file)
size = data.count
case .audioRef(_, let byteCount, _, _), .videoRef(_, let byteCount, _):
return ByteCountFormatter.string(fromByteCount: Int64(byteCount), countStyle: .file)
size = byteCount
default:
return nil
size = nil
}
if let size {
return ByteCountFormatter.string(fromByteCount: Int64(size), countStyle: .file)
}
return nil
}

public var fileExtension: String? {
Expand All @@ -331,6 +425,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
guard let ext = fileExtension else { return "photo" }
switch ext {
case "pdf": return "doc.richtext"
case "ppt", "pptx", "ppsx", "potx": return "rectangle.on.rectangle.angled"
case "docx", "doc": return "doc.text"
case "md", "markdown": return "text.document"
case "csv": return "tablecells"
Expand Down Expand Up @@ -362,6 +457,9 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable {
return max(1, content.count / 4)
case .documentRef(_, _, let fileSize):
return max(1, fileSize / 4)
case .file(let filename, let mimeType, _, _, let preview):
let manifestCharacters = filename.count + mimeType.count + 80
return max(1, (manifestCharacters + (preview?.count ?? 0)) / 4)
case .audio(let data, _, _):
// ~50 acoustic tokens/sec @ 16kHz mono → ~1 token / 640 bytes
return max(1, data.count / 640)
Expand Down Expand Up @@ -401,7 +499,7 @@ extension Array where Element == Attachment {
}

/// Resolve every image attachment (inline + spilled) into its raw
/// bytes. Performs blocking disk reads for spilled blobs call
/// bytes. Performs blocking disk reads for spilled blobs; call
/// off the main thread for chats with many attachments.
public func loadImages() -> [Data] {
compactMap { $0.loadImageData() }
Expand All @@ -411,6 +509,14 @@ extension Array where Element == Attachment {
filter(\.isDocument)
}

public var preservedFiles: [Attachment] {
filter(\.isPreservedFile)
}

public var inputFiles: [ChatInputFile] {
compactMap(\.inputFile)
}

public var audios: [Attachment] {
filter(\.isAudio)
}
Expand Down
16 changes: 16 additions & 0 deletions Packages/OsaurusCore/Models/Chat/SharedArtifact.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

import Foundation

// swift-format owns multiline brace placement in wrapped conditions.
// swiftlint:disable opening_brace

// MARK: - Artifact Context Type

public enum ArtifactContextType: String, Codable, Sendable {
Expand Down Expand Up @@ -92,6 +95,10 @@ public struct SharedArtifact: Identifiable, Codable, Sendable, Equatable {
case "txt": return "text/plain"
case "csv": return "text/csv"
case "pdf": return "application/pdf"
case "ppt": return "application/vnd.ms-powerpoint"
case "pptx": return "application/vnd.openxmlformats-officedocument.presentationml.presentation"
case "ppsx": return "application/vnd.openxmlformats-officedocument.presentationml.slideshow"
case "potx": return "application/vnd.openxmlformats-officedocument.presentationml.template"
case "zip": return "application/zip"
case "tar": return "application/x-tar"
case "gz": return "application/gzip"
Expand Down Expand Up @@ -136,11 +143,18 @@ public struct SharedArtifact: Identifiable, Codable, Sendable, Equatable {
/// Whether this artifact is a PDF document.
public var isPDF: Bool { mimeType == "application/pdf" }

/// Whether this artifact is a PowerPoint presentation package.
public var isPresentation: Bool {
mimeType == "application/vnd.ms-powerpoint"
|| mimeType.hasPrefix("application/vnd.openxmlformats-officedocument.presentationml.")
}

/// Human-readable content category label.
public var categoryLabel: String {
if isDirectory { return "Directory" }
if isImage { return "Image" }
if isPDF { return "PDF" }
if isPresentation { return "Presentation" }
if isAudio { return "Audio" }
if isVideo { return "Video" }
if isHTML { return "Web Page" }
Expand Down Expand Up @@ -627,3 +641,5 @@ extension SharedArtifact {
return prefix + inner + suffix
}
}

// swiftlint:enable opening_brace
5 changes: 5 additions & 0 deletions Packages/OsaurusCore/Models/Plugin/PluginHTTP.swift
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ enum MIMEType {
case "map": return "application/json"
case "txt": return "text/plain; charset=utf-8"
case "xml": return "application/xml"
case "pdf": return "application/pdf"
case "ppt": return "application/vnd.ms-powerpoint"
case "pptx": return "application/vnd.openxmlformats-officedocument.presentationml.presentation"
case "ppsx": return "application/vnd.openxmlformats-officedocument.presentationml.slideshow"
case "potx": return "application/vnd.openxmlformats-officedocument.presentationml.template"
case "wasm": return "application/wasm"
default: return "application/octet-stream"
}
Expand Down
Loading
Loading