diff --git a/Packages/OsaurusCore/Folder/ChatExecutionContext.swift b/Packages/OsaurusCore/Folder/ChatExecutionContext.swift index f972aae5c..b542e9ed3 100644 --- a/Packages/OsaurusCore/Folder/ChatExecutionContext.swift +++ b/Packages/OsaurusCore/Folder/ChatExecutionContext.swift @@ -9,6 +9,35 @@ import Foundation +/// A file attachment that is available to tool/plugin execution for the +/// current chat turn context. The original bytes live on disk, not in chat +/// history JSON. +public struct ChatInputFile: Codable, Sendable, Equatable, Identifiable { + public let id: String + public let filename: String + public let mimeType: String + public let fileSize: Int + public let hostPath: String + + public init(id: String, filename: String, mimeType: String, fileSize: Int, hostPath: String) { + self.id = id + self.filename = filename + self.mimeType = mimeType + self.fileSize = fileSize + self.hostPath = hostPath + } + + var toolPayload: [String: Any] { + [ + "id": id, + "filename": filename, + "mime_type": mimeType, + "file_size": fileSize, + "host_path": hostPath, + ] + } +} + /// TaskLocal storage carrying the active chat session / agent / batch ids /// down through tool execution. The chat engine seeds these in /// `ChatSession.send` (and equivalent headless paths) so any tool reading @@ -32,4 +61,7 @@ public enum ChatExecutionContext { /// Specific tool invocation id. Used by `speak` so the inline card /// can swap its check for a spinner while its audio plays @TaskLocal public static var currentToolCallId: String? + + /// Preserved high-fidelity file attachments visible to the current tool call. + @TaskLocal public static var currentInputFiles: [ChatInputFile] = [] } diff --git a/Packages/OsaurusCore/Models/Chat/Attachment.swift b/Packages/OsaurusCore/Models/Chat/Attachment.swift index bf4f4c2f1..4ce7eec10 100644 --- a/Packages/OsaurusCore/Models/Chat/Attachment.swift +++ b/Packages/OsaurusCore/Models/Chat/Attachment.swift @@ -14,6 +14,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { public enum Kind: Codable, Sendable, Equatable { case image(Data) case document(filename: String, content: String, fileSize: Int) + case file(filename: String, mimeType: String, fileSize: Int, hostPath: String, extractedPreview: String?) /// Audio bytes + format hint (e.g. "wav", "mp3", "m4a", "flac", /// "ogg"). Format flows into `MessageContentPart.audioInput.format` @@ -47,7 +48,8 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { case videoRef(hash: String, byteCount: Int, filename: String?) private enum CodingKeys: String, CodingKey { - case type, data, filename, content, fileSize, hash, byteCount, format + case type, data, filename, content, fileSize, hash, byteCount, mimeType, hostPath, extractedPreview + case format } public func encode(to encoder: Encoder) throws { @@ -61,6 +63,13 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { try container.encode(filename, forKey: .filename) try container.encode(content, forKey: .content) try container.encode(fileSize, forKey: .fileSize) + case .file(let filename, let mimeType, let fileSize, let hostPath, let extractedPreview): + try container.encode("file", forKey: .type) + try container.encode(filename, forKey: .filename) + try container.encode(mimeType, forKey: .mimeType) + try container.encode(fileSize, forKey: .fileSize) + try container.encode(hostPath, forKey: .hostPath) + try container.encodeIfPresent(extractedPreview, forKey: .extractedPreview) case .audio(let data, let format, let filename): try container.encode("audio", forKey: .type) try container.encode(data, forKey: .data) @@ -105,6 +114,19 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { let content = try container.decode(String.self, forKey: .content) let fileSize = try container.decode(Int.self, forKey: .fileSize) self = .document(filename: filename, content: content, fileSize: fileSize) + case "file": + let filename = try container.decode(String.self, forKey: .filename) + let mimeType = try container.decode(String.self, forKey: .mimeType) + let fileSize = try container.decode(Int.self, forKey: .fileSize) + let hostPath = try container.decode(String.self, forKey: .hostPath) + let extractedPreview = try container.decodeIfPresent(String.self, forKey: .extractedPreview) + self = .file( + filename: filename, + mimeType: mimeType, + fileSize: fileSize, + hostPath: hostPath, + extractedPreview: extractedPreview + ) case "audio": let data = try container.decode(Data.self, forKey: .data) let format = try container.decode(String.self, forKey: .format) @@ -159,6 +181,26 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { Attachment(kind: .document(filename: filename, content: content, fileSize: fileSize)) } + public static func file( + id: UUID = UUID(), + filename: String, + mimeType: String, + fileSize: Int, + hostPath: String, + extractedPreview: String? = nil + ) -> Attachment { + Attachment( + id: id, + kind: .file( + filename: filename, + mimeType: mimeType, + fileSize: fileSize, + hostPath: hostPath, + extractedPreview: extractedPreview + ) + ) + } + public static func audio(_ data: Data, format: String, filename: String? = nil) -> Attachment { Attachment(kind: .audio(data, format: format, filename: filename)) } @@ -178,11 +220,16 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { public var isDocument: Bool { switch kind { - case .document, .documentRef: return true + case .document, .documentRef, .file: return true default: return false } } + public var isPreservedFile: Bool { + if case .file = kind { return true } + return false + } + public var isAudio: Bool { switch kind { case .audio, .audioRef: return true @@ -208,7 +255,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { public var filename: String? { switch kind { - case .document(let name, _, _), .documentRef(let name, _, _): + case .document(let name, _, _), .documentRef(let name, _, _), .file(let name, _, _, _, _): return name case .audio(_, _, let name), .audioRef(_, _, _, let name), .video(_, let name), .videoRef(_, _, let name): @@ -237,7 +284,47 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { return nil } - /// Resolves the attachment to its raw image bytes — inline or + public var extractedPreview: String? { + if case .file(_, _, _, _, let preview) = kind { return preview } + return nil + } + + public var mimeType: String? { + switch kind { + case .file(_, let mimeType, _, _, _): return mimeType + case .document(let filename, _, _), .documentRef(let filename, _, _): + return SharedArtifact.mimeType(from: filename) + case .image, .imageRef: return "image/png" + case .audio(_, let format, let filename), .audioRef(_, _, let format, let filename): + if let filename { + return SharedArtifact.mimeType(from: filename) + } + return SharedArtifact.mimeType(from: "audio.\(format)") + case .video(_, let filename), .videoRef(_, _, let filename): + if let filename { + return SharedArtifact.mimeType(from: filename) + } + return "video/mp4" + } + } + + public var hostPath: String? { + if case .file(_, _, _, let hostPath, _) = kind { return hostPath } + return nil + } + + public var inputFile: ChatInputFile? { + guard case .file(let filename, let mimeType, let fileSize, let hostPath, _) = kind else { return nil } + return ChatInputFile( + id: id.uuidString, + filename: filename, + mimeType: mimeType, + fileSize: fileSize, + hostPath: hostPath + ) + } + + /// Resolves the attachment to its raw image bytes, inline or /// hydrated from the blob store. Returns `nil` for non-image kinds /// or read failures. public func loadImageData() -> Data? { @@ -289,15 +376,17 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { } } - /// Resolves the attachment to its document content text — inline or - /// hydrated from the blob store. Returns `nil` for non-document - /// kinds or read failures. + /// Resolves the attachment to its document preview/content text, + /// inline or hydrated from the blob store. Preserved files return + /// their extracted preview only; tools use `inputFile` for bytes. public func loadDocumentContent() -> String? { switch kind { case .document(_, let content, _): return content case .documentRef(_, let hash, _): return (try? AttachmentBlobStore.read(hash)).flatMap { String(data: $0, encoding: .utf8) } + case .file(_, _, _, _, let preview): + return preview default: return nil } @@ -306,18 +395,23 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { // MARK: - Display Helpers public var fileSizeFormatted: String? { + let size: Int? switch kind { - case .document(_, _, let size), .documentRef(_, _, let size): - return ByteCountFormatter.string(fromByteCount: Int64(size), countStyle: .file) + case .document(_, _, let value), .documentRef(_, _, let value), .file(_, _, let value, _, _): + size = value case .audio(let data, _, _): - return ByteCountFormatter.string(fromByteCount: Int64(data.count), countStyle: .file) + size = data.count case .video(let data, _): - return ByteCountFormatter.string(fromByteCount: Int64(data.count), countStyle: .file) + size = data.count case .audioRef(_, let byteCount, _, _), .videoRef(_, let byteCount, _): - return ByteCountFormatter.string(fromByteCount: Int64(byteCount), countStyle: .file) + size = byteCount default: - return nil + size = nil + } + if let size { + return ByteCountFormatter.string(fromByteCount: Int64(size), countStyle: .file) } + return nil } public var fileExtension: String? { @@ -331,6 +425,7 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { guard let ext = fileExtension else { return "photo" } switch ext { case "pdf": return "doc.richtext" + case "ppt", "pptx", "ppsx", "potx": return "rectangle.on.rectangle.angled" case "docx", "doc": return "doc.text" case "md", "markdown": return "text.document" case "csv": return "tablecells" @@ -362,6 +457,9 @@ public struct Attachment: Codable, Sendable, Equatable, Identifiable { return max(1, content.count / 4) case .documentRef(_, _, let fileSize): return max(1, fileSize / 4) + case .file(let filename, let mimeType, _, _, let preview): + let manifestCharacters = filename.count + mimeType.count + 80 + return max(1, (manifestCharacters + (preview?.count ?? 0)) / 4) case .audio(let data, _, _): // ~50 acoustic tokens/sec @ 16kHz mono → ~1 token / 640 bytes return max(1, data.count / 640) @@ -401,7 +499,7 @@ extension Array where Element == Attachment { } /// Resolve every image attachment (inline + spilled) into its raw - /// bytes. Performs blocking disk reads for spilled blobs — call + /// bytes. Performs blocking disk reads for spilled blobs; call /// off the main thread for chats with many attachments. public func loadImages() -> [Data] { compactMap { $0.loadImageData() } @@ -411,6 +509,14 @@ extension Array where Element == Attachment { filter(\.isDocument) } + public var preservedFiles: [Attachment] { + filter(\.isPreservedFile) + } + + public var inputFiles: [ChatInputFile] { + compactMap(\.inputFile) + } + public var audios: [Attachment] { filter(\.isAudio) } diff --git a/Packages/OsaurusCore/Models/Chat/SharedArtifact.swift b/Packages/OsaurusCore/Models/Chat/SharedArtifact.swift index 4b9df593d..396028caa 100644 --- a/Packages/OsaurusCore/Models/Chat/SharedArtifact.swift +++ b/Packages/OsaurusCore/Models/Chat/SharedArtifact.swift @@ -8,6 +8,9 @@ import Foundation +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + // MARK: - Artifact Context Type public enum ArtifactContextType: String, Codable, Sendable { @@ -92,6 +95,10 @@ public struct SharedArtifact: Identifiable, Codable, Sendable, Equatable { case "txt": return "text/plain" case "csv": return "text/csv" case "pdf": return "application/pdf" + case "ppt": return "application/vnd.ms-powerpoint" + case "pptx": return "application/vnd.openxmlformats-officedocument.presentationml.presentation" + case "ppsx": return "application/vnd.openxmlformats-officedocument.presentationml.slideshow" + case "potx": return "application/vnd.openxmlformats-officedocument.presentationml.template" case "zip": return "application/zip" case "tar": return "application/x-tar" case "gz": return "application/gzip" @@ -136,11 +143,18 @@ public struct SharedArtifact: Identifiable, Codable, Sendable, Equatable { /// Whether this artifact is a PDF document. public var isPDF: Bool { mimeType == "application/pdf" } + /// Whether this artifact is a PowerPoint presentation package. + public var isPresentation: Bool { + mimeType == "application/vnd.ms-powerpoint" + || mimeType.hasPrefix("application/vnd.openxmlformats-officedocument.presentationml.") + } + /// Human-readable content category label. public var categoryLabel: String { if isDirectory { return "Directory" } if isImage { return "Image" } if isPDF { return "PDF" } + if isPresentation { return "Presentation" } if isAudio { return "Audio" } if isVideo { return "Video" } if isHTML { return "Web Page" } @@ -627,3 +641,5 @@ extension SharedArtifact { return prefix + inner + suffix } } + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Models/Plugin/PluginHTTP.swift b/Packages/OsaurusCore/Models/Plugin/PluginHTTP.swift index 60d628ecb..b13a09729 100644 --- a/Packages/OsaurusCore/Models/Plugin/PluginHTTP.swift +++ b/Packages/OsaurusCore/Models/Plugin/PluginHTTP.swift @@ -117,6 +117,11 @@ enum MIMEType { case "map": return "application/json" case "txt": return "text/plain; charset=utf-8" case "xml": return "application/xml" + case "pdf": return "application/pdf" + case "ppt": return "application/vnd.ms-powerpoint" + case "pptx": return "application/vnd.openxmlformats-officedocument.presentationml.presentation" + case "ppsx": return "application/vnd.openxmlformats-officedocument.presentationml.slideshow" + case "potx": return "application/vnd.openxmlformats-officedocument.presentationml.template" case "wasm": return "application/wasm" default: return "application/octet-stream" } diff --git a/Packages/OsaurusCore/Services/Plugin/PluginHostAPI.swift b/Packages/OsaurusCore/Services/Plugin/PluginHostAPI.swift index 74dccb7a9..ef6637414 100644 --- a/Packages/OsaurusCore/Services/Plugin/PluginHostAPI.swift +++ b/Packages/OsaurusCore/Services/Plugin/PluginHostAPI.swift @@ -10,6 +10,9 @@ import Foundation import os +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + extension Notification.Name { static let pluginConfigDidChange = Notification.Name("PluginConfigDidChange") } @@ -23,7 +26,7 @@ final class PluginHostContext: @unchecked Sendable { // MARK: - Context Registry (thread-safe) - private nonisolated(unsafe) static var contexts: [String: PluginHostContext] = [:] + nonisolated(unsafe) private static var contexts: [String: PluginHostContext] = [:] private static let contextsLock = NSLock() static func getContext(for pluginId: String) -> PluginHostContext? { @@ -1625,10 +1628,16 @@ final class PluginHostContext: @unchecked Sendable { } let fileURL = URL(fileURLWithPath: path).standardizedFileURL - let allowedPrefix = OsaurusPaths.artifactsDir().standardizedFileURL.path + "/" + let allowedPrefixes = [ + OsaurusPaths.artifactsDir().standardizedFileURL.path + "/", + OsaurusPaths.attachmentsDir().standardizedFileURL.path + "/", + ] - guard fileURL.path.hasPrefix(allowedPrefix) else { - return Self.jsonString(["error": "access_denied", "message": "File read restricted to artifact paths"]) + guard allowedPrefixes.contains(where: { fileURL.path.hasPrefix($0) }) else { + return Self.jsonString([ + "error": "access_denied", + "message": "File read restricted to artifact and attachment paths", + ]) } let fm = FileManager.default @@ -2007,7 +2016,7 @@ extension PluginHostContext { /// Serialize a dictionary to a JSON string. Falls back to "{}" on encoding failure. static func jsonString(_ dict: [String: Any]) -> String { guard let data = try? JSONSerialization.data(withJSONObject: dict, options: []) else { return "{}" } - return String(decoding: data, as: UTF8.self) + return String(bytes: data, encoding: .utf8) ?? "{}" } /// Parse a JSON string back into a dictionary. @@ -2048,7 +2057,7 @@ extension PluginHostContext { /// have TLS set. Protected by `fallbackLock` to avoid data races under /// concurrent execution. TLS (option 1) is the authoritative mechanism. private static let fallbackLock = NSLock() - private nonisolated(unsafe) static var _lastDispatchedPluginId: String? + nonisolated(unsafe) private static var _lastDispatchedPluginId: String? private static var lastDispatchedPluginId: String? { get { fallbackLock.withLock { _lastDispatchedPluginId } } @@ -2460,3 +2469,5 @@ extension PluginHostContext { return makeCString(result) } } + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Storage/AttachmentBlobStore.swift b/Packages/OsaurusCore/Storage/AttachmentBlobStore.swift index 459b21271..2be1e6f13 100644 --- a/Packages/OsaurusCore/Storage/AttachmentBlobStore.swift +++ b/Packages/OsaurusCore/Storage/AttachmentBlobStore.swift @@ -202,7 +202,7 @@ public enum AttachmentBlobStore { return attachment } - case .imageRef, .documentRef, .audioRef, .videoRef: + case .file, .imageRef, .documentRef, .audioRef, .videoRef: return attachment } } diff --git a/Packages/OsaurusCore/Tests/Chat/HighFidelityAttachmentTests.swift b/Packages/OsaurusCore/Tests/Chat/HighFidelityAttachmentTests.swift new file mode 100644 index 000000000..52c7887a7 --- /dev/null +++ b/Packages/OsaurusCore/Tests/Chat/HighFidelityAttachmentTests.swift @@ -0,0 +1,111 @@ +// +// HighFidelityAttachmentTests.swift +// OsaurusCoreTests +// +// Verifies preserved PDF/PPT/PPTX input attachments and plugin context wiring. +// + +import Foundation +import Testing + +@testable import OsaurusCore + +@Suite(.serialized) +struct HighFidelityAttachmentTests { + + @Test + func pptxAttachPreservesOriginalBytesOutsideChatJSON() throws { + let fm = FileManager.default + let tempDir = fm.temporaryDirectory.appendingPathComponent( + "osaurus-high-fidelity-source-\(UUID().uuidString)", + isDirectory: true + ) + try fm.createDirectory(at: tempDir, withIntermediateDirectories: true) + defer { try? fm.removeItem(at: tempDir) } + + let source = tempDir.appendingPathComponent("deck.pptx") + let bytes = Data([0x50, 0x4B, 0x03, 0x04, 0x01, 0x02, 0x03]) + try bytes.write(to: source) + + let attachments = try DocumentParser.parseAll(url: source) + #expect(attachments.count == 1) + + let attachment = try #require(attachments.first) + #expect(attachment.isPreservedFile) + #expect(attachment.filename == "deck.pptx") + #expect(attachment.mimeType == "application/vnd.openxmlformats-officedocument.presentationml.presentation") + #expect(attachment.hostPath?.hasPrefix(OsaurusPaths.attachmentsDir().path + "/") == true) + + let hostPath = try #require(attachment.hostPath) + defer { try? fm.removeItem(at: URL(fileURLWithPath: hostPath).deletingLastPathComponent()) } + #expect(fm.fileExists(atPath: hostPath)) + #expect(try Data(contentsOf: URL(fileURLWithPath: hostPath)) == bytes) + + let encoded = try JSONEncoder().encode(attachment) + let json = String(decoding: encoded, as: UTF8.self) + #expect(json.contains("\"type\":\"file\"")) + #expect(json.contains("hostPath")) + #expect(!json.contains(bytes.base64EncodedString())) + } + + @Test @MainActor + func preservedFileManifestIsInjectedIntoUserMessage() { + let attachment = Attachment.file( + id: UUID(uuidString: "00000000-0000-0000-0000-000000000123")!, + filename: "sample.pdf", + mimeType: "application/pdf", + fileSize: 42, + hostPath: "/tmp/sample.pdf", + extractedPreview: "Page 1:\nHello" + ) + + let text = ChatSession.buildUserMessageText(content: "Please summarize this.", attachments: [attachment]) + + #expect(text.contains(" String { let agentId = ChatExecutionContext.currentAgentId let payloadWithSecrets = injectSecrets(into: argumentsJSON, agentId: agentId) - let payloadWithContext = injectFolderContext(into: payloadWithSecrets) + let payloadWithContext = injectExecutionContext(into: payloadWithSecrets) return try await plugin.invoke(type: "tool", id: toolId, payload: payloadWithContext, agentId: agentId) } @@ -77,13 +77,21 @@ final class ExternalTool: OsaurusTool, PermissionedTool, @unchecked Sendable { return modifiedPayload } - /// Injects folder context into the tool payload under the `_context` key + /// Injects runtime context into the tool payload under the `_context` key /// - Parameter payload: Original JSON payload - /// - Returns: Payload with folder context injected, or original payload if no folder context active - private func injectFolderContext(into payload: String) -> String { + /// - Returns: Payload with runtime context injected, or original payload if no context is active + private func injectExecutionContext(into payload: String) -> String { // Read from the thread-safe cache to avoid hopping to MainActor, // which can deadlock when the main thread is busy with SwiftUI layout. - guard let rootPath = FolderContextService.cachedRootPath else { return payload } + Self.injectRuntimeContext( + into: payload, + rootPath: FolderContextService.cachedRootPath, + inputFiles: ChatExecutionContext.currentInputFiles + ) + } + + static func injectRuntimeContext(into payload: String, rootPath: URL?, inputFiles: [ChatInputFile]) -> String { + guard rootPath != nil || !inputFiles.isEmpty else { return payload } // Parse the original payload guard let payloadData = payload.data(using: .utf8), @@ -93,10 +101,14 @@ final class ExternalTool: OsaurusTool, PermissionedTool, @unchecked Sendable { return payload } - // Add context under the `_context` key - payloadDict["_context"] = [ - "working_directory": rootPath.path - ] + var context = payloadDict["_context"] as? [String: Any] ?? [:] + if let rootPath { + context["working_directory"] = rootPath.path + } + if !inputFiles.isEmpty { + context["attachments"] = inputFiles.map(\.toolPayload) + } + payloadDict["_context"] = context // Re-serialize to JSON guard let modifiedData = try? JSONSerialization.data(withJSONObject: payloadDict), diff --git a/Packages/OsaurusCore/Tools/PluginABI/osaurus_plugin.h b/Packages/OsaurusCore/Tools/PluginABI/osaurus_plugin.h index 056f6af32..65a93f58a 100644 --- a/Packages/OsaurusCore/Tools/PluginABI/osaurus_plugin.h +++ b/Packages/OsaurusCore/Tools/PluginABI/osaurus_plugin.h @@ -80,11 +80,12 @@ typedef const char* (*osr_list_models_fn)(void); // Returns JSON with "status", "headers", "body", "body_encoding", "elapsed_ms". typedef const char* (*osr_http_request_fn)(const char* request_json); -// File I/O — read files from allowed paths (e.g. shared artifacts). +// File I/O — read files from allowed paths (shared artifacts and preserved +// chat input attachments). // request_json has "path" (absolute file path). // Returns JSON with "data" (base64-encoded), "size", "mime_type", // or "error" + "message" on failure. -// Restricted to artifact paths for security. +// Restricted to artifact/attachment paths for security. typedef const char* (*osr_file_read_fn)(const char* request_json); // List all active tasks dispatched by the calling plugin. diff --git a/Packages/OsaurusCore/Utils/AttachmentFileStore.swift b/Packages/OsaurusCore/Utils/AttachmentFileStore.swift new file mode 100644 index 000000000..34a684113 --- /dev/null +++ b/Packages/OsaurusCore/Utils/AttachmentFileStore.swift @@ -0,0 +1,93 @@ +// +// AttachmentFileStore.swift +// osaurus +// +// Persists high-fidelity input files outside chat history JSON so plugins +// can operate on original bytes. +// + +import Foundation +import PDFKit + +enum AttachmentFileStore { + static let preservedExtensions: Set = ["pdf", "ppt", "pptx", "ppsx", "potx"] + + static func shouldPreserveOriginal(url: URL) -> Bool { + preservedExtensions.contains(url.pathExtension.lowercased()) + } + + static func store(url: URL) throws -> Attachment { + let id = UUID() + let filename = sanitizedFilename(url.lastPathComponent) + let fileSize = (try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0 + let destinationDir = OsaurusPaths.attachmentDir(attachmentId: id.uuidString) + let destinationURL = destinationDir.appendingPathComponent(filename, isDirectory: false) + + let fm = FileManager.default + try fm.createDirectory(at: destinationDir, withIntermediateDirectories: true) + if fm.fileExists(atPath: destinationURL.path) { + try fm.removeItem(at: destinationURL) + } + try fm.copyItem(at: url, to: destinationURL) + + return .file( + id: id, + filename: filename, + mimeType: mimeType(for: filename), + fileSize: fileSize, + hostPath: destinationURL.path, + extractedPreview: extractedPreview(for: destinationURL) + ) + } + + static func mimeType(for filename: String) -> String { + let ext = (filename as NSString).pathExtension.lowercased() + switch ext { + case "ppt": + return "application/vnd.ms-powerpoint" + case "pptx": + return "application/vnd.openxmlformats-officedocument.presentationml.presentation" + case "ppsx": + return "application/vnd.openxmlformats-officedocument.presentationml.slideshow" + case "potx": + return "application/vnd.openxmlformats-officedocument.presentationml.template" + default: + return SharedArtifact.mimeType(from: filename) + } + } + + private static func sanitizedFilename(_ filename: String) -> String { + let fallback = "attachment" + let trimmed = filename.trimmingCharacters(in: .whitespacesAndNewlines) + let base = trimmed.isEmpty ? fallback : trimmed + let invalid = CharacterSet(charactersIn: "/\\:") + let cleaned = base.components(separatedBy: invalid).joined(separator: "_") + return cleaned.isEmpty ? fallback : cleaned + } + + private static func extractedPreview(for url: URL) -> String? { + guard url.pathExtension.lowercased() == "pdf", + let document = PDFDocument(url: url) + else { + return nil + } + + var pages: [String] = [] + for i in 0 ..< document.pageCount { + guard let page = document.page(at: i), + let text = page.string?.trimmingCharacters(in: .whitespacesAndNewlines), + !text.isEmpty + else { continue } + pages.append("Page \(i + 1):\n\(text)") + } + + let joined = pages.joined(separator: "\n\n") + guard !joined.isEmpty else { return nil } + + if joined.count > DocumentParser.maxParsedTextLength { + return String(joined.prefix(DocumentParser.maxParsedTextLength)) + + "\n\n[Preview truncated - exceeded \(DocumentParser.maxParsedTextLength) character limit]" + } + return joined + } +} diff --git a/Packages/OsaurusCore/Utils/DocumentParser.swift b/Packages/OsaurusCore/Utils/DocumentParser.swift index 2bce81665..5083f4fc4 100644 --- a/Packages/OsaurusCore/Utils/DocumentParser.swift +++ b/Packages/OsaurusCore/Utils/DocumentParser.swift @@ -11,6 +11,9 @@ import Foundation import PDFKit import UniformTypeIdentifiers +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + enum DocumentParser { static let maxParsedTextLength = 500_000 // ~500KB of text @@ -57,6 +60,10 @@ enum DocumentParser { let ext = url.pathExtension.lowercased() let filename = url.lastPathComponent + if AttachmentFileStore.shouldPreserveOriginal(url: url) { + return [try AttachmentFileStore.store(url: url)] + } + // PDF may fall back to image rendering if text extraction yields nothing if ext == "pdf" { return try parsePDFWithFallback(url: url, filename: filename, fileSize: fileSize) @@ -94,6 +101,7 @@ enum DocumentParser { static func canParse(url: URL) -> Bool { let ext = url.pathExtension.lowercased() return isPlainText(ext: ext) || richDocumentExtensions.contains(ext) + || AttachmentFileStore.shouldPreserveOriginal(url: url) } static func isImageFile(url: URL) -> Bool { @@ -109,6 +117,10 @@ enum DocumentParser { .html, UTType("org.openxmlformats.wordprocessingml.document") ?? .data, // .docx UTType("com.microsoft.word.doc") ?? .data, // .doc + UTType(filenameExtension: "ppt") ?? .data, + UTType(filenameExtension: "pptx") ?? .data, + UTType(filenameExtension: "ppsx") ?? .data, + UTType(filenameExtension: "potx") ?? .data, .commaSeparatedText, .json, .xml, .yaml, UTType("public.python-script") ?? .data, @@ -262,3 +274,5 @@ enum DocumentParser { } } } + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Utils/OsaurusPaths.swift b/Packages/OsaurusCore/Utils/OsaurusPaths.swift index e0edf8cac..c8919ba88 100644 --- a/Packages/OsaurusCore/Utils/OsaurusPaths.swift +++ b/Packages/OsaurusCore/Utils/OsaurusPaths.swift @@ -8,12 +8,15 @@ import Foundation +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + /// Centralized path management for all Osaurus app data. /// All stores and services should use this module for path resolution. public enum OsaurusPaths { /// Optional root directory override for tests /// Note: nonisolated(unsafe) since this is only set during test setup before any concurrent access - public nonisolated(unsafe) static var overrideRoot: URL? + nonisolated(unsafe) public static var overrideRoot: URL? // MARK: - Root Directory @@ -229,6 +232,11 @@ public enum OsaurusPaths { root().appendingPathComponent("artifacts", isDirectory: true) } + /// Preserved input attachments directory + public static func attachments() -> URL { + root().appendingPathComponent("attachments", isDirectory: true) + } + /// Work data directory public static func workData() -> URL { root().appendingPathComponent("work", isDirectory: true) @@ -313,6 +321,17 @@ public enum OsaurusPaths { root().appendingPathComponent("artifacts", isDirectory: true) } + /// Root directory for preserved chat input attachments: + /// `~/.osaurus/attachments/` + public static func attachmentsDir() -> URL { + root().appendingPathComponent("attachments", isDirectory: true) + } + + /// Per-attachment directory: `~/.osaurus/attachments/{attachmentId}/` + public static func attachmentDir(attachmentId: String) -> URL { + attachmentsDir().appendingPathComponent(attachmentId, isDirectory: true) + } + /// Per-context artifacts directory: `~/.osaurus/artifacts/{contextId}/` public static func contextArtifactsDir(contextId: String) -> URL { artifactsDir().appendingPathComponent(contextId, isDirectory: true) @@ -471,3 +490,5 @@ public enum OsaurusPaths { } } + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Views/Chat/ChatView.swift b/Packages/OsaurusCore/Views/Chat/ChatView.swift index 26a8dc071..e5db37d43 100644 --- a/Packages/OsaurusCore/Views/Chat/ChatView.swift +++ b/Packages/OsaurusCore/Views/Chat/ChatView.swift @@ -11,6 +11,9 @@ import LocalAuthentication @preconcurrency import MLXLMCommon import SwiftUI +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + /// Holds the derived, streaming-mutated `[ContentBlock]` list for the chat /// thread. Kept as a separate `ObservableObject` so that per-token visibleBlocks /// updates don't fire `ChatSession.objectWillChange` — that would force @@ -50,7 +53,7 @@ final class ChatSession: ObservableObject { let expandedBlocksStore = ExpandedBlocksStore() @Published var input: String = "" @Published var pendingAttachments: [Attachment] = [] - @Published var selectedModel: String? = nil + @Published var selectedModel: String? @Published var pickerItems: [ModelPickerItem] = [] @Published var activeModelOptions: [String: ModelOptionValue] = [:] @Published var hasAnyModel: Bool = false @@ -483,15 +486,18 @@ final class ChatSession: ObservableObject { ) } - /// Builds the full user message text, prepending any attached document contents wrapped in XML tags. + /// Builds the full user message text, prepending attached document text and a + /// compact manifest for high-fidelity files available to plugins. /// - /// Filenames are reduced to their basename and both the name and the body are - /// XML-entity-escaped so that a hostile document cannot forge a closing - /// `` tag or inject bracketed pseudo-tool markers that - /// would otherwise reach the model as control text. + /// Filenames are reduced to their basename and XML wrappers are escaped so a + /// hostile attachment cannot forge closing tags or pseudo-tool markers. static func buildUserMessageText(content: String, attachments: [Attachment]) -> String { - let docs = attachments.filter(\.isDocument) - guard !docs.isEmpty else { return content } + let docs = attachments.filter { + if case .document = $0.kind { return true } + return false + } + let files = attachments.filter(\.isPreservedFile) + guard !docs.isEmpty || !files.isEmpty else { return content } var parts: [String] = [] for doc in docs { @@ -502,6 +508,39 @@ final class ChatSession: ObservableObject { } } + if !files.isEmpty { + let manifest = files.compactMap { attachment -> String? in + guard let input = attachment.inputFile else { return nil } + let safeName = escapeAttachmentName(input.filename) + return """ + + """ + } + parts.append( + """ + + \(manifest.joined(separator: "\n")) + + """ + ) + + for file in files { + guard let input = file.inputFile, + let preview = file.extractedPreview, + !preview.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + else { continue } + let safeName = escapeAttachmentName(input.filename) + let safePreview = xmlEscape(preview) + parts.append( + """ + + \(safePreview) + + """ + ) + } + } + if !content.isEmpty { parts.append(content) } @@ -512,7 +551,7 @@ final class ChatSession: ObservableObject { private static func escapeAttachmentName(_ raw: String) -> String { let basename = (raw as NSString).lastPathComponent let trimmed = basename.trimmingCharacters(in: .whitespacesAndNewlines) - return xmlEscape(trimmed.isEmpty ? "attachment" : trimmed) + return xmlAttributeEscaped(trimmed.isEmpty ? "attachment" : trimmed) } private static func xmlEscape(_ s: String) -> String { @@ -523,6 +562,29 @@ final class ChatSession: ObservableObject { .replacingOccurrences(of: "\"", with: """) } + static func availableInputFiles(from turns: [ChatTurn]) -> [ChatInputFile] { + var seen: Set = [] + var files: [ChatInputFile] = [] + + for turn in turns where turn.role == .user { + for input in turn.attachments.inputFiles where !seen.contains(input.id) { + seen.insert(input.id) + files.append(input) + } + } + + return files + } + + private static func xmlAttributeEscaped(_ value: String) -> String { + value + .replacingOccurrences(of: "&", with: "&") + .replacingOccurrences(of: "\"", with: """) + .replacingOccurrences(of: "'", with: "'") + .replacingOccurrences(of: "<", with: "<") + .replacingOccurrences(of: ">", with: ">") + } + /// Format token count for display (e.g., "1.2K", "15K") static func formatTokenCount(_ tokens: Int) -> String { if tokens < 1000 { @@ -1789,14 +1851,27 @@ final class ChatSession: ObservableObject { // brand-new chats still get a todo store entry. let sessionIdForTools = sessionId?.uuidString ?? "chatwindow-\(ObjectIdentifier(self).hashValue)" - resultText = try await ChatExecutionContext.$currentAgentId.withValue(effectiveAgentId) { - try await ChatExecutionContext.$currentSessionId.withValue(sessionIdForTools) { - try await ChatExecutionContext.$currentAssistantTurnId.withValue(assistantTurn.id) { - try await ChatExecutionContext.$currentToolCallId.withValue(callId) { - try await ToolRegistry.shared.execute( - name: inv.toolName, - argumentsJSON: inv.jsonArguments - ) + let inputFilesForTools = Self.availableInputFiles(from: turns) + resultText = try await ChatExecutionContext.$currentAgentId.withValue( + effectiveAgentId + ) { + try await ChatExecutionContext.$currentSessionId.withValue( + sessionIdForTools + ) { + try await ChatExecutionContext.$currentAssistantTurnId.withValue( + assistantTurn.id + ) { + try await ChatExecutionContext.$currentToolCallId.withValue( + callId + ) { + try await ChatExecutionContext.$currentInputFiles.withValue( + inputFilesForTools + ) { + try await ToolRegistry.shared.execute( + name: inv.toolName, + argumentsJSON: inv.jsonArguments + ) + } } } } @@ -2040,13 +2115,13 @@ struct ChatView: View { @State private var editText: String = "" @State private var userImagePreview: NSImage? // Bonjour agent connection - @State private var pendingDiscoveredAgent: DiscoveredAgent? = nil + @State private var pendingDiscoveredAgent: DiscoveredAgent? // Minimap @State private var activeMinimapTurnId: UUID? @State private var scrollToTurnId: UUID? @State private var scrollToTurnTrigger: Int = 0 // What's New modal - @State private var pendingWhatsNew: WhatsNewRelease? = nil + @State private var pendingWhatsNew: WhatsNewRelease? /// Convenience accessor for the window's theme private var theme: ThemeProtocol { windowState.theme } @@ -2116,6 +2191,7 @@ struct ChatView: View { } var body: some View { + // swiftlint:disable:next redundant_discardable_let let _ = ChatPerfTrace.shared.count("body.ChatView") chatModeContent .themedAlertScope(.chat(windowState.windowId)) @@ -2759,11 +2835,12 @@ private struct IsolatedThreadView: View { let onConfirmEdit: (() -> Void)? let onCancelEdit: (() -> Void)? let onUserImagePreview: ((String) -> Void)? - var onVisibleTopUserTurnChanged: ((UUID?) -> Void)? = nil - var scrollToTurnId: UUID? = nil + var onVisibleTopUserTurnChanged: ((UUID?) -> Void)? + var scrollToTurnId: UUID? var scrollToTurnTrigger: Int = 0 var body: some View { + // swiftlint:disable:next redundant_discardable_let let _ = ChatPerfTrace.shared.count("body.IsolatedThreadView") MessageThreadView( blocks: store.blocks, @@ -3030,7 +3107,7 @@ private struct PairingSheet: View { let onCancel: () -> Void @State private var isPairing = false - @State private var errorMessage: String? = nil + @State private var errorMessage: String? @Environment(\.theme) private var theme var body: some View { @@ -3136,17 +3213,17 @@ private enum PairingClient { let context = LAContext() context.touchIDAuthenticationAllowableReuseDuration = 300 - var masterKey = try MasterKey.getPrivateKey(context: context) + var pairingPrivateKey = try MasterKey.getPrivateKey(context: context) defer { - masterKey.withUnsafeMutableBytes { ptr in + pairingPrivateKey.withUnsafeMutableBytes { ptr in if let base = ptr.baseAddress { memset(base, 0, ptr.count) } } } - let connectorAddress = try PairingKey.deriveAddress(masterKey: masterKey) + let connectorAddress = try PairingKey.deriveAddress(masterKey: pairingPrivateKey) let nonce = UUID().uuidString - let signature = try PairingKey.sign(payload: Data(nonce.utf8), masterKey: masterKey) + let signature = try PairingKey.sign(payload: Data(nonce.utf8), masterKey: pairingPrivateKey) let hexSig = "0x" + signature.hexEncodedString let rawHost = agent.host ?? "" @@ -3185,3 +3262,5 @@ private enum PairingClient { // MARK: - Shared Header Components // HeaderActionButton, SettingsButton, CloseButton, PinButton are now in SharedHeaderComponents.swift + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Views/Chat/FloatingInputCard.swift b/Packages/OsaurusCore/Views/Chat/FloatingInputCard.swift index 2560a6e7e..154d5bab0 100644 --- a/Packages/OsaurusCore/Views/Chat/FloatingInputCard.swift +++ b/Packages/OsaurusCore/Views/Chat/FloatingInputCard.swift @@ -10,6 +10,9 @@ import Combine import SwiftUI import UniformTypeIdentifiers +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + struct FloatingInputCard: View { @Binding var text: String @Binding var selectedModel: String? @@ -31,16 +34,16 @@ struct FloatingInputCard: View { /// Trigger to focus the input field (increment to focus) var focusTrigger: Int = 0 /// Current agent ID (used for agent-specific settings) - var agentId: UUID? = nil + var agentId: UUID? /// Window ID for targeted VAD notifications - var windowId: UUID? = nil + var windowId: UUID? /// Compact mode (sidebar open) - hides secondary chip content var isCompact: Bool = false /// Callback to clear the current chat session (triggered by /clear command). - var onClearChat: (() -> Void)? = nil + var onClearChat: (() -> Void)? /// Callback when the user selects a skill slash command. Passes the skill UUID so the /// caller can inject that skill's instructions as one-off context for the next send. - var onSkillSelected: ((UUID) -> Void)? = nil + var onSkillSelected: ((UUID) -> Void)? /// Binding to the session's pending one-off skill. Non-nil shows a dismissable skill chip. @Binding var pendingSkillId: UUID? @@ -144,7 +147,7 @@ struct FloatingInputCard: View { @State private var contextHoverTask: Task? @State private var isSandboxHovered = false @State private var sandboxPulseAmount: CGFloat = 1.0 - @State private var sandboxPulseTask: Task? = nil + @State private var sandboxPulseTask: Task? @State private var isClipboardHovered = false @State private var clipboardPulseAmount: CGFloat = 0.0 @State private var clipboardPulseOpacity: Double = 0.0 @@ -168,7 +171,7 @@ struct FloatingInputCard: View { /// Tracks confirmed transcription length to detect actual changes (for silence timeout) @State private var lastConfirmedLength: Int = 0 - @State private var pauseTimerCancellable: AnyCancellable? = nil + @State private var pauseTimerCancellable: AnyCancellable? // TextEditor should grow up to ~6 lines before scrolling private var inputFontSize: CGFloat { CGFloat(theme.bodySize) } @@ -329,6 +332,7 @@ struct FloatingInputCard: View { } var body: some View { + // swiftlint:disable:next redundant_discardable_let let _ = ChatPerfTrace.shared.count("body.FloatingInputCard") mainContent .onAppear { @@ -563,7 +567,7 @@ struct FloatingInputCard: View { // MARK: - Voice Debug Helpers /// Standalone log helper so VoiceDebugObservers can call it without a card reference. -fileprivate func voiceDebugLog( +private func voiceDebugLog( trigger: String, enabled: Bool, micPermission: Bool, @@ -1058,7 +1062,7 @@ extension FloatingInputCard { } ) } - case .document, .documentRef: + case .document, .documentRef, .file: DocumentChip(attachment: attachment) { withAnimation(theme.springAnimation()) { _ = pendingAttachments.remove(at: index) @@ -2324,7 +2328,7 @@ extension FloatingInputCard { } } else if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) { handled = true - provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier) { item, error in + provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier) { item, _ in guard let data = item as? Data, let url = URL(dataRepresentation: data, relativeTo: nil) else { return } @@ -3484,7 +3488,6 @@ private struct StopButton: View { // MARK: - Resume Button -/// Polished resume button with accent color // MARK: - Preview #if DEBUG @@ -3537,3 +3540,5 @@ private struct StopButton: View { } } #endif + +// swiftlint:enable opening_brace diff --git a/Packages/OsaurusCore/Views/Chat/NativeArtifactCardView.swift b/Packages/OsaurusCore/Views/Chat/NativeArtifactCardView.swift index a01bc4b17..c035bd8b4 100644 --- a/Packages/OsaurusCore/Views/Chat/NativeArtifactCardView.swift +++ b/Packages/OsaurusCore/Views/Chat/NativeArtifactCardView.swift @@ -11,6 +11,9 @@ import AVFoundation @preconcurrency import PDFKit import QuartzCore +// swift-format owns multiline brace placement in wrapped conditions. +// swiftlint:disable opening_brace + // MARK: - NativeArtifactCardView final class NativeArtifactCardView: NSView { @@ -969,6 +972,7 @@ final class NativeArtifactCardView: NSView { if artifact.isDirectory { return "folder.fill" } if artifact.isImage { return "photo" } if artifact.isPDF { return "doc.richtext.fill" } + if artifact.isPresentation { return "rectangle.on.rectangle.angled" } if artifact.isVideo { return "film" } if artifact.isAudio { return "waveform" } if artifact.isHTML { return "globe" } @@ -988,6 +992,7 @@ final class NativeArtifactCardView: NSView { } if artifact.isImage { return [c("8b5cf6"), c("7c3aed")] } if artifact.isPDF { return [c("ef4444"), c("dc2626")] } + if artifact.isPresentation { return [c("2563eb"), c("0f766e")] } if artifact.isVideo { return [c("ec4899"), c("db2777")] } if artifact.isAudio { return [c("f59e0b"), c("d97706")] } if artifact.isHTML { return [c("3b82f6"), c("2563eb")] } @@ -1005,3 +1010,5 @@ final class NativeArtifactCardView: NSView { "\(theme.bodySize)|\(theme.captionSize)|\(theme.glassEnabled)|\(NSColor(theme.accentColor).description)|\(NSColor(theme.secondaryBackground).description)|\(NSColor(theme.primaryBorder).description)|\(NSColor(theme.primaryText).description)|\(NSColor(theme.tertiaryText).description)|\(NSColor(theme.tertiaryBackground).description)" } } + +// swiftlint:enable opening_brace