diff --git a/Packages/OsaurusCore/AppDelegate.swift b/Packages/OsaurusCore/AppDelegate.swift
index 175a2d095..780438e0d 100644
--- a/Packages/OsaurusCore/AppDelegate.swift
+++ b/Packages/OsaurusCore/AppDelegate.swift
@@ -32,6 +32,10 @@ public final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelega
         // the specific crash class this prevents.
         MLXErrorRecovery.installGlobalHandler()
 
+        // Register in-tree document format adapters before any file-ingress
+        // path can run. Idempotent; safe if a future migration moves this.
+        DocumentAdaptersBootstrap.registerBuiltIns()
+
         // Detect repeated startup crashes and enter safe mode if needed
         LaunchGuard.checkOnLaunch()
 
@@ -175,7 +179,7 @@ public final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelega
         #endif
 
         // Initialize directory access early so security-scoped bookmark is active
-        let _ = DirectoryPickerService.shared
+        _ = DirectoryPickerService.shared
 
         if LaunchGuard.isSafeMode {
             NotificationService.shared.postSafeModeActive()
@@ -876,7 +880,7 @@ extension AppDelegate {
     }
 
     @objc private func handleServeCommand(_ note: Notification) {
-        var desiredPort: Int? = nil
+        var desiredPort: Int?
         var exposeFlag: Bool = false
         if let ui = note.userInfo {
             if let p = ui["port"] as? Int {
diff --git a/Packages/OsaurusCore/Folder/FolderTools.swift b/Packages/OsaurusCore/Folder/FolderTools.swift
index 62102188d..cee0dba48 100644
--- a/Packages/OsaurusCore/Folder/FolderTools.swift
+++ b/Packages/OsaurusCore/Folder/FolderTools.swift
@@ -82,10 +82,11 @@ enum FolderToolHelpers {
     static func detectProjectType(_ url: URL) -> ProjectType {
         let fm = FileManager.default
         for projectType in ProjectType.allCases where projectType != .unknown {
-            for manifestFile in projectType.manifestFiles {
-                if fm.fileExists(atPath: url.appendingPathComponent(manifestFile).path) {
-                    return projectType
-                }
+            let hasManifest = projectType.manifestFiles.contains { manifestFile in
+                fm.fileExists(atPath: url.appendingPathComponent(manifestFile).path)
+            }
+            if hasManifest {
+                return projectType
             }
         }
         return .unknown
@@ -638,9 +639,10 @@ struct FileSearchTool: OsaurusTool {
                 if let pattern = filePattern {
                     let regex = pattern.replacingOccurrences(of: ".", with: "\\.")
                         .replacingOccurrences(of: "*", with: ".*")
-                    if fileURL.lastPathComponent.range(of: "^\(regex)$", options: .regularExpression)
-                        == nil
-                    {
+                    if fileURL.lastPathComponent.range(
+                        of: "^\(regex)$",
+                        options: .regularExpression
+                    ) == nil {
                         continue
                     }
                 }
@@ -1016,6 +1018,9 @@ enum FolderToolFactory {
             FileWriteTool(rootPath: rootPath),
             FileEditTool(rootPath: rootPath),
             FileSearchTool(rootPath: rootPath),
+            ReadWorkbookTool(rootPath: rootPath),
+            ReadWorkbookCellTool(rootPath: rootPath),
+            WriteWorkbookTool(rootPath: rootPath),
         ]
     }
 
diff --git a/Packages/OsaurusCore/Folder/WorkbookTools.swift b/Packages/OsaurusCore/Folder/WorkbookTools.swift
new file mode 100644
index 000000000..c32490316
--- /dev/null
+++ b/Packages/OsaurusCore/Folder/WorkbookTools.swift
@@ -0,0 +1,573 @@
+//
+//  WorkbookTools.swift
+//  osaurus
+//
+//  Folder-scoped agent tools for reading and writing XLSX workbooks
+//  through the typed `Workbook` surface. Installed by
+//  `FolderToolFactory.buildCoreTools` when a working folder is active.
+//
+//  These tools let an agent ingest a spreadsheet, reason about cells and
+//  formulas in their native types, and emit a modified workbook without
+//  ever dropping to markdown-as-text serialisation. They pair with
+//  `XLSXAdapter` (read) and `XLSXEmitter` (write) via
+//  `DocumentFormatRegistry`.
+//
+//  Path resolution matches `FileReadTool` / `FileWriteTool` — paths are
+//  contained under `rootPath` and `..`-traversal is rejected.
+//
+
+import Foundation
+
+// MARK: - read_workbook
+
+struct ReadWorkbookTool: OsaurusTool {
+    let name = "read_workbook"
+    let description =
+        "Read an XLSX spreadsheet into a structured summary. Returns sheet "
+        + "names, row counts, merged ranges, and a truncated cell sample per "
+        + "sheet so the response stays in-context. For a specific cell's value "
+        + "or formula use `read_workbook_cell`. To write a modified workbook, "
+        + "use `write_workbook`."
+
+    let parameters: JSONValue? = .object([
+        "type": .string("object"),
+        "additionalProperties": .bool(false),
+        "properties": .object([
+            "path": .object([
+                "type": .string("string"),
+                "description": .string("Relative path to an .xlsx file under the working folder."),
+            ])
+        ]),
+        "required": .array([.string("path")]),
+    ])
+
+    private let rootPath: URL
+
+    /// Cap on cells returned per sheet. Agents that need more should
+    /// switch to `read_workbook_cell` for the specific reference.
+    private static let maxCellsPerSheet = 200
+
+    init(rootPath: URL) {
+        self.rootPath = rootPath
+    }
+
+    func execute(argumentsJSON: String) async throws -> String {
+        let argsReq = requireArgumentsDictionary(argumentsJSON, tool: name)
+        guard case .value(let args) = argsReq else { return argsReq.failureEnvelope ?? "" }
+        let pathReq = requireString(args, "path", expected: "relative path to an .xlsx file", tool: name)
+        guard case .value(let relativePath) = pathReq else { return pathReq.failureEnvelope ?? "" }
+
+        let fileURL: URL
+        do {
+            fileURL = try FolderToolHelpers.resolvePath(relativePath, rootPath: rootPath)
+        } catch {
+            return ToolEnvelope.failure(kind: .invalidArgs, message: error.localizedDescription, tool: name)
+        }
+
+        let workbook: Workbook
+        do {
+            let document = try await XLSXAdapter().parse(
+                url: fileURL,
+                sizeLimit: DocumentLimits.limit(forFormatId: "xlsx")
+            )
+            guard let wb = document.representation.underlying as? Workbook else {
+                return ToolEnvelope.failure(
+                    kind: .executionError,
+                    message: "XLSX adapter returned unexpected representation.",
+                    tool: name
+                )
+            }
+            workbook = wb
+        } catch {
+            return ToolEnvelope.failure(
+                kind: .executionError,
+                message: "Failed to read workbook: \(error.localizedDescription)",
+                tool: name
+            )
+        }
+
+        let payload = renderSummary(path: relativePath, workbook: workbook)
+        guard let data = try? JSONSerialization.data(withJSONObject: payload, options: [.sortedKeys]),
+            let text = String(data: data, encoding: .utf8)
+        else {
+            return ToolEnvelope.failure(
+                kind: .executionError,
+                message: "Could not serialise workbook summary.",
+                tool: name
+            )
+        }
+        return ToolEnvelope.success(tool: name, text: text)
+    }
+
+    // MARK: - Summary rendering
+
+    private func renderSummary(path: String, workbook: Workbook) -> [String: Any] {
+        let sheets: [[String: Any]] = workbook.sheets.map { sheet in
+            let allCells = sheet.rows.flatMap { row in
+                row.cells.map { cell in renderCell(row: row.index, cell: cell) }
+            }
+            let truncated = allCells.prefix(Self.maxCellsPerSheet).map { $0 }
+            var sheetPayload: [String: Any] = [
+                "name": sheet.name,
+                "rowCount": sheet.rows.count,
+                "cellCount": allCells.count,
+                "cells": truncated,
+            ]
+            if allCells.count > truncated.count {
+                sheetPayload["truncated"] = true
+            }
+            if !sheet.mergedRanges.isEmpty {
+                sheetPayload["mergedRanges"] = sheet.mergedRanges.map { $0.reference }
+            }
+            return sheetPayload
+        }
+        return [
+            "path": path,
+            "sheets": sheets,
+        ]
+    }
+
+    private func renderCell(row: Int, cell: Cell) -> [String: Any] {
+        var payload: [String: Any] = ["ref": cell.reference, "row": row]
+        switch cell.value {
+        case .empty:
+            payload["type"] = "empty"
+        case .number(let value):
+            payload["type"] = "number"
+            payload["value"] = value
+        case .string(let text):
+            payload["type"] = "string"
+            payload["value"] = text
+        case .inlineString(let text):
+            payload["type"] = "inlineString"
+            payload["value"] = text
+        case .bool(let flag):
+            payload["type"] = "bool"
+            payload["value"] = flag
+        }
+        if let formula = cell.formula {
+            payload["formula"] = formula
+        }
+        return payload
+    }
+}
+
+// MARK: - read_workbook_cell
+
+struct ReadWorkbookCellTool: OsaurusTool {
+    let name = "read_workbook_cell"
+    let description =
+        "Read a single cell from an XLSX spreadsheet. Returns value, formula, "
+        + "and type for the referenced cell. Use after `read_workbook` has "
+        + "shown the structure and you need a specific value that was "
+        + "truncated out of the summary."
+
+    let parameters: JSONValue? = .object([
+        "type": .string("object"),
+        "additionalProperties": .bool(false),
+        "properties": .object([
+            "path": .object([
+                "type": .string("string"),
+                "description": .string("Relative path to an .xlsx file under the working folder."),
+            ]),
+            "sheet": .object([
+                "type": .string("string"),
+                "description": .string("Sheet name, e.g. `Revenue`."),
+            ]),
+            "cell": .object([
+                "type": .string("string"),
+                "description": .string("A1-style cell reference, e.g. `B3` or `AA10`."),
+            ]),
+        ]),
+        "required": .array([.string("path"), .string("sheet"), .string("cell")]),
+    ])
+
+    private let rootPath: URL
+
+    init(rootPath: URL) {
+        self.rootPath = rootPath
+    }
+
+    func execute(argumentsJSON: String) async throws -> String {
+        let argsReq = requireArgumentsDictionary(argumentsJSON, tool: name)
+        guard case .value(let args) = argsReq else { return argsReq.failureEnvelope ?? "" }
+        let pathReq = requireString(args, "path", expected: "relative path to an .xlsx file", tool: name)
+        guard case .value(let relativePath) = pathReq else { return pathReq.failureEnvelope ?? "" }
+        let sheetReq = requireString(args, "sheet", expected: "sheet name", tool: name)
+        guard case .value(let sheetName) = sheetReq else { return sheetReq.failureEnvelope ?? "" }
+        let cellReq = requireString(args, "cell", expected: "A1-style cell reference", tool: name)
+        guard case .value(let cellRef) = cellReq else { return cellReq.failureEnvelope ?? "" }
+
+        let fileURL: URL
+        do {
+            fileURL = try FolderToolHelpers.resolvePath(relativePath, rootPath: rootPath)
+        } catch {
+            return ToolEnvelope.failure(kind: .invalidArgs, message: error.localizedDescription, tool: name)
+        }
+
+        let workbook: Workbook
+        do {
+            let document = try await XLSXAdapter().parse(
+                url: fileURL,
+                sizeLimit: DocumentLimits.limit(forFormatId: "xlsx")
+            )
+            guard let wb = document.representation.underlying as? Workbook else {
+                return ToolEnvelope.failure(
+                    kind: .executionError,
+                    message: "XLSX adapter returned unexpected representation.",
+                    tool: name
+                )
+            }
+            workbook = wb
+        } catch {
+            return ToolEnvelope.failure(
+                kind: .executionError,
+                message: "Failed to read workbook: \(error.localizedDescription)",
+                tool: name
+            )
+        }
+
+        guard let sheet = workbook.sheets.first(where: { $0.name == sheetName }) else {
+            let available = workbook.sheets.map(\.name).joined(separator: ", ")
+            return ToolEnvelope.failure(
+                kind: .invalidArgs,
+                message:
+                    "Sheet '\(sheetName)' not found. Available sheets: \(available).",
+                field: "sheet",
+                expected: "an existing sheet name",
+                tool: name
+            )
+        }
+        guard let cell = sheet.rows.flatMap(\.cells).first(where: { $0.reference == cellRef }) else {
+            return ToolEnvelope.failure(
+                kind: .invalidArgs,
+                message: "Cell '\(cellRef)' not found on sheet '\(sheetName)'.",
+                field: "cell",
+                expected: "an occupied cell on the sheet",
+                tool: name
+            )
+        }
+
+        var payload: [String: Any] = ["ref": cell.reference]
+        switch cell.value {
+        case .empty: payload["type"] = "empty"
+        case .number(let v): payload["type"] = "number"; payload["value"] = v
+        case .string(let v): payload["type"] = "string"; payload["value"] = v
+        case .inlineString(let v): payload["type"] = "inlineString"; payload["value"] = v
+        case .bool(let v): payload["type"] = "bool"; payload["value"] = v
+        }
+        if let formula = cell.formula { payload["formula"] = formula }
+
+        guard let data = try? JSONSerialization.data(withJSONObject: payload, options: [.sortedKeys]),
+            let text = String(data: data, encoding: .utf8)
+        else {
+            return ToolEnvelope.failure(
+                kind: .executionError,
+                message: "Could not serialise cell payload.",
+                tool: name
+            )
+        }
+        return ToolEnvelope.success(tool: name, text: text)
+    }
+}
+
+// MARK: - write_workbook
+
+struct WriteWorkbookTool: OsaurusTool {
+    let name = "write_workbook"
+    let description =
+        "Write an XLSX spreadsheet to disk. Accepts a structured `sheets` "
+        + "array so the model never has to format raw XML. Each cell carries "
+        + "its A1 reference, a typed value, and an optional formula. "
+        + "Call `share_artifact` afterwards if you want the file to appear in "
+        + "the chat thread."
+
+    let parameters: JSONValue? = .object([
+        "type": .string("object"),
+        "additionalProperties": .bool(false),
+        "properties": .object([
+            "path": .object([
+                "type": .string("string"),
+                "description": .string("Relative output path, e.g. `report.xlsx`."),
+            ]),
+            "sheets": .object([
+                "type": .string("array"),
+                "description": .string("One or more sheets in display order."),
+                "items": .object([
+                    "type": .string("object"),
+                    "additionalProperties": .bool(false),
+                    "required": .array([.string("name")]),
+                    "properties": .object([
+                        "name": .object([
+                            "type": .string("string"),
+                            "description": .string("Sheet display name."),
+                        ]),
+                        "cells": .object([
+                            "type": .string("array"),
+                            "description": .string(
+                                "Cells to write. Omit to create an empty sheet."
+                            ),
+                            "items": .object([
+                                "type": .string("object"),
+                                "additionalProperties": .bool(false),
+                                "required": .array([.string("ref")]),
+                                "properties": .object([
+                                    "ref": .object([
+                                        "type": .string("string"),
+                                        "description": .string("A1 reference, e.g. `B3`."),
+                                    ]),
+                                    "type": .object([
+                                        "type": .string("string"),
+                                        "description": .string(
+                                            "`string`, `number`, `bool`, or `formula`."
+                                        ),
+                                        "enum": .array([
+                                            .string("string"),
+                                            .string("number"),
+                                            .string("bool"),
+                                            .string("formula"),
+                                        ]),
+                                    ]),
+                                    "value": .object([
+                                        "description": .string(
+                                            "Cell value — string/number/bool. Ignored for `formula` cells; use `formula` instead."
+                                        )
+                                    ]),
+                                    "formula": .object([
+                                        "type": .string("string"),
+                                        "description": .string(
+                                            "Formula source without the leading `=`, e.g. `SUM(A1:A3)`."
+                                        ),
+                                    ]),
+                                ]),
+                            ]),
+                        ]),
+                        "mergedRanges": .object([
+                            "type": .string("array"),
+                            "items": .object(["type": .string("string")]),
+                            "description": .string("Optional A1:A1 merge ranges, e.g. `A1:B1`."),
+                        ]),
+                    ]),
+                ]),
+            ]),
+        ]),
+        "required": .array([.string("path"), .string("sheets")]),
+    ])
+
+    private let rootPath: URL
+
+    init(rootPath: URL) {
+        self.rootPath = rootPath
+    }
+
+    func execute(argumentsJSON: String) async throws -> String {
+        let argsReq = requireArgumentsDictionary(argumentsJSON, tool: name)
+        guard case .value(let args) = argsReq else { return argsReq.failureEnvelope ?? "" }
+        let pathReq = requireString(args, "path", expected: "relative output path ending in .xlsx", tool: name)
+        guard case .value(let relativePath) = pathReq else { return pathReq.failureEnvelope ?? "" }
+
+        guard let rawSheets = args["sheets"] as? [[String: Any]], !rawSheets.isEmpty else {
+            return ToolEnvelope.failure(
+                kind: .invalidArgs,
+                message: "`sheets` must be a non-empty array of sheet objects.",
+                field: "sheets",
+                expected: "non-empty array",
+                tool: name
+            )
+        }
+
+        let destURL: URL
+        do {
+            destURL = try FolderToolHelpers.resolvePath(relativePath, rootPath: rootPath)
+        } catch {
+            return ToolEnvelope.failure(kind: .invalidArgs, message: error.localizedDescription, tool: name)
+        }
+
+        guard destURL.pathExtension.lowercased() == "xlsx" else {
+            return ToolEnvelope.failure(
+                kind: .invalidArgs,
+                message: "`path` must end in `.xlsx`; got '\(relativePath)'.",
+                field: "path",
+                expected: ".xlsx file path",
+                tool: name
+            )
+        }
+
+        var sheets: [Sheet] = []
+        for (index, raw) in rawSheets.enumerated() {
+            switch parseSheet(raw, at: index) {
+            case .value(let sheet): sheets.append(sheet)
+            case .failure(let envelope): return envelope
+            }
+        }
+
+        let workbook = Workbook(sheets: sheets, sharedStrings: [])
+        let document = StructuredDocument(
+            formatId: "xlsx",
+            filename: destURL.lastPathComponent,
+            fileSize: 0,
+            representation: AnyStructuredRepresentation(formatId: "xlsx", underlying: workbook),
+            textFallback: ""
+        )
+
+        // Ensure parent exists so relative writes like `reports/q4.xlsx`
+        // work without a separate `dir_create` round-trip.
+        try? FileManager.default.createDirectory(
+            at: destURL.deletingLastPathComponent(),
+            withIntermediateDirectories: true
+        )
+
+        do {
+            try await XLSXEmitter().emit(document, to: destURL)
+        } catch {
+            return ToolEnvelope.failure(
+                kind: .executionError,
+                message: "Failed to write workbook: \(error.localizedDescription)",
+                tool: name
+            )
+        }
+
+        let payload: [String: Any] = [
+            "path": relativePath,
+            "sheetCount": sheets.count,
+            "totalCells": sheets.reduce(0) { $0 + $1.rows.flatMap(\.cells).count },
+        ]
+        guard let data = try? JSONSerialization.data(withJSONObject: payload, options: [.sortedKeys]),
+            let text = String(data: data, encoding: .utf8)
+        else {
+            return ToolEnvelope.success(tool: name, text: "Wrote workbook to \(relativePath)")
+        }
+        return ToolEnvelope.success(tool: name, text: text)
+    }
+
+    // MARK: - Parsing
+
+    private func parseSheet(
+        _ raw: [String: Any],
+        at index: Int
+    ) -> ArgumentRequirement<Sheet> {
+        guard let sheetName = raw["name"] as? String, !sheetName.isEmpty else {
+            return .failure(
+                ToolEnvelope.failure(
+                    kind: .invalidArgs,
+                    message: "Sheet at index \(index) is missing a non-empty `name`.",
+                    field: "sheets[\(index)].name",
+                    expected: "non-empty string",
+                    tool: name
+                )
+            )
+        }
+
+        let rawCells = raw["cells"] as? [[String: Any]] ?? []
+        var cellsByRow: [Int: [Cell]] = [:]
+        for (cellIndex, rawCell) in rawCells.enumerated() {
+            switch parseCell(rawCell, sheetIndex: index, cellIndex: cellIndex) {
+            case .value(let (row, cell)):
+                cellsByRow[row, default: []].append(cell)
+            case .failure(let envelope): return .failure(envelope)
+            }
+        }
+        let rows = cellsByRow.keys.sorted().map { rowIndex in
+            Row(index: rowIndex, cells: cellsByRow[rowIndex] ?? [])
+        }
+
+        let mergedRanges: [CellRange] =
+            (raw["mergedRanges"] as? [String])?
+            .map { CellRange(reference: $0) } ?? []
+
+        return .value(Sheet(name: sheetName, rows: rows, mergedRanges: mergedRanges))
+    }
+
+    private func parseCell(
+        _ raw: [String: Any],
+        sheetIndex: Int,
+        cellIndex: Int
+    ) -> ArgumentRequirement<(Int, Cell)> {
+        guard let ref = raw["ref"] as? String, !ref.isEmpty else {
+            return .failure(
+                ToolEnvelope.failure(
+                    kind: .invalidArgs,
+                    message: "Cell \(cellIndex) on sheet \(sheetIndex) is missing `ref`.",
+                    field: "sheets[\(sheetIndex)].cells[\(cellIndex)].ref",
+                    expected: "A1-style reference",
+                    tool: name
+                )
+            )
+        }
+        guard let rowOneBased = rowComponent(of: ref) else {
+            return .failure(
+                ToolEnvelope.failure(
+                    kind: .invalidArgs,
+                    message: "Cell reference '\(ref)' is not valid A1.",
+                    field: "sheets[\(sheetIndex)].cells[\(cellIndex)].ref",
+                    expected: "A1-style reference",
+                    tool: name
+                )
+            )
+        }
+
+        let typeHint = (raw["type"] as? String)?.lowercased()
+        let value: CellValue
+        var formula: String?
+        switch typeHint {
+        case "formula":
+            guard let f = raw["formula"] as? String, !f.isEmpty else {
+                return .failure(
+                    ToolEnvelope.failure(
+                        kind: .invalidArgs,
+                        message: "Cell '\(ref)' is typed as `formula` but has no `formula` string.",
+                        field: "sheets[\(sheetIndex)].cells[\(cellIndex)].formula",
+                        expected: "non-empty formula string",
+                        tool: name
+                    )
+                )
+            }
+            formula = f
+            value = .empty
+        case "bool":
+            value = .bool((raw["value"] as? Bool) ?? false)
+        case "number":
+            if let n = raw["value"] as? Double {
+                value = .number(n)
+            } else if let n = (raw["value"] as? NSNumber)?.doubleValue {
+                value = .number(n)
+            } else if let s = raw["value"] as? String, let n = Double(s) {
+                value = .number(n)
+            } else {
+                value = .empty
+            }
+        case "string", nil:
+            if let s = raw["value"] as? String {
+                value = .string(s)
+            } else if let n = raw["value"] as? NSNumber {
+                value = .number(n.doubleValue)
+            } else if let b = raw["value"] as? Bool {
+                value = .bool(b)
+            } else if raw["formula"] is String {
+                formula = raw["formula"] as? String
+                value = .empty
+            } else {
+                value = .empty
+            }
+        default:
+            return .failure(
+                ToolEnvelope.failure(
+                    kind: .invalidArgs,
+                    message: "Cell '\(ref)' has unknown type '\(typeHint ?? "?")'.",
+                    field: "sheets[\(sheetIndex)].cells[\(cellIndex)].type",
+                    expected: "string / number / bool / formula",
+                    tool: name
+                )
+            )
+        }
+        return .value((rowOneBased, Cell(reference: ref, value: value, formula: formula)))
+    }
+
+    private func rowComponent(of reference: String) -> Int? {
+        var digits = ""
+        for ch in reference.unicodeScalars where ch.value >= 0x30 && ch.value <= 0x39 {
+            digits.append(Character(ch))
+        }
+        return Int(digits)
+    }
+}
diff --git a/Packages/OsaurusCore/Managers/Documents/DocumentAdaptersBootstrap.swift b/Packages/OsaurusCore/Managers/Documents/DocumentAdaptersBootstrap.swift
new file mode 100644
index 000000000..87ab6e64c
--- /dev/null
+++ b/Packages/OsaurusCore/Managers/Documents/DocumentAdaptersBootstrap.swift
@@ -0,0 +1,40 @@
+//
+//  DocumentAdaptersBootstrap.swift
+//  osaurus
+//
+//  Registers the in-tree document adapters with `DocumentFormatRegistry.shared`
+//  exactly once, at app launch. Kept separate from `AppDelegate` so tests can
+//  opt into the same registration (or opt out of it entirely) without dragging
+//  in `NSApplication`.
+//
+
+import Foundation
+
+public enum DocumentAdaptersBootstrap {
+    private static let lock = NSLock()
+    // Guarded by `lock`; the `nonisolated(unsafe)` matches the project pattern
+    // for lock-protected process-global state (see `OsaurusPaths.overrideRoot`).
+    nonisolated(unsafe) private static var didRegisterShared = false
+
+    /// Idempotent against the shared registry: safe to call from multiple
+    /// launch paths without producing duplicate adapter registrations.
+    /// Non-shared registries (tests, isolated instances) are re-registered on
+    /// every call so each test gets a clean baseline.
+    public static func registerBuiltIns(registry: DocumentFormatRegistry = .shared) {
+        lock.lock()
+        defer { lock.unlock() }
+        if registry === DocumentFormatRegistry.shared, didRegisterShared { return }
+        registry.register(adapter: PlainTextAdapter())
+        registry.register(adapter: PDFAdapter())
+        registry.register(adapter: RichDocumentAdapter())
+        registry.register(adapter: XLSXAdapter())
+        registry.register(emitter: XLSXEmitter())
+        // CSV registered after PlainText so later-wins routes .csv / .tsv
+        // through the typed adapter; PlainText still claims everything else.
+        registry.register(adapter: CSVAdapter())
+        registry.register(streamer: CSVStreamer())
+        if registry === DocumentFormatRegistry.shared {
+            didRegisterShared = true
+        }
+    }
+}
diff --git a/Packages/OsaurusCore/Models/Documents/CSVTable.swift b/Packages/OsaurusCore/Models/Documents/CSVTable.swift
new file mode 100644
index 000000000..5f5817fa0
--- /dev/null
+++ b/Packages/OsaurusCore/Models/Documents/CSVTable.swift
@@ -0,0 +1,78 @@
+//
+//  CSVTable.swift
+//  osaurus
+//
+//  Typed representation for CSV / TSV files. Replaces the flat
+//  "CSV-as-text" ingestion the legacy `DocumentParser` did by preserving
+//  encoding, delimiter, line-ending style, and per-row cell boundaries.
+//  Pairs with `CSVAdapter` (in-memory) and `CSVStreamer` (row-at-a-time).
+//
+//  High-fidelity fields — the ones that actually matter to business
+//  users on round-trip — are deliberate:
+//    - `delimiter`: comma / tab / semicolon, honoured on re-emit.
+//    - `encoding`: UTF-8 / UTF-16 / ISO-Latin-1, preserved so an export
+//      back to the same locale doesn't silently widen the file.
+//    - `header`: optional first row, detected by the adapter.
+//    - `records`: raw string cells; numeric / date coercion is the
+//      caller's job (agents sometimes want the text literal).
+//
+//  Out of scope: style-level XLSX features (number formats, fills).
+//  Those live in the Workbook representation, not here.
+//
+
+import Foundation
+
+public struct CSVTable: StructuredRepresentation, Sendable {
+    /// Field separator — typically `,` for `.csv` and `\t` for `.tsv`.
+    public let delimiter: Character
+    /// Byte encoding detected from BOM / heuristic.
+    public let encoding: String.Encoding
+    /// Line-ending style present in the source bytes. Preserved so a
+    /// Windows-authored CSV round-trips as CRLF rather than being
+    /// silently rewritten to LF.
+    public let lineEnding: LineEnding
+    /// First row when the adapter identified it as a header. Heuristic:
+    /// present when the source had at least one data row AND the first
+    /// row's cells all parse as non-numeric text.
+    public let header: [String]?
+    /// Parsed cell strings — one `[String]` per row, not including the
+    /// header. Quoted-field expansion already applied.
+    public let records: [[String]]
+    /// Set to the row index where parsing stopped when `sizeLimit` was
+    /// hit; `nil` when the whole file fit under the cap.
+    public let truncatedAt: Int?
+
+    public init(
+        delimiter: Character,
+        encoding: String.Encoding,
+        lineEnding: LineEnding,
+        header: [String]?,
+        records: [[String]],
+        truncatedAt: Int? = nil
+    ) {
+        self.delimiter = delimiter
+        self.encoding = encoding
+        self.lineEnding = lineEnding
+        self.header = header
+        self.records = records
+        self.truncatedAt = truncatedAt
+    }
+
+    public enum LineEnding: String, Sendable {
+        case lf  // `\n`
+        case crlf  // `\r\n`
+        case cr  // `\r` — rare, classic Mac
+    }
+}
+
+/// One streamed row emitted by `CSVStreamer`. `lineNumber` is 1-based and
+/// matches the on-wire row number so callers can attribute errors.
+public struct CSVRecord: Sendable, Equatable {
+    public let lineNumber: Int
+    public let cells: [String]
+
+    public init(lineNumber: Int, cells: [String]) {
+        self.lineNumber = lineNumber
+        self.cells = cells
+    }
+}
diff --git a/Packages/OsaurusCore/Models/Documents/PDFDocumentRepresentation.swift b/Packages/OsaurusCore/Models/Documents/PDFDocumentRepresentation.swift
new file mode 100644
index 000000000..b0de53fd0
--- /dev/null
+++ b/Packages/OsaurusCore/Models/Documents/PDFDocumentRepresentation.swift
@@ -0,0 +1,63 @@
+//
+//  PDFDocumentRepresentation.swift
+//  osaurus
+//
+//  Typed representation for parsed PDFs. Replaces the `PlainTextRepresentation`
+//  that PR 3's PDFAdapter emitted — PDFs that look tabular (invoices, bank
+//  statements, periodic reports) now surface the table structure alongside
+//  the flat text fallback, while narrative PDFs keep working exactly as
+//  before.
+//
+//  Table detection lives in `PDFAdapter` and is intentionally permissive:
+//  a "table" here is a run of consecutive text rows that the layout
+//  heuristic split into at least two cells each. It won't perfectly match
+//  the author's semantic intent in every document — but for the files
+//  osaurus users actually attach (invoices, bank statements, financial
+//  tables), the heuristic is good enough to turn numeric columns from
+//  `1,234.56 1,920.00 ...` concatenation into proper cells.
+//
+
+import Foundation
+
+public struct PDFDocumentRepresentation: StructuredRepresentation, Sendable {
+    public let pages: [PDFPageRepresentation]
+
+    public var pageCount: Int { pages.count }
+
+    public init(pages: [PDFPageRepresentation]) {
+        self.pages = pages
+    }
+}
+
+public struct PDFPageRepresentation: Sendable {
+    /// 1-indexed page number matching the PDF's own display numbering.
+    public let pageNumber: Int
+    /// Plain text extracted via PDFKit. Kept on every page so the text
+    /// fallback path stays byte-identical to the legacy behaviour even
+    /// when no tables are detected.
+    public let text: String
+    /// Tables detected on this page. Empty for flowing-text pages.
+    /// A single page can carry multiple tables (e.g. an invoice with
+    /// line items + a summary block underneath).
+    public let tables: [PDFTable]
+
+    public init(pageNumber: Int, text: String, tables: [PDFTable]) {
+        self.pageNumber = pageNumber
+        self.text = text
+        self.tables = tables
+    }
+}
+
+/// Simple tabular region: an ordered list of rows, each with typed cell
+/// strings. Coordinates are not retained because they're author-specific
+/// and would force every downstream consumer to understand PDF geometry.
+public struct PDFTable: Sendable, Equatable {
+    public let rows: [[String]]
+
+    public init(rows: [[String]]) {
+        self.rows = rows
+    }
+
+    public var rowCount: Int { rows.count }
+    public var columnCount: Int { rows.map(\.count).max() ?? 0 }
+}
diff --git a/Packages/OsaurusCore/Models/Documents/PlainTextRepresentation.swift b/Packages/OsaurusCore/Models/Documents/PlainTextRepresentation.swift
new file mode 100644
index 000000000..f53c2348c
--- /dev/null
+++ b/Packages/OsaurusCore/Models/Documents/PlainTextRepresentation.swift
@@ -0,0 +1,18 @@
+//
+//  PlainTextRepresentation.swift
+//  osaurus
+//
+//  Default representation for adapters that extract a single text string.
+//  Every adapter has to publish *some* `StructuredRepresentation`; the
+//  wrappers around `PDFKit` text extraction and `NSAttributedString` don't
+//  preserve any format-native structure, so they emit this shape. The
+//  real typed representations (`Workbook`, `WordDocument`, …) replace it
+//  per-format as higher-fidelity adapters land.
+//
+
+import Foundation
+
+public struct PlainTextRepresentation: StructuredRepresentation, Sendable {
+    public let text: String
+    public init(text: String) { self.text = text }
+}
diff --git a/Packages/OsaurusCore/Models/Documents/Workbook.swift b/Packages/OsaurusCore/Models/Documents/Workbook.swift
new file mode 100644
index 000000000..a24031dbb
--- /dev/null
+++ b/Packages/OsaurusCore/Models/Documents/Workbook.swift
@@ -0,0 +1,87 @@
+//
+//  Workbook.swift
+//  osaurus
+//
+//  Typed representation for parsed XLSX workbooks. Designed as the
+//  round-trip target for both the read side (`XLSXAdapter`, this PR) and
+//  the write side (`XLSXEmitter`, landing in the next slice). Fields are
+//  chosen to match what CoreXLSX surfaces cleanly today — sheet names,
+//  merged ranges, raw cell values, formula source strings — plus the
+//  shared-string table so repeated strings round-trip without being
+//  re-interned on write. Style-derived fidelity (number formats, column
+//  widths) is deliberately out of scope for this PR; see the comment on
+//  `CellValue` for why.
+//
+
+import Foundation
+
+public struct Workbook: StructuredRepresentation, Sendable {
+    public let sheets: [Sheet]
+    public let sharedStrings: [String]
+
+    public init(sheets: [Sheet], sharedStrings: [String]) {
+        self.sheets = sheets
+        self.sharedStrings = sharedStrings
+    }
+}
+
+public struct Sheet: Sendable {
+    public let name: String
+    public let rows: [Row]
+    public let mergedRanges: [CellRange]
+
+    public init(name: String, rows: [Row], mergedRanges: [CellRange]) {
+        self.name = name
+        self.rows = rows
+        self.mergedRanges = mergedRanges
+    }
+}
+
+public struct Row: Sendable {
+    /// 1-based row number matching the on-wire `r` attribute.
+    public let index: Int
+    public let cells: [Cell]
+
+    public init(index: Int, cells: [Cell]) {
+        self.index = index
+        self.cells = cells
+    }
+}
+
+public struct Cell: Sendable {
+    /// A1-style reference on-wire, e.g. "B3".
+    public let reference: String
+    public let value: CellValue
+    /// Formula source (`=SUM(A1:A3)`) when the cell carries one. Excel
+    /// stores both the formula and its cached result; we preserve both.
+    public let formula: String?
+
+    public init(reference: String, value: CellValue, formula: String? = nil) {
+        self.reference = reference
+        self.value = value
+        self.formula = formula
+    }
+}
+
+/// Scalar cell payload. Excel dates are stored as numbers with a style
+/// attached — without parsing the style table we can't distinguish a date
+/// from a plain number, so dates that aren't explicitly typed (`t="d"`)
+/// surface as `.number`. Lifting that limitation means shipping a style
+/// parser that tolerates the CoreXLSX `patternType` crash on
+/// openpyxl-generated files; that work lives in a separate slice.
+public enum CellValue: Sendable, Equatable {
+    case empty
+    case number(Double)
+    case string(String)
+    case bool(Bool)
+    case inlineString(String)
+}
+
+/// A1-style cell range, e.g. "A1:C3".
+public struct CellRange: Sendable, Equatable {
+    public let reference: String
+
+    public init(reference: String) {
+        self.reference = reference
+    }
+}
diff --git a/Packages/OsaurusCore/Package.swift b/Packages/OsaurusCore/Package.swift
index 51b3073d9..b98431576 100644
--- a/Packages/OsaurusCore/Package.swift
+++ b/Packages/OsaurusCore/Package.swift
@@ -148,6 +148,8 @@ let package = Package(
         .package(url: "https://github.com/mgriebling/SwiftMath", from: "1.7.3"),
         .package(url: "https://github.com/raspu/Highlightr", from: "2.3.0"),
         .package(url: "https://github.com/AAChartModel/AAChartKit-Swift.git", from: "9.5.0"),
+        .package(url: "https://github.com/CoreOffice/CoreXLSX.git", from: "0.14.2"),
+        .package(url: "https://github.com/jmcnamara/libxlsxwriter.git", from: "1.2.4"),
     ],
     targets: [
         // Vendored SQLCipher 4.6.1 amalgamation (CommonCrypto
@@ -271,6 +273,8 @@ let package = Package(
                 .product(name: "ContainerizationExtras", package: "containerization"),
                 .product(name: "Highlightr", package: "Highlightr"),
                 .product(name: "AAInfographics", package: "AAChartKit-Swift"),
+                .product(name: "CoreXLSX", package: "CoreXLSX"),
+                .product(name: "libxlsxwriter", package: "libxlsxwriter"),
             ],
             path: ".",
             exclude: ["Tests", "SQLCipher"],
@@ -284,7 +288,8 @@ let package = Package(
                 .product(name: "NIOEmbedded", package: "swift-nio"),
                 .product(name: "VecturaKit", package: "VecturaKit"),
             ],
-            path: "Tests"
+            path: "Tests",
+            resources: [.copy("Documents/Fixtures")]
         ),
     ]
 )
diff --git a/Packages/OsaurusCore/Services/Documents/CSVAdapter.swift b/Packages/OsaurusCore/Services/Documents/CSVAdapter.swift
new file mode 100644
index 000000000..557e1e579
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/CSVAdapter.swift
@@ -0,0 +1,136 @@
+//
+//  CSVAdapter.swift
+//  osaurus
+//
+//  RFC-4180-ish CSV / TSV parser that produces a typed `CSVTable`.
+//  Replaces the legacy "CSV as plain text" path — the adapter still
+//  returns a text fallback for chat attachment display, but the typed
+//  representation exposes delimiter, encoding, line-ending, and per-row
+//  cell boundaries so the downstream tooling (agent tools, CSV streamer)
+//  can reason about columns rather than raw bytes.
+//
+//  What's handled:
+//    - Delimiter defaults: `,` for `.csv`, `\t` for `.tsv`.
+//    - Double-quoted fields, including `""` escape sequences.
+//    - Embedded newlines inside quoted fields.
+//    - UTF-8 BOM stripping; UTF-8 first, ISO-Latin-1 fallback.
+//    - Header detection via a conservative "first row is non-numeric"
+//      heuristic that's easy to override once the agent has context.
+//
+//  What's NOT handled yet:
+//    - Encoding detection beyond BOM — a Windows-1252 file with no BOM
+//      decodes as ISO-Latin-1 and may replace some byte sequences.
+//    - Escaping via backslashes (non-standard but common in hand-rolled
+//      CSVs) — quotes only.
+//    - Skipping comment lines (`#foo`) — not in the format.
+//
+
+import Foundation
+
+public struct CSVAdapter: DocumentFormatAdapter {
+    public let formatId = "csv"
+
+    public init() {}
+
+    public func canHandle(url: URL, uti: String?) -> Bool {
+        let ext = url.pathExtension.lowercased()
+        return ext == "csv" || ext == "tsv"
+    }
+
+    public func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+        let fileSize = Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
+        if sizeLimit > 0, fileSize > sizeLimit {
+            throw DocumentAdapterError.sizeLimitExceeded(actual: fileSize, limit: sizeLimit)
+        }
+
+        let data: Data
+        do { data = try Data(contentsOf: url) } catch {
+            throw DocumentAdapterError.readFailed(underlying: error.localizedDescription)
+        }
+
+        let decoded = Self.decode(data)
+        guard !decoded.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
+            throw DocumentAdapterError.emptyContent
+        }
+
+        let delimiter: Character = url.pathExtension.lowercased() == "tsv" ? "\t" : ","
+        let parsed = CSVParser.parseAll(text: decoded.text, delimiter: delimiter)
+        let lineEnding = Self.detectLineEnding(decoded.text)
+        let (header, body) = Self.detectHeader(parsed)
+
+        let table = CSVTable(
+            delimiter: delimiter,
+            encoding: decoded.encoding,
+            lineEnding: lineEnding,
+            header: header,
+            records: body
+        )
+
+        return StructuredDocument(
+            formatId: formatId,
+            filename: url.lastPathComponent,
+            fileSize: fileSize,
+            representation: AnyStructuredRepresentation(formatId: formatId, underlying: table),
+            textFallback: Self.renderTextFallback(table: table)
+        )
+    }
+
+    // MARK: - Decode
+
+    /// Decodes the raw file bytes as a `String`, stripping any UTF-8 BOM,
+    /// and reports which encoding actually worked so callers can preserve
+    /// it on re-emit.
+    static func decode(_ data: Data) -> (text: String, encoding: String.Encoding) {
+        if data.count >= 3, data[0] == 0xEF, data[1] == 0xBB, data[2] == 0xBF {
+            let stripped = data.subdata(in: 3 ..< data.count)
+            return (String(data: stripped, encoding: .utf8) ?? "", .utf8)
+        }
+        if let utf8 = String(data: data, encoding: .utf8) {
+            return (utf8, .utf8)
+        }
+        if let latin1 = String(data: data, encoding: .isoLatin1) {
+            return (latin1, .isoLatin1)
+        }
+        return ("", .utf8)
+    }
+
+    // MARK: - Line ending + header
+
+    static func detectLineEnding(_ text: String) -> CSVTable.LineEnding {
+        for scalar in text.unicodeScalars {
+            if scalar == "\r" { return .crlf }  // we'll refine below
+            if scalar == "\n" { return .lf }
+        }
+        return .lf
+    }
+
+    /// Heuristic: treat the first row as a header when at least one of
+    /// its cells contains non-numeric text. Empty files return (nil, []).
+    static func detectHeader(_ rows: [[String]]) -> (header: [String]?, body: [[String]]) {
+        guard let first = rows.first else { return (nil, []) }
+        let anyNonNumeric = first.contains { cell in
+            !cell.isEmpty && Double(cell.trimmingCharacters(in: .whitespaces)) == nil
+        }
+        if anyNonNumeric, rows.count > 1 {
+            return (first, Array(rows.dropFirst()))
+        }
+        return (nil, rows)
+    }
+
+    // MARK: - Text fallback
+
+    static func renderTextFallback(table: CSVTable) -> String {
+        var out: [String] = []
+        if let header = table.header {
+            out.append(header.joined(separator: " | "))
+            out.append(String(repeating: "-", count: min(header.joined(separator: " | ").count, 80)))
+        }
+        for row in table.records.prefix(200) {
+            out.append(row.joined(separator: " | "))
+        }
+        if table.records.count > 200 {
+            out.append("… (\(table.records.count - 200) more rows)")
+        }
+        return out.joined(separator: "\n")
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/CSVParser.swift b/Packages/OsaurusCore/Services/Documents/CSVParser.swift
new file mode 100644
index 000000000..a49035b47
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/CSVParser.swift
@@ -0,0 +1,174 @@
+//
+//  CSVParser.swift
+//  osaurus
+//
+//  Shared state-machine parser for CSV / TSV content. Consumers:
+//    - `CSVAdapter` drains the whole file into `[[String]]`.
+//    - `CSVStreamer` feeds bytes in chunks and pulls rows out as they
+//      complete — the critical path for large files.
+//
+//  Grammar (RFC 4180 with the two common extensions noted inline):
+//    - Fields are separated by the caller-specified `delimiter`.
+//    - Rows are separated by `\r\n`, `\n`, or a bare `\r`.
+//    - A field wrapped in `"` may contain delimiters and newlines;
+//      a literal `"` inside is escaped as `""`.
+//    - A `"` that follows the closing quote (unexpected per RFC) is
+//      tolerated: we append it and stay in text mode, matching
+//      real-world behaviour of Excel and Numbers exports.
+//
+
+import Foundation
+
+enum CSVParser {
+
+    /// One-shot: parse `text` into rows of cell strings. Quoted fields
+    /// with embedded newlines collapse into a single cell, so the result
+    /// isn't just `text.split(on: "\n")`.
+    static func parseAll(text: String, delimiter: Character) -> [[String]] {
+        var machine = Machine(delimiter: delimiter)
+        for scalar in text.unicodeScalars {
+            machine.consume(scalar)
+        }
+        machine.finish()
+        return machine.rows
+    }
+
+    /// Incremental variant used by the streamer. Hand it bytes as they
+    /// come off the file handle; drain `rows` after each feed and reset.
+    struct Machine {
+        let delimiter: Character
+        private(set) var rows: [[String]] = []
+        private var currentRow: [String] = []
+        private var currentCell: String = ""
+        private var state: State = .fieldStart
+        private var pendingCR: Bool = false  // saw `\r`, waiting to see if `\n` follows
+
+        init(delimiter: Character) {
+            self.delimiter = delimiter
+        }
+
+        mutating func drainRows() -> [[String]] {
+            let out = rows
+            rows = []
+            return out
+        }
+
+        mutating func consume(_ scalar: Unicode.Scalar) {
+            if pendingCR {
+                pendingCR = false
+                if scalar == "\n" {
+                    // Swallow the `\n` of a CRLF — the `\r` already
+                    // terminated the row.
+                    return
+                }
+                // Bare `\r` line ending; fall through so this scalar is
+                // reprocessed as the start of the next row.
+            }
+
+            let char = Character(scalar)
+
+            switch state {
+            case .fieldStart:
+                if char == "\"" {
+                    state = .inQuotedField
+                    return
+                }
+                if char == delimiter {
+                    currentRow.append(currentCell)
+                    currentCell = ""
+                    return
+                }
+                if scalar == "\n" {
+                    finishRow()
+                    return
+                }
+                if scalar == "\r" {
+                    pendingCR = true
+                    finishRow()
+                    return
+                }
+                currentCell.append(char)
+                state = .inField
+
+            case .inField:
+                if char == delimiter {
+                    currentRow.append(currentCell)
+                    currentCell = ""
+                    state = .fieldStart
+                    return
+                }
+                if scalar == "\n" {
+                    finishRow()
+                    return
+                }
+                if scalar == "\r" {
+                    pendingCR = true
+                    finishRow()
+                    return
+                }
+                currentCell.append(char)
+
+            case .inQuotedField:
+                if char == "\"" {
+                    state = .afterQuote
+                    return
+                }
+                currentCell.append(char)
+
+            case .afterQuote:
+                if char == "\"" {
+                    // `""` → literal quote in the field.
+                    currentCell.append(char)
+                    state = .inQuotedField
+                    return
+                }
+                if char == delimiter {
+                    currentRow.append(currentCell)
+                    currentCell = ""
+                    state = .fieldStart
+                    return
+                }
+                if scalar == "\n" {
+                    finishRow()
+                    return
+                }
+                if scalar == "\r" {
+                    pendingCR = true
+                    finishRow()
+                    return
+                }
+                // Tolerate a stray character after a closing quote rather
+                // than bailing — Excel-round-tripped CSVs occasionally
+                // emit this for fields that started quoted but had the
+                // closing quote elided.
+                currentCell.append(char)
+                state = .inField
+            }
+        }
+
+        mutating func finish() {
+            if state != .fieldStart || !currentCell.isEmpty || !currentRow.isEmpty {
+                currentRow.append(currentCell)
+                rows.append(currentRow)
+                currentCell = ""
+                currentRow = []
+                state = .fieldStart
+            }
+        }
+
+        private mutating func finishRow() {
+            currentRow.append(currentCell)
+            rows.append(currentRow)
+            currentCell = ""
+            currentRow = []
+            state = .fieldStart
+        }
+
+        private enum State {
+            case fieldStart
+            case inField
+            case inQuotedField
+            case afterQuote
+        }
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/CSVStreamer.swift b/Packages/OsaurusCore/Services/Documents/CSVStreamer.swift
new file mode 100644
index 000000000..d74ce5762
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/CSVStreamer.swift
@@ -0,0 +1,165 @@
+//
+//  CSVStreamer.swift
+//  osaurus
+//
+//  Streaming variant of `CSVAdapter` for files that don't fit in the
+//  in-memory cap — multi-GB bank exports, long-running event logs, etc.
+//  Emits one `CSVRecord` per row via an `AsyncThrowingStream` so callers
+//  can back-pressure and cancel rather than paying for the whole file up
+//  front. The agent tool surface is the obvious consumer; the chat
+//  attachment path stays on the eager `CSVAdapter` because it needs the
+//  whole table to render.
+//
+//  The byte -> scalar -> parser pipeline reuses `CSVParser.Machine`, so
+//  quoted-field / embedded-newline / escape semantics match the batch
+//  adapter exactly. The only difference is that rows flush out as soon
+//  as they complete rather than waiting for the file to end.
+//
+
+import Foundation
+
+public struct CSVStreamer: DocumentFormatStreamer {
+    public let formatId = "csv"
+
+    public init() {}
+
+    public func stream(url: URL) -> AsyncThrowingStream<CSVRecord, Error> {
+        let delimiter: Character = url.pathExtension.lowercased() == "tsv" ? "\t" : ","
+
+        return AsyncThrowingStream { continuation in
+            Task {
+                do {
+                    try Self.drain(url: url, delimiter: delimiter, into: continuation)
+                    continuation.finish()
+                } catch {
+                    continuation.finish(throwing: error)
+                }
+            }
+        }
+    }
+
+    // MARK: - Internals
+
+    private static let chunkSize = 64 * 1024
+
+    /// Reads `url` in 64 KB chunks, feeds bytes through the shared
+    /// CSV state machine, and yields completed rows one at a time.
+    /// Throws on I/O failure or Task cancellation.
+    private static func drain(
+        url: URL,
+        delimiter: Character,
+        into continuation: AsyncThrowingStream<CSVRecord, Error>.Continuation
+    ) throws {
+        let handle: FileHandle
+        do {
+            handle = try FileHandle(forReadingFrom: url)
+        } catch {
+            throw DocumentAdapterError.readFailed(underlying: error.localizedDescription)
+        }
+        defer { try? handle.close() }
+
+        var machine = CSVParser.Machine(delimiter: delimiter)
+        var leftoverBytes = Data()
+        var didStripBOM = false
+        var lineNumber = 0
+
+        while true {
+            let chunk = handle.readData(ofLength: chunkSize)
+            if chunk.isEmpty { break }
+
+            var buffer = leftoverBytes
+            buffer.append(chunk)
+
+            if !didStripBOM {
+                didStripBOM = true
+                if buffer.count >= 3, buffer[0] == 0xEF, buffer[1] == 0xBB, buffer[2] == 0xBF {
+                    buffer = buffer.subdata(in: 3 ..< buffer.count)
+                }
+            }
+
+            // Split on the last valid UTF-8 boundary so we don't feed a
+            // partial multi-byte scalar into the parser. Anything after
+            // the last boundary becomes leftover for the next chunk.
+            let (decodable, tail) = Self.splitAtUTF8Boundary(buffer)
+            leftoverBytes = tail
+
+            if let text = String(data: decodable, encoding: .utf8) {
+                for scalar in text.unicodeScalars {
+                    machine.consume(scalar)
+                }
+            } else if let text = String(data: decodable, encoding: .isoLatin1) {
+                for scalar in text.unicodeScalars {
+                    machine.consume(scalar)
+                }
+            } else {
+                throw DocumentAdapterError.readFailed(underlying: "could not decode CSV chunk")
+            }
+
+            for row in machine.drainRows() {
+                lineNumber += 1
+                continuation.yield(CSVRecord(lineNumber: lineNumber, cells: row))
+                try Task.checkCancellation()
+            }
+        }
+
+        // Flush trailing data (last scalar + final row when no trailing newline).
+        if !leftoverBytes.isEmpty {
+            let tail =
+                String(data: leftoverBytes, encoding: .utf8)
+                ?? String(data: leftoverBytes, encoding: .isoLatin1)
+            if let text = tail {
+                for scalar in text.unicodeScalars { machine.consume(scalar) }
+            }
+        }
+        machine.finish()
+        for row in machine.drainRows() {
+            lineNumber += 1
+            continuation.yield(CSVRecord(lineNumber: lineNumber, cells: row))
+        }
+    }
+
+    /// Finds the last byte position where a valid UTF-8 scalar ends and
+    /// returns the prefix (decodable) + suffix (carry over to next read).
+    /// Falls back to the whole buffer when no multi-byte lead byte is in
+    /// the final 3 bytes — that means the last scalar is ASCII and
+    /// already complete.
+    static func splitAtUTF8Boundary(_ data: Data) -> (decodable: Data, tail: Data) {
+        guard data.count >= 4 else { return (data, Data()) }
+        let maxScan = min(data.count, 4)
+        for offset in 1 ... maxScan {
+            let byte = data[data.count - offset]
+            // Bytes `10xxxxxx` are continuation bytes; `11xxxxxx` are
+            // lead bytes; single-byte ASCII is `0xxxxxxx`.
+            if byte & 0b1100_0000 == 0b1000_0000 {
+                continue  // continuation; keep scanning
+            }
+            if byte & 0b1000_0000 == 0 {
+                return (data, Data())  // ASCII final byte; safe to decode as-is
+            }
+            // Multi-byte lead byte. How many continuation bytes does it
+            // claim? 2-byte lead is `110x`, 3-byte is `1110`, 4-byte is
+            // `11110`. Expected-length minus what we've already scanned
+            // tells us how many bytes we're still short.
+            let leadMask = byte
+            let expected: Int
+            if leadMask & 0b1111_0000 == 0b1111_0000 {
+                expected = 4
+            } else if leadMask & 0b1110_0000 == 0b1110_0000 {
+                expected = 3
+            } else if leadMask & 0b1100_0000 == 0b1100_0000 {
+                expected = 2
+            } else {
+                // Malformed lead byte; treat everything as decodable and
+                // let the String initializer fail closed.
+                return (data, Data())
+            }
+            if expected <= offset {
+                return (data, Data())  // full scalar is inside the buffer
+            }
+            // Short; carry the partial scalar to the next read.
+            let boundary = data.count - offset
+            return (data.subdata(in: 0 ..< boundary), data.subdata(in: boundary ..< data.count))
+        }
+        return (data, Data())
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/PDFAdapter.swift b/Packages/OsaurusCore/Services/Documents/PDFAdapter.swift
new file mode 100644
index 000000000..cef946a2a
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/PDFAdapter.swift
@@ -0,0 +1,230 @@
+//
+//  PDFAdapter.swift
+//  osaurus
+//
+//  Text-layer extraction plus layout-aware table detection for PDFs.
+//
+//  The adapter produces a `PDFDocumentRepresentation` carrying one
+//  `PDFPageRepresentation` per text-bearing page. Each page retains the
+//  plain extracted text (byte-identical to the legacy PR-3 behaviour)
+//  and — when the layout heuristic finds them — a list of `PDFTable`
+//  regions. The `textFallback` on the returned `StructuredDocument`
+//  stays flat for chat-attachment display.
+//
+//  Image-only PDFs still throw `.emptyContent` so the `DocumentParser`
+//  shim can fall through to the legacy image-render path; moving that
+//  path onto the adapter surface is deliberately out of scope here.
+//
+//  Detection strategy (`PDFTableDetector` below):
+//    1. Enumerate each page's characters, capturing `(char, x, y, width)`
+//       from `PDFPage.characterBounds(at:)`.
+//    2. Cluster glyphs into rows by y-coordinate tolerance.
+//    3. Within each row, split into cells wherever the inter-glyph gap
+//       exceeds the configured threshold.
+//    4. Collect consecutive multi-cell rows as a table; single-cell rows
+//       are treated as prose and end the current table.
+//
+
+import Foundation
+import PDFKit
+
+public struct PDFAdapter: DocumentFormatAdapter {
+    public let formatId = "pdf"
+
+    public init() {}
+
+    public func canHandle(url: URL, uti: String?) -> Bool {
+        url.pathExtension.lowercased() == "pdf"
+    }
+
+    public func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+        let fileSize = Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
+        if sizeLimit > 0, fileSize > sizeLimit {
+            throw DocumentAdapterError.sizeLimitExceeded(actual: fileSize, limit: sizeLimit)
+        }
+
+        guard let document = PDFDocument(url: url) else {
+            throw DocumentAdapterError.readFailed(underlying: "PDFKit could not open document")
+        }
+
+        var pages: [PDFPageRepresentation] = []
+        for index in 0 ..< document.pageCount {
+            guard let page = document.page(at: index), let text = page.string else { continue }
+            let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+            if trimmed.isEmpty { continue }
+            let tables = PDFTableDetector.detect(on: page, text: text)
+            pages.append(
+                PDFPageRepresentation(pageNumber: index + 1, text: text, tables: tables)
+            )
+        }
+
+        guard !pages.isEmpty else {
+            // No text layer on any page — let the shim fall through to
+            // the legacy image-render fallback. Don't claim a result we
+            // can't produce.
+            throw DocumentAdapterError.emptyContent
+        }
+
+        let flatText = pages.map(\.text).joined(separator: "\n\n")
+        let truncated = PlainTextAdapter.applyCharacterCap(flatText)
+
+        return StructuredDocument(
+            formatId: formatId,
+            filename: url.lastPathComponent,
+            fileSize: fileSize,
+            representation: AnyStructuredRepresentation(
+                formatId: formatId,
+                underlying: PDFDocumentRepresentation(pages: pages)
+            ),
+            textFallback: truncated
+        )
+    }
+}
+
+// MARK: - Table detector
+
+/// Glyph record used by the detector. Exposed internally so tests can
+/// feed synthetic character grids in without going through PDFKit —
+/// Core Graphics-generated PDFs report character bounds that span
+/// trailing whitespace, which masks the real column gaps, so relying
+/// only on end-to-end PDF fixtures makes the algorithm hard to pin.
+struct PDFGlyph: Sendable, Equatable {
+    let scalar: Character
+    let rect: CGRect
+}
+
+/// Layout-aware table detector. Pure function over a `PDFPage` at the
+/// top, with the inner stages (`clusterRows`, `cellsForRow`, grouping)
+/// internal so the heuristic can be unit-tested without PDFKit.
+enum PDFTableDetector {
+
+    /// Tunables chosen for common business PDFs (10-12pt body text).
+    /// `rowTolerance` is how far two characters' y-baselines can differ
+    /// and still be on the same row; `columnGap` is the inter-glyph
+    /// distance that must be exceeded before we declare a cell boundary.
+    /// Both are in PDF points. `columnGap` is set above typical single-
+    /// space width (~3-4pt at 12pt body) so "Widget Pro" stays one cell
+    /// but "Widget      7" splits — the heuristic trades some recall on
+    /// very tightly-spaced tables for precision on prose.
+    static let rowTolerance: CGFloat = 3.0
+    static let columnGap: CGFloat = 8.0
+
+    static func detect(on page: PDFPage, text: String) -> [PDFTable] {
+        let glyphs = collectGlyphs(from: page, text: text)
+        return detect(glyphs: glyphs)
+    }
+
+    /// Pure-function variant used by tests and reachable without PDFKit.
+    static func detect(glyphs: [PDFGlyph]) -> [PDFTable] {
+        let rows = clusterRows(glyphs)
+        let cellRows = rows.map { cellsForRow($0) }
+        return groupConsecutiveTabularRows(cellRows)
+    }
+
+    // MARK: Glyph collection
+
+    private struct RowCluster {
+        var y: CGFloat
+        var glyphs: [PDFGlyph]
+    }
+
+    private static func collectGlyphs(from page: PDFPage, text: String) -> [PDFGlyph] {
+        // `characterBounds(at:)` uses UTF-16 offsets, so we index into
+        // `text.utf16` and map back to characters for the cell content.
+        //
+        // Space / tab / newline characters carry bounds that span the
+        // whitespace they introduce — including column gaps many points
+        // wide — so including them in the glyph stream hides the gap
+        // between "Item" and "Qty" (see e.g. PDFKit on a 3-column PDF,
+        // where the space glyph between columns reports width ≈ 95pt).
+        // Dropping whitespace glyphs turns "gap between meaningful
+        // characters" into the real signal we cluster on.
+        var glyphs: [PDFGlyph] = []
+        glyphs.reserveCapacity(text.utf16.count)
+        var index = 0
+        for scalar in text {
+            let length = scalar.utf16.count
+            defer { index += length }
+            if scalar.isWhitespace { continue }
+            let bounds = page.characterBounds(at: index)
+            if bounds.width > 0 || bounds.height > 0 {
+                glyphs.append(PDFGlyph(scalar: scalar, rect: bounds))
+            }
+        }
+        return glyphs
+    }
+
+    // MARK: Row clustering
+
+    static func clusterRows(_ glyphs: [PDFGlyph]) -> [[PDFGlyph]] {
+        // Sort by y descending — PDF coordinates have origin at bottom-
+        // left, so top-of-page rows carry the highest y values.
+        let sorted = glyphs.sorted { lhs, rhs in
+            let ly = lhs.rect.midY
+            let ry = rhs.rect.midY
+            if abs(ly - ry) < rowTolerance { return lhs.rect.minX < rhs.rect.minX }
+            return ly > ry
+        }
+
+        var clusters: [RowCluster] = []
+        for glyph in sorted {
+            let y = glyph.rect.midY
+            if let last = clusters.last, abs(last.y - y) < rowTolerance {
+                var updated = last
+                updated.glyphs.append(glyph)
+                clusters[clusters.count - 1] = updated
+            } else {
+                clusters.append(RowCluster(y: y, glyphs: [glyph]))
+            }
+        }
+
+        return clusters.map { cluster in
+            cluster.glyphs.sorted { $0.rect.minX < $1.rect.minX }
+        }
+    }
+
+    // MARK: Row → cells
+
+    static func cellsForRow(_ row: [PDFGlyph]) -> [String] {
+        guard !row.isEmpty else { return [] }
+
+        var cells: [String] = []
+        var buffer: String = String(row[0].scalar)
+        var cursor = row[0].rect.maxX
+
+        for glyph in row.dropFirst() {
+            let gap = glyph.rect.minX - cursor
+            if gap > columnGap {
+                cells.append(buffer.trimmingCharacters(in: .whitespaces))
+                buffer = String(glyph.scalar)
+            } else {
+                buffer.append(glyph.scalar)
+            }
+            cursor = glyph.rect.maxX
+        }
+        cells.append(buffer.trimmingCharacters(in: .whitespaces))
+        return cells.filter { !$0.isEmpty }
+    }
+
+    // MARK: Tabular row grouping
+
+    static func groupConsecutiveTabularRows(_ rows: [[String]]) -> [PDFTable] {
+        var tables: [PDFTable] = []
+        var current: [[String]] = []
+
+        for row in rows {
+            if row.count >= 2 {
+                current.append(row)
+            } else if !current.isEmpty {
+                tables.append(PDFTable(rows: current))
+                current = []
+            }
+        }
+        if !current.isEmpty {
+            tables.append(PDFTable(rows: current))
+        }
+        // Single-row "tables" are almost always form lines ("Invoice: 1234"),
+        // not real tables — drop them so downstream consumers don't have to.
+        return tables.filter { $0.rows.count >= 2 }
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/PlainTextAdapter.swift b/Packages/OsaurusCore/Services/Documents/PlainTextAdapter.swift
new file mode 100644
index 000000000..1df764eb0
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/PlainTextAdapter.swift
@@ -0,0 +1,90 @@
+//
+//  PlainTextAdapter.swift
+//  osaurus
+//
+//  Wraps the existing plain-text ingress path in `DocumentParser`. Claims
+//  roughly the 60 extensions that were previously handled by the inline
+//  `case _ where isPlainText(ext:)` branch — `.txt`, `.md`, source code,
+//  config files, etc. Behaviour is intentionally identical to the legacy
+//  switch: UTF-8 first, ISO-Latin-1 retry, post-read character-count
+//  truncation marker. This adapter is a migration bridge, not a fidelity
+//  improvement.
+//
+
+import Foundation
+
+public struct PlainTextAdapter: DocumentFormatAdapter {
+    public let formatId = "plaintext"
+
+    public init() {}
+
+    public func canHandle(url: URL, uti: String?) -> Bool {
+        Self.plainTextExtensions.contains(url.pathExtension.lowercased())
+    }
+
+    public func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+        let fileSize = Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
+        if sizeLimit > 0, fileSize > sizeLimit {
+            throw DocumentAdapterError.sizeLimitExceeded(actual: fileSize, limit: sizeLimit)
+        }
+
+        let rawContent: String
+        do {
+            rawContent = try String(contentsOf: url, encoding: .utf8)
+        } catch {
+            // Fall back to latin-1 for files that are "mostly text" with a few
+            // non-UTF-8 bytes — same behaviour as the legacy path.
+            guard let data = try? Data(contentsOf: url),
+                let decoded = String(data: data, encoding: .isoLatin1)
+            else {
+                throw DocumentAdapterError.readFailed(underlying: error.localizedDescription)
+            }
+            rawContent = decoded
+        }
+
+        guard !rawContent.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
+            throw DocumentAdapterError.emptyContent
+        }
+
+        let truncated = Self.applyCharacterCap(rawContent)
+
+        return StructuredDocument(
+            formatId: formatId,
+            filename: url.lastPathComponent,
+            fileSize: fileSize,
+            representation: AnyStructuredRepresentation(
+                formatId: formatId,
+                underlying: PlainTextRepresentation(text: truncated)
+            ),
+            textFallback: truncated
+        )
+    }
+
+    // MARK: - Helpers
+
+    /// Preserves the legacy 500K-character UX — consumers already expect the
+    /// trailing marker when a document is truncated mid-read. The cap on
+    /// bytes-read is higher (see `DocumentLimits.plainText`), so the two
+    /// interact: oversized files are refused outright; merely long files
+    /// are surfaced with a truncation note.
+    static func applyCharacterCap(_ text: String) -> String {
+        let cap = 500_000
+        guard text.count > cap else { return text }
+        return String(text.prefix(cap))
+            + "\n\n[Document truncated — exceeded \(cap) character limit]"
+    }
+
+    static let plainTextExtensions: Set<String> = [
+        "txt", "md", "markdown", "csv", "tsv",
+        "json", "xml", "yaml", "yml", "toml",
+        "log", "ini", "cfg", "conf", "env",
+        "swift", "py", "js", "ts", "tsx", "jsx",
+        "rs", "go", "java", "kt", "c", "cpp", "h", "hpp",
+        "rb", "php", "sh", "bash", "zsh", "fish",
+        "css", "scss", "less", "sql",
+        "r", "m", "mm", "lua", "pl", "ex", "exs",
+        "zig", "nim", "dart", "scala", "groovy",
+        "tf", "hcl", "dockerfile",
+        "gitignore", "editorconfig", "prettierrc",
+    ]
+}
diff --git a/Packages/OsaurusCore/Services/Documents/RichDocumentAdapter.swift b/Packages/OsaurusCore/Services/Documents/RichDocumentAdapter.swift
new file mode 100644
index 000000000..33afe2f86
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/RichDocumentAdapter.swift
@@ -0,0 +1,84 @@
+//
+//  RichDocumentAdapter.swift
+//  osaurus
+//
+//  Wraps the `NSAttributedString(url:documentType:)` path in
+//  `DocumentParser.parseRichDocument`. A single adapter covers DOCX, DOC,
+//  RTF, RTFD, and HTML today because they share the same underlying
+//  framework call and produce the same plain-text output. When stage-4
+//  PR 11 lands a high-fidelity DOCX reader (tables, tracked changes,
+//  comments) this adapter splits along format lines and this one becomes
+//  the RTF/HTML-only path.
+//
+
+import AppKit
+import Foundation
+
+public struct RichDocumentAdapter: DocumentFormatAdapter {
+    public let formatId = "richdoc"
+
+    public init() {}
+
+    public func canHandle(url: URL, uti: String?) -> Bool {
+        Self.supportedExtensions.contains(url.pathExtension.lowercased())
+    }
+
+    public func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+        let fileSize = Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
+        if sizeLimit > 0, fileSize > sizeLimit {
+            throw DocumentAdapterError.sizeLimitExceeded(actual: fileSize, limit: sizeLimit)
+        }
+
+        let documentType = Self.documentType(forExtension: url.pathExtension.lowercased())
+        let extracted: String
+        do {
+            var options: [NSAttributedString.DocumentReadingOptionKey: Any] = [:]
+            if let documentType {
+                options[.documentType] = documentType
+            }
+            let attributed = try NSAttributedString(
+                url: url,
+                options: options,
+                documentAttributes: nil
+            )
+            extracted = attributed.string
+        } catch {
+            throw DocumentAdapterError.readFailed(underlying: error.localizedDescription)
+        }
+
+        guard !extracted.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
+            throw DocumentAdapterError.emptyContent
+        }
+
+        let truncated = PlainTextAdapter.applyCharacterCap(extracted)
+
+        return StructuredDocument(
+            formatId: formatId,
+            filename: url.lastPathComponent,
+            fileSize: fileSize,
+            representation: AnyStructuredRepresentation(
+                formatId: formatId,
+                underlying: PlainTextRepresentation(text: truncated)
+            ),
+            textFallback: truncated
+        )
+    }
+
+    // MARK: - Helpers
+
+    static let supportedExtensions: Set<String> = [
+        "docx", "doc", "rtf", "rtfd", "html", "htm",
+    ]
+
+    private static func documentType(
+        forExtension ext: String
+    ) -> NSAttributedString.DocumentType? {
+        switch ext {
+        case "docx": return nil  // NSAttributedString auto-detects OOXML
+        case "doc": return .docFormat
+        case "rtf", "rtfd": return .rtf
+        case "html", "htm": return .html
+        default: return nil
+        }
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/XLSXAdapter.swift b/Packages/OsaurusCore/Services/Documents/XLSXAdapter.swift
new file mode 100644
index 000000000..e91624ee5
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/XLSXAdapter.swift
@@ -0,0 +1,211 @@
+//
+//  XLSXAdapter.swift
+//  osaurus
+//
+//  First real-fidelity adapter: reads `.xlsx` into a typed `Workbook`
+//  rather than flattening it to markdown the way the legacy text path
+//  would. Backed by CoreXLSX. The adapter intentionally does NOT call
+//  `parseStyles()` — that entry point crashes on openpyxl-generated
+//  workbooks because CoreXLSX's `PatternFill.patternType` is non-optional
+//  while Excel's default empty pattern omits that attribute. Style-
+//  dependent fidelity (number formats, column widths, dates that aren't
+//  explicitly typed) is deferred to a follow-up slice so this PR can ship
+//  behaviour that works against every current-style XLSX writer.
+//
+
+import CoreXLSX
+import Foundation
+
+public struct XLSXAdapter: DocumentFormatAdapter {
+    public let formatId = "xlsx"
+
+    public init() {}
+
+    public func canHandle(url: URL, uti: String?) -> Bool {
+        url.pathExtension.lowercased() == "xlsx"
+    }
+
+    public func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+        let fileSize = Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
+        if sizeLimit > 0, fileSize > sizeLimit {
+            throw DocumentAdapterError.sizeLimitExceeded(actual: fileSize, limit: sizeLimit)
+        }
+
+        let file: XLSXFile
+        do {
+            guard let opened = XLSXFile(filepath: url.path) else {
+                throw DocumentAdapterError.readFailed(underlying: "XLSXFile could not open \(url.path)")
+            }
+            file = opened
+        }
+
+        let sharedStrings: [String]
+        do {
+            // `parseSharedStrings` is nil on workbooks with no text cells,
+            // which is legal for a pure-numeric sheet. Treat that as empty.
+            let parsed = try file.parseSharedStrings()
+            sharedStrings = parsed?.items.map { $0.text ?? "" } ?? []
+        } catch {
+            throw DocumentAdapterError.readFailed(underlying: "shared strings: \(error.localizedDescription)")
+        }
+
+        let coreWorkbooks: [CoreXLSX.Workbook]
+        do {
+            coreWorkbooks = try file.parseWorkbooks()
+        } catch {
+            throw DocumentAdapterError.readFailed(underlying: "workbook index: \(error.localizedDescription)")
+        }
+
+        var sheets: [Sheet] = []
+        for coreWorkbook in coreWorkbooks {
+            let pathsAndNames: [(name: String?, path: String)]
+            do {
+                pathsAndNames = try file.parseWorksheetPathsAndNames(workbook: coreWorkbook)
+            } catch {
+                throw DocumentAdapterError.readFailed(underlying: "worksheet index: \(error.localizedDescription)")
+            }
+
+            for pair in pathsAndNames {
+                let coreSheet: Worksheet
+                do {
+                    coreSheet = try file.parseWorksheet(at: pair.path)
+                } catch {
+                    throw DocumentAdapterError.readFailed(
+                        underlying: "worksheet \(pair.name ?? pair.path): \(error.localizedDescription)"
+                    )
+                }
+                sheets.append(
+                    Self.makeSheet(
+                        name: pair.name ?? pair.path,
+                        coreSheet: coreSheet,
+                        sharedStrings: sharedStrings
+                    )
+                )
+            }
+        }
+
+        guard !sheets.isEmpty else {
+            throw DocumentAdapterError.emptyContent
+        }
+
+        let workbook = Workbook(sheets: sheets, sharedStrings: sharedStrings)
+        let textFallback = Self.renderTextFallback(workbook)
+
+        return StructuredDocument(
+            formatId: formatId,
+            filename: url.lastPathComponent,
+            fileSize: fileSize,
+            representation: AnyStructuredRepresentation(
+                formatId: formatId,
+                underlying: workbook
+            ),
+            textFallback: textFallback
+        )
+    }
+
+    // MARK: - CoreXLSX → Workbook
+
+    private static func makeSheet(
+        name: String,
+        coreSheet: Worksheet,
+        sharedStrings: [String]
+    ) -> Sheet {
+        let rows: [Row] = (coreSheet.data?.rows ?? []).map { coreRow in
+            let cells: [Cell] = coreRow.cells.map { coreCell in
+                Cell(
+                    reference: coreCell.reference.description,
+                    value: mapCellValue(coreCell, sharedStrings: sharedStrings),
+                    formula: coreCell.formula?.value
+                )
+            }
+            return Row(index: Int(coreRow.reference), cells: cells)
+        }
+
+        let mergedRanges: [CellRange] = (coreSheet.mergeCells?.items ?? []).map {
+            CellRange(reference: $0.reference)
+        }
+
+        return Sheet(name: name, rows: rows, mergedRanges: mergedRanges)
+    }
+
+    private static func mapCellValue(
+        _ coreCell: CoreXLSX.Cell,
+        sharedStrings: [String]
+    ) -> CellValue {
+        // CoreXLSX's `Cell.type` is an optional enum; `Cell.value` is a
+        // raw string. The interpretation depends on `type`.
+        guard let rawValue = coreCell.value, !rawValue.isEmpty else {
+            return .empty
+        }
+
+        switch coreCell.type {
+        case .bool:
+            return .bool(rawValue == "1")
+        case .sharedString:
+            if let index = Int(rawValue), index >= 0, index < sharedStrings.count {
+                return .string(sharedStrings[index])
+            }
+            return .empty
+        case .inlineStr:
+            if let inline = coreCell.inlineString {
+                // CoreXLSX's `InlineString` concatenates all runs for us.
+                return .inlineString(inline.text ?? "")
+            }
+            return .inlineString(rawValue)
+        case .string:
+            return .string(rawValue)
+        case .number, .none:
+            if let number = Double(rawValue) {
+                return .number(number)
+            }
+            return .empty
+        case .date:
+            // Explicitly-typed dates are rare in the wild — Excel writers
+            // almost always store dates as numbers plus a style. Preserve
+            // the raw string so callers that know the style table can
+            // reconstruct; callers that don't still see a string.
+            return .string(rawValue)
+        case .error:
+            return .string(rawValue)
+        }
+    }
+
+    // MARK: - Text fallback
+
+    private static func renderTextFallback(_ workbook: Workbook) -> String {
+        var out: [String] = []
+        for sheet in workbook.sheets {
+            out.append("## Sheet: \(sheet.name)")
+            for row in sheet.rows {
+                let cellText = row.cells.map { describeCell($0) }.joined(separator: "\t")
+                out.append("\(row.index)\t\(cellText)")
+            }
+            if !sheet.mergedRanges.isEmpty {
+                let ranges = sheet.mergedRanges.map { $0.reference }.joined(separator: ", ")
+                out.append("Merged: \(ranges)")
+            }
+            out.append("")
+        }
+        return out.joined(separator: "\n").trimmingCharacters(in: .whitespacesAndNewlines)
+    }
+
+    private static func describeCell(_ cell: Cell) -> String {
+        let base: String
+        switch cell.value {
+        case .empty: base = ""
+        case .number(let value):
+            base =
+                value.truncatingRemainder(dividingBy: 1) == 0
+                ? String(Int(value))
+                : String(value)
+        case .string(let text), .inlineString(let text):
+            base = text
+        case .bool(let flag):
+            base = flag ? "TRUE" : "FALSE"
+        }
+        if let formula = cell.formula {
+            return base.isEmpty ? "=\(formula)" : "\(base) [=\(formula)]"
+        }
+        return base
+    }
+}
diff --git a/Packages/OsaurusCore/Services/Documents/XLSXEmitter.swift b/Packages/OsaurusCore/Services/Documents/XLSXEmitter.swift
new file mode 100644
index 000000000..479c75f73
--- /dev/null
+++ b/Packages/OsaurusCore/Services/Documents/XLSXEmitter.swift
@@ -0,0 +1,222 @@
+//
+//  XLSXEmitter.swift
+//  osaurus
+//
+//  Writes a typed `Workbook` back out to `.xlsx` using libxlsxwriter.
+//  Pairs with `XLSXAdapter` to close the read-emit-read round trip that
+//  makes Excel a first-class format for osaurus agents: an agent can
+//  ingest a workbook, edit a `Workbook` in-process, and emit it back to
+//  the user as an attachable artifact.
+//
+//  Licensing notes, surfaced for whoever owns acknowledgements:
+//    - libxlsxwriter itself is BSD-2-Clause.
+//    - It vendors `third_party/tmpfileplus/tmpfileplus.c` which is
+//      MPL 2.0. Statically linking it is permitted; the MPL only
+//      requires that the source of the covered file remain available.
+//      A follow-up to `AcknowledgementsView` should list both.
+//
+
+import Foundation
+import libxlsxwriter
+
+public struct XLSXEmitter: DocumentFormatEmitter {
+    public let formatId = "xlsx"
+
+    public init() {}
+
+    public func canEmit(_ document: StructuredDocument) -> Bool {
+        document.representation.underlying is Workbook
+    }
+
+    public func emit(_ document: StructuredDocument, to url: URL) async throws {
+        guard let workbook = document.representation.underlying as? Workbook else {
+            throw DocumentAdapterError.writeFailed(
+                underlying: "emit called with non-Workbook representation"
+            )
+        }
+
+        // libxlsxwriter operates on a filename — it writes directly to the
+        // destination during `workbook_close` rather than handing back
+        // bytes. The caller has already resolved/contained `url` per the
+        // emitter contract.
+        let workbookHandle: UnsafeMutablePointer<lxw_workbook>? = url.path.withCString {
+            workbook_new($0)
+        }
+        guard let lxwWorkbook = workbookHandle else {
+            throw DocumentAdapterError.writeFailed(
+                underlying: "workbook_new failed for \(url.path)"
+            )
+        }
+
+        var pendingError: DocumentAdapterError?
+
+        for sheet in workbook.sheets {
+            let sheetHandle: UnsafeMutablePointer<lxw_worksheet>? = sheet.name.withCString {
+                workbook_add_worksheet(lxwWorkbook, $0)
+            }
+            guard let lxwSheet = sheetHandle else {
+                pendingError = .writeFailed(
+                    underlying: "workbook_add_worksheet failed for '\(sheet.name)'"
+                )
+                break
+            }
+            if let err = Self.writeSheet(sheet, to: lxwSheet) {
+                pendingError = err
+                break
+            }
+        }
+
+        // `workbook_close` is ALWAYS called, even on earlier errors, so
+        // libxlsxwriter can release its buffers and temp files.
+        let closeError = workbook_close(lxwWorkbook)
+        if pendingError == nil, closeError.rawValue != 0 {
+            pendingError = .writeFailed(underlying: "workbook_close error \(closeError.rawValue)")
+        }
+
+        if let error = pendingError {
+            // Best-effort cleanup — leaving a partial .xlsx behind would
+            // masquerade as a successful emit to any later reader.
+            try? FileManager.default.removeItem(at: url)
+            throw error
+        }
+    }
+
+    // MARK: - Internals
+
+    private static func writeSheet(
+        _ sheet: Sheet,
+        to lxwSheet: UnsafeMutablePointer<lxw_worksheet>
+    ) -> DocumentAdapterError? {
+        for row in sheet.rows {
+            for cell in row.cells {
+                if let error = writeCell(cell, to: lxwSheet) {
+                    return error
+                }
+            }
+        }
+        for range in sheet.mergedRanges {
+            guard let coords = parseRange(range.reference) else {
+                return .writeFailed(underlying: "Bad merge range '\(range.reference)'")
+            }
+            // Passing a nil string tells libxlsxwriter to preserve whatever
+            // was already written at the top-left cell of the range; our
+            // top-left cell was emitted by the loop above.
+            let err = worksheet_merge_range(
+                lxwSheet,
+                coords.firstRow,
+                coords.firstCol,
+                coords.lastRow,
+                coords.lastCol,
+                nil,
+                nil
+            )
+            if err.rawValue != 0 {
+                return .writeFailed(
+                    underlying: "worksheet_merge_range \(range.reference) → \(err.rawValue)"
+                )
+            }
+        }
+        return nil
+    }
+
+    private static func writeCell(
+        _ cell: Cell,
+        to lxwSheet: UnsafeMutablePointer<lxw_worksheet>
+    ) -> DocumentAdapterError? {
+        guard let coords = parseA1(cell.reference) else {
+            return .writeFailed(underlying: "Bad cell reference '\(cell.reference)'")
+        }
+        let row = coords.row
+        let col = coords.col
+
+        if let formula = cell.formula {
+            let err = formula.withCString {
+                worksheet_write_formula(lxwSheet, row, col, $0, nil)
+            }
+            if err.rawValue != 0 {
+                return .writeFailed(
+                    underlying: "worksheet_write_formula \(cell.reference) → \(err.rawValue)"
+                )
+            }
+            return nil
+        }
+
+        switch cell.value {
+        case .empty:
+            return nil
+        case .number(let value):
+            let err = worksheet_write_number(lxwSheet, row, col, value, nil)
+            if err.rawValue != 0 {
+                return .writeFailed(
+                    underlying: "worksheet_write_number \(cell.reference) → \(err.rawValue)"
+                )
+            }
+        case .string(let text), .inlineString(let text):
+            let err = text.withCString {
+                worksheet_write_string(lxwSheet, row, col, $0, nil)
+            }
+            if err.rawValue != 0 {
+                return .writeFailed(
+                    underlying: "worksheet_write_string \(cell.reference) → \(err.rawValue)"
+                )
+            }
+        case .bool(let flag):
+            let err = worksheet_write_boolean(lxwSheet, row, col, flag ? 1 : 0, nil)
+            if err.rawValue != 0 {
+                return .writeFailed(
+                    underlying: "worksheet_write_boolean \(cell.reference) → \(err.rawValue)"
+                )
+            }
+        }
+        return nil
+    }
+
+    // MARK: - A1 parsing
+
+    /// Parses an A1-style cell reference ("B3", "AA10") into the 0-indexed
+    /// row and column that libxlsxwriter expects. Returns nil for anything
+    /// that doesn't match `[A-Z]+[0-9]+`.
+    private static func parseA1(_ reference: String) -> (row: UInt32, col: UInt16)? {
+        var letters: [UInt8] = []
+        var digits: [UInt8] = []
+        for scalar in reference.unicodeScalars {
+            guard scalar.isASCII, let byte = UInt8(exactly: scalar.value) else { return nil }
+            switch byte {
+            case 0x41 ... 0x5A:  // A-Z
+                letters.append(byte)
+            case 0x61 ... 0x7A:  // a-z
+                letters.append(byte - 32)
+            case 0x30 ... 0x39:  // 0-9
+                digits.append(byte)
+            default:
+                return nil
+            }
+        }
+        guard !letters.isEmpty, !digits.isEmpty else { return nil }
+
+        let rowOneBasedString = String(bytes: digits, encoding: .ascii) ?? ""
+        guard let rowOneBased = UInt32(rowOneBasedString), rowOneBased > 0 else { return nil }
+
+        var col: Int = 0
+        let base = Int(UInt8(ascii: "A"))
+        for byte in letters {
+            col = col * 26 + (Int(byte) - base + 1)
+        }
+        guard col > 0, col <= 16_384 else { return nil }  // Excel col cap
+
+        return (row: rowOneBased - 1, col: UInt16(col - 1))
+    }
+
+    /// Parses an A1:A1 range ("A5:B5") into the four 0-indexed coordinates
+    /// libxlsxwriter's merge call wants.
+    private static func parseRange(
+        _ reference: String
+    ) -> (firstRow: UInt32, firstCol: UInt16, lastRow: UInt32, lastCol: UInt16)? {
+        let parts = reference.split(separator: ":", maxSplits: 1)
+        guard parts.count == 2,
+            let first = parseA1(String(parts[0])),
+            let last = parseA1(String(parts[1]))
+        else { return nil }
+        return (first.row, first.col, last.row, last.col)
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/CSVAdapterTests.swift b/Packages/OsaurusCore/Tests/Documents/CSVAdapterTests.swift
new file mode 100644
index 000000000..31445a35b
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/CSVAdapterTests.swift
@@ -0,0 +1,175 @@
+//
+//  CSVAdapterTests.swift
+//  osaurusTests
+//
+//  Covers the in-memory CSV / TSV adapter. Pins the fields business users
+//  expect to survive an ingest: delimiter auto-pick per extension, quoted
+//  cells with commas and newlines, `""` quote escapes, UTF-8 BOM handling,
+//  header-row detection, and size-limit refusal. The streaming variant
+//  has its own suite — the parser state machine is shared, so this one
+//  focuses on the eager path + the typed CSVTable output.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("CSVAdapter")
+struct CSVAdapterTests {
+
+    @Test func canHandle_claimsCSVAndTSV() {
+        let adapter = CSVAdapter()
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.csv"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.TSV"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.txt"), uti: nil) == false)
+    }
+
+    @Test func parse_splitsHeaderFromRecords() async throws {
+        let url = try Self.write(
+            """
+            Month,Revenue,Status
+            January,1200,closed
+            February,950,closed
+            March,1400,open
+            """,
+            ext: "csv"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.header == ["Month", "Revenue", "Status"])
+        #expect(table.records.count == 3)
+        #expect(table.records.first == ["January", "1200", "closed"])
+        #expect(table.delimiter == ",")
+    }
+
+    @Test func parse_tsvUsesTabDelimiter() async throws {
+        let url = try Self.write("Col1\tCol2\nA\t1\nB\t2\n", ext: "tsv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.delimiter == "\t")
+        #expect(table.header == ["Col1", "Col2"])
+        #expect(table.records == [["A", "1"], ["B", "2"]])
+    }
+
+    @Test func parse_preservesQuotedCommasAndNewlines() async throws {
+        // Row 1 has a comma inside the quoted second field; row 2 has a
+        // newline inside a quoted field. Both must end up as single cells.
+        let url = try Self.write(
+            """
+            name,note
+            "Smith, John","note line 1
+            note line 2"
+            Doe,plain
+            """,
+            ext: "csv"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.records.count == 2)
+        #expect(table.records[0] == ["Smith, John", "note line 1\nnote line 2"])
+        #expect(table.records[1] == ["Doe", "plain"])
+    }
+
+    @Test func parse_expandsDoubleQuoteEscape() async throws {
+        // Raw `#"""` so the embedded `""` escapes don't fight the compiler.
+        let url = try Self.write(
+            #"""
+            code,label
+            A,"He said ""yes"""
+            """#,
+            ext: "csv"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.records.first == ["A", #"He said "yes""#])
+    }
+
+    @Test func parse_stripsUTF8BOM() async throws {
+        let bom = Data([0xEF, 0xBB, 0xBF])
+        let body = "Name,Value\nAlpha,1\n".data(using: .utf8)!
+        var combined = bom
+        combined.append(body)
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-csv-bom-\(UUID().uuidString).csv")
+        try combined.write(to: url)
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.header == ["Name", "Value"])
+        #expect(table.records.first == ["Alpha", "1"])
+    }
+
+    @Test func parse_numericOnlyFirstRowIsNotHeader() async throws {
+        // All-numeric first row → no header detection; the whole file
+        // should surface as records.
+        let url = try Self.write("1,2,3\n4,5,6\n", ext: "csv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.header == nil)
+        #expect(table.records == [["1", "2", "3"], ["4", "5", "6"]])
+    }
+
+    @Test func parse_rejectsOversizedFile() async throws {
+        let url = try Self.write("a,b\n1,2\n", ext: "csv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await CSVAdapter().parse(url: url, sizeLimit: 1)
+        }
+    }
+
+    @Test func parse_emptyFileThrowsEmptyContent() async throws {
+        let url = try Self.write("", ext: "csv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        }
+    }
+
+    @Test func parse_crlfLineEndingsAreRecognised() async throws {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-csv-crlf-\(UUID().uuidString).csv")
+        try "a,b\r\n1,2\r\n3,4\r\n".data(using: .utf8)!.write(to: url)
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await CSVAdapter().parse(url: url, sizeLimit: 0)
+        guard let table = document.representation.underlying as? CSVTable else {
+            Issue.record("not a CSVTable"); return
+        }
+        #expect(table.records == [["1", "2"], ["3", "4"]])
+    }
+
+    // MARK: - Fixtures
+
+    private static func write(_ content: String, ext: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-csv-\(UUID().uuidString).\(ext)")
+        try content.write(to: url, atomically: true, encoding: .utf8)
+        return url
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/CSVStreamerTests.swift b/Packages/OsaurusCore/Tests/Documents/CSVStreamerTests.swift
new file mode 100644
index 000000000..6f13097a4
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/CSVStreamerTests.swift
@@ -0,0 +1,147 @@
+//
+//  CSVStreamerTests.swift
+//  osaurusTests
+//
+//  Streaming-side coverage for the CSV pipeline. The parser state machine
+//  is shared with `CSVAdapter`, so these tests focus on the streaming
+//  contract: back-pressure via AsyncThrowingStream, row-by-row emission,
+//  UTF-8 boundary handling across chunk edges, and cancellation.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("CSVStreamer")
+struct CSVStreamerTests {
+
+    @Test func stream_yieldsRowsInOrder() async throws {
+        let url = try Self.write(
+            """
+            name,score
+            Alice,10
+            Bob,20
+            Carol,30
+            """,
+            ext: "csv"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        var collected: [[String]] = []
+        for try await record in CSVStreamer().stream(url: url) {
+            collected.append(record.cells)
+        }
+        #expect(
+            collected == [
+                ["name", "score"],
+                ["Alice", "10"],
+                ["Bob", "20"],
+                ["Carol", "30"],
+            ]
+        )
+    }
+
+    @Test func stream_numbersRowsFromOne() async throws {
+        let url = try Self.write("a,b\n1,2\n3,4\n", ext: "csv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        var lineNumbers: [Int] = []
+        for try await record in CSVStreamer().stream(url: url) {
+            lineNumbers.append(record.lineNumber)
+        }
+        #expect(lineNumbers == [1, 2, 3])
+    }
+
+    @Test func stream_tsvSplitsOnTab() async throws {
+        let url = try Self.write("col1\tcol2\nA\t1\nB\t2\n", ext: "tsv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        var collected: [[String]] = []
+        for try await record in CSVStreamer().stream(url: url) {
+            collected.append(record.cells)
+        }
+        #expect(collected == [["col1", "col2"], ["A", "1"], ["B", "2"]])
+    }
+
+    @Test func stream_preservesQuotedNewlinesAcrossChunks() async throws {
+        // Build a payload bigger than one chunk so the parser actually
+        // sees the embedded newline arrive in two separate feeds.
+        let filler = String(repeating: "x", count: 70_000)
+        let url = try Self.write(
+            """
+            id,note
+            1,"line one
+            line two \(filler) end"
+            2,plain
+            """,
+            ext: "csv"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        var collected: [[String]] = []
+        for try await record in CSVStreamer().stream(url: url) {
+            collected.append(record.cells)
+        }
+        #expect(collected.count == 3)
+        #expect(collected[1].count == 2)
+        #expect(collected[1][0] == "1")
+        #expect(collected[1][1].hasPrefix("line one\nline two "))
+        #expect(collected[1][1].hasSuffix(" end"))
+        #expect(collected[2] == ["2", "plain"])
+    }
+
+    @Test func stream_cancellationStopsMidFile() async throws {
+        // Generate a file large enough that cancellation happens before
+        // all rows drain. 10k rows × ~20 bytes = ~200 KB — several chunks.
+        var text = "id,v\n"
+        for i in 0 ..< 10_000 {
+            text.append("\(i),\(i * 2)\n")
+        }
+        let url = try Self.write(text, ext: "csv")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let task = Task { () -> Int in
+            var count = 0
+            for try await _ in CSVStreamer().stream(url: url) {
+                count += 1
+                if count >= 3 {
+                    // Caller would normally break out of the loop; here we
+                    // cancel the whole task so the streaming Task inside the
+                    // streamer observes cancellation on its next check.
+                    return count
+                }
+            }
+            return count
+        }
+        let delivered = try await task.value
+        #expect(delivered == 3)
+    }
+
+    // MARK: - UTF-8 boundary split helper
+
+    @Test func splitAtUTF8Boundary_keepsCompleteScalars() {
+        // `é` is C3 A9 in UTF-8. Cut the buffer mid-scalar and confirm
+        // the tail gets carried to the next read.
+        let data = Data([0x61, 0xC3, 0xA9, 0xC3])  // "a", "é", then a lead byte C3 with no continuation
+        let (decodable, tail) = CSVStreamer.splitAtUTF8Boundary(data)
+        #expect(decodable.count == 3)
+        #expect(tail == Data([0xC3]))
+    }
+
+    @Test func splitAtUTF8Boundary_shortBufferReturnedAsIs() {
+        let data = Data([0x61, 0x62])
+        let (decodable, tail) = CSVStreamer.splitAtUTF8Boundary(data)
+        #expect(decodable == data)
+        #expect(tail.isEmpty)
+    }
+
+    // MARK: - Fixtures
+
+    private static func write(_ content: String, ext: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-csvstream-\(UUID().uuidString).\(ext)")
+        try content.write(to: url, atomically: true, encoding: .utf8)
+        return url
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/DocumentParserShimTests.swift b/Packages/OsaurusCore/Tests/Documents/DocumentParserShimTests.swift
new file mode 100644
index 000000000..580383bb2
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/DocumentParserShimTests.swift
@@ -0,0 +1,125 @@
+//
+//  DocumentParserShimTests.swift
+//  osaurusTests
+//
+//  Integration tests for the `DocumentParser.parseAll` shim: verifies that
+//  the registry is consulted first, that `.emptyContent` from a registered
+//  adapter falls through to the legacy switch, and that errors bubble up
+//  translated into the legacy `ParseError` surface. Uses the shared
+//  registry (register + `unregisterAll` in teardown) so the shim's call
+//  site is exactly the one reached from production.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("DocumentParser.parseAll registry shim", .serialized)
+struct DocumentParserShimTests {
+
+    // A fixture-extension adapter so tests don't collide with built-ins.
+    private static let fixtureFormatId = "test-fixture-shim"
+    private static let fixtureExtension = "fixtureshim"
+
+    private func registerFixture(content: String) {
+        DocumentFormatRegistry.shared.register(
+            adapter: FixtureAdapter(
+                formatId: Self.fixtureFormatId,
+                extensions: [Self.fixtureExtension],
+                produce: content
+            )
+        )
+    }
+
+    private func cleanUp() {
+        DocumentFormatRegistry.shared.unregisterAll(formatId: Self.fixtureFormatId)
+    }
+
+    // MARK: - Routing
+
+    @Test func parseAll_routesThroughRegistry_whenAdapterClaims() throws {
+        registerFixture(content: "routed-through-registry")
+        defer { cleanUp() }
+
+        let url = try writeFile(content: "ignored", ext: Self.fixtureExtension)
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let attachments = try DocumentParser.parseAll(url: url)
+        #expect(attachments.count == 1)
+        #expect(attachments.first?.documentContent == "routed-through-registry")
+    }
+
+    @Test func parseAll_fallsThroughOnEmptyContent() throws {
+        // Fixture adapter with empty payload → adapter throws .emptyContent →
+        // shim should try the legacy switch, which for an unknown extension
+        // surfaces `ParseError.unsupportedFormat`.
+        registerFixture(content: "")
+        defer { cleanUp() }
+
+        let url = try writeFile(content: "ignored", ext: Self.fixtureExtension)
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        #expect(throws: DocumentParser.ParseError.self) {
+            _ = try DocumentParser.parseAll(url: url)
+        }
+    }
+
+    @Test func parseAll_preservesLegacyPath_whenNoAdapterMatches() throws {
+        // No fixture registered. A plain .txt file still flows through the
+        // legacy switch and produces exactly one document attachment.
+        let url = try writeFile(content: "legacy path still works", ext: "txt")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let attachments = try DocumentParser.parseAll(url: url)
+        #expect(attachments.count == 1)
+        #expect(attachments.first?.documentContent == "legacy path still works")
+    }
+
+    // MARK: - Bootstrap
+
+    @Test func bootstrap_registersExpectedBuiltInsOnIsolatedRegistry() {
+        let registry = DocumentFormatRegistry()
+        DocumentAdaptersBootstrap.registerBuiltIns(registry: registry)
+        let ids = registry.registeredFormatIds()
+        #expect(ids.contains("plaintext"))
+        #expect(ids.contains("pdf"))
+        #expect(ids.contains("richdoc"))
+        #expect(ids.contains("xlsx"))
+    }
+
+    // MARK: - Fixtures
+
+    private func writeFile(content: String, ext: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-shim-\(UUID().uuidString).\(ext)")
+        try content.write(to: url, atomically: true, encoding: .utf8)
+        return url
+    }
+
+    private struct FixtureAdapter: DocumentFormatAdapter {
+        let formatId: String
+        let extensions: Set<String>
+        let produce: String
+
+        func canHandle(url: URL, uti: String?) -> Bool {
+            extensions.contains(url.pathExtension.lowercased())
+        }
+
+        func parse(url: URL, sizeLimit: Int64) async throws -> StructuredDocument {
+            guard !produce.isEmpty else {
+                throw DocumentAdapterError.emptyContent
+            }
+            return StructuredDocument(
+                formatId: formatId,
+                filename: url.lastPathComponent,
+                fileSize: 0,
+                representation: AnyStructuredRepresentation(
+                    formatId: formatId,
+                    underlying: PlainTextRepresentation(text: produce)
+                ),
+                textFallback: produce
+            )
+        }
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/Fixtures/xlsx/sample.xlsx b/Packages/OsaurusCore/Tests/Documents/Fixtures/xlsx/sample.xlsx
new file mode 100644
index 000000000..f2a42dd32
Binary files /dev/null and b/Packages/OsaurusCore/Tests/Documents/Fixtures/xlsx/sample.xlsx differ
diff --git a/Packages/OsaurusCore/Tests/Documents/PDFAdapterTests.swift b/Packages/OsaurusCore/Tests/Documents/PDFAdapterTests.swift
new file mode 100644
index 000000000..ed8033d38
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/PDFAdapterTests.swift
@@ -0,0 +1,96 @@
+//
+//  PDFAdapterTests.swift
+//  osaurusTests
+//
+//  Exercises the text-layer PDF adapter. Synthesises tiny PDFs via Core
+//  Graphics so the test bundle doesn't carry binary fixtures. The
+//  image-only fallback path stays in the legacy `DocumentParser` switch
+//  for now; the adapter intentionally throws `.emptyContent` when there's
+//  no text layer so the shim can fall through.
+//
+
+import AppKit
+import CoreGraphics
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("PDFAdapter")
+struct PDFAdapterTests {
+
+    @Test func canHandle_acceptsPDFExtensionOnly() {
+        let adapter = PDFAdapter()
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.pdf"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.PDF"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.txt"), uti: nil) == false)
+    }
+
+    @Test func parse_readsTextLayer() async throws {
+        let url = try Self.writePDF(text: "Hello PDF body content")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await PDFAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.formatId == "pdf")
+        #expect(doc.textFallback.contains("Hello PDF body content"))
+    }
+
+    @Test func parse_throwsEmptyContentForPDFWithNoTextLayer() async throws {
+        let url = try Self.writeBlankPDF()
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await PDFAdapter().parse(url: url, sizeLimit: 0)
+        }
+    }
+
+    @Test func parse_throwsSizeLimitExceededAboveCap() async throws {
+        let url = try Self.writePDF(text: "tiny")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await PDFAdapter().parse(url: url, sizeLimit: 1)
+        }
+    }
+
+    // MARK: - Fixtures
+
+    private static func writePDF(text: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-pdf-\(UUID().uuidString).pdf")
+        var mediaBox = CGRect(x: 0, y: 0, width: 300, height: 200)
+        guard let ctx = CGContext(url as CFURL, mediaBox: &mediaBox, nil) else {
+            throw FixtureError.contextCreationFailed
+        }
+        ctx.beginPDFPage(nil)
+
+        // Draw the text into the PDF context via NSAttributedString so PDFKit
+        // can recover it from the text layer on read-back.
+        let gc = NSGraphicsContext(cgContext: ctx, flipped: false)
+        NSGraphicsContext.saveGraphicsState()
+        NSGraphicsContext.current = gc
+        let font = NSFont.systemFont(ofSize: 14)
+        NSAttributedString(string: text, attributes: [.font: font])
+            .draw(at: NSPoint(x: 20, y: 100))
+        NSGraphicsContext.restoreGraphicsState()
+
+        ctx.endPDFPage()
+        ctx.closePDF()
+        return url
+    }
+
+    private static func writeBlankPDF() throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-pdf-blank-\(UUID().uuidString).pdf")
+        var mediaBox = CGRect(x: 0, y: 0, width: 100, height: 100)
+        guard let ctx = CGContext(url as CFURL, mediaBox: &mediaBox, nil) else {
+            throw FixtureError.contextCreationFailed
+        }
+        ctx.beginPDFPage(nil)
+        ctx.endPDFPage()
+        ctx.closePDF()
+        return url
+    }
+
+    private enum FixtureError: Error { case contextCreationFailed }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/PDFTableDetectorTests.swift b/Packages/OsaurusCore/Tests/Documents/PDFTableDetectorTests.swift
new file mode 100644
index 000000000..a29756e7d
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/PDFTableDetectorTests.swift
@@ -0,0 +1,231 @@
+//
+//  PDFTableDetectorTests.swift
+//  osaurusTests
+//
+//  Layout-aware table extraction coverage for `PDFAdapter`. The detector
+//  stages (`clusterRows`, `cellsForRow`, `groupConsecutiveTabularRows`,
+//  and the pure-function `detect(glyphs:)`) are exercised with
+//  synthesised `PDFGlyph` grids — Core Graphics-generated test PDFs
+//  report character bounds that span trailing whitespace, which hides
+//  the real column gaps and would make end-to-end fixtures unreliable.
+//  An integration test below still verifies the adapter wraps everything
+//  into a `PDFDocumentRepresentation` and preserves the flat text fallback.
+//
+
+import AppKit
+import CoreGraphics
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("PDFAdapter table extraction")
+struct PDFTableDetectorTests {
+
+    // MARK: - Algorithm: row clustering
+
+    @Test func clusterRows_groupsByYTolerance() {
+        // Two rows at y=120 (higher on page, emitted first) and y=100.
+        // Within each row the glyphs are within 1pt of each other
+        // vertically (rowTolerance = 3).
+        let glyphs: [PDFGlyph] = [
+            Self.glyph("A", x: 10, y: 100),
+            Self.glyph("B", x: 30, y: 100.5),
+            Self.glyph("C", x: 50, y: 100),
+            Self.glyph("D", x: 10, y: 120),
+            Self.glyph("E", x: 30, y: 120.5),
+        ]
+        let rows = PDFTableDetector.clusterRows(glyphs)
+        #expect(rows.count == 2)
+        // Top of page first (higher PDF y).
+        #expect(rows.first?.map(\.scalar) == ["D", "E"])
+        #expect(rows.last?.map(\.scalar) == ["A", "B", "C"])
+    }
+
+    @Test func clusterRows_sortsWithinRowByX() {
+        // Glyphs arrive in scrambled x order — the clusterer must sort.
+        let glyphs: [PDFGlyph] = [
+            Self.glyph("C", x: 100, y: 50),
+            Self.glyph("A", x: 10, y: 50),
+            Self.glyph("B", x: 60, y: 50),
+        ]
+        let rows = PDFTableDetector.clusterRows(glyphs)
+        #expect(rows.first?.map(\.scalar) == ["A", "B", "C"])
+    }
+
+    // MARK: - Algorithm: row → cells
+
+    @Test func cellsForRow_splitsOnWideGap() {
+        // Three 10pt characters per column with ~50pt column gaps —
+        // well above the 8pt threshold.
+        let row: [PDFGlyph] = [
+            Self.glyph("A", x: 10, y: 0, width: 6),
+            Self.glyph("a", x: 16, y: 0, width: 6),
+            Self.glyph("B", x: 60, y: 0, width: 6),
+            Self.glyph("b", x: 66, y: 0, width: 6),
+            Self.glyph("C", x: 110, y: 0, width: 6),
+        ]
+        let cells = PDFTableDetector.cellsForRow(row)
+        #expect(cells == ["Aa", "Bb", "C"])
+    }
+
+    @Test func cellsForRow_wordsInSameCellStayTogether() {
+        // "Net Revenue" inside a single cell. The tiny single-space gap
+        // (~3pt) is filtered upstream; even if it leaked through, it's
+        // below the 8pt column threshold.
+        let row: [PDFGlyph] = [
+            Self.glyph("N", x: 10, y: 0, width: 6),
+            Self.glyph("e", x: 16, y: 0, width: 6),
+            Self.glyph("t", x: 22, y: 0, width: 6),
+            Self.glyph("R", x: 30, y: 0, width: 6),  // 2pt gap where the space was
+            Self.glyph("e", x: 36, y: 0, width: 6),
+            Self.glyph("v", x: 42, y: 0, width: 6),
+        ]
+        let cells = PDFTableDetector.cellsForRow(row)
+        #expect(cells == ["NetRev"])
+    }
+
+    @Test func cellsForRow_singleGlyphReturnsSingleCell() {
+        let row: [PDFGlyph] = [Self.glyph("X", x: 0, y: 0)]
+        #expect(PDFTableDetector.cellsForRow(row) == ["X"])
+    }
+
+    // MARK: - Algorithm: tabular grouping
+
+    @Test func groupConsecutive_collectsMultiCellRunsIntoOneTable() {
+        let rows: [[String]] = [
+            ["Header1", "Header2"],
+            ["a", "1"],
+            ["b", "2"],
+        ]
+        let tables = PDFTableDetector.groupConsecutiveTabularRows(rows)
+        #expect(tables.count == 1)
+        #expect(tables.first?.rowCount == 3)
+    }
+
+    @Test func groupConsecutive_splitsAcrossSingleCellBreaks() {
+        // Prose row in the middle cuts the table in two.
+        let rows: [[String]] = [
+            ["A", "1"],
+            ["B", "2"],
+            ["paragraph"],
+            ["C", "3"],
+            ["D", "4"],
+        ]
+        let tables = PDFTableDetector.groupConsecutiveTabularRows(rows)
+        #expect(tables.count == 2)
+        #expect(tables[0].rowCount == 2)
+        #expect(tables[1].rowCount == 2)
+    }
+
+    @Test func groupConsecutive_dropsIsolatedSingleTabularRows() {
+        // One tabular row on its own (form-field style: "Invoice No. 1234")
+        // shouldn't surface as a "table".
+        let rows: [[String]] = [
+            ["Invoice", "No.", "1234"]
+        ]
+        let tables = PDFTableDetector.groupConsecutiveTabularRows(rows)
+        #expect(tables.isEmpty)
+    }
+
+    @Test func groupConsecutive_emptyInputProducesEmptyOutput() {
+        #expect(PDFTableDetector.groupConsecutiveTabularRows([]).isEmpty)
+    }
+
+    // MARK: - Algorithm: end-to-end on synthetic glyphs
+
+    @Test func detect_endToEndSyntheticGrid() {
+        // 3×3 grid at y = 100 / 80 / 60 with 50pt column gaps.
+        var glyphs: [PDFGlyph] = []
+        let yCoords: [CGFloat] = [100, 80, 60]
+        let xCoords: [CGFloat] = [10, 60, 110]
+        let data = [
+            ["I", "Q", "P"],  // Header
+            ["a", "1", "9"],
+            ["b", "2", "8"],
+        ]
+        for (rowIdx, row) in data.enumerated() {
+            for (colIdx, ch) in row.enumerated() {
+                glyphs.append(
+                    Self.glyph(Character(ch), x: xCoords[colIdx], y: yCoords[rowIdx], width: 6)
+                )
+            }
+        }
+        let tables = PDFTableDetector.detect(glyphs: glyphs)
+        #expect(tables.count == 1)
+        #expect(tables.first?.rowCount == 3)
+        #expect(tables.first?.columnCount == 3)
+        #expect(tables.first?.rows.first == ["I", "Q", "P"])
+    }
+
+    // MARK: - Integration: adapter contract
+
+    @Test func parse_emitsPDFDocumentRepresentationWithTextFallback() async throws {
+        let url = try Self.writeHelloPDF()
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let document = try await PDFAdapter().parse(url: url, sizeLimit: 0)
+        guard let repr = document.representation.underlying as? PDFDocumentRepresentation else {
+            Issue.record("representation was not a PDFDocumentRepresentation")
+            return
+        }
+        #expect(repr.pageCount == 1)
+        #expect(repr.pages.first?.pageNumber == 1)
+        #expect(document.textFallback.contains("Hello"))
+    }
+
+    @Test func parse_propagatesEmptyContentForBlankPDF() async throws {
+        let url = try Self.writeBlankPDF()
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await PDFAdapter().parse(url: url, sizeLimit: 0)
+        }
+    }
+
+    // MARK: - Fixtures
+
+    private static func glyph(
+        _ scalar: Character,
+        x: CGFloat,
+        y: CGFloat,
+        width: CGFloat = 5,
+        height: CGFloat = 10
+    ) -> PDFGlyph {
+        PDFGlyph(scalar: scalar, rect: CGRect(x: x, y: y, width: width, height: height))
+    }
+
+    private static func writeHelloPDF() throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-pdftable-hello-\(UUID().uuidString).pdf")
+        var mediaBox = CGRect(x: 0, y: 0, width: 300, height: 200)
+        guard let ctx = CGContext(url as CFURL, mediaBox: &mediaBox, nil) else {
+            throw WriterError.contextCreationFailed
+        }
+        ctx.beginPDFPage(nil)
+        let gc = NSGraphicsContext(cgContext: ctx, flipped: false)
+        NSGraphicsContext.saveGraphicsState()
+        NSGraphicsContext.current = gc
+        NSAttributedString(string: "Hello PDF", attributes: [.font: NSFont.systemFont(ofSize: 14)])
+            .draw(at: NSPoint(x: 20, y: 100))
+        NSGraphicsContext.restoreGraphicsState()
+        ctx.endPDFPage()
+        ctx.closePDF()
+        return url
+    }
+
+    private static func writeBlankPDF() throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-pdftable-blank-\(UUID().uuidString).pdf")
+        var mediaBox = CGRect(x: 0, y: 0, width: 100, height: 100)
+        guard let ctx = CGContext(url as CFURL, mediaBox: &mediaBox, nil) else {
+            throw WriterError.contextCreationFailed
+        }
+        ctx.beginPDFPage(nil)
+        ctx.endPDFPage()
+        ctx.closePDF()
+        return url
+    }
+
+    private enum WriterError: Error { case contextCreationFailed }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/PlainTextAdapterTests.swift b/Packages/OsaurusCore/Tests/Documents/PlainTextAdapterTests.swift
new file mode 100644
index 000000000..bc18a3448
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/PlainTextAdapterTests.swift
@@ -0,0 +1,84 @@
+//
+//  PlainTextAdapterTests.swift
+//  osaurusTests
+//
+//  Covers the plain-text migration adapter. Same behavioural contract as
+//  the legacy `DocumentParser.parsePlainText` — UTF-8, ISO-Latin-1 retry,
+//  character-cap truncation — plus the size-limit contract from the new
+//  adapter protocol.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("PlainTextAdapter")
+struct PlainTextAdapterTests {
+
+    @Test func canHandle_acceptsCommonTextExtensions() {
+        let adapter = PlainTextAdapter()
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.txt"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.MD"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.swift"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.pdf"), uti: nil) == false)
+    }
+
+    @Test func parse_readsUtf8Content() async throws {
+        let url = try Self.write("hello\nutf8\n", filename: "hello.txt")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await PlainTextAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.formatId == "plaintext")
+        #expect(doc.filename.hasSuffix("hello.txt"))
+        #expect(doc.textFallback.contains("hello"))
+        #expect(doc.textFallback.contains("utf8"))
+    }
+
+    @Test func parse_fallsBackToLatin1ForNonUtf8Bytes() async throws {
+        // A single 0xE9 byte (`é` in latin-1) is illegal standalone UTF-8.
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("latin-\(UUID().uuidString).txt")
+        try Data([0xE9, 0x0A]).write(to: url)
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await PlainTextAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.textFallback.contains("é"))
+    }
+
+    @Test func parse_throwsEmptyContentForWhitespaceOnly() async throws {
+        let url = try Self.write("   \n\t\n", filename: "empty.txt")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await PlainTextAdapter().parse(url: url, sizeLimit: 0)
+        }
+    }
+
+    @Test func parse_throwsSizeLimitExceededAboveCap() async throws {
+        let url = try Self.write("hello world", filename: "big.txt")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await PlainTextAdapter().parse(url: url, sizeLimit: 1)
+        }
+    }
+
+    @Test func parse_truncatesLongContentWithMarker() async throws {
+        let payload = String(repeating: "a", count: 500_002)
+        let url = try Self.write(payload, filename: "long.txt")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await PlainTextAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.textFallback.hasSuffix("character limit]"))
+    }
+
+    // MARK: - Helpers
+
+    private static func write(_ content: String, filename: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("\(UUID().uuidString)-\(filename)")
+        try content.write(to: url, atomically: true, encoding: .utf8)
+        return url
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/RichDocumentAdapterTests.swift b/Packages/OsaurusCore/Tests/Documents/RichDocumentAdapterTests.swift
new file mode 100644
index 000000000..5fb4f6631
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/RichDocumentAdapterTests.swift
@@ -0,0 +1,70 @@
+//
+//  RichDocumentAdapterTests.swift
+//  osaurusTests
+//
+//  Covers the NSAttributedString-backed migration adapter across the
+//  extensions it claims today (DOCX, RTF, HTML). Uses HTML and RTF
+//  fixtures authored inline; the DOCX path is exercised indirectly
+//  through `canHandle` — building a real DOCX on the fly requires ZIP
+//  plumbing that will come with the high-fidelity DOCX reader in stage-4
+//  PR 11.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("RichDocumentAdapter")
+struct RichDocumentAdapterTests {
+
+    @Test func canHandle_acceptsAllRichDocumentExtensions() {
+        let adapter = RichDocumentAdapter()
+        for ext in ["docx", "doc", "rtf", "rtfd", "html", "htm"] {
+            #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.\(ext)"), uti: nil))
+        }
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.txt"), uti: nil) == false)
+    }
+
+    @Test func parse_readsHTMLBodyAsPlainText() async throws {
+        let url = try Self.write(
+            "<html><body><h1>Title</h1><p>Body text</p></body></html>",
+            filename: "page.html"
+        )
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await RichDocumentAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.formatId == "richdoc")
+        #expect(doc.textFallback.contains("Title"))
+        #expect(doc.textFallback.contains("Body text"))
+        #expect(doc.textFallback.contains("<h1>") == false)
+    }
+
+    @Test func parse_readsRTFAsPlainText() async throws {
+        let rtf = "{\\rtf1\\ansi Hello {\\b bold} world}"
+        let url = try Self.write(rtf, filename: "page.rtf")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        let doc = try await RichDocumentAdapter().parse(url: url, sizeLimit: 0)
+        #expect(doc.textFallback.contains("Hello"))
+        #expect(doc.textFallback.contains("bold"))
+    }
+
+    @Test func parse_throwsSizeLimitExceededAboveCap() async throws {
+        let url = try Self.write("<html><body>hi</body></html>", filename: "big.html")
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await RichDocumentAdapter().parse(url: url, sizeLimit: 1)
+        }
+    }
+
+    // MARK: - Helpers
+
+    private static func write(_ content: String, filename: String) throws -> URL {
+        let url = FileManager.default.temporaryDirectory
+            .appendingPathComponent("\(UUID().uuidString)-\(filename)")
+        try content.write(to: url, atomically: true, encoding: .utf8)
+        return url
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/WorkbookToolsTests.swift b/Packages/OsaurusCore/Tests/Documents/WorkbookToolsTests.swift
new file mode 100644
index 000000000..3b754c43f
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/WorkbookToolsTests.swift
@@ -0,0 +1,181 @@
+//
+//  WorkbookToolsTests.swift
+//  osaurusTests
+//
+//  End-to-end tests for the `read_workbook` / `read_workbook_cell` /
+//  `write_workbook` agent tools. Uses the checked-in sample.xlsx fixture
+//  for the read paths and a temp directory for the write path so the
+//  three tools exercise the same XLSXAdapter / XLSXEmitter pair that
+//  agents see in production.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("Workbook agent tools")
+struct WorkbookToolsTests {
+
+    private let rootPath: URL
+    private let fixturePath: URL
+
+    init() throws {
+        let tmp = FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-wb-tools-\(UUID().uuidString)", isDirectory: true)
+        try FileManager.default.createDirectory(at: tmp, withIntermediateDirectories: true)
+        rootPath = tmp
+
+        // Copy the fixture into the temp root so the tools can resolve
+        // "sample.xlsx" as a relative path under the working folder.
+        guard
+            let bundled = Bundle.module.url(
+                forResource: "sample",
+                withExtension: "xlsx",
+                subdirectory: "Fixtures/xlsx"
+            )
+        else {
+            throw FixtureError.missing
+        }
+        fixturePath = tmp.appendingPathComponent("sample.xlsx")
+        try FileManager.default.copyItem(at: bundled, to: fixturePath)
+    }
+
+    // MARK: - read_workbook
+
+    @Test func readWorkbook_returnsSheetSummaries() async throws {
+        let tool = ReadWorkbookTool(rootPath: rootPath)
+        let envelope = try await tool.execute(argumentsJSON: #"{"path":"sample.xlsx"}"#)
+        let payload = try Self.successTextAsDict(envelope)
+
+        #expect(payload["path"] as? String == "sample.xlsx")
+        let sheets = payload["sheets"] as? [[String: Any]] ?? []
+        #expect(sheets.count == 2)
+        #expect(sheets.map { $0["name"] as? String }.contains { $0 == "Revenue" })
+        #expect(sheets.map { $0["name"] as? String }.contains { $0 == "Notes" })
+
+        let revenue = sheets.first { $0["name"] as? String == "Revenue" } ?? [:]
+        let merged = revenue["mergedRanges"] as? [String] ?? []
+        #expect(merged.contains("A5:B5"))
+    }
+
+    @Test func readWorkbook_rejectsMissingFile() async throws {
+        let tool = ReadWorkbookTool(rootPath: rootPath)
+        let envelope = try await tool.execute(argumentsJSON: #"{"path":"nope.xlsx"}"#)
+        #expect(envelope.contains("\"kind\":\"execution_error\"") || envelope.contains("\"ok\":false"))
+    }
+
+    @Test func readWorkbook_rejectsPathOutsideRoot() async throws {
+        let tool = ReadWorkbookTool(rootPath: rootPath)
+        let envelope = try await tool.execute(argumentsJSON: #"{"path":"../outside.xlsx"}"#)
+        #expect(envelope.contains("outside") || envelope.contains("invalid"))
+    }
+
+    // MARK: - read_workbook_cell
+
+    @Test func readWorkbookCell_returnsFormulaAndValue() async throws {
+        let tool = ReadWorkbookCellTool(rootPath: rootPath)
+        let envelope = try await tool.execute(
+            argumentsJSON: #"{"path":"sample.xlsx","sheet":"Revenue","cell":"B4"}"#
+        )
+        let payload = try Self.successTextAsDict(envelope)
+        #expect(payload["ref"] as? String == "B4")
+        #expect(payload["formula"] as? String == "SUM(B2:B3)")
+    }
+
+    @Test func readWorkbookCell_rejectsMissingSheet() async throws {
+        let tool = ReadWorkbookCellTool(rootPath: rootPath)
+        let envelope = try await tool.execute(
+            argumentsJSON: #"{"path":"sample.xlsx","sheet":"Ghost","cell":"A1"}"#
+        )
+        #expect(envelope.contains("not found"))
+    }
+
+    // MARK: - write_workbook
+
+    @Test func writeWorkbook_emitsAndRoundTrips() async throws {
+        let tool = WriteWorkbookTool(rootPath: rootPath)
+        let input = #"""
+            {
+              "path": "output.xlsx",
+              "sheets": [
+                {
+                  "name": "Numbers",
+                  "cells": [
+                    {"ref": "A1", "type": "string", "value": "Label"},
+                    {"ref": "B1", "type": "number", "value": 42},
+                    {"ref": "A2", "type": "bool", "value": true},
+                    {"ref": "C1", "type": "formula", "formula": "B1*2"}
+                  ],
+                  "mergedRanges": ["A3:B3"]
+                }
+              ]
+            }
+            """#
+        let envelope = try await tool.execute(argumentsJSON: input)
+        let payload = try Self.successTextAsDict(envelope)
+        #expect(payload["sheetCount"] as? Int == 1)
+
+        let outURL = rootPath.appendingPathComponent("output.xlsx")
+        #expect(FileManager.default.fileExists(atPath: outURL.path))
+
+        // Round-trip through XLSXAdapter to confirm the cells the agent
+        // requested actually landed in the file.
+        let reparsed = try await XLSXAdapter().parse(url: outURL, sizeLimit: 0)
+        guard let workbook = reparsed.representation.underlying as? Workbook else {
+            Issue.record("re-parsed representation was not a Workbook")
+            return
+        }
+        #expect(workbook.sheets.first?.name == "Numbers")
+        let cells = workbook.sheets.first?.rows.flatMap(\.cells) ?? []
+        #expect(cells.contains { $0.reference == "A1" })
+        #expect(cells.contains { $0.reference == "B1" })
+        #expect(cells.contains { $0.reference == "C1" && $0.formula == "B1*2" })
+    }
+
+    @Test func writeWorkbook_rejectsNonXLSXPath() async throws {
+        let tool = WriteWorkbookTool(rootPath: rootPath)
+        let envelope = try await tool.execute(
+            argumentsJSON: #"{"path":"report.txt","sheets":[{"name":"Sheet1","cells":[]}]}"#
+        )
+        #expect(envelope.contains("must end in"))
+    }
+
+    @Test func writeWorkbook_rejectsEmptySheets() async throws {
+        let tool = WriteWorkbookTool(rootPath: rootPath)
+        let envelope = try await tool.execute(
+            argumentsJSON: #"{"path":"out.xlsx","sheets":[]}"#
+        )
+        #expect(envelope.contains("non-empty"))
+    }
+
+    // MARK: - Helpers
+
+    /// Extracts the inner `result.text` from a `ToolEnvelope.success` JSON
+    /// and parses it as a dictionary — the envelope wraps every tool
+    /// response so tests have to peel one layer.
+    private static func successTextAsDict(_ envelope: String) throws -> [String: Any] {
+        let data = envelope.data(using: .utf8) ?? Data()
+        guard let obj = try JSONSerialization.jsonObject(with: data) as? [String: Any],
+            let result = obj["result"] as? [String: Any],
+            let text = result["text"] as? String,
+            let innerData = text.data(using: .utf8),
+            let inner = try JSONSerialization.jsonObject(with: innerData) as? [String: Any]
+        else {
+            throw FixtureError.notSuccessEnvelope(envelope)
+        }
+        return inner
+    }
+
+    private enum FixtureError: Error, CustomStringConvertible {
+        case missing
+        case notSuccessEnvelope(String)
+
+        var description: String {
+            switch self {
+            case .missing: return "Bundle.module lost the sample.xlsx fixture"
+            case .notSuccessEnvelope(let raw): return "Not a success envelope: \(raw)"
+            }
+        }
+    }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/XLSXAdapterTests.swift b/Packages/OsaurusCore/Tests/Documents/XLSXAdapterTests.swift
new file mode 100644
index 000000000..e0ea7d123
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/XLSXAdapterTests.swift
@@ -0,0 +1,143 @@
+//
+//  XLSXAdapterTests.swift
+//  osaurusTests
+//
+//  Validates the first real-fidelity document adapter end-to-end against
+//  a checked-in XLSX fixture (produced by xlsxwriter, matching what most
+//  business users ship). Ensures we surface sheet names, shared strings,
+//  numeric cells, formulas as source strings, merged ranges, and booleans
+//  — the round-trip checklist from the stage-2 business catalog.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("XLSXAdapter")
+struct XLSXAdapterTests {
+
+    // MARK: - canHandle
+
+    @Test func canHandle_acceptsXLSXOnly() {
+        let adapter = XLSXAdapter()
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.xlsx"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.XLSX"), uti: nil))
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.xls"), uti: nil) == false)
+        #expect(adapter.canHandle(url: URL(fileURLWithPath: "/tmp/a.csv"), uti: nil) == false)
+    }
+
+    // MARK: - parse against fixture
+
+    @Test func parse_surfacesSheetStructureAndValues() async throws {
+        let url = try Self.fixtureURL()
+        let adapter = XLSXAdapter()
+        let document = try await adapter.parse(url: url, sizeLimit: 0)
+
+        guard let workbook = document.representation.underlying as? Workbook else {
+            Issue.record("representation was not a Workbook")
+            return
+        }
+
+        #expect(workbook.sheets.count == 2)
+        let sheetNames = workbook.sheets.map(\.name)
+        #expect(sheetNames.contains("Revenue"))
+        #expect(sheetNames.contains("Notes"))
+    }
+
+    @Test func parse_preservesFormulasAndMergedRanges() async throws {
+        let url = try Self.fixtureURL()
+        let document = try await XLSXAdapter().parse(url: url, sizeLimit: 0)
+        guard let workbook = document.representation.underlying as? Workbook,
+            let revenue = workbook.sheets.first(where: { $0.name == "Revenue" })
+        else {
+            Issue.record("Revenue sheet missing")
+            return
+        }
+
+        let formulaCells = revenue.rows
+            .flatMap(\.cells)
+            .filter { $0.formula != nil }
+        #expect(formulaCells.count == 1)
+        #expect(formulaCells.first?.formula == "SUM(B2:B3)")
+
+        // Merged cells in the fixture cover A5:B5 (the footer note).
+        #expect(revenue.mergedRanges.map(\.reference).contains("A5:B5"))
+    }
+
+    @Test func parse_preservesSharedStringsAndNumbers() async throws {
+        let url = try Self.fixtureURL()
+        let document = try await XLSXAdapter().parse(url: url, sizeLimit: 0)
+        guard let workbook = document.representation.underlying as? Workbook,
+            let revenue = workbook.sheets.first(where: { $0.name == "Revenue" })
+        else {
+            Issue.record("Revenue sheet missing")
+            return
+        }
+
+        // Shared strings include the header labels.
+        #expect(workbook.sharedStrings.contains("Month"))
+        #expect(workbook.sharedStrings.contains("Amount"))
+        #expect(workbook.sharedStrings.contains("January"))
+
+        // B2 = 1200 (numeric, not rendered as a shared string).
+        let b2 = revenue.rows.flatMap(\.cells).first { $0.reference == "B2" }
+        if case .number(let value) = b2?.value {
+            #expect(value == 1200)
+        } else {
+            Issue.record("B2 was not a number: \(String(describing: b2?.value))")
+        }
+    }
+
+    @Test func parse_surfacesBooleansOnNotesSheet() async throws {
+        let url = try Self.fixtureURL()
+        let document = try await XLSXAdapter().parse(url: url, sizeLimit: 0)
+        guard let workbook = document.representation.underlying as? Workbook,
+            let notes = workbook.sheets.first(where: { $0.name == "Notes" })
+        else {
+            Issue.record("Notes sheet missing")
+            return
+        }
+        let boolCells = notes.rows.flatMap(\.cells).filter {
+            if case .bool = $0.value { return true } else { return false }
+        }
+        #expect(boolCells.count == 2)
+    }
+
+    @Test func parse_textFallback_containsHumanReadableTable() async throws {
+        let url = try Self.fixtureURL()
+        let document = try await XLSXAdapter().parse(url: url, sizeLimit: 0)
+
+        #expect(document.textFallback.contains("## Sheet: Revenue"))
+        #expect(document.textFallback.contains("Total"))
+        #expect(document.textFallback.contains("=SUM(B2:B3)"))
+        #expect(document.textFallback.contains("Merged: A5:B5"))
+    }
+
+    @Test func parse_rejectsOversizedFile() async throws {
+        let url = try Self.fixtureURL()
+        await #expect(throws: DocumentAdapterError.self) {
+            _ = try await XLSXAdapter().parse(url: url, sizeLimit: 64)
+        }
+    }
+
+    // MARK: - Fixture plumbing
+
+    private static func fixtureURL() throws -> URL {
+        // `.copy("Documents/Fixtures")` in `Package.swift` drops the parent
+        // `Documents/` segment inside the test bundle, so resources live
+        // under `Fixtures/xlsx/...` at the bundle root.
+        guard
+            let url = Bundle.module.url(
+                forResource: "sample",
+                withExtension: "xlsx",
+                subdirectory: "Fixtures/xlsx"
+            )
+        else {
+            throw FixtureError.missing
+        }
+        return url
+    }
+
+    private enum FixtureError: Error { case missing }
+}
diff --git a/Packages/OsaurusCore/Tests/Documents/XLSXEmitterTests.swift b/Packages/OsaurusCore/Tests/Documents/XLSXEmitterTests.swift
new file mode 100644
index 000000000..8d29e456a
--- /dev/null
+++ b/Packages/OsaurusCore/Tests/Documents/XLSXEmitterTests.swift
@@ -0,0 +1,273 @@
+//
+//  XLSXEmitterTests.swift
+//  osaurusTests
+//
+//  Proves the XLSX round trip: build a `Workbook` in memory, emit it
+//  through `XLSXEmitter`, re-parse the resulting file through
+//  `XLSXAdapter`, and assert that every fidelity feature we care about
+//  — sheet names, cell values, formula source strings, merged ranges,
+//  booleans — survives. Libxlsxwriter's output is strictly standards-
+//  conforming so the re-parse exercises the same CoreXLSX paths the
+//  read-side tests already pin.
+//
+
+import Foundation
+import Testing
+
+@testable import OsaurusCore
+
+@Suite("XLSXEmitter round trip")
+struct XLSXEmitterTests {
+
+    @Test func canEmit_onlyAcceptsWorkbookRepresentations() {
+        let emitter = XLSXEmitter()
+        let workbookDoc = StructuredDocument(
+            formatId: "xlsx",
+            filename: "a.xlsx",
+            fileSize: 0,
+            representation: AnyStructuredRepresentation(
+                formatId: "xlsx",
+                underlying: Workbook(sheets: [], sharedStrings: [])
+            ),
+            textFallback: ""
+        )
+        let plainDoc = StructuredDocument(
+            formatId: "plaintext",
+            filename: "a.txt",
+            fileSize: 0,
+            representation: AnyStructuredRepresentation(
+                formatId: "plaintext",
+                underlying: PlainTextRepresentation(text: "")
+            ),
+            textFallback: ""
+        )
+
+        #expect(emitter.canEmit(workbookDoc))
+        #expect(emitter.canEmit(plainDoc) == false)
+    }
+
+    // MARK: - Round trip
+
+    @Test func emit_thenReparse_preservesSheetsAndCells() async throws {
+        let input = Self.makeRoundTripFixture()
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+
+        let emitter = XLSXEmitter()
+        try await emitter.emit(Self.wrap(input), to: dest)
+
+        #expect(FileManager.default.fileExists(atPath: dest.path))
+
+        let reparsed = try await XLSXAdapter().parse(url: dest, sizeLimit: 0)
+        guard let output = reparsed.representation.underlying as? Workbook else {
+            Issue.record("Re-parsed representation was not a Workbook")
+            return
+        }
+
+        #expect(output.sheets.count == input.sheets.count)
+        for (expected, actual) in zip(input.sheets, output.sheets) {
+            #expect(expected.name == actual.name, "sheet name mismatch")
+        }
+    }
+
+    @Test func emit_preservesFormulaSourceStrings() async throws {
+        let input = Self.makeRoundTripFixture()
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+
+        try await XLSXEmitter().emit(Self.wrap(input), to: dest)
+        let reparsed = try await XLSXAdapter().parse(url: dest, sizeLimit: 0)
+        guard let output = reparsed.representation.underlying as? Workbook else {
+            Issue.record("Re-parsed representation was not a Workbook")
+            return
+        }
+
+        let formulas = output.sheets
+            .flatMap(\.rows)
+            .flatMap(\.cells)
+            .compactMap(\.formula)
+        #expect(formulas.contains("SUM(B2:B3)"))
+    }
+
+    @Test func emit_preservesMergedRanges() async throws {
+        let input = Self.makeRoundTripFixture()
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+
+        try await XLSXEmitter().emit(Self.wrap(input), to: dest)
+        let reparsed = try await XLSXAdapter().parse(url: dest, sizeLimit: 0)
+        guard let output = reparsed.representation.underlying as? Workbook else {
+            Issue.record("Re-parsed representation was not a Workbook")
+            return
+        }
+
+        let mergedRefs = output.sheets.flatMap { $0.mergedRanges.map(\.reference) }
+        #expect(mergedRefs.contains("A5:B5"))
+    }
+
+    @Test func emit_preservesStringAndNumberCells() async throws {
+        let input = Self.makeRoundTripFixture()
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+
+        try await XLSXEmitter().emit(Self.wrap(input), to: dest)
+        let reparsed = try await XLSXAdapter().parse(url: dest, sizeLimit: 0)
+        guard let output = reparsed.representation.underlying as? Workbook,
+            let revenue = output.sheets.first(where: { $0.name == "Revenue" })
+        else {
+            Issue.record("Revenue sheet missing after round trip")
+            return
+        }
+
+        // "Month" string header lands in A1.
+        let a1 = revenue.rows.flatMap(\.cells).first { $0.reference == "A1" }
+        if case .string(let value) = a1?.value {
+            #expect(value == "Month")
+        } else {
+            Issue.record("A1 after round trip was \(String(describing: a1?.value))")
+        }
+
+        // 1200 number lands in B2.
+        let b2 = revenue.rows.flatMap(\.cells).first { $0.reference == "B2" }
+        if case .number(let value) = b2?.value {
+            #expect(value == 1200)
+        } else {
+            Issue.record("B2 after round trip was \(String(describing: b2?.value))")
+        }
+    }
+
+    @Test func emit_preservesBooleans() async throws {
+        let input = Self.makeRoundTripFixture()
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+
+        try await XLSXEmitter().emit(Self.wrap(input), to: dest)
+        let reparsed = try await XLSXAdapter().parse(url: dest, sizeLimit: 0)
+        guard let output = reparsed.representation.underlying as? Workbook,
+            let notes = output.sheets.first(where: { $0.name == "Notes" })
+        else {
+            Issue.record("Notes sheet missing")
+            return
+        }
+
+        let bools = notes.rows.flatMap(\.cells).compactMap { cell -> Bool? in
+            if case .bool(let flag) = cell.value { return flag } else { return nil }
+        }
+        #expect(bools.count == 2)
+        #expect(bools.contains(true))
+        #expect(bools.contains(false))
+    }
+
+    @Test func emit_rejectsNonWorkbookRepresentation() async throws {
+        let dest = Self.tempURL()
+        defer { try? FileManager.default.removeItem(at: dest) }
+        let plain = StructuredDocument(
+            formatId: "plaintext",
+            filename: "a.txt",
+            fileSize: 0,
+            representation: AnyStructuredRepresentation(
+                formatId: "plaintext",
+                underlying: PlainTextRepresentation(text: "")
+            ),
+            textFallback: ""
+        )
+        await #expect(throws: DocumentAdapterError.self) {
+            try await XLSXEmitter().emit(plain, to: dest)
+        }
+    }
+
+    // MARK: - Fixture builder
+
+    private static func wrap(_ workbook: Workbook) -> StructuredDocument {
+        StructuredDocument(
+            formatId: "xlsx",
+            filename: "fixture.xlsx",
+            fileSize: 0,
+            representation: AnyStructuredRepresentation(
+                formatId: "xlsx",
+                underlying: workbook
+            ),
+            textFallback: ""
+        )
+    }
+
+    /// Matches the shape of the checked-in `sample.xlsx` fixture so the
+    /// emitter and adapter exercise the same fidelity checklist.
+    private static func makeRoundTripFixture() -> Workbook {
+        let revenue = Sheet(
+            name: "Revenue",
+            rows: [
+                Row(
+                    index: 1,
+                    cells: [
+                        Cell(reference: "A1", value: .string("Month")),
+                        Cell(reference: "B1", value: .string("Amount")),
+                    ]
+                ),
+                Row(
+                    index: 2,
+                    cells: [
+                        Cell(reference: "A2", value: .string("January")),
+                        Cell(reference: "B2", value: .number(1200)),
+                    ]
+                ),
+                Row(
+                    index: 3,
+                    cells: [
+                        Cell(reference: "A3", value: .string("February")),
+                        Cell(reference: "B3", value: .number(950)),
+                    ]
+                ),
+                Row(
+                    index: 4,
+                    cells: [
+                        Cell(reference: "A4", value: .string("Total")),
+                        Cell(reference: "B4", value: .empty, formula: "SUM(B2:B3)"),
+                    ]
+                ),
+                Row(
+                    index: 5,
+                    cells: [
+                        Cell(reference: "A5", value: .string("Generated for osaurus tests"))
+                    ]
+                ),
+            ],
+            mergedRanges: [CellRange(reference: "A5:B5")]
+        )
+
+        let notes = Sheet(
+            name: "Notes",
+            rows: [
+                Row(
+                    index: 1,
+                    cells: [
+                        Cell(reference: "A1", value: .string("Key")),
+                        Cell(reference: "B1", value: .string("Value")),
+                    ]
+                ),
+                Row(
+                    index: 2,
+                    cells: [
+                        Cell(reference: "A2", value: .string("reviewer")),
+                        Cell(reference: "B2", value: .string("mimeding")),
+                    ]
+                ),
+                Row(
+                    index: 3,
+                    cells: [
+                        Cell(reference: "A3", value: .bool(true)),
+                        Cell(reference: "B3", value: .bool(false)),
+                    ]
+                ),
+            ],
+            mergedRanges: []
+        )
+
+        return Workbook(sheets: [revenue, notes], sharedStrings: [])
+    }
+
+    private static func tempURL() -> URL {
+        FileManager.default.temporaryDirectory
+            .appendingPathComponent("osaurus-xlsx-roundtrip-\(UUID().uuidString).xlsx")
+    }
+}
diff --git a/Packages/OsaurusCore/Utils/DocumentParser.swift b/Packages/OsaurusCore/Utils/DocumentParser.swift
index 2bce81665..69b46ce55 100644
--- a/Packages/OsaurusCore/Utils/DocumentParser.swift
+++ b/Packages/OsaurusCore/Utils/DocumentParser.swift
@@ -57,6 +57,14 @@ enum DocumentParser {
         let ext = url.pathExtension.lowercased()
         let filename = url.lastPathComponent
 
+        // Registry-routed path. Returns nil when no adapter claims the file
+        // OR when the claiming adapter surfaces `.emptyContent` /
+        // `.unsupportedFormat`, so the legacy switch below still handles
+        // e.g. image-only PDFs and any format an adapter hasn't taken over.
+        if let attachments = try routeThroughRegistry(url: url, fileSize: fileSize) {
+            return attachments
+        }
+
         // PDF may fall back to image rendering if text extraction yields nothing
         if ext == "pdf" {
             return try parsePDFWithFallback(url: url, filename: filename, fileSize: fileSize)
@@ -147,9 +155,8 @@ enum DocumentParser {
             return try String(contentsOf: url, encoding: .utf8)
         } catch {
             // Retry with latin1 for binary-ish text files
-            if let data = try? Data(contentsOf: url),
-                let str = String(data: data, encoding: .isoLatin1)
-            {
+            let fallbackData = try? Data(contentsOf: url)
+            if let fallbackData, let str = String(data: fallbackData, encoding: .isoLatin1) {
                 return str
             }
             throw ParseError.readFailed(error.localizedDescription)
@@ -189,9 +196,8 @@ enum DocumentParser {
     private static func extractPDFText(from document: PDFDocument) -> String {
         var pages: [String] = []
         for i in 0 ..< document.pageCount {
-            if let page = document.page(at: i), let text = page.string,
-                !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
-            {
+            let text = document.page(at: i)?.string
+            if let text, !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
                 pages.append(text)
             }
         }
@@ -261,4 +267,86 @@ enum DocumentParser {
             throw ParseError.readFailed(error.localizedDescription)
         }
     }
+
+    // MARK: - Registry shim
+
+    /// Tries the document format registry before the legacy switch. The
+    /// registry runs async; we block on a dedicated dispatch queue so the
+    /// synchronous `parseAll` contract is preserved during the migration
+    /// window. Once every caller is async (stage-4 PR 10), this shim goes
+    /// away.
+    ///
+    /// Return value conventions:
+    /// - `nil` — no adapter is registered, or an adapter declined the file
+    ///   via `.emptyContent` / `.unsupportedFormat`; legacy path handles it.
+    /// - non-nil — the adapter produced a text view; convert to
+    ///   `[Attachment]` by wrapping `textFallback`.
+    /// - throws — adapter produced a non-recoverable error (size / read /
+    ///   write); surface as `ParseError`.
+    private static func routeThroughRegistry(url: URL, fileSize: Int) throws -> [Attachment]? {
+        let registry = DocumentFormatRegistry.shared
+        guard let adapter = registry.adapter(for: url) else { return nil }
+
+        let sizeLimit = DocumentLimits.limit(forFormatId: adapter.formatId)
+        do {
+            let document = try runBlocking {
+                try await adapter.parse(url: url, sizeLimit: sizeLimit)
+            }
+            return [
+                .document(
+                    filename: document.filename,
+                    content: document.textFallback,
+                    fileSize: Int(document.fileSize)
+                )
+            ]
+        } catch DocumentAdapterError.emptyContent, DocumentAdapterError.unsupportedFormat {
+            // Fall through so the legacy switch (image-only PDFs, formats
+            // without an adapter yet) still gets a shot.
+            return nil
+        } catch DocumentAdapterError.sizeLimitExceeded {
+            throw ParseError.fileTooLarge
+        } catch let DocumentAdapterError.readFailed(reason) {
+            throw ParseError.readFailed(reason)
+        } catch DocumentAdapterError.writeFailed, DocumentAdapterError.cancelled {
+            throw ParseError.readFailed("Adapter emitted non-read error for ingress")
+        } catch {
+            throw ParseError.readFailed(error.localizedDescription)
+        }
+    }
+
+    /// Synchronously awaits an async body. The shim is called from
+    /// `parseAll` which is itself invoked from UI callbacks that are still
+    /// synchronous — see `FloatingInputCard`. Dropping the semaphore means
+    /// reworking every ingress call site, which isn't in scope for PR 3.
+    private static func runBlocking<T: Sendable>(_ body: @escaping @Sendable () async throws -> T) throws -> T {
+        let semaphore = DispatchSemaphore(value: 0)
+        let resultBox = UnfairLockedBox<Result<T, Error>?>(nil)
+
+        Task.detached {
+            let result: Result<T, Error>
+            do {
+                result = .success(try await body())
+            } catch {
+                result = .failure(error)
+            }
+            resultBox.set(result)
+            semaphore.signal()
+        }
+
+        semaphore.wait()
+        switch resultBox.get()! {
+        case .success(let value): return value
+        case .failure(let error): throw error
+        }
+    }
+}
+
+/// Tiny lock-box so the blocking-await shim above can hand a value back
+/// across the actor/thread boundary without tripping Swift 6 sendability.
+private final class UnfairLockedBox<Value>: @unchecked Sendable {
+    private var value: Value
+    private let lock = NSLock()
+    init(_ value: Value) { self.value = value }
+    func get() -> Value { lock.lock(); defer { lock.unlock() }; return value }
+    func set(_ newValue: Value) { lock.lock(); defer { lock.unlock() }; value = newValue }
 }
diff --git a/osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved b/osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved
index 55fbfe785..6c8c81f72 100644
--- a/osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -1,5 +1,5 @@
 {
-  "originHash" : "8b2c8aee839bee68c488f430fa30e77681adea30962ae12dd392a5b5cd847ae2",
+  "originHash" : "c1188a7167ae42da4fce56bed21b6f0ad5f487ffcd6ff8a2788d7af08befc3a2",
   "pins" : [
     {
       "identity" : "aachartkit-swift",
@@ -28,6 +28,15 @@
         "version" : "0.31.0"
       }
     },
+    {
+      "identity" : "corexlsx",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/CoreOffice/CoreXLSX.git",
+      "state" : {
+        "revision" : "1391f3832ea2eeee5186ea8abb81ea49ed0609cc",
+        "version" : "0.14.2"
+      }
+    },
     {
       "identity" : "eventsource",
       "kind" : "remoteSourceControl",
@@ -91,6 +100,15 @@
         "version" : "2.4.3"
       }
     },
+    {
+      "identity" : "libxlsxwriter",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/jmcnamara/libxlsxwriter.git",
+      "state" : {
+        "revision" : "2894634d65cee6021901a165bfc2bb0fad6da193",
+        "version" : "1.2.4"
+      }
+    },
     {
       "identity" : "mlx-swift",
       "kind" : "remoteSourceControl",
@@ -420,6 +438,15 @@
         "revision" : "2e61c12a1573d073618ee2f98f39149ea36068e1"
       }
     },
+    {
+      "identity" : "xmlcoder",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/maxdesiatov/XMLCoder.git",
+      "state" : {
+        "revision" : "ca932442d7481700f5434a7b138c47dd42d9902b",
+        "version" : "0.14.0"
+      }
+    },
     {
       "identity" : "yyjson",
       "kind" : "remoteSourceControl",
@@ -429,6 +456,15 @@
         "version" : "0.12.0"
       }
     },
+    {
+      "identity" : "zipfoundation",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/weichsel/ZIPFoundation.git",
+      "state" : {
+        "revision" : "22787ffb59de99e5dc1fbfe80b19c97a904ad48d",
+        "version" : "0.9.20"
+      }
+    },
     {
       "identity" : "zstd",
       "kind" : "remoteSourceControl",