From 74a79373824576c4d8b0a65a16e8af5e42745b7c Mon Sep 17 00:00:00 2001 From: Robert Date: Fri, 3 Jul 2026 10:14:47 -0700 Subject: [PATCH] docs: split docs into user and developer wikis --- .github/workflows/mkdocs.yml | 3 +- Docs/Code_Documentation/Docs_Site_Guide.md | 23 + Docs/Published/ADR/000-template.md | 30 + .../ADR/001-adr-workflow-and-governance.md | 32 + .../ADR/002-backlog-md-task-tracking.md | 32 + .../ADR/003-jobs-vs-scheduler-default.md | 32 + ...004-ai-generated-pr-change-summary-gate.md | 32 + .../005-bandit-touched-scope-security-gate.md | 32 + .../ADR/006-bandit-report-path-portability.md | 33 + ...h-workspace-canonical-first-slice-shell.md | 37 + ...t-key-persistence-and-indexeddb-offload.md | 37 + .../009-quick-chat-docs-assistant-modes.md | 37 + .../ADR/010-sandbox-vz-runtime-ownership.md | 46 + Docs/Published/ADR/011-audio-api-semantics.md | 55 + .../012-evaluations-resource-id-prefixes.md | 39 + .../ADR/013-evaluations-deletion-lifecycle.md | 39 + ...4-evaluations-openai-compatible-schemas.md | 39 + ...luations-existing-evaluator-integration.md | 39 + ...p-session-and-orchestration-persistence.md | 45 + ...017-scoped-org-team-rbac-core-semantics.md | 54 + ...overnance-endpoint-policy-and-route-map.md | 57 + .../019-security-request-edge-middleware.md | 55 + ...ment-per-user-paths-and-content-backend.md | 56 + ...services-lifecycle-startup-and-shutdown.md | 59 + .../022-embeddings-api-and-media-pipeline.md | 61 + ...tables-backend-storage-jobs-and-exports.md | 60 + ...deepseek-ocr-local-transformers-backend.md | 61 + ...-provider-adapter-routing-and-overrides.md | 60 + ...ecurity-outbound-egress-and-ssrf-policy.md | 57 + ...-security-aes-gcm-json-envelope-helpers.md | 59 + ...-restricted-legacy-pickle-compatibility.md | 61 + Docs/Published/ADR/README.md | 62 + .../2026-06-03-acp-rbac-confirmation-audit.md | 61 + .../2026-06-03-decision-inventory.md | 115 ++ ...26-06-03-evaluations-confirmation-audit.md | 54 + ...-06-04-db-management-confirmation-audit.md | 50 + ...026-06-04-embeddings-confirmation-audit.md | 66 + ...provider-integration-confirmation-audit.md | 40 + ...-resource-governance-confirmation-audit.md | 45 + .../2026-06-04-security-confirmation-audit.md | 50 + ...4-services-lifecycle-confirmation-audit.md | 48 + ...26-06-07-data-tables-confirmation-audit.md | 48 + ...6-06-07-deepseek-ocr-confirmation-audit.md | 44 + ...ty-secrets-serialization-adoption-audit.md | 51 + Docs/Published/Architecture.md | 385 +++++ .../Code_Documentation/Data_Flow_Atlas.md | 1295 +++++++++++++++++ .../Code_Documentation/Docs_Site_Guide.md | 23 + .../WebUI_Extension/Knowledge_QA_Guide.md | 203 +++ Docs/Published/Wiki/Developer_Wiki.md | 68 + Docs/Published/Wiki/User_Wiki.md | 68 + Docs/Published/Wiki/index.md | 33 + Docs/Wiki/Developer_Wiki.md | 68 + Docs/Wiki/User_Wiki.md | 68 + Docs/Wiki/index.md | 33 + Docs/mkdocs.yml | 123 +- ...docs-audience-wikis-implementation-plan.md | 92 ++ .../2026-07-03-docs-audience-wikis-design.md | 41 + Helper_Scripts/refresh_docs_published.sh | 14 + README.md | 4 +- ...to-user-and-developer-wiki-entry-points.md | 64 + tldw_Server_API/tests/Docs/conftest.py | 7 + .../tests/Docs/test_docs_audience_wikis.py | 63 + 62 files changed, 4707 insertions(+), 41 deletions(-) create mode 100644 Docs/Published/ADR/000-template.md create mode 100644 Docs/Published/ADR/001-adr-workflow-and-governance.md create mode 100644 Docs/Published/ADR/002-backlog-md-task-tracking.md create mode 100644 Docs/Published/ADR/003-jobs-vs-scheduler-default.md create mode 100644 Docs/Published/ADR/004-ai-generated-pr-change-summary-gate.md create mode 100644 Docs/Published/ADR/005-bandit-touched-scope-security-gate.md create mode 100644 Docs/Published/ADR/006-bandit-report-path-portability.md create mode 100644 Docs/Published/ADR/007-research-workspace-canonical-first-slice-shell.md create mode 100644 Docs/Published/ADR/008-workspace-split-key-persistence-and-indexeddb-offload.md create mode 100644 Docs/Published/ADR/009-quick-chat-docs-assistant-modes.md create mode 100644 Docs/Published/ADR/010-sandbox-vz-runtime-ownership.md create mode 100644 Docs/Published/ADR/011-audio-api-semantics.md create mode 100644 Docs/Published/ADR/012-evaluations-resource-id-prefixes.md create mode 100644 Docs/Published/ADR/013-evaluations-deletion-lifecycle.md create mode 100644 Docs/Published/ADR/014-evaluations-openai-compatible-schemas.md create mode 100644 Docs/Published/ADR/015-evaluations-existing-evaluator-integration.md create mode 100644 Docs/Published/ADR/016-acp-session-and-orchestration-persistence.md create mode 100644 Docs/Published/ADR/017-scoped-org-team-rbac-core-semantics.md create mode 100644 Docs/Published/ADR/018-resource-governance-endpoint-policy-and-route-map.md create mode 100644 Docs/Published/ADR/019-security-request-edge-middleware.md create mode 100644 Docs/Published/ADR/020-db-management-per-user-paths-and-content-backend.md create mode 100644 Docs/Published/ADR/021-services-lifecycle-startup-and-shutdown.md create mode 100644 Docs/Published/ADR/022-embeddings-api-and-media-pipeline.md create mode 100644 Docs/Published/ADR/023-data-tables-backend-storage-jobs-and-exports.md create mode 100644 Docs/Published/ADR/024-deepseek-ocr-local-transformers-backend.md create mode 100644 Docs/Published/ADR/025-llm-provider-adapter-routing-and-overrides.md create mode 100644 Docs/Published/ADR/026-security-outbound-egress-and-ssrf-policy.md create mode 100644 Docs/Published/ADR/027-security-aes-gcm-json-envelope-helpers.md create mode 100644 Docs/Published/ADR/028-security-restricted-legacy-pickle-compatibility.md create mode 100644 Docs/Published/ADR/README.md create mode 100644 Docs/Published/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-03-decision-inventory.md create mode 100644 Docs/Published/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-db-management-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-security-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md create mode 100644 Docs/Published/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md create mode 100644 Docs/Published/Architecture.md create mode 100644 Docs/Published/Code_Documentation/Data_Flow_Atlas.md create mode 100644 Docs/Published/User_Guides/WebUI_Extension/Knowledge_QA_Guide.md create mode 100644 Docs/Published/Wiki/Developer_Wiki.md create mode 100644 Docs/Published/Wiki/User_Wiki.md create mode 100644 Docs/Published/Wiki/index.md create mode 100644 Docs/Wiki/Developer_Wiki.md create mode 100644 Docs/Wiki/User_Wiki.md create mode 100644 Docs/Wiki/index.md create mode 100644 Docs/superpowers/plans/2026-07-03-docs-audience-wikis-implementation-plan.md create mode 100644 Docs/superpowers/specs/2026-07-03-docs-audience-wikis-design.md create mode 100644 backlog/tasks/task-12119 - Split-published-docs-navigation-into-user-and-developer-wiki-entry-points.md create mode 100644 tldw_Server_API/tests/Docs/test_docs_audience_wikis.py diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml index fd646f03bf..d88a9ab829 100644 --- a/.github/workflows/mkdocs.yml +++ b/.github/workflows/mkdocs.yml @@ -51,9 +51,10 @@ jobs: run: | set -e test -f Docs/Published/index.md - for d in API-related Code_Documentation Deployment Evaluations Monitoring User_Guides; do + for d in API-related ADR Code_Documentation Deployment Evaluations Monitoring User_Guides Wiki; do test -d "Docs/Published/$d" || { echo "Missing Docs/Published/$d"; exit 1; } done + test -f Docs/Published/Architecture.md || { echo "Missing Docs/Published/Architecture.md"; exit 1; } # Ensure there are Markdown files to build count=$(find Docs/Published -type f -name "*.md" | wc -l) echo "Markdown files found: $count" diff --git a/Docs/Code_Documentation/Docs_Site_Guide.md b/Docs/Code_Documentation/Docs_Site_Guide.md index ca971d63e0..ff8a994d77 100644 --- a/Docs/Code_Documentation/Docs_Site_Guide.md +++ b/Docs/Code_Documentation/Docs_Site_Guide.md @@ -12,17 +12,30 @@ This document explains how the tldw_Server documentation site is organized, buil The public docs site is for OSS, self-host, and developer documentation. Hosted/commercial docs are excluded from the published site and should live in the private repo instead of this public docs pipeline. +The published site is audience-first: + +- `User Wiki`: install, run, configure, and use tldw_server. +- `Developer Wiki`: contribute to, test, package, and understand the codebase. + +These wiki pages are MkDocs landing pages in this repository, not the separate GitHub Wiki feature. + ## What Gets Published Only these folders are included on the public site: +- `Docs/Wiki` - `Docs/API-related` +- `Docs/ADR` - `Docs/Code_Documentation` - `Docs/Deployment` (excluding its nested `Monitoring`) - `Docs/Deployment/Monitoring` (published as top-level `Monitoring`) - `Docs/Evals` - `Docs/User_Guides` +These root-level files are also included when present: + +- `Docs/Architecture.md` + The curated content is synced into `Docs/Published/`. Do not manually edit files in `Docs/Published/` - they can be overwritten by the refresh script or CI. Hosted/commercial docs are excluded from this curated set even when they live under similarly named source areas. If a page exists mainly to run, sell, support, or differentiate the hosted SaaS service, keep it in the private repo rather than adding it to the public docs tree. @@ -32,6 +45,9 @@ Hosted/commercial docs are excluded from this curated set even when they live un - Script: `Helper_Scripts/refresh_docs_published.sh` - What it does: - Copies the approved folders from `Docs/` to `Docs/Published/` + - Copies the audience wiki landing pages from `Docs/Wiki` + - Copies architecture decision records from `Docs/ADR` + - Copies `Docs/Architecture.md` when present - Promotes `Docs/Deployment/Monitoring` to top-level `Docs/Published/Monitoring` - Removes the nested `Monitoring` under `Deployment` to avoid duplication - Preserves each section's `index.md` landing page @@ -88,6 +104,9 @@ To change the logo: replace `Docs/Logo.png` and run the refresh script. - The sidebar and ordering are defined explicitly in `mkdocs.yml` under `nav:` - When adding a new page you want visible in the sidebar, add a new entry under the appropriate section in `mkdocs.yml` - The nav uses paths relative to `Docs/Published/` +- Keep the top-level navigation audience-first: `Home`, `User Wiki`, `Developer Wiki`, and shared reference links. +- User-facing workflow docs belong under the `User Wiki` nav tree. +- Contributor, implementation, architecture, and docs-maintenance material belongs under the `Developer Wiki` nav tree. Example nav entry (under Code section): @@ -107,6 +126,10 @@ Tip: keep titles short and parallel (e.g., "Guide", "Reference", "Checklist"). 5. Commit and push; CI will refresh, build, and deploy the site Notes: +- Put audience chooser pages in `Docs/Wiki/` +- Put user-facing workflow guides in `Docs/User_Guides/` or `Docs/Getting_Started/` +- Put contributor-facing implementation guides in `Docs/Code_Documentation/` +- Put public architecture decision records in `Docs/ADR/` - Keep file names stable after they’re published to avoid broken links - Use relative links within the allowed folders; avoid linking to WIP docs outside the curated set - Prefer images stored under `Docs/assets/` or section subfolders; the refresh script copies section contents diff --git a/Docs/Published/ADR/000-template.md b/Docs/Published/ADR/000-template.md new file mode 100644 index 0000000000..9bdacb33ec --- /dev/null +++ b/Docs/Published/ADR/000-template.md @@ -0,0 +1,30 @@ +# ADR-{N}: {Short title} + +**Status:** Proposed | Accepted | Superseded by ADR-{N} +**Date:** YYYY-MM-DD +**Backfilled from:** {source path, or "not backfilled"} +**Decision owner:** {human/session/reviewer} +**Related task:** {Backlog task ID/link} +**Related spec/plan:** {paths} + +## Decision + +One sentence stating what was decided. + +## Context + +Why this decision was needed. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| {Alternative A} | {Reason} | + +## Consequences + +What this means going forward, including accepted tradeoffs. + +## Follow-up + +Optional implementation, audit, or documentation follow-up links. diff --git a/Docs/Published/ADR/001-adr-workflow-and-governance.md b/Docs/Published/ADR/001-adr-workflow-and-governance.md new file mode 100644 index 0000000000..43bb1a2a58 --- /dev/null +++ b/Docs/Published/ADR/001-adr-workflow-and-governance.md @@ -0,0 +1,32 @@ +# ADR-001: ADR Workflow And Governance + +**Status:** Accepted +**Date:** 2026-06-02 +**Backfilled from:** not backfilled +**Decision owner:** User + Codex collaboration session +**Related task:** TASK-506, TASK-507, TASK-508 +**Related spec/plan:** `Docs/superpowers/specs/2026-06-02-adr-workflow-adoption-design.md`, `Docs/superpowers/plans/2026-06-02-adr-workflow-adoption-stage-1-implementation-plan.md` + +## Decision + +Use `Docs/ADR/` as the canonical home for Architecture Decision Records and require ADR assessment for substantial specs, implementation plans, and PRs. + +## Context + +Architecture decisions existed in scattered design docs, plans, review packets, and embedded ADR-like sections. The project needs a lightweight durable record that explains why architectural rules exist without replacing Backlog.md, Superpowers specs, implementation plans, or module documentation. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Big-bang migration | Too much churn and too high a risk of converting stale decisions into accepted policy. | +| Decision index before ADRs | Safer for audit, but delays the actual ADR workflow. | +| Module-by-module only | Too passive; it would not establish a repo-wide standard. | + +## Consequences + +Significant durable architecture decisions need ADRs. Substantial specs, plans, and PRs need an explicit ADR assessment. Accepted ADRs are immutable except for supersession metadata. Backfilled decisions use source metadata rather than pretending they were written at decision time. + +## Follow-up + +Create follow-up Backlog tasks for the decision inventory, module-by-module backfill, and possible global Superpowers updates. diff --git a/Docs/Published/ADR/002-backlog-md-task-tracking.md b/Docs/Published/ADR/002-backlog-md-task-tracking.md new file mode 100644 index 0000000000..8657f5ed84 --- /dev/null +++ b/Docs/Published/ADR/002-backlog-md-task-tracking.md @@ -0,0 +1,32 @@ +# ADR-002: Backlog.md Task Tracking + +**Status:** Accepted +**Date:** 2026-06-02 +**Backfilled from:** `AGENTS.md`, `Docs/superpowers/specs/2026-05-03-backlog-md-task-tracking-design.md` +**Decision owner:** User + prior Codex collaboration session +**Related task:** TASK-506, TASK-507, TASK-508 +**Related spec/plan:** `Docs/superpowers/specs/2026-05-03-backlog-md-task-tracking-design.md` + +## Decision + +Require an associated Backlog.md task before work changes repository files. + +## Context + +The repository needs a durable task and history layer that records why work exists, how it was planned, what files changed, what verification ran, and what was skipped or blocked. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Git commits only | Commits do not capture task state, verification history, blockers, or reviewable unit boundaries. | +| GitHub issues only | Not every local agent task maps cleanly to a remote issue, and local work needs MCP/CLI-first task tracking. | +| Manual markdown notes | Too easy to duplicate or bypass; Backlog.md provides a consistent task workflow. | + +## Consequences + +Repo-changing work must search for or create a Backlog task before edits begin. Read-only investigation can proceed without a task. Backlog tasks link to specs, plans, PRs, verification, and final summaries; they do not replace those artifacts. + +## Follow-up + +None for Stage 1. diff --git a/Docs/Published/ADR/003-jobs-vs-scheduler-default.md b/Docs/Published/ADR/003-jobs-vs-scheduler-default.md new file mode 100644 index 0000000000..a0743ba6eb --- /dev/null +++ b/Docs/Published/ADR/003-jobs-vs-scheduler-default.md @@ -0,0 +1,32 @@ +# ADR-003: Jobs Vs Scheduler Default + +**Status:** Accepted +**Date:** 2026-06-02 +**Backfilled from:** `AGENTS.md` +**Decision owner:** User + prior project guidance +**Related task:** TASK-506, TASK-507, TASK-508 +**Related spec/plan:** `Docs/superpowers/specs/2026-06-02-adr-workflow-adoption-design.md` + +## Decision + +Use Jobs by default for new user-visible work that needs admin or ops visibility, and use Scheduler for internal orchestration where dependency handling is central. + +## Context + +The project has both Jobs and Scheduler systems. Future contributors need a durable default to avoid ad hoc queue/orchestration choices. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Jobs for all async work | Internal dependency orchestration fits Scheduler better and does not always need user/admin controls. | +| Scheduler for all async work | User-facing work often needs pause, resume, drain, retries, quotas, RLS, status endpoints, and worker processes. | +| Decide per feature with no default | Repeated debates and inconsistent ownership would slow implementation and increase maintenance cost. | + +## Consequences + +New user-visible features or work needing admin controls should use Jobs. Internal orchestration with task dependencies, idempotency keys, and registered handlers should use Scheduler. Recurring schedules use APScheduler to enqueue into whichever backend the feature chooses. + +## Follow-up + +Later ADR inventory work should identify any module-specific exceptions. diff --git a/Docs/Published/ADR/004-ai-generated-pr-change-summary-gate.md b/Docs/Published/ADR/004-ai-generated-pr-change-summary-gate.md new file mode 100644 index 0000000000..8dcfb3c10a --- /dev/null +++ b/Docs/Published/ADR/004-ai-generated-pr-change-summary-gate.md @@ -0,0 +1,32 @@ +# ADR-004: AI-Generated PR Change Summary Gate + +**Status:** Accepted +**Date:** 2026-06-02 +**Backfilled from:** `AGENTS.md`, `Docs/superpowers/AI_GENERATED_PR_CHANGE_SUMMARY_POLICY_2026_04_17.md` +**Decision owner:** User + prior project guidance +**Related task:** TASK-506, TASK-507, TASK-508 +**Related spec/plan:** `Docs/superpowers/AI_GENERATED_PR_CHANGE_SUMMARY_POLICY_2026_04_17.md` + +## Decision + +Materially AI-authored PRs are not merge-ready until the human requester writes a `Change summary` explaining what changed and why those implementation choices were made. + +## Context + +The project allows AI-assisted development but needs human ownership of architectural and implementation rationale before merge. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Allow AI-generated summaries | A diff recap or AI-authored rationale does not prove human understanding or ownership. | +| Require no summary | Reviewers lose a concise human explanation of why the implementation is the right one. | +| Ban AI-authored PRs | Too restrictive for the project workflow. | + +## Consequences + +AI-generated PRs need a human-written summary. If the requester cannot explain the rationale in their own words, the PR is not merge-ready. Agents may prepare context, but the merge gate requires human ownership. + +## Follow-up + +None for Stage 1. diff --git a/Docs/Published/ADR/005-bandit-touched-scope-security-gate.md b/Docs/Published/ADR/005-bandit-touched-scope-security-gate.md new file mode 100644 index 0000000000..35a3eb1f5a --- /dev/null +++ b/Docs/Published/ADR/005-bandit-touched-scope-security-gate.md @@ -0,0 +1,32 @@ +# ADR-005: Bandit Touched-Scope Security Gate + +**Status:** Superseded by ADR-006 +**Date:** 2026-06-02 +**Backfilled from:** `AGENTS.md` +**Decision owner:** User + prior project guidance +**Related task:** TASK-506, TASK-507, TASK-508 +**Related spec/plan:** `Docs/superpowers/specs/2026-06-02-adr-workflow-adoption-design.md` + +## Decision + +Run Bandit on touched Python/code scope before considering work complete; for docs-only changes, document why Bandit is not applicable. + +## Context + +The project handles authentication, media ingestion, sandboxing, providers, and local/self-hosted data. Security-sensitive Python changes need an explicit security scan gate that scales to the touched scope. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Full-repo Bandit every time | Can be expensive and noisy for narrow changes. | +| No routine Bandit gate | Security regressions in touched code could be missed. | +| Run only in CI | Local completion should catch new findings before review. | + +## Consequences + +Agents should activate the project virtual environment and run `python -m bandit -r -f json -o /tmp/bandit_.json` for touched Python/code paths. New findings in changed code should be fixed before finishing. Docs-only work records Bandit as not applicable. + +## Follow-up + +None for Stage 1. diff --git a/Docs/Published/ADR/006-bandit-report-path-portability.md b/Docs/Published/ADR/006-bandit-report-path-portability.md new file mode 100644 index 0000000000..b0fa82339a --- /dev/null +++ b/Docs/Published/ADR/006-bandit-report-path-portability.md @@ -0,0 +1,33 @@ +# ADR-006: Bandit Report Path Portability + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** not backfilled +**Decision owner:** User + Codex collaboration session +**Related task:** TASK-512 +**Related spec/plan:** PR #2230 review follow-up +**Supersedes:** ADR-005 + +## Decision + +Bandit remains required for touched Python/code scope, but report output paths must be portable and must not hard-code `/tmp`. + +## Context + +ADR-005 established the touched-scope Bandit gate but used `/tmp/bandit_.json` as the example report path. The project supports Windows, macOS, and Linux, so hard-coding a Unix temporary directory makes the guidance less portable and creates unnecessary friction for agents working outside Unix-like environments. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep `/tmp/bandit_.json` | Not portable to all supported platforms. | +| Use platform-specific temporary environment variables | Adds shell-specific complexity to a simple project guidance command. | +| Omit the report output path | Loses the durable JSON artifact useful for recording verification evidence. | + +## Consequences + +Agents should activate the project virtual environment and run `python -m bandit -r -f json -o bandit_.json` or another explicitly chosen portable output path for touched Python/code paths. New findings in changed code should be fixed before finishing. Docs-only work records Bandit as not applicable. Generated `bandit_*.json` report artifacts are ignored by `.gitignore` and should not be committed unless explicitly requested. + +## Follow-up + +None for this follow-up. diff --git a/Docs/Published/ADR/007-research-workspace-canonical-first-slice-shell.md b/Docs/Published/ADR/007-research-workspace-canonical-first-slice-shell.md new file mode 100644 index 0000000000..b2fc241854 --- /dev/null +++ b/Docs/Published/ADR/007-research-workspace-canonical-first-slice-shell.md @@ -0,0 +1,37 @@ +# ADR-007: Research Workspace Canonical First-Slice Shell + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Design/Workspace_Canonical_Model_Decision_2026_05.md`, `Docs/superpowers/specs/2026-05-06-tldw-product-roadmap-design.md`, `Docs/superpowers/plans/2026-05-06-tldw-product-roadmap-first-slice-implementation-plan.md` +**Decision owner:** Human requester approval of TASK-509 inventory defaults +**Related task:** TASK-514 +**Related spec/plan:** `Docs/superpowers/plans/2026-06-03-adr-follow-up-sprint-implementation-plan.md` + +## Decision + +Use `ResearchWorkspace` as the canonical shell for the first roadmap slice while keeping `ChatWorkspace` and `DocumentWorkspace` as specialized routes or modes instead of deleting or fully merging them. + +## Context + +The workspace roadmap needs one product model for sources, selected sources, chat, quick notes, generated artifacts, saved workspaces, source transfer, local persistence, and server sync boundaries. `ResearchWorkspace` already contains the broadest version of that model and is the best first-slice shell. `ChatWorkspace` and `DocumentWorkspace` still validate important workflows, but they should not define parallel product models during this slice. + +The first slice should consolidate the model before consolidating routes. Route consolidation remains a later decision. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Fully merge `ChatWorkspace` and `DocumentWorkspace` into `ResearchWorkspace` immediately | Too much route and workflow churn for the first slice; it risks breaking existing chat-first and document-focused flows before the shared model is stable. | +| Keep all three workspaces as independent product models | Creates duplicated state, divergent persistence semantics, and unclear roadmap ownership. | +| Create a new roadmap workspace from scratch | Duplicates existing `ResearchWorkspace` capabilities and delays first-value work. | + +## Consequences + +`ResearchWorkspace` owns the first-slice canonical workspace direction. `ChatWorkspace` and `DocumentWorkspace` remain available as specialized entry points or modes. Implementation plans should update the canonical model or write a new ADR before changing route ownership semantics. + +Server sync should use the existing `/api/v1/workspaces` family first, while browser-local workspace state remains a responsive cache and offline-friendly UI surface. + +## Follow-up + +- Use this ADR as the covering record for `INV-017` and the route-consolidation context in `INV-019`. +- If later work fully merges or removes `ChatWorkspace` or `DocumentWorkspace`, create a superseding ADR. diff --git a/Docs/Published/ADR/008-workspace-split-key-persistence-and-indexeddb-offload.md b/Docs/Published/ADR/008-workspace-split-key-persistence-and-indexeddb-offload.md new file mode 100644 index 0000000000..df3642cec0 --- /dev/null +++ b/Docs/Published/ADR/008-workspace-split-key-persistence-and-indexeddb-offload.md @@ -0,0 +1,37 @@ +# ADR-008: Workspace Split-Key Persistence And IndexedDB Offload + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Design/Workspace_Persistence_Architecture.md` +**Decision owner:** Human requester approval of TASK-509 inventory defaults +**Related task:** TASK-514 +**Related spec/plan:** `Docs/superpowers/plans/2026-06-03-adr-follow-up-sprint-implementation-plan.md` + +## Decision + +Persist browser-local workspace state using split `localStorage` keys with optional IndexedDB offload for heavy chat sessions and artifact payloads. + +## Context + +The Research Workspace persistence path outgrew a single monolithic `localStorage` blob. Workspaces need responsive browser-local state, offline-friendly behavior, migration from legacy payloads, and bounded storage growth while server-backed workspace APIs continue to provide the long-term sync surface. + +The split-key model stores an index under `tldw-workspace` and per-workspace snapshot/chat payloads under workspace-specific keys. Heavy chat and artifact payloads can be offloaded to IndexedDB and replaced with pointer metadata. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep one monolithic `localStorage` payload | Increases write churn, payload size risk, and recovery difficulty as workspace count and artifact size grow. | +| Move all browser workspace persistence to IndexedDB | Adds complexity for every read/write path and removes a simple compatibility path for smaller payloads. | +| Make server persistence the only source for workspace UI state | Loses responsive local cache behavior and offline-friendly workflows; server sync is not yet the only required client state surface. | + +## Consequences + +Workspace persistence has a split index plus per-workspace payload keys. IndexedDB offload is feature-gated and optional, and failures fall back to inline payloads when possible. Legacy monolith payloads remain readable and are migrated into the split model. + +Persistence code must preserve payload bounds, cleanup stale per-workspace keys/offload records, and keep source-lineage and artifact metadata available even when heavy payload fields are offloaded. + +## Follow-up + +- Use this ADR as the covering record for `INV-018`. +- If server-backed workspace sync becomes the only accepted source of persisted workspace truth, create a superseding ADR. diff --git a/Docs/Published/ADR/009-quick-chat-docs-assistant-modes.md b/Docs/Published/ADR/009-quick-chat-docs-assistant-modes.md new file mode 100644 index 0000000000..eff4a823c9 --- /dev/null +++ b/Docs/Published/ADR/009-quick-chat-docs-assistant-modes.md @@ -0,0 +1,37 @@ +# ADR-009: Quick Chat Docs Assistant Modes + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Design/Quick_Chat_Docs_Assistant.md` +**Decision owner:** Human requester approval of TASK-509 inventory defaults +**Related task:** TASK-514 +**Related spec/plan:** `Docs/superpowers/plans/2026-06-03-adr-follow-up-sprint-implementation-plan.md` + +## Decision + +Quick Chat uses three explicit modes: `Chat`, `Docs Q&A`, and `Browse Guides`. + +## Context + +Quick Chat needs to provide lightweight help without replacing the main chat UI or creating a new backend surface. The helper supports normal conversation, retrieval-grounded documentation answers, and deterministic local guidance from tutorials and curated workflow cards. + +The modal owns mode switching and route handoff. `Docs Q&A` uses the existing RAG search API with a docs-focused retrieval profile. `Browse Guides` uses local tutorial and workflow-guide registries so guidance remains fast and deterministic even when retrieval quality varies. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Single chat mode for all helper interactions | Blurs retrieval-grounded documentation answers, deterministic guide browsing, and normal model conversation into one unclear path. | +| Add a new backend endpoint for Quick Chat docs help | Existing RAG search already provides the needed retrieval surface; a new endpoint would add ownership and testing burden without a clear need. | +| Generate workflow cards dynamically | Curated/static workflow cards and validated overrides are more deterministic and easier to test for route-aware help. | + +## Consequences + +Quick Chat mode selection must preserve the intended send path: standard chat for `Chat`, docs-scoped RAG for `Docs Q&A`, and local tutorial/workflow lookup for `Browse Guides`. + +Docs retrieval profile settings, citation formatting, and suggested-page generation are part of the Quick Chat contract. Browse-mode cards remain curated or validated rather than free-form generated. + +## Follow-up + +- Use this ADR as the covering record for `INV-020`. +- If Quick Chat later adds a dedicated backend API or generated workflow-card system, create a superseding ADR or a new ADR for that durable change. diff --git a/Docs/Published/ADR/010-sandbox-vz-runtime-ownership.md b/Docs/Published/ADR/010-sandbox-vz-runtime-ownership.md new file mode 100644 index 0000000000..4ff67778cc --- /dev/null +++ b/Docs/Published/ADR/010-sandbox-vz-runtime-ownership.md @@ -0,0 +1,46 @@ +# ADR-010: Sandbox VZ Runtime Ownership + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Design/2026-05-02-apple-containerization-evaluation.md` +**Decision owner:** Human requester approval of ADR backfill continuation +**Related task:** TASK-515 +**Related spec/plan:** `Docs/superpowers/plans/2026-06-03-adr-follow-up-sprint-implementation-plan.md` + +## Decision + +Keep `vz_linux` as a repo-owned sandbox runtime path instead of requiring Apple `container`, while borrowing narrow OCI metadata and diagnostics ideas and treating networking as a separate policy milestone. + +## Context + +`vz_linux` already has a first-party execution path: the Python sandbox service owns admission and API behavior, the Swift `macos-vz-helper` owns prepared-host VM lifecycle and guest transport health, the Go `tldw-agent` owns guest command execution, and `SandboxImageStore` owns local bundle inventory and run clone manifests. + +Apple `container` and `containerization` provide useful prior art for Apple silicon Linux VMs, OCI image handling, VM service topology, resource diagnostics, and vsock guest control. They are broader than this project needs, though. `tldw_server` is a sandbox control plane, not a Docker-compatible container engine, and the current `deny_all` meaning depends on not attaching a guest network device. + +The source evaluation concluded that the project should preserve its working helper and guest-agent boundaries, use Apple projects as reference material, and move only narrow image-store metadata toward OCI compatibility before considering deeper package reuse. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Require Apple `container` for `vz_linux` | Makes the sandbox path depend on an external CLI and newer macOS virtualization/networking assumptions before the project intentionally changes host support. | +| Replace `macos-vz-helper` with Apple `container-apiserver` topology | Imports image, registry, network, and container UX assumptions that would blur sandbox policy ownership and add daemons before the existing helper responsibilities require them. | +| Replace `tldw-agent` with `vminitd` now | Adds guest-protocol churn before the current helper-backed execution path is stable enough; `vminitd` remains useful prior art for supervision and event semantics. | +| Keep image-store metadata entirely bundle-only | Blocks future OCI provenance and registry metadata even though metadata-only scaffolding can be added without changing helper bootability. | +| Enable vmnet networking as part of the same change | Attaching a network device changes the meaning of `deny_all`; networking needs its own policy design, diagnostics, and host-gated tests. | + +## Consequences + +`vz_linux` remains operator-repeatable without installing Apple `container`. The repo-owned bundle with `manifest.json`, `kernel`, `rootfs.img`, optional `initrd`, and guest-agent metadata remains the canonical near-term artifact for helper bootability. + +Apple `container` and `containerization` remain reference material, not default runtime dependencies. Any direct Swift package reuse should be narrow, version-gated, and separately reviewed, starting with OCI manifest/digest parsing before rootfs construction or runtime primitives. + +Image-store records may carry optional OCI/source metadata, but helper bootability still comes from template validation. Current bundles should continue to identify as `artifact_format=tldw_bundle`. + +`deny_all` for `vz_linux` continues to mean no attached guest network device. Any vmnet allowlist or networking work must start with a separate policy milestone and likely a new ADR. + +## Follow-up + +- Use this ADR as the covering record for `INV-016`. +- Create a superseding ADR before requiring Apple `container`, replacing `macos-vz-helper` or `tldw-agent`, or changing `vz_linux` network policy semantics. +- Future implementation work can continue the OCI-aware image-store metadata seam without changing VM boot, helper lifecycle, networking, or guest execution. diff --git a/Docs/Published/ADR/011-audio-api-semantics.md b/Docs/Published/ADR/011-audio-api-semantics.md new file mode 100644 index 0000000000..00f5adfb1b --- /dev/null +++ b/Docs/Published/ADR/011-audio-api-semantics.md @@ -0,0 +1,55 @@ +# ADR-011: Audio API Semantics + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Design/STT_TTS_Audio_API_Design.md` +**Decision owner:** Human requester approval of ADR backfill continuation +**Related task:** TASK-516 +**Related spec/plan:** `Docs/superpowers/plans/2026-06-03-adr-follow-up-sprint-implementation-plan.md` + +## Decision + +Use centralized Audio API auth, model-first TTS routing with configured fallback behavior, structured streaming errors by default, and non-streaming-only `return_download_link` semantics. + +## Context + +The Audio API exposes OpenAI-compatible speech, transcription, and streaming transcription surfaces: + +- `POST /api/v1/audio/speech` +- `POST /api/v1/audio/transcriptions` +- `WS /api/v1/audio/stream/transcribe` + +The implemented design centralizes HTTP auth through `get_request_user` and WebSocket auth through `_audio_ws_authenticate`, keeping single-user API key mode and multi-user JWT/API-key mode aligned across audio endpoints. + +TTS provider selection is model-first, using explicit model-to-provider routing before provider aliases and capability/fallback search. Adapter initialization failures are tracked in the provider registry and can be retried after a configured cooldown. Streaming TTS failures default to structured errors rather than embedding error text as audio bytes, while a compatibility switch can still opt into error-as-audio behavior. + +For generated speech storage, `return_download_link` is intentionally limited to non-streaming responses. The server can persist the generated audio and return storage headers while still returning the audio bytes in the response body. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep separate ad hoc auth paths per audio endpoint | Would diverge single-user and multi-user behavior across HTTP speech, HTTP transcription, and streaming transcription surfaces. | +| Route TTS by provider first instead of model first | Makes OpenAI-compatible model names less predictable and can pick the wrong provider when a model has a known canonical adapter. | +| Treat adapter initialization failures as permanent until process restart | Makes transient provider startup failures harder to recover from when a bounded retry cooldown can safely recheck availability. | +| Emit TTS streaming errors as audio by default | Preserves compatibility for some callers but hides failures from clients that expect structured HTTP or generator errors. | +| Allow `return_download_link` on streaming responses | Streaming and storage registration have different completion semantics; returning a durable download link before the full payload is buffered would be misleading. | +| Return only a storage link for non-streaming generated speech | Breaks OpenAI-compatible callers that expect audio bytes in the response body. | + +## Consequences + +Audio endpoint auth should continue to route through the shared HTTP and WebSocket auth helpers instead of custom endpoint-local credential parsing. + +TTS adapters should preserve model-first selection and only fall back through configured provider priority and capability matching. Adapter failure retry behavior should remain explicit and cooldown-driven. + +Structured streaming failures are the default contract. Compatibility-mode error-as-audio behavior remains a configuration escape hatch, not the normal behavior. + +`return_download_link=true` requires `stream=false`. Non-streaming responses can include `X-Download-Path` and `X-Generated-File-Id` while still returning audio bytes. + +This ADR does not decide TTS/STT preset storage ownership. That remains inventory-only under `INV-022` until a separate owner-reviewed decision is made. + +## Follow-up + +- Use this ADR as the covering record for `INV-021`. +- Create a separate ADR before changing Audio API auth ownership, TTS routing priority semantics, default streaming error behavior, or `return_download_link` streaming restrictions. +- Keep `INV-022` unresolved until preset storage ownership is reviewed. diff --git a/Docs/Published/ADR/012-evaluations-resource-id-prefixes.md b/Docs/Published/ADR/012-evaluations-resource-id-prefixes.md new file mode 100644 index 0000000000..a829fd7612 --- /dev/null +++ b/Docs/Published/ADR/012-evaluations-resource-id-prefixes.md @@ -0,0 +1,39 @@ +# ADR-012: Evaluations Resource ID Prefixes + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Evals/Evals-Plan-1.md`, `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` +**Decision owner:** Human requester approval of TASK-518 continuation +**Related task:** TASK-518 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` + +## Decision + +Use OpenAI-style type-prefixed IDs for primary Evaluations API resources: `eval_` for evaluations, `run_` for evaluation runs, and `dataset_` for datasets. + +## Context + +The Evaluations API exposes OpenAI-compatible resource surfaces for evaluations, runs, and datasets. These IDs are returned through public API responses, stored in the evaluations database, used by tests, and passed across service and runner boundaries. + +The current implementation generates these prefixes in `EvaluationsDatabase.create_evaluation`, `EvaluationsDatabase.create_run`, and `EvaluationsDatabase.create_dataset`. `UnifiedEvaluationService.create_run` also pre-generates a `run_` ID before durable run persistence so audit records, idempotent schedulers, and task registration can refer to the same run consistently. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Bare UUIDs for all resources | Removes quick resource-type recognition from logs, API clients, and human debugging. | +| Integer IDs | Makes cross-database migration, per-user storage, and API exposure more collision-prone and less OpenAI-compatible. | +| Prefix only at the API layer | Creates a split identity model where API IDs and persisted IDs differ, increasing translation and idempotency risk. | + +## Consequences + +Evaluations resource IDs are part of the public API contract. New primary resource families should choose explicit, non-overlapping prefixes instead of reusing these prefixes. + +The prefix identifies resource type only. It must not be used as an authorization signal, ownership proof, or database-routing subject. + +Tests and documentation should continue to assert the prefix contract for evaluations, runs, and datasets when those IDs are exposed to callers. + +## Follow-up + +- Use this ADR as the covering record for INV-010. +- Create a new ADR before replacing the prefixed ID contract or adding a primary Evaluations resource family with ambiguous ID semantics. diff --git a/Docs/Published/ADR/013-evaluations-deletion-lifecycle.md b/Docs/Published/ADR/013-evaluations-deletion-lifecycle.md new file mode 100644 index 0000000000..ef471ebdc9 --- /dev/null +++ b/Docs/Published/ADR/013-evaluations-deletion-lifecycle.md @@ -0,0 +1,39 @@ +# ADR-013: Evaluations Deletion Lifecycle + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Evals/Evals-Plan-1.md`, `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` +**Decision owner:** Human requester approval of TASK-518 continuation +**Related task:** TASK-518 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` + +## Decision + +Use soft deletes for evaluation definitions and hard deletes for datasets in the Evaluations API lifecycle. + +## Context + +Evaluation definitions can have associated runs, audit records, result history, and user-visible references. Deleting an evaluation should hide it from normal get/list/update paths without immediately erasing its historical relationship to runs and audit trails. + +Datasets are user-managed payload containers. The current implementation treats dataset deletion as storage cleanup: `EvaluationsDatabase.delete_dataset` removes the dataset row, while evaluation deletion sets `deleted_at` and normal evaluation queries filter `deleted_at IS NULL`. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Hard-delete evaluations | Risks losing history and breaking run/audit references tied to a deleted evaluation definition. | +| Soft-delete datasets | Keeps potentially large sample payloads around after users explicitly request cleanup. | +| Use one deletion mode for every resource | Ignores the different retention needs of evaluation definitions versus dataset payload storage. | + +## Consequences + +Evaluation read, list, and update paths must filter out soft-deleted rows unless a future recovery/admin workflow explicitly opts into viewing them. + +Dataset deletion is destructive. Callers and endpoints must preserve appropriate permission checks before invoking it, and future dataset recovery semantics require a new decision. + +The deletion lifecycle is independent of database backend choice. SQLite and PostgreSQL implementations should preserve the same logical behavior even if column types differ. + +## Follow-up + +- Use this ADR as the covering record for INV-011. +- Create a new ADR before adding dataset recovery, evaluation hard-delete, or retention-window behavior that changes these lifecycle semantics. diff --git a/Docs/Published/ADR/014-evaluations-openai-compatible-schemas.md b/Docs/Published/ADR/014-evaluations-openai-compatible-schemas.md new file mode 100644 index 0000000000..cdcdf2976f --- /dev/null +++ b/Docs/Published/ADR/014-evaluations-openai-compatible-schemas.md @@ -0,0 +1,39 @@ +# ADR-014: Evaluations OpenAI-Compatible Schemas + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Evals/Evals-Plan-1.md`, `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` +**Decision owner:** Human requester approval of TASK-518 continuation +**Related task:** TASK-518 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` + +## Decision + +Use separate request and response schemas for Evaluations API resources while preserving OpenAI-compatible response conventions such as `object`, Unix `created` timestamps, and list wrappers. + +## Context + +The Evaluations API needs to expose OpenAI-compatible evaluation, run, and dataset workflows while still supporting tldw-specific evaluation types and metadata. + +The schema modules keep request payloads, update payloads, and response payloads separate. `openai_eval_schemas.py` explicitly documents the OpenAI-compatible conventions, and `evaluation_schemas_unified.py` extends the current API surface while preserving resource `object` fields, `created` timestamps, and dedicated response models. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Reuse database row dictionaries as API responses | Leaks persistence details, backend-specific timestamp formats, and internal fields into the public API. | +| Use one shared model for create, update, and response | Blurs required input fields, partial update semantics, generated fields, and output-only metadata. | +| Drop OpenAI-compatible response conventions | Breaks client expectations for resource objects, Unix timestamps, and list response shape. | + +## Consequences + +Schema changes must preserve the public API shape unless a superseding ADR explicitly changes compatibility goals. + +Internal database fields can evolve independently from API responses, but conversion code must keep generated IDs, resource `object` values, timestamps, and list wrappers consistent. + +tldw-specific extensions should be added through explicit schema fields or metadata rather than by leaking internal persistence rows. + +## Follow-up + +- Use this ADR as the covering record for INV-013. +- Create a new ADR before changing Evaluations API compatibility away from the OpenAI-style resource/list response shape. diff --git a/Docs/Published/ADR/015-evaluations-existing-evaluator-integration.md b/Docs/Published/ADR/015-evaluations-existing-evaluator-integration.md new file mode 100644 index 0000000000..70e13323ab --- /dev/null +++ b/Docs/Published/ADR/015-evaluations-existing-evaluator-integration.md @@ -0,0 +1,39 @@ +# ADR-015: Evaluations Existing Evaluator Integration + +**Status:** Accepted +**Date:** 2026-06-03 +**Backfilled from:** `Docs/Evals/Evals-Plan-1.md`, `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` +**Decision owner:** Human requester approval of TASK-518 continuation +**Related task:** TASK-518 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md` + +## Decision + +Integrate Evaluations API orchestration by wrapping and delegating to existing evaluator modules instead of rewriting evaluator logic inside the API runner or endpoints. + +## Context + +The Evaluations module already has dedicated evaluator implementations for GEval, RAG, response quality, OCR, propositions, and related workflows. The API runner and service layer need to orchestrate persistence, run state, progress, webhooks, and result shaping without duplicating the scoring implementations. + +The current runner imports and delegates to existing components such as `ms_g_eval.run_geval`, `RAGEvaluator`, `ResponseQualityEvaluator`, proposition evaluation helpers, and the unified RAG pipeline. `UnifiedEvaluationService` maps high-level evaluation requests to these dedicated evaluator services. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Rewrite evaluator logic inside `eval_runner.py` | Duplicates tested scoring behavior and increases drift between API and non-API evaluation paths. | +| Put evaluator-specific logic directly in FastAPI endpoints | Couples routing, auth, persistence, and scoring logic, making tests and future evaluator additions harder. | +| Replace local evaluator modules with a new external-only evaluation package | Would discard existing project-specific RAG, OCR, response-quality, and proposition evaluation behavior. | + +## Consequences + +The API runner and service layer own orchestration, persistence, state transitions, and response shaping. Dedicated evaluator modules own scoring behavior. + +Adding a new evaluator should register or map to a dedicated evaluator implementation rather than embedding scoring logic in endpoint handlers. + +Some adapters may still need sync-to-async bridging, provider resolution, and result normalization at the runner/service boundary. + +## Follow-up + +- Use this ADR as the covering record for INV-015. +- Create a new ADR before replacing the wrapper/delegation strategy with a centralized evaluator rewrite. diff --git a/Docs/Published/ADR/016-acp-session-and-orchestration-persistence.md b/Docs/Published/ADR/016-acp-session-and-orchestration-persistence.md new file mode 100644 index 0000000000..3b66ad2362 --- /dev/null +++ b/Docs/Published/ADR/016-acp-session-and-orchestration-persistence.md @@ -0,0 +1,45 @@ +# ADR-016: ACP Session And Orchestration Persistence + +**Status:** Accepted +**Date:** 2026-06-04 +**Backfilled from:** `Docs/Plans/2026-03-08-acp-persistence-registry-expansion-design.md` +**Decision owner:** Owner sign-off via 2026-06-04 continuation instruction after TASK-520 scope summary +**Related task:** TASK-520 +**Related spec/plan:** `Docs/Plans/2026-03-08-acp-persistence-registry-expansion-design.md`, `Docs/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md` + +## Decision + +Persist ACP session, registry, policy, health, and permission-decision state in shared `Databases/acp_sessions.db`, and persist user-owned orchestration state in per-user `Databases/user_databases//orchestration.db` by default. + +## Context + +ACP sessions and orchestration work cannot be governed by process-local memory if users and admins need state to survive server restarts. ACP session state is also a shared administrative surface: session cleanup, agent registry, health history, permission policies, and permission-decision audit records are global operational concerns rather than one user's private workspace data. + +Orchestration projects, tasks, runs, reviews, workspaces, and workspace MCP server records are user-owned work. The implemented persistence path follows the existing per-user database convention and resolves the user database base directory through configuration, defaulting to `Databases/user_databases//`. + +The accepted current behavior is the bounded persistence behavior confirmed by TASK-519. This ADR does not accept the older setup-guide consolidation claim from the source plan, and it does not treat the legacy in-memory orchestration service class as the governing architecture. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep ACP sessions and orchestration state in memory | Loses sessions, runs, reviews, registry state, and health/audit history on restart. | +| Store all ACP and orchestration state in one shared database | Mixes user-owned orchestration work with global ACP operational state and diverges from existing per-user database ownership patterns. | +| Store ACP sessions per user | Makes global admin visibility, cleanup, registry, health monitoring, and permission policy auditing harder. | +| Keep messages embedded in session rows | Allows unbounded row growth and makes fork slicing by message index harder. | +| Normalize token usage into a separate table | Adds joins to quota and listing paths where denormalized counters are sufficient. | + +## Consequences + +`Databases/acp_sessions.db` is the shared ACP persistence boundary for sessions, session messages, agent registry data, health history, permission policies, and permission-decision records. ACP session messages live in a separate `session_messages` table, and token usage remains denormalized on session records for efficient quota/status reads. + +Per-user orchestration databases remain under the configured user database base directory, defaulting to `Databases/user_databases//orchestration.db`. Operational guidance and migrations must use the configured base directory rather than hard-coding only the shortened `user_databases/` path. + +SQLite settings should follow the implemented project pattern: WAL mode, foreign keys enabled, and short transactions for shared ACP writes. + +Any future move to PostgreSQL, a different orchestration ownership model, or a unified ACP/orchestration persistence backend requires a superseding ADR. + +## Follow-up + +- Use this ADR as the covering record for INV-023. +- Keep setup-guide consolidation, broader registry UX, and any non-implemented ACP registry claims as separate follow-up decisions. diff --git a/Docs/Published/ADR/017-scoped-org-team-rbac-core-semantics.md b/Docs/Published/ADR/017-scoped-org-team-rbac-core-semantics.md new file mode 100644 index 0000000000..87736b606e --- /dev/null +++ b/Docs/Published/ADR/017-scoped-org-team-rbac-core-semantics.md @@ -0,0 +1,54 @@ +# ADR-017: Scoped Org Team RBAC Core Semantics + +**Status:** Accepted +**Date:** 2026-06-04 +**Backfilled from:** `Docs/Design/Org_Team_RBAC_Propagation_V2.md` +**Decision owner:** Owner sign-off via 2026-06-04 continuation instruction after TASK-520 scope summary +**Related task:** TASK-520 +**Related spec/plan:** `Docs/Design/Org_Team_RBAC_Propagation_V2.md`, `Docs/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md` + +## Decision + +Scoped Org/Team RBAC is an opt-in permission overlay that defaults to `require_active`, filters admin-level permissions from scoped grants, derives active scope from JWT, API-key, or membership context rather than request headers, and allows MCP/tool execution permissions in scoped grants. + +## Context + +The project already has global roles and permissions. Organization and team memberships also carry role information, but those membership roles should not automatically become platform-wide permissions. Scoped RBAC adds a second, feature-flagged layer that maps org/team membership roles to scoped permission grants and merges those grants into the authenticated principal only when scoped propagation is enabled. + +The current implementation confirms the core semantics from the source design: + +- `ORG_RBAC_PROPAGATION_ENABLED` defaults off for backward compatibility. +- `ORG_RBAC_SCOPE_MODE` defaults to `require_active`. +- Scoped grants are filtered through an admin-level denylist before they merge into principal permissions. +- JWT users derive active org/team scope from claims or default memberships. +- API-key users derive scope from the key's org/team scope or memberships. +- Request headers are not a scope source. +- MCP and `tools.execute:*` permissions are eligible scoped grants. + +The accepted current behavior excludes implementation gaps identified by TASK-519: the admin mapping endpoints listed in the source design, resolver metrics/failure flags, and the older invalid-claim fallback behavior are not accepted here as implemented current architecture. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep only global RBAC | Cannot express org/team membership permissions without granting platform-wide capabilities. | +| Enable scoped propagation by default | Risks changing existing authorization behavior for deployments that have not prepared active scope data. | +| Default to union across all memberships | Can grant permissions from inactive org/team contexts and weakens tenant scoping. | +| Allow admin-level permissions in scoped grants | Creates a privilege-escalation path from org/team membership roles into platform administration. | +| Use request headers for active scope | Lets clients influence authorization scope through a weaker and easier-to-spoof channel than authenticated claims or membership-derived defaults. | +| Exclude MCP/tool permissions from scoped grants | Prevents org/team roles from authorizing bounded tool execution even though those permissions are intentionally not in the admin denylist. | + +## Consequences + +Scoped RBAC remains additive to global RBAC and is controlled by feature flags. Deployments can keep legacy behavior by leaving scoped propagation disabled. + +When enabled, `require_active` is the default posture: scoped permissions apply only when an active org/team context is available. Invalid JWT active-scope claims currently fail closed with `403`; this ADR does not accept the source design's older default fallback behavior. + +Scoped permission mapping must continue to filter denylisted admin capabilities at runtime. Admin-level permission changes, mapping endpoints, resolver observability, and fallback semantics require separate implementation and, if they become durable architecture policy, a new or superseding ADR. + +MCP/tool permissions may be granted through scoped org/team mappings, including `tools.execute:*`, because they are outside the admin-level denylist. + +## Follow-up + +- Use this ADR as the covering record for INV-024. +- Track admin mapping endpoints, resolver metrics/failure flags, and any invalid-claim fallback change as follow-up implementation or decision work. diff --git a/Docs/Published/ADR/018-resource-governance-endpoint-policy-and-route-map.md b/Docs/Published/ADR/018-resource-governance-endpoint-policy-and-route-map.md new file mode 100644 index 0000000000..db8a4e71d8 --- /dev/null +++ b/Docs/Published/ADR/018-resource-governance-endpoint-policy-and-route-map.md @@ -0,0 +1,57 @@ +# ADR-018: Resource Governance Endpoint Policy and Route Map + +**Status:** Accepted +**Date:** 2026-06-04 +**Backfilled from:** `tldw_Server_API/app/core/Resource_Governance/README.md` +**Decision owner:** TASK-2233 confirmation and TASK-2234 backfill scope +**Related task:** TASK-2234 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md` + +## Decision + +New API endpoints must use claim-first authorization, explicitly decide Resource Governor applicability for latency/cost-sensitive or user-facing ingress, and own policy-store plus `route_map` coverage when governed; DB policy-store mode merges file `route_map` entries into DB policy snapshots with file precedence, and request ingress denies route-map entries that resolve to missing request policies. + +## Context + +The project has multiple endpoint families that can be expensive, latency-sensitive, or operationally sensitive. Contributors need a durable rule for how endpoint authorization and Resource Governance coverage are chosen so new routes do not rely on ad hoc mode checks, undocumented rate-limit heuristics, or policy IDs that only work in one policy-store mode. + +The confirmed current behavior is: + +- New endpoints use `get_auth_principal` plus `RequirePermission(...)`, `RequireRole(...)`, or `require_service_principal()` rather than granting/bypassing access through `AUTH_MODE` or single-user/multi-user mode checks. +- Latency/cost-sensitive or user-facing endpoints should decide whether Resource Governor policy applies. +- Governed ingress routes need a policy-store entry and `route_map` coverage keyed by path or tag. +- Middleware route-map resolution is path first, then tag. +- Resource Governor middleware enforces the request category at ingress; token, stream, job, and minute-budget categories require endpoint-level reserve/commit plumbing. +- In DB policy-store mode, DB policies are the policy source of truth, while the file `route_map` is merged into the snapshot and takes precedence over DB route-map entries on conflict. +- When request ingress resolves to a missing request policy, missing request limits default to deny. + +This ADR is deliberately bounded by the TASK-2233 audit. It does not claim every existing endpoint is already Resource Governor-covered or claim global fail-closed behavior for Redis outages, tokens, streams, jobs, or other non-request categories. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Allow new endpoints to branch on `AUTH_MODE` or mode helpers for authorization | Reintroduces mode-specific authorization shortcuts and conflicts with the claim-first AuthNZ guardrails. | +| Leave Resource Governor applicability as an implicit per-endpoint implementation detail | Makes expensive or user-facing routes easy to add without policy ownership, route-map coverage, or tests. | +| Require DB policy-store mode to own both policies and route maps exclusively | Loses the repo-owned file route map that documents primary ingress coverage and makes path/tag mapping harder to review with code changes. | +| Let missing route-map policies fail open for request ingress | Allows a typo or missing DB seed to silently remove request protection from a governed route. | +| Treat middleware as full Resource Governor enforcement for all categories | Current middleware only reserves request units. Other categories need endpoint-level plumbing and should not be accepted as middleware behavior. | +| Claim all existing endpoints satisfy the new-endpoint governance rule | The confirmation audit verified the rule, representative coverage, and guardrails, not universal coverage for all historical routes. | + +## Consequences + +New endpoints should first establish claim-first auth dependencies. Admin/control surfaces should use claim-derived roles or permissions, not new mode-specific bypasses. + +Endpoint owners must make a Resource Governor applicability decision for latency/cost-sensitive or user-facing ingress. If governed, the route needs an explicit policy and a `route_map` entry. Path mappings take precedence over tag mappings, so path entries should be used for primary ingress routes where precise coverage matters. + +DB-backed Resource Governor deployments must seed every request policy referenced by the merged route map. YAML `policies:` are not runtime policies in DB mode, but YAML `route_map` entries still merge into the snapshot and override DB route-map conflicts. + +Missing request policies deny request ingress by default. This is a safety property for request-rate enforcement, not a blanket claim about all Resource Governor categories. Token limits with missing config can be unbounded unless another durable cap denies later, concurrency categories are not middleware-enforced, and Redis backend outage behavior remains configurable through fail-mode settings. + +Future broadening of Resource Governor coverage, Redis fail-mode posture, or endpoint-level token/stream/job plumbing should be tracked as implementation follow-up and, if it becomes a durable architecture rule, recorded in a new or superseding ADR. + +## Follow-up + +- Use this ADR as the covering record for INV-028. +- Consider adding a focused regression test for a route-map entry that resolves to a missing DB request policy and returns the expected request-ingress denial. +- Keep `Docs/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md` as the evidence record and scope boundary for this backfill. diff --git a/Docs/Published/ADR/019-security-request-edge-middleware.md b/Docs/Published/ADR/019-security-request-edge-middleware.md new file mode 100644 index 0000000000..38b1996e0d --- /dev/null +++ b/Docs/Published/ADR/019-security-request-edge-middleware.md @@ -0,0 +1,55 @@ +# ADR-019: Security Request-Edge Middleware + +**Status:** Accepted +**Date:** 2026-06-04 +**Backfilled from:** `tldw_Server_API/app/core/Security/README.md` +**Decision owner:** TASK-2247 confirmation and TASK-2248 backfill scope +**Related task:** TASK-2248 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-security-confirmation-audit.md` + +## Decision + +Request-edge Security middleware is owned by the Security module: normal startup installs setup access guard, setup CSP, and security headers; request ID and drain-gate middleware are always installed; CSP behavior is path-scoped; and production keeps security headers enabled by default unless explicitly disabled. + +## Context + +The project has several request-edge controls that need consistent startup wiring and path-sensitive behavior: request correlation, shutdown draining, setup UI access, setup-specific CSP, documentation/API CSP differences, and HTTP hardening headers. Keeping these concerns in one Security module boundary gives endpoint and feature owners a clear place to add tests and prevents each router from inventing its own request-edge policy. + +The TASK-2247 confirmation audit verified the current behavior that bounds this ADR: + +- `app/main.py` imports and wires Security middlewares during startup. +- Test startup still installs setup CSP and setup access guard, while explicitly skipping nonessential security headers in test mode. +- Normal startup installs setup CSP and setup access guard, computes `ENABLE_SECURITY_HEADERS`, defaults production security headers on when the variable is absent, and installs `SecurityHeadersMiddleware` when enabled. +- `DrainGateMiddleware` and `RequestIDMiddleware` are always installed. +- `SecurityHeadersMiddleware` sets response hardening headers, removes `Server`, applies path-scoped CSP behavior for setup/docs/API surfaces, and only enables HSTS when `SECURITY_ENABLE_HSTS` is true and the request is HTTPS or `X-Forwarded-Proto: https`. +- `RequestIDMiddleware` sanitizes or generates request and session identifiers, stores them on request state, propagates response headers, and mirrors request IDs into tracing baggage. +- `SetupAccessGuardMiddleware` gates `/setup`, permits loopback access, and applies denylist, allowlist, and explicit remote setup settings. +- `SetupCSPMiddleware` applies only to `/setup`, permits inline scripts, permits `eval` by default, and removes `eval` when `TLDW_SETUP_NO_EVAL` is set. + +This ADR deliberately covers request-edge middleware only. It does not accept a project-wide outbound egress/SSRF decision, a universal secret-management adoption claim, or a serialization policy decision. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Scatter request-edge behavior across feature routers | Makes setup access, CSP, request IDs, drain handling, and HTTP hardening dependent on local router choices instead of one startup-owned Security boundary. | +| Treat setup, docs, and API CSP as one global policy | Current behavior is intentionally path-sensitive, and setup has relaxed script needs that should not leak into normal API or docs responses. | +| Force HSTS for every response independent of deployment protocol | HSTS can break deployments behind proxies or local HTTP setups when not coordinated with HTTPS ingress, so the middleware keeps HSTS opt-in and HTTPS-aware. | +| Install request IDs and drain gate only in production | Test and development requests still need correlation IDs and deterministic drain behavior, so these middlewares should stay always installed. | +| Backfill one broad Security ADR for request edge, egress, secrets, and serialization | TASK-2247 found different caveats for those areas. Combining them would overclaim adoption and make future review harder. | + +## Consequences + +New request-edge middleware behavior belongs in `tldw_Server_API/app/core/Security/` and startup wiring in `app/main.py`, with path-specific tests under `tldw_Server_API/tests/Security/`. + +Production deployments keep Security headers enabled by default unless `ENABLE_SECURITY_HEADERS` explicitly disables them. Test mode and explicit disablement remain recognized caveats and must be called out when verifying behavior. + +HSTS remains opt-in and HTTPS-aware. Setup CSP remains intentionally more relaxed than normal API/docs CSP, including the default `eval` allowance unless `TLDW_SETUP_NO_EVAL` disables it. + +Outbound egress/SSRF policy, secret-management adoption, and serialization safety remain separate Security decisions. They should be confirmed and backfilled in separate bounded ADRs if they become priority work. + +## Follow-up + +- Use this ADR as the covering record for the request-edge middleware portion of INV-029. +- Keep `Docs/ADR/inventory/2026-06-04-security-confirmation-audit.md` as the evidence record and scope boundary for this backfill. +- Consider separate ADRs for outbound egress/SSRF policy and for secret/serialization adoption after focused confirmation work. diff --git a/Docs/Published/ADR/020-db-management-per-user-paths-and-content-backend.md b/Docs/Published/ADR/020-db-management-per-user-paths-and-content-backend.md new file mode 100644 index 0000000000..85aff37dbd --- /dev/null +++ b/Docs/Published/ADR/020-db-management-per-user-paths-and-content-backend.md @@ -0,0 +1,56 @@ +# ADR-020: DB Management Per-User Paths and Content Backend + +**Status:** Accepted +**Date:** 2026-06-04 +**Backfilled from:** `tldw_Server_API/app/core/DB_Management/README.md` +**Decision owner:** TASK-2253 confirmation and TASK-2254 backfill scope +**Related task:** TASK-2254 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-db-management-confirmation-audit.md` + +## Decision + +DB_Management owns per-user database path resolution through `DatabasePaths` under `USER_DB_BASE_DIR` by default, SQLite remains the default content-storage mode using per-user file paths by default, and PostgreSQL is an explicit shared content backend option that must pass startup validation when enabled. + +## Context + +The project stores content and user-scoped state across Media DB, ChaCha Notes, Prompts, Evaluations, Workflows, and related feature databases. Those stores need predictable local/self-hosted defaults, safe multi-user path separation, and an explicit path for deployments that want shared PostgreSQL-backed content storage. + +TASK-2253 confirmed the current DB Management behavior that bounds this ADR: + +- `DB_Management` is the module boundary for database path utilities, content backend selection, DB factories, migrations, and representative user-scoped stores. +- `DatabasePaths` resolves per-user directories from `USER_DB_BASE_DIR`, defaulting outside tests to `Databases/user_databases`, and derives Media, ChaCha Notes, and Prompts database paths from that base. +- Single-user mode maps missing storage IDs to the configured fixed single-user ID, while multi-user mode rejects missing user IDs for per-user storage. +- SQLite is the default content backend. In SQLite content mode, callers resolve per-user database files rather than receiving a shared content backend. +- PostgreSQL content mode is opt-in through content backend configuration. When enabled, DB Management creates and caches a shared PostgreSQL backend and Media/content runtime requires that backend. +- Startup validation calls PostgreSQL content backend validation and reraises runtime validation errors, so schema and required Media/sync RLS policy gaps stop startup when PostgreSQL content mode is configured. + +This ADR is intentionally bounded. It does not include AuthNZ/users DB persistence, does not claim that every DB family is fully PostgreSQL-backed, and does not remove documented path overrides or compatibility aliases. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Let each DB caller choose its own default file path | Scatters path ownership, makes multi-user isolation inconsistent, and keeps legacy root-level Media DB patterns alive as normal architecture. | +| Make PostgreSQL the default content backend | Local/self-hosted single-user deployments still need a zero-service SQLite default, and PostgreSQL content mode requires explicit operational setup, schema readiness, and RLS validation. | +| Treat SQLite content mode as a shared backend abstraction | Current SQLite behavior intentionally uses per-user file paths instead of returning a shared content backend, which keeps file ownership and user isolation visible to callers. | +| Claim all DB families are PostgreSQL-backed | The confirmation evidence supports Media/content backend runtime plus representative backend-aware factories and dependencies, but some user-scoped stores still instantiate SQLite files directly. | +| Include AuthNZ `DATABASE_URL` and users DB persistence in this ADR | AuthNZ has separate persistence configuration and security semantics, so it needs a separate AuthNZ persistence decision if one is required. | +| Remove or ignore explicit SQLite path overrides and legacy aliases in the decision | Current behavior still supports explicit SQLite path overrides, test fallback directories, and selected deprecated aliases, so the ADR must preserve those caveats. | + +## Consequences + +New user-scoped database defaults should use `DatabasePaths` and should not introduce new root-level Media DB-style paths without a separate compatibility decision. + +SQLite remains the default content-storage mode. In SQLite mode, DB factories and API dependencies should resolve per-user file paths by default, while still respecting documented explicit SQLite path overrides such as `TLDW_CONTENT_SQLITE_PATH` or `[Database].sqlite_path`. + +PostgreSQL content mode remains opt-in. When a feature participates in PostgreSQL content mode, it must account for shared backend creation, schema readiness, and required RLS policy validation. Startup should fail on PostgreSQL content runtime validation errors rather than silently falling back to SQLite. + +Test-mode path isolation, deprecated compatibility aliases, and historical explicit path overrides remain recognized caveats. Documentation and tests should distinguish default production/local paths from test fallback paths. + +AuthNZ/users DB persistence and any future decision to make additional DB families fully PostgreSQL-backed remain separate decisions. + +## Follow-up + +- Use this ADR as the covering record for the DB Management portion of INV-030. +- Keep `Docs/ADR/inventory/2026-06-04-db-management-confirmation-audit.md` as the evidence record and caveat boundary for this backfill. +- Create separate ADRs if AuthNZ persistence policy or non-Media DB family PostgreSQL support becomes a durable architecture decision. diff --git a/Docs/Published/ADR/021-services-lifecycle-startup-and-shutdown.md b/Docs/Published/ADR/021-services-lifecycle-startup-and-shutdown.md new file mode 100644 index 0000000000..ddb6a6fefb --- /dev/null +++ b/Docs/Published/ADR/021-services-lifecycle-startup-and-shutdown.md @@ -0,0 +1,59 @@ +# ADR-021: Services Lifecycle Startup and Shutdown + +**Status:** Accepted +**Date:** 2026-06-05 +**Backfilled from:** `tldw_Server_API/app/services/README.md` +**Decision owner:** TASK-2259 confirmation and TASK-2260 backfill scope +**Related task:** TASK-2260 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md` + +## Decision + +FastAPI lifespan startup and shutdown orchestration is owned by focused Services helpers, and lifecycle-managed workers are owned through the worker lifecycle session with cooperative stop-event workers, explicit shutdown phases, bounded timeout/cancel fallback, and compatibility caveats for callback-only workers and legacy shutdown adapters. + +## Context + +The FastAPI application has many startup and shutdown responsibilities: core resource initialization, validation, background worker startup, scheduler setup, request/job drain gates, job-poller quiesce, worker shutdown, legacy component coordination, and final resource cleanup. Keeping all of that inline in `main.py` made ordering hard to test and made long-lived worker handles easy to scatter across module globals. + +TASK-2259 confirmed the current Services lifecycle behavior that bounds this ADR: + +- `main.py` constructs `LifespanWorkerRuntimeState`, passes it into `run_lifespan_startup_sequence(...)`, then passes the same object into `run_lifespan_shutdown_sequence(...)`. +- Startup helpers return explicit handles. The worker-bootstrap path returns a `WorkerLifecycleSession`, and `LifespanWorkerRuntimeState` stores that session for shutdown. +- Lifecycle-managed workers are declared as `WorkerSpec` values and started through `LifecycleWorkerEngine`, with `WorkerLifecycleSession` tracking handles, enabled/disabled state, stopped or quiesced workers, and diagnostic inventory. +- Stop-event task workers are the default strategy for new lifecycle-managed workers. Callback-only workers remain supported for components that expose shutdown callbacks instead of task handles. +- Shutdown runs in staged order: transition drain/gate, job-poller handoff and bounded quiesce, background-worker shutdown, coordinated legacy components, pre-worker cleanup, post-worker phase, post-worker services cleanup, and final resource cleanup. +- Within a shutdown phase, the lifecycle engine stops dependent workers before their dependencies and stops independent workers concurrently. +- Shutdown uses bounded waits and cancellation fallback so a stuck worker does not block the full application teardown indefinitely. + +This ADR is intentionally bounded. It covers Services lifecycle-managed workers and the FastAPI lifespan startup/shutdown sequence. It does not replace ADR-003's Jobs-vs-Scheduler ownership rule, does not claim that every background operation in the repository is Services-managed, and does not claim that all legacy shutdown adapters have been removed. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Keep startup and shutdown logic inline in `main.py` | Inline lifecycle logic makes ordering, ownership, and failure semantics harder to test and turns the FastAPI entry point into a catch-all orchestration module. | +| Store worker tasks and stop events as scattered module globals | Scattered handle ownership makes shutdown incomplete by default and makes it difficult to know which worker owns which stop path. | +| Use direct task cancellation as the default worker shutdown strategy | Cancel-only shutdown gives workers no cooperative stop signal and makes graceful lease drain or task-local cleanup less reliable. | +| Stop every worker in one unphased global loop | A single loop obscures the required ordering between request/job drain, job-poller quiesce, background workers, legacy components, post-worker cleanup, and final resource cleanup. | +| Require all lifecycle workers to be stop-event task workers immediately | Some current components expose shutdown callbacks or still participate in legacy shutdown adapters; forcing immediate conversion would overstate the migration and risk breaking existing teardown paths. | +| Treat this as a general background-work ownership ADR | The confirmed evidence only supports Services lifespan-managed workers. Jobs/Scheduler defaults, external worker processes, and module-specific runtime ownership remain separate decisions. | + +## Consequences + +New FastAPI lifespan startup or shutdown work should be added through focused `startup_*.py`, `shutdown_*.py`, or `lifespan_*.py` Services helpers rather than growing large inline blocks in `main.py`. + +New long-running workers started by the Services lifespan path should prefer declarative `WorkerSpec` registration and stop-event task ownership. If a worker cannot expose a stop event yet, it should use an explicit callback-only strategy or a clearly caveated legacy adapter rather than an unowned task. + +Startup helpers should return explicit handle dataclasses or worker lifecycle sessions. Shutdown-needed worker handles should flow through `LifespanWorkerRuntimeState` by storing the `WorkerLifecycleSession`, not by adding ad hoc module globals. + +Shutdown ordering should preserve the staged sequence confirmed by TASK-2259: transition drain/gate first, job-poller quiesce before background worker shutdown, coordinated legacy component shutdown before later cleanup, and final resource cleanup last. + +Worker shutdown should remain bounded. Stop events and callbacks get a configured timeout; unresponsive tasks are cancelled and awaited briefly so one stuck worker does not block teardown indefinitely. Bounded lease drain is not a guarantee that every external job finishes before shutdown proceeds. + +Legacy shutdown adapters and callback-only workers remain compatibility paths. Future cleanup can migrate more components into declarative lifecycle worker specs, but this ADR does not require removing all legacy paths in one change. + +## Follow-up + +- Use this ADR as the covering record for the Services lifecycle portion of INV-031. +- Keep `Docs/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md` as the evidence record and caveat boundary for this backfill. +- Create separate ADRs if a future change materially changes Jobs-vs-Scheduler ownership, external worker-process ownership, or legacy shutdown adapter removal. diff --git a/Docs/Published/ADR/022-embeddings-api-and-media-pipeline.md b/Docs/Published/ADR/022-embeddings-api-and-media-pipeline.md new file mode 100644 index 0000000000..0b5c1ddddb --- /dev/null +++ b/Docs/Published/ADR/022-embeddings-api-and-media-pipeline.md @@ -0,0 +1,61 @@ +# ADR-022: Embeddings API And Media Pipeline + +**Status:** Accepted +**Date:** 2026-06-05 +**Backfilled from:** `tldw_Server_API/app/core/Embeddings/README.md` +**Decision owner:** TASK-2261 confirmation and TASK-2262 backfill scope +**Related task:** TASK-2262 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md` + +## Decision + +Embeddings use OpenAI-compatible request/response semantics with provider resolution safeguards, optional adapter-registry routing with legacy provider-config fallback, endpoint cache/batching/circuit-breaker reliability controls, and media embedding pipeline ownership where core Jobs owns the durable root `embeddings_pipeline` record while Redis Streams owns stage delivery. + +## Context + +Embeddings sit at the boundary between OpenAI-compatible public API behavior, multiple local and remote provider implementations, vector storage, reliability controls, and background media processing. The module needs predictable API semantics for clients, bounded provider selection rules for operators, and a clear ownership split between durable job status and asynchronous stage execution. + +TASK-2261 confirmed the current Embeddings behavior that bounds this ADR: + +- The API schemas use OpenAI-style embeddings request and response models, including required `model`, string/list/token-array input support, `encoding_format`, `dimensions`, `user`, forbidden extra fields, list response data, and usage fields. +- The create endpoint enforces request safeguards before provider execution, including empty-input rejection, list/token-array shape limits, per-model token limits, dimensions validation, create rate limits, and API-call billing limits. +- Provider resolution accepts explicit `x-provider`, provider-qualified model IDs, and model-name heuristics, then applies provider/model allowlists and rejects recognized but unsupported providers instead of silently falling through. +- The LLM embeddings adapter registry is an optional routing path gated by `LLM_EMBEDDINGS_ADAPTERS_ENABLED`; legacy provider configuration and direct provider execution remain the current fallback path. +- Reliability controls include endpoint TTL cache lookup/writeback, uncached request batching, provider-scoped circuit breakers, connection reuse, provider fallback behavior, health breaker visibility, and admin breaker reset/status endpoints. +- Explicit `x-provider` requests suppress provider fallback by default unless `EMBEDDINGS_ALLOW_FALLBACK_WITH_HEADER` enables it. +- Media embedding endpoints force backend ownership to core Jobs, create root Jobs records with `job_type="embeddings_pipeline"`, and enqueue Redis Streams stage messages for chunking, embedding, storage, and content work. +- Redis stage workers update the root Jobs result/status on stage progress, completion, and failure. The older Jobs worker path is explicitly labeled legacy. + +This ADR is intentionally bounded. It does not decide billing/accounting semantics beyond the confirmed create-endpoint limits and root Jobs status surface, does not decide local provider URL policy, does not decide ChromaDB versus pgvector storage evolution, and does not turn the legacy Jobs worker into the primary media pipeline path. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Let each provider expose provider-specific embedding request and response shapes | Breaks the OpenAI-compatible API contract that clients and tests rely on, and would push provider differences into callers. | +| Resolve providers only from server defaults | Current behavior intentionally supports explicit provider selection, provider-qualified model IDs, and model-name heuristics while still applying allowlist and unsupported-provider guards. | +| Force all embeddings through the LLM adapter registry immediately | The adapter registry is optional and feature-gated today; legacy provider-config and direct provider execution remain active compatibility paths. | +| Treat Redis Streams as the durable source of truth for media embedding status | Redis Streams is the stage-delivery mechanism. Current durable status and result exposure come from the root Jobs record. | +| Create durable Jobs records for every media embedding stage | The confirmed pipeline uses one durable root `embeddings_pipeline` job and Redis stage messages, avoiding a durable Jobs fan-out for every internal stage. | +| Include vector-store backend policy in this ADR | ChromaDB and optional pgvector behavior are related but separate storage-backend evolution decisions that need their own confirmation if they become governing policy. | +| Include local provider URL override policy in this ADR | Embeddings local API configuration has separate behavior from LLM provider URL policy and should not inherit INV-027 without a focused review. | + +## Consequences + +Embeddings API changes should preserve OpenAI-compatible request and response semantics unless a later ADR supersedes this one. New validation or policy checks should keep failure modes explicit and client-facing rather than silently falling through to unrelated providers or model defaults. + +Provider integrations should maintain the current resolution order and guardrails: explicit provider requests, provider-qualified model IDs, and model-name heuristics can choose a provider, but allowlists and unsupported-provider checks remain authoritative. Explicit provider requests should continue to suppress fallback by default. + +The optional LLM embeddings adapter registry can expand, but code and docs must continue to account for the legacy provider-config/direct execution fallback path while it remains supported. + +Endpoint reliability controls are part of the accepted production shape. Changes to cache identity, request batching, circuit-breaker behavior, connection reuse, fallback behavior, or health/admin breaker surfaces should be reviewed as API reliability changes, not incidental refactors. + +Media embedding pipeline work should keep durable user/admin-visible status on the root Jobs record and use Redis Streams for internal stage delivery. Stage workers should report progress, completion, and failure back to the root job rather than creating competing durable status surfaces. + +Billing/accounting behavior, local provider URL policy, vector-store backend evolution, broader multi-tier cache architecture, and legacy Jobs worker removal remain separate decisions. + +## Follow-up + +- Use this ADR as the covering record for the Embeddings portion of INV-032. +- Keep `Docs/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md` as the evidence record and caveat boundary for this backfill. +- Create separate ADRs if billing/accounting semantics, local provider URL policy, ChromaDB versus pgvector storage ownership, or legacy Jobs worker removal becomes a durable architecture decision. diff --git a/Docs/Published/ADR/023-data-tables-backend-storage-jobs-and-exports.md b/Docs/Published/ADR/023-data-tables-backend-storage-jobs-and-exports.md new file mode 100644 index 0000000000..d59a607ff3 --- /dev/null +++ b/Docs/Published/ADR/023-data-tables-backend-storage-jobs-and-exports.md @@ -0,0 +1,60 @@ +# ADR-023: Data Tables Backend Storage, Jobs, And Exports + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `Docs/Design/Data_Tables_Backend.md` +**Decision owner:** TASK-2272 confirmation and TASK-2273 backfill scope +**Related task:** TASK-2273 +**Related spec/plan:** `Docs/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md` + +## Decision + +Data Tables persist backend table state in per-user Media DB helpers, use Jobs-backed generation and regeneration through the `data_tables` worker, store bounded source snapshots for reproducible regeneration, expose table APIs by UUID while retaining numeric job IDs, and keep exports server-side through direct rendering or File Artifacts delegation. + +## Context + +Data Tables bridge user prompts, media/chat/RAG sources, LLM-generated structured output, durable table storage, job lifecycle controls, and downloadable exports. The backend needs one clear ownership model so table state, generation status, source snapshots, and export behavior do not drift across independent storage tables or competing worker abstractions. + +TASK-2272 confirmed the current Data Tables backend behavior that bounds this ADR: + +- Data Tables endpoints receive the per-user Media DB through `get_media_db_for_user`, while the worker resolves user-specific Media DB paths for sidecar execution. +- Media DB owns table metadata, selected source rows, source snapshots, column definitions, generated rows, user-visible table status, soft-delete/version metadata, and table UUIDs. +- Data Tables owner scoping is implemented through the existing owner/client filter used by Media DB helpers; there is not a dedicated `owner_user_id` column on every Data Tables row. +- Generate and regenerate routes create or reuse Media DB table/source state and enqueue core Jobs records in the `data_tables` domain with `job_type="data_table_generate"`. +- The Data Tables worker owns source resolution, bounded prompt construction, LLM adapter invocation, structured JSON parsing, column/row normalization, persistence, cancellation checks, and failure/status mirroring. +- RAG query sources store retrieval params plus bounded chunk snapshots so regeneration can use stored source state instead of re-running retrieval when a snapshot exists. +- Export routes either render content directly through `DataTableAdapter` or delegate generated-file metadata/export handling to File Artifacts using `file_type="data_table"`. +- Table routes and table response models use UUIDs externally and resolve to internal numeric IDs server-side. Job status/cancel routes and response fields still expose numeric `job_id` values, with optional job UUIDs. + +This ADR is intentionally bounded. It does not claim all Data Tables operations are asynchronous, does not prove complete ownership validation for every current or future source adapter, does not treat snapshots as a full provenance ledger, does not decide File Artifacts internals, and does not cover frontend table editing. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Store Data Tables in a new standalone database | Would bypass existing per-user Media DB path/backend ownership, owner/client filtering, soft-delete/version conventions, and content-backend migration path. | +| Add a dedicated Data Tables export table | Current export behavior either directly renders server-side content or delegates generated-file metadata/export handling to File Artifacts, avoiding another export-status store. | +| Generate tables synchronously inside API handlers | Generation uses external source resolution and LLM calls, needs progress/cancellation/failure visibility, and already fits the user/admin-visible Jobs lifecycle model. | +| Let regeneration re-run RAG retrieval by default | Re-running retrieval would make regeneration sensitive to index drift and source changes. Stored snapshots give bounded reproducibility for the source state used by table generation. | +| Expose internal numeric table IDs in public table routes | Current table APIs use UUIDs externally and resolve numeric IDs server-side, matching the broader Media DB UUID pattern and avoiding leaking internal row IDs as table identity. | +| Claim every Data Tables-related ID must be a UUID | Current Jobs status and cancellation APIs still use numeric `job_id` values, so this ADR preserves that caveat instead of rewriting the Jobs API contract. | + +## Consequences + +Data Tables schema and persistence changes should keep Media DB as the owner of table metadata, source rows, source snapshots, column definitions, generated rows, table status, and table UUID identity unless a later ADR supersedes this one. + +Generation and regeneration should continue to enter the durable worker path through Jobs records in the `data_tables` domain. The Data Tables worker should report progress, cancellation, completion, and failures through Jobs while mirroring user-visible table status into Media DB for table reads. + +Source resolution changes must preserve the stored snapshot contract. New source types should decide what source state is stored for regeneration and should update the worker, endpoint/schema validation, and Data Tables tests together. + +Export changes should remain server-side. Direct downloads can render from table content, while generated-file metadata and asynchronous export behavior should continue to use File Artifacts instead of introducing a competing Data Tables export store. + +Public table APIs should remain UUID-first. Numeric table IDs stay internal to Media DB helper calls, while numeric Jobs identifiers remain an explicit compatibility caveat for job status and cancellation routes. + +Wait-for-completion responses, direct download exports, source ownership depth, snapshot size limits, File Artifacts internals, and frontend editing workflows remain separate concerns rather than accepted claims in this ADR. + +## Follow-up + +- Use this ADR as the covering record for INV-025. +- Keep `Docs/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md` as the evidence record and caveat boundary for this backfill. +- Create separate ADRs if File Artifacts export internals, source authorization guarantees, frontend editing ownership, or a future non-Media DB Data Tables storage backend becomes a durable architecture decision. diff --git a/Docs/Published/ADR/024-deepseek-ocr-local-transformers-backend.md b/Docs/Published/ADR/024-deepseek-ocr-local-transformers-backend.md new file mode 100644 index 0000000000..af5ca58653 --- /dev/null +++ b/Docs/Published/ADR/024-deepseek-ocr-local-transformers-backend.md @@ -0,0 +1,61 @@ +# ADR-024: DeepSeek OCR Local Transformers Backend + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `Docs/Design/DeepSeek_OCR_Backend.md` +**Decision owner:** TASK-2275 confirmation and TASK-2276 DeepSeek backfill scope +**Related task:** TASK-2276 (`backlog/tasks/task-2276 - Backfill-DeepSeek-OCR-backend-ADR.md`) +**Related spec/plan:** `Docs/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md` + +## Decision + +DeepSeek OCR is supported as a local Transformers-only OCR backend named `deepseek`, using HuggingFace `AutoTokenizer`/`AutoModel` with upstream `trust_remote_code=True`, markdown-oriented Gundam-equivalent defaults, safe string output extraction, temporary result handling by default, and explicit local dependency availability gates for `torch`, `transformers`, CUDA, and FlashAttention with env-based device/attention overrides. + +## Context + +DeepSeek-OCR is a heavy local model integration at the OCR/provider boundary. It plugs into the generic OCR registry and PDF/evaluation OCR paths, but its upstream contract differs from lightweight CLI backends and remote LLM-style OCR backends: it loads model code through HuggingFace Transformers, requires `trust_remote_code=True`, expects GPU-oriented dependencies for the default path, and uses `model.infer(...)` with temporary image and output paths. + +TASK-2275 confirmed the current implementation behavior that bounds this ADR: + +- `DeepSeekOCRBackend.name` is `deepseek`. +- The backend is local Transformers-only; there is no server or remote mode in this integration. +- Model loading uses `AutoTokenizer.from_pretrained(...)` and `AutoModel.from_pretrained(...)` with `trust_remote_code=True`, optional `DEEPSEEK_OCR_MODEL_REVISION`, `DEEPSEEK_OCR_MODEL_ID`, `use_safetensors=True` first, and a fallback to `use_safetensors=False`. +- The default prompt is markdown-oriented and includes the required `` token. The default sizes are equivalent to the Gundam preset: `base_size=1024`, `image_size=640`, and `crop_mode=True`. +- `available()` requires `transformers` and `torch`, defaults the device to CUDA, requires CUDA when using CUDA, and requires `flash_attn` only when CUDA plus `flash_attention_2` are selected. Env overrides can choose CPU or a different attention implementation, but CPU is not the preferred or performance-proven path. +- `ocr_image()` writes input image bytes to a temporary file, calls upstream `model.infer(...)`, and extracts string output from common return shapes before falling back to safe stringification. +- `DEEPSEEK_OCR_SAVE_RESULTS` defaults false. Output paths are temporary by default, and configured persistent output is used only when saving is explicitly enabled and `DEEPSEEK_OCR_OUTPUT_DIR` is usable. +- The OCR registry exposes explicit `ocr_backend=deepseek`, includes DeepSeek in `auto` and `auto_high_quality` ordering, and `/api/v1/ocr/backends` exposes resolved DeepSeek metadata. +- Docs describe manual dependency installation and the `trust_remote_code=True` security warning. + +This ADR is intentionally bounded. It does not claim the model dependencies are packaged by a project optional extra, does not eliminate the `trust_remote_code=True` risk, does not make CPU mode a first-class performance target, does not add a server mode, does not persist OCR outputs by default, and does not claim routine test runs validate live model inference. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Expose DeepSeek OCR only as a remote/server OCR provider | Current implementation and docs are local Transformers-only, and adding a server mode would introduce separate deployment, auth, and health-check decisions. | +| Treat DeepSeek OCR like a lightweight default OCR backend | Its dependency stack is heavy, GPU-oriented, and security-sensitive because upstream loading requires `trust_remote_code=True`; availability gates must remain explicit. | +| Persist upstream DeepSeek OCR outputs by default | Persistent outputs can write model artifacts and intermediate files. The accepted default is temporary output handling unless the operator opts in with `DEEPSEEK_OCR_SAVE_RESULTS` and an output directory. | +| Make CPU/eager execution the primary supported path | Env overrides can select CPU or alternate attention behavior, but upstream guidance and current testing center on the GPU-oriented path. | +| Add a project `ocr_deepseek` optional extra as part of this decision | Current docs require manual installation of compatible `torch`, `transformers`, and FlashAttention builds. Packaging policy can be decided separately if the dependency stack stabilizes. | +| Backfill the historical registry priority phrase exactly | The design note says "after dots/points", but current `auto` and `auto_high_quality` ordering differ. This ADR accepts the registered backend and documents the actual priority behavior as a caveat rather than rewriting it into an unverified policy. | + +## Consequences + +DeepSeek OCR changes should preserve the `deepseek` backend identity, local Transformers ownership, resolved env configuration, and explicit availability checks unless a later ADR supersedes this one. + +Security-sensitive model loading must remain visible in docs and code review. The use of `trust_remote_code=True` is accepted for this backend because upstream requires it, not because the risk is removed. Operators should enable this backend only in controlled environments and review the model code they execute. + +The default deployment path remains GPU-oriented. CUDA and FlashAttention requirements are availability gates for the default configuration, while CPU or alternate-attention env overrides are compatibility escape hatches. Tests and docs should avoid implying equivalent CPU performance. + +Result persistence remains opt-in. Backend output should continue to use temporary image/output paths by default, and persistent output should require explicit `DEEPSEEK_OCR_SAVE_RESULTS` plus an operator-managed output directory. + +The OCR registry and backend discovery endpoint should continue to surface DeepSeek metadata, but registry priority remains current implementation behavior rather than a broad provider-ranking policy. Changes to default OCR ordering should be reviewed as registry behavior changes. + +Routine CI should keep lightweight unit/registry coverage for this backend. Live model endpoint validation remains an explicitly gated integration path because it requires CUDA, local model dependencies, and operator opt-in. + +## Follow-up + +- Use this ADR as the covering record for INV-026. +- Keep `Docs/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md` as the evidence record and caveat boundary for this backfill. +- Create separate ADRs or focused tasks if DeepSeek gains a server mode, a packaged optional dependency extra, different default registry priority, persistent artifact storage, or stronger model provenance controls. diff --git a/Docs/Published/ADR/025-llm-provider-adapter-routing-and-overrides.md b/Docs/Published/ADR/025-llm-provider-adapter-routing-and-overrides.md new file mode 100644 index 0000000000..64a177bf22 --- /dev/null +++ b/Docs/Published/ADR/025-llm-provider-adapter-routing-and-overrides.md @@ -0,0 +1,60 @@ +# ADR-025: LLM Provider Adapter Routing and Overrides + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `tldw_Server_API/app/core/LLM_Calls/README.md` +**Decision owner:** TASK-2232 confirmation, TASK-2309 code/doc alignment, and TASK-2310 LLM provider backfill scope +**Related task:** TASK-2310 (`backlog/tasks/task-2310 - Backfill-LLM-provider-integration-ADR.md`) +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md` + +## Decision + +LLM calls use the provider adapter registry as the integration boundary, normalize chat responses and streams to OpenAI-compatible shapes, allow request-level `base_url` overrides only for trusted allowlisted providers, and reject request-level local provider endpoint URL overrides at the Chat adapter-request boundary. + +## Context + +The LLM Calls module is the shared provider integration surface for the Chat API and internal services. It supports commercial providers, custom OpenAI-compatible gateways, and local providers such as llama.cpp, Kobold, Oobabooga, TabbyAPI, vLLM, Ollama, Aphrodite, MLX, and local-llm. Without one durable boundary, provider-specific call behavior can leak into endpoints and callers, making request validation, streaming behavior, provider aliases, and local endpoint policy inconsistent. + +TASK-2232 confirmed the current integration behavior that bounds this ADR: + +- Provider routing goes through `ChatProviderRegistry`, which registers commercial, custom OpenAI-compatible, and local adapters and resolves provider aliases before dispatch. +- Non-streaming results are normalized to OpenAI-compatible chat completion dictionaries. +- Streaming results are normalized to OpenAI-style SSE `data: ...` chunks and terminated with one final `[DONE]`. +- Strict OpenAI-compatible mode for local gateways drops selected non-standard fields before forwarding payloads to strict local providers. +- Trusted request-level `base_url` overrides are allowed only when the target provider is allowlisted and the caller is trusted. + +TASK-2309 aligned the local endpoint override policy with the module documentation. The Chat adapter-request builder now canonicalizes provider names through the adapter registry, rejects non-null `api_url` and provider-specific `*_api_url` request keys for local providers before adapter dispatch, and keeps allowlisted `base_url` behavior for supported trusted providers. + +This ADR is intentionally bounded. It covers the Chat adapter-request boundary and provider adapter routing contract. It does not claim every low-level local adapter helper can never receive endpoint URLs, because local adapters may still accept config-derived endpoint URLs internally. It also treats provider-specific response preservation as an OpenAI-compatible envelope extension, not as a broad promise that every provider-specific response field is stable public API. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Let endpoints call provider helpers directly | Direct endpoint-to-provider calls duplicate routing, alias handling, error mapping, streaming normalization, and request filtering across call sites. The adapter registry is the current shared integration point. | +| Let every provider accept arbitrary request-level endpoint URLs | Request-supplied local endpoint URLs blur trusted config with user payload, weaken SSRF/egress review boundaries, and contradict the local provider config-only policy. Trusted `base_url` overrides remain available only for allowlisted providers. | +| Disable all request-level base URL overrides | Some BYOK and proxy deployments need caller-provided base URLs. The accepted policy keeps this feature but requires trusted callers, provider allowlisting, and URL validation. | +| Treat local provider endpoint URLs as normal OpenAI-compatible payload fields | Local endpoint URLs are deployment configuration, not chat payload semantics. Keeping them out of request extras prevents accidental forwarding to local adapters. | +| Make provider-specific response preservation the primary response contract | The API contract stays OpenAI-compatible. Provider-specific data may be preserved as an extension, but downstream code should not depend on arbitrary provider-native fields as the stable primary shape. | + +## Consequences + +New LLM provider integrations should register adapters in the provider registry and use registry alias resolution for dispatch. Callers should use Chat service entrypoints instead of wiring endpoint code directly to provider helper functions. + +Provider adapters must preserve the OpenAI-compatible response and SSE contract expected by the Chat API. Provider-native data can be exposed as extension metadata only when it does not replace the normalized envelope. + +Request override policy is split by trust boundary: + +- `extra_headers` and `extra_body` remain additive request override surfaces where explicit request keys win on conflicts. +- `base_url` and `api_base_url` are accepted only after trusted-caller checks, allowlist checks, alias-aware provider resolution, and URL validation. +- Local provider `api_url` and provider-specific `*_api_url` request keys are rejected before adapter dispatch. + +Local provider endpoint URLs remain config-derived operational settings. Local adapter helper functions may still receive endpoint URLs from trusted config paths, so future changes should describe which boundary they enforce. + +Adding a new local provider should update the registry-owned local-provider classification so the Chat adapter-request guard continues to reject request-level local endpoint overrides. A future change that allows request-level local endpoint URLs, changes the trusted `base_url` policy, or makes provider-specific response fields stable public API needs a separate decision. + +## Follow-up + +- Use this ADR as the covering record for INV-027. +- Keep `Docs/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md` as the evidence and caveat record for this backfill. +- Consider separate ADRs if provider-specific response preservation becomes a stable public API guarantee, if local endpoint URLs become request-configurable, or if sync/async provider call paths are unified under a new provider runtime policy. diff --git a/Docs/Published/ADR/026-security-outbound-egress-and-ssrf-policy.md b/Docs/Published/ADR/026-security-outbound-egress-and-ssrf-policy.md new file mode 100644 index 0000000000..20697eb095 --- /dev/null +++ b/Docs/Published/ADR/026-security-outbound-egress-and-ssrf-policy.md @@ -0,0 +1,57 @@ +# ADR-026: Security Outbound Egress and SSRF Policy + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `tldw_Server_API/app/core/Security/README.md` +**Decision owner:** TASK-2247 confirmation and TASK-2311 outbound egress backfill scope +**Related task:** TASK-2311 (`backlog/tasks/task-2311 - Backfill-Security-outbound-egress-policy-ADR.md`) +**Related spec/plan:** `Docs/ADR/inventory/2026-06-04-security-confirmation-audit.md` + +## Decision + +Outbound integrations that handle untrusted or user-configurable URLs must route those URLs through the Security module egress policy helpers, which enforce scheme, host, port, allow/deny, environment profile, tenant, DNS, and private/reserved-address checks before network calls. + +## Context + +The Security module owns outbound network policy helpers in `egress.py` and endpoint-friendly URL assertions in `url_validation.py`. The source README already directs Web scraping, Watchlists, WebSearch, Workflows, Text2SQL, and third-party providers to use those helpers before outbound work. Without one shared egress boundary, individual feature modules can drift into inconsistent SSRF handling, local allowlists that skip private-IP checks, or different production defaults. + +TASK-2247 confirmed the current implementation that bounds this ADR: + +- `evaluate_url_policy()` accepts only HTTP and HTTPS schemes. +- URLs must include a hostname and a valid port. Allowed ports default to 80 and 443 unless `WORKFLOWS_EGRESS_ALLOWED_PORTS` changes that policy. +- Global and workflow allow/deny lists are merged from `EGRESS_ALLOWLIST`, `EGRESS_DENYLIST`, `WORKFLOWS_EGRESS_ALLOWLIST`, and `WORKFLOWS_EGRESS_DENYLIST`. +- Denylist entries win over allowlist entries. +- Production-like environments default to a strict profile that requires an allowlist; non-production defaults to permissive public-host behavior unless allowlists are configured. +- Private and reserved resolved addresses are blocked by default through `WORKFLOWS_EGRESS_BLOCK_PRIVATE`. +- Tenant egress and webhook helpers call the same policy with tenant-specific allow/deny environment variables. +- `assert_url_safe()` wraps the central policy for endpoint code and raises a 400 response when a URL is blocked. + +This ADR is intentionally bounded to the shared egress policy and future outbound integration rule. It does not claim every historical network call in the repository already uses these helpers. The protection is effective for paths that route URL decisions through the Security module boundary. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Let each outbound feature define local URL validation | Local validators drift over time and can omit scheme, port, DNS, private-address, or environment-profile checks that the central policy already handles. | +| Treat allowlists as the only SSRF control | Allowlists do not replace DNS and private/reserved-address checks, especially when hostnames can resolve to local or reserved ranges. | +| Use a strict allowlist profile in every environment | Development and local integrations often need public-host testing without central allowlist setup. The accepted policy keeps production strict by default while preserving permissive non-production behavior. | +| Allow outbound callers to bypass egress helpers when they perform their own checks | Bypasses make it hard to audit SSRF behavior and can create incompatible policy semantics across modules. New outbound integrations should use the shared helpers. | +| Combine egress, request-edge middleware, secrets, and serialization into one Security ADR | TASK-2247 found different enforcement boundaries and caveats. ADR-019 already covers request-edge middleware, and secrets/serialization need a separate adoption audit before any accepted ADR. | + +## Consequences + +New outbound integrations that accept user-configurable URLs should validate through `tldw_Server_API.app.core.Security.egress` or `tldw_Server_API.app.core.Security.url_validation` before constructing clients, sessions, downloads, callbacks, webhooks, search requests, or scraper fetches. + +Policy changes for outbound network access belong in `egress.py`, with tests covering allowed and denied URLs, global/workflow/tenant controls, strict versus permissive profiles, port handling, DNS behavior, and private/reserved-address blocking. + +Feature modules should not add independent allowlists that bypass the central private-IP, scheme, port, DNS, or denylist checks. If a module needs a narrower rule, it should compose with the central policy rather than replacing it. + +Existing outbound call sites that do not yet use the Security helpers are not proven compliant by this ADR. They should be audited or migrated under separate tasks when touched. + +Request-edge middleware remains covered by ADR-019. Secret management, crypto helpers, and restricted serialization remain outside this ADR until a focused adoption audit confirms the repository-wide behavior worth recording. + +## Follow-up + +- Use this ADR as the covering record for the outbound egress/SSRF portion of INV-029. +- Keep `Docs/ADR/inventory/2026-06-04-security-confirmation-audit.md` as the evidence and caveat record for Security split backfills. +- Consider a separate secrets/serialization ADR only after an adoption audit confirms which consumers use `SecretManager`, encrypted JSON helpers, and restricted pickle compatibility paths. diff --git a/Docs/Published/ADR/027-security-aes-gcm-json-envelope-helpers.md b/Docs/Published/ADR/027-security-aes-gcm-json-envelope-helpers.md new file mode 100644 index 0000000000..96a00d5261 --- /dev/null +++ b/Docs/Published/ADR/027-security-aes-gcm-json-envelope-helpers.md @@ -0,0 +1,59 @@ +# ADR-027: Security AES-GCM JSON Envelope Helpers + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` +**Decision owner:** TASK-2312 adoption audit and TASK-2313 crypto-envelope backfill scope +**Related task:** TASK-2313 (`backlog/tasks/task-2313 - Backfill-Security-crypto-envelope-ADR.md`) +**Related spec/plan:** `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` + +## Decision + +Configured encrypted persistence paths use the Security module's AES-GCM JSON envelope helpers as the shared primitive for storing and rotating sensitive structured metadata, while keeping encryption opt-in or required according to each caller's existing boundary. + +## Context + +TASK-2312 found that the remaining secrets/serialization portion of INV-029 was too broad for one accepted ADR. The audit did not support a universal `SecretManager` adoption claim or a universal safe-serialization claim, but it did find a focused shared crypto primitive with multiple active consumers. + +`tldw_Server_API/app/core/Security/crypto.py` provides: + +- `encrypt_json_blob()` and `decrypt_json_blob()` using `WORKFLOWS_ARTIFACT_ENC_KEY`. +- `encrypt_json_blob_with_key()` and `decrypt_json_blob_with_key()` for caller-supplied keys. +- AES-GCM envelopes marked with `_enc: aesgcm:v1`, plus base64-encoded `nonce`, `ct`, and `tag` fields. +- Primary-key decrypt behavior with optional `JOBS_CRYPTO_SECONDARY_KEY` fallback for rotation windows. +- Failure-safe return behavior where unsupported crypto, missing keys, invalid envelopes, or decrypt failures return `None` instead of exposing plaintext or partial data. + +Known consumer patterns include Jobs payload/result encryption and key rotation, External Sources OAuth state/token envelope handling, AuthNZ user provider secrets, admin webhook secrets, and Workflow metadata decrypt/encrypt paths. + +This ADR is intentionally bounded. It accepts the shared AES-GCM JSON envelope primitive and the existing encrypted-persistence consumer pattern. It does not claim all sensitive JSON in the repository is encrypted, that all secret lookup flows use `SecretManager`, or that restricted pickle compatibility is part of this decision. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Let each persistence feature define its own encrypted JSON envelope format | Divergent envelope formats make key rotation, decrypt fallback, tests, and future migrations harder to reason about. | +| Require AES-GCM encryption for every JSON blob immediately | Current consumers have different boundaries: Jobs encryption is domain/config gated, connector token storage can fall back when crypto is unavailable, while BYOK/admin webhook helpers require configured keys. A universal requirement would overclaim current behavior. | +| Fold `SecretManager`, AES-GCM helpers, and restricted pickle into one Security ADR | TASK-2312 found different adoption levels and boundaries. Combining them would imply repository-wide guarantees the code does not currently provide. | +| Store sensitive structured metadata as plaintext plus redaction only | Redaction helps logs and UI surfaces, but it does not protect persisted structured metadata. Existing encrypted-persistence paths already use envelopes where configured or required. | +| Replace AES-GCM helpers with caller-specific key-management services | Some callers have their own key source, but the shared explicit-key helpers already let them use a common envelope format without centralizing every key policy. | + +## Consequences + +New encrypted persistence paths for structured JSON should prefer `tldw_Server_API.app.core.Security.crypto` helpers and the `_enc: aesgcm:v1` envelope shape instead of inventing a local envelope format. + +Callers remain responsible for their key boundary: + +- Jobs encryption stays gated by `JOBS_ENCRYPT` / `JOBS_ENCRYPT_` plus configured crypto keys. +- Generic Security crypto helpers use `WORKFLOWS_ARTIFACT_ENC_KEY` and optional `JOBS_CRYPTO_SECONDARY_KEY`. +- BYOK and admin webhook secret helpers use explicit-key variants and can require configured keys at their own boundary. +- Existing connector paths may preserve plaintext fallback behavior when crypto is unavailable or not configured. + +Key rotation and migration work should use the explicit-key helpers when old and new keys must be supplied directly. Tests should cover envelope creation, decrypt behavior, invalid-envelope handling, key fallback, and caller-specific fallback or required-key behavior. + +This ADR does not replace `SecretManager`, require every secret read to flow through `SecretManager`, or decide restricted pickle policy. Those remain separate inventory-only or future implementation-backed slices. + +## Follow-up + +- Use this ADR as the covering record for the AES-GCM JSON envelope portion of INV-029. +- Keep `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` as the evidence and caveat record for this split. +- Consider separate follow-up work for `SecretManager` adoption and restricted legacy pickle compatibility if the owner wants those decisions promoted from inventory-only status. diff --git a/Docs/Published/ADR/028-security-restricted-legacy-pickle-compatibility.md b/Docs/Published/ADR/028-security-restricted-legacy-pickle-compatibility.md new file mode 100644 index 0000000000..6bae19fb23 --- /dev/null +++ b/Docs/Published/ADR/028-security-restricted-legacy-pickle-compatibility.md @@ -0,0 +1,61 @@ +# ADR-028: Security Restricted Legacy Pickle Compatibility + +**Status:** Accepted +**Date:** 2026-06-07 +**Backfilled from:** `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` +**Decision owner:** TASK-2312 adoption audit and TASK-2314 restricted-pickle backfill scope +**Related task:** TASK-2314 (`backlog/tasks/task-2314 - Backfill-Security-restricted-pickle-compatibility-ADR.md`) +**Related spec/plan:** `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` + +## Decision + +Legacy pickle deserialization is allowed only through the Security module's restricted pickle helper for explicitly gated legacy compatibility paths, not as a general-purpose serialization format. + +## Context + +TASK-2312 found that the remaining secrets/serialization portion of INV-029 was too broad for one accepted ADR. The audit did not support a universal safe-serialization claim, but it did find a focused `safe_pickle` helper with bounded, default-disabled legacy consumers. + +`tldw_Server_API/app/core/Security/safe_pickle.py` provides: + +- `RestrictedUnpickler`, which blocks arbitrary global class and function resolution. +- `safe_pickle_loads()`, which accepts only bytes-like payloads and routes deserialization through `RestrictedUnpickler`. +- An allowlist limited to basic built-in value containers and `collections.OrderedDict`. +- `ValueError` wrapping for unsafe pickle payloads rejected by the restricted unpickler. + +Known consumer patterns are intentionally narrow: + +- Web Scraping `ContentDeduplicator` migrates legacy `content_hashes.pkl` only when `WEBSCRAPER_ALLOW_LEGACY_PICKLE_HASHES=true`; migrated data is normalized and saved back to JSON. +- Scheduler `PayloadService` loads legacy pickle payloads only when `allow_legacy_pickle_payloads` / `SCHEDULER_ALLOW_LEGACY_PICKLE_PAYLOADS` enables compatibility mode. +- Scheduler only writes pickle payloads for non-JSON-serializable payloads when that same compatibility mode is enabled. + +This ADR is intentionally bounded. It accepts the restricted helper and the known gated legacy compatibility pattern. It does not claim all pickle deserialization in the repository routes through `Security.safe_pickle`, does not consolidate the Embeddings cache-local unpickler, and does not cover `SecretManager` adoption. + +## Alternatives considered + +| Option | Why rejected | +| --- | --- | +| Continue using raw `pickle.load()` / `pickle.loads()` in legacy paths | Raw pickle can resolve arbitrary globals. Existing migration and payload paths already have a restricted helper and tests for disallowed globals. | +| Remove all legacy pickle compatibility immediately | Web Scraping and Scheduler still have explicit compatibility paths for older stored data. Removing them would strand legacy users without a migration path. | +| Enable legacy pickle compatibility by default | Pickle is a high-risk format. The current callers require explicit environment or configuration gates before loading legacy pickle payloads. | +| Treat `Security.safe_pickle` as the universal pickle boundary for the repository | The Embeddings cache currently has its own local restrictive unpickler. Calling this universal would overclaim current implementation. | +| Combine restricted pickle, AES-GCM envelopes, and `SecretManager` adoption into one Security ADR | TASK-2312 found different adoption levels and boundaries. Combining them would imply repository-wide guarantees the code does not currently provide. | + +## Consequences + +New persistence and payload formats should prefer JSON or another explicit safe format. Pickle compatibility is for bounded legacy migration or compatibility paths only. + +Callers that must support legacy pickle data must: + +- route loads through `tldw_Server_API.app.core.Security.safe_pickle.safe_pickle_loads()`; +- keep compatibility default-disabled or explicitly gated; +- document the compatibility flag or configuration boundary; +- normalize and migrate legacy data to a safer format where practical; +- test default-disabled behavior and rejection of disallowed globals. + +Existing cache-local restrictive unpicklers remain outside this ADR unless a future implementation consolidates them into `Security.safe_pickle`. This ADR also does not replace `SecretManager`, require every secret read to flow through `SecretManager`, or create a broad safe-serialization policy for every module. + +## Follow-up + +- Use this ADR as the covering record for the restricted legacy pickle compatibility portion of INV-029. +- Keep `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` as the evidence and caveat record for this split. +- Consider separate follow-up work for `SecretManager` adoption if the owner wants that decision promoted from inventory-only status. diff --git a/Docs/Published/ADR/README.md b/Docs/Published/ADR/README.md new file mode 100644 index 0000000000..3eb92e3427 --- /dev/null +++ b/Docs/Published/ADR/README.md @@ -0,0 +1,62 @@ +# Architecture Decision Records + +Architecture Decision Records (ADRs) capture durable architecture decisions for `tldw_server`: what was decided, why, what alternatives were considered, and what tradeoffs were accepted. + +Module docs, design specs, and plans describe how things work. ADRs explain why important architecture rules exist. + +## Workflow + +1. Search existing ADRs before creating a new one. +2. Create a Backlog.md task or use the task already associated with the work. +3. Use `000-template.md`. +4. Use the next sequential number. +5. Record one decision per ADR. +6. Write ADRs at decision time whenever possible. +7. If backfilling, keep `Status: Accepted` for still-governing decisions and set `Backfilled from:` to the source path. +8. Do not rewrite accepted ADR rationale. To change a decision, create a new ADR and mark the old one `Superseded by ADR-{N}`. + +## Status Rules + +- `Proposed`: drafted for review but not yet accepted. +- `Accepted`: current governing decision. +- `Superseded by ADR-{N}`: no longer governing because a newer ADR replaced it. +- Backfill is metadata, not status. Backfilled still-governing decisions use `Status: Accepted` plus `Backfilled from: `. + +## ADR Required When + +An ADR is required when a decision creates or changes a durable rule for module boundaries, public API shape, persistence, security, worker ownership, provider integration, WebUI/extension conventions, major dependencies, or repository workflow gates. + +Small bug fixes, local implementation details, product copy, temporary experiments, and test-only changes usually do not need ADRs unless they create durable policy. + +## Index + +| ADR | Status | Decision | +| --- | --- | --- | +| [ADR-001](001-adr-workflow-and-governance.md) | Accepted | Adopt `Docs/ADR/` as the canonical ADR workflow. | +| [ADR-002](002-backlog-md-task-tracking.md) | Accepted | Require Backlog.md tasks for repo-changing work. | +| [ADR-003](003-jobs-vs-scheduler-default.md) | Accepted | Use Jobs by default for new user-visible work and Scheduler for internal dependency orchestration. | +| [ADR-004](004-ai-generated-pr-change-summary-gate.md) | Accepted | Require human-written change summaries for materially AI-authored PRs. | +| [ADR-005](005-bandit-touched-scope-security-gate.md) | Superseded by ADR-006 | Run Bandit on touched Python/code scope before completion. | +| [ADR-006](006-bandit-report-path-portability.md) | Accepted | Keep the Bandit touched-scope gate but require portable report output paths. | +| [ADR-007](007-research-workspace-canonical-first-slice-shell.md) | Accepted | Use `ResearchWorkspace` as the canonical first-slice workspace shell while preserving specialized routes. | +| [ADR-008](008-workspace-split-key-persistence-and-indexeddb-offload.md) | Accepted | Use split localStorage workspace persistence with optional IndexedDB offload for heavy payloads. | +| [ADR-009](009-quick-chat-docs-assistant-modes.md) | Accepted | Keep Quick Chat split into `Chat`, `Docs Q&A`, and `Browse Guides` modes. | +| [ADR-010](010-sandbox-vz-runtime-ownership.md) | Accepted | Keep `vz_linux` as a repo-owned sandbox runtime path instead of requiring Apple `container`. | +| [ADR-011](011-audio-api-semantics.md) | Accepted | Use centralized Audio API auth, model-first TTS routing, structured streaming errors, and non-streaming-only download links. | +| [ADR-012](012-evaluations-resource-id-prefixes.md) | Accepted | Use OpenAI-style type-prefixed IDs for primary Evaluations API resources. | +| [ADR-013](013-evaluations-deletion-lifecycle.md) | Accepted | Use soft deletes for evaluation definitions and hard deletes for datasets. | +| [ADR-014](014-evaluations-openai-compatible-schemas.md) | Accepted | Use separate request and response schemas with OpenAI-compatible response conventions. | +| [ADR-015](015-evaluations-existing-evaluator-integration.md) | Accepted | Wrap existing evaluator modules instead of rewriting evaluator logic inside the API runner or endpoints. | +| [ADR-016](016-acp-session-and-orchestration-persistence.md) | Accepted | Persist ACP shared operational state in `Databases/acp_sessions.db` and user orchestration state in per-user `orchestration.db` files. | +| [ADR-017](017-scoped-org-team-rbac-core-semantics.md) | Accepted | Use feature-flagged scoped Org/Team RBAC overlays with `require_active` default scope and denylist-filtered grants. | +| [ADR-018](018-resource-governance-endpoint-policy-and-route-map.md) | Accepted | Use claim-first new-endpoint authorization with explicit Resource Governor applicability decisions, route-map ownership, DB/file route-map merge semantics, and request-ingress missing-policy denial. | +| [ADR-019](019-security-request-edge-middleware.md) | Accepted | Own request-edge Security middleware in the Security module with path-scoped setup/docs/API behavior, always-installed request IDs and drain gate, and production-default security headers. | +| [ADR-020](020-db-management-per-user-paths-and-content-backend.md) | Accepted | Use `DatabasePaths` for per-user database paths, SQLite as the default content mode, and explicit PostgreSQL content mode with startup validation. | +| [ADR-021](021-services-lifecycle-startup-and-shutdown.md) | Accepted | Use focused Services lifespan orchestration with lifecycle worker session ownership, cooperative stop-event workers, staged shutdown, and bounded timeout/cancel fallback. | +| [ADR-022](022-embeddings-api-and-media-pipeline.md) | Accepted | Use OpenAI-compatible Embeddings API safeguards, bounded provider routing, endpoint reliability controls, and Jobs-root/Redis-stage media pipeline ownership. | +| [ADR-023](023-data-tables-backend-storage-jobs-and-exports.md) | Accepted | Use Media DB-owned Data Tables state, Jobs-backed generation/regeneration, bounded source snapshots, UUID table APIs, and server-side exports. | +| [ADR-024](024-deepseek-ocr-local-transformers-backend.md) | Accepted | Support DeepSeek OCR as a local Transformers-only backend with explicit dependency gates, temporary output by default, and security caveats for `trust_remote_code=True`. | +| [ADR-025](025-llm-provider-adapter-routing-and-overrides.md) | Accepted | Use the LLM provider adapter registry boundary with OpenAI-compatible normalization, trusted allowlisted `base_url` overrides, and local endpoint URL override rejection. | +| [ADR-026](026-security-outbound-egress-and-ssrf-policy.md) | Accepted | Require outbound integrations to use Security egress helpers for untrusted URLs, with central scheme, host, port, allow/deny, profile, tenant, DNS, and private-address checks. | +| [ADR-027](027-security-aes-gcm-json-envelope-helpers.md) | Accepted | Use Security AES-GCM JSON envelope helpers as the shared primitive for configured encrypted structured-metadata persistence and key-rotation paths. | +| [ADR-028](028-security-restricted-legacy-pickle-compatibility.md) | Accepted | Use Security restricted pickle helpers only for explicitly gated legacy compatibility paths, not as a general-purpose serialization format. | diff --git a/Docs/Published/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md new file mode 100644 index 0000000000..a0ca0859a6 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md @@ -0,0 +1,61 @@ +# ACP/AuthNZ/RBAC ADR Candidate Confirmation Audit + +**Date:** 2026-06-03 +**Backlog:** TASK-519 +**Follow-up:** TASK-520 + +## Scope + +This audit confirms whether `INV-023` and `INV-024` are current enough to backfill as accepted ADRs. It does not create accepted ADRs; it records the evidence and limits for the next backfill task. + +## Dispositions + +| Inventory ID | Disposition | Next action | +| --- | --- | --- | +| INV-023 | Current governing for implemented ACP persistence. | Backfilled by `Docs/ADR/016-acp-session-and-orchestration-persistence.md` via TASK-520. The ADR does not claim the older setup-guide consolidation work. | +| INV-024 | Current governing for core scoped Org/Team RBAC semantics. | Backfilled by `Docs/ADR/017-scoped-org-team-rbac-core-semantics.md` via TASK-520. The ADR does not claim missing admin mapping endpoints, resolver metrics, or the older invalid-claim fallback behavior. | + +## Owner Sign-off + +TASK-520 recorded owner sign-off from the 2026-06-04 continuation instruction after the ACP/RBAC backfill scope and sign-off requirement were summarized. + +## Evidence Reviewed + +### INV-023 - ACP Persistence + +The source plan approved moving ACP session state to a shared SQLite database and orchestration state to per-user SQLite databases (`Docs/Plans/2026-03-08-acp-persistence-registry-expansion-design.md`). The current implementation matches the core persistence decision: + +- `tldw_Server_API/app/core/DB_Management/ACP_Sessions_DB.py` defines `sessions`, `session_messages`, `agent_registry`, `agent_health_history`, `permission_policies`, and `permission_decisions` tables, with separate message storage and denormalized session token counters. +- `ACPSessionsDB` defaults to `Databases/acp_sessions.db`, uses thread-local SQLite connections, and runs the shared SQLite policy helper. +- `tldw_Server_API/app/services/admin_acp_sessions_service.py` delegates persistent ACP session state to `ACPSessionsDB` while preserving the public session-store API surface. +- `tldw_Server_API/app/core/DB_Management/Orchestration_DB.py` defines a per-user `OrchestrationDB` with projects, tasks, runs, reviews, workspaces, and workspace MCP servers. `OrchestrationDB.for_user()` resolves the user database directory, which defaults to `Databases/user_databases//`, and stores `orchestration.db` under that directory. Deployments may override the user DB base directory through configuration. +- `tldw_Server_API/app/core/Agent_Orchestration/orchestration_service.py` exposes `get_orchestration_db(user_id)` as an LRU-cached per-user factory, and `tldw_Server_API/app/api/v1/endpoints/agent_orchestration.py` uses that factory for workspace and project APIs. +- Regression coverage exists in `tldw_Server_API/tests/Agent_Client_Protocol/test_acp_sessions_db.py`, `tldw_Server_API/tests/Agent_Client_Protocol/test_acp_integration_persistence.py`, `tldw_Server_API/tests/Agent_Client_Protocol/test_acp_agent_registry.py`, and `tldw_Server_API/tests/Agent_Orchestration/test_orchestration_db.py`. + +Caveat: `orchestration_service.py` still contains a legacy in-memory service class and header text. Current API routes use the SQLite-backed factory, so the ADR should describe the governing persistence path and avoid treating the legacy class as the architecture. + +### INV-024 - Scoped Org/Team RBAC + +The source design resolved four core decisions: default `require_active` mode, deny admin-level permissions in scoped grants, derive active scope from JWT/default membership rather than request headers, and allow MCP/tool permissions in scoped grants. The current implementation supports those core decisions: + +- `tldw_Server_API/app/core/AuthNZ/settings.py` defaults `ORG_RBAC_PROPAGATION_ENABLED` to `False`, `ORG_RBAC_SCOPE_MODE` to `require_active`, and defines an admin-level scoped permission denylist that does not include MCP or `tools.execute:*`. +- `tldw_Server_API/app/core/AuthNZ/migrations.py` creates and seeds `org_role_permissions` and `team_role_permissions`; `tldw_Server_API/app/core/AuthNZ/pg_migrations_extra.py` carries the PostgreSQL equivalent. +- `tldw_Server_API/app/core/AuthNZ/org_rbac.py` normalizes scope mode to `require_active` by default, resolves org/team membership roles, reads scoped grants from the mapping tables, filters denylisted permissions, and merges scoped permissions with base permissions when propagation is enabled. +- `tldw_Server_API/app/core/AuthNZ/User_DB_Handling.py` reads active scope from JWT claims for JWT users, validates it against current memberships, falls back to membership-derived scope when no active claim is present, and applies scoped permissions. The API-key path derives scope from key org/team scope or memberships and also applies scoped permissions. +- `tldw_Server_API/tests/AuthNZ_SQLite/test_org_rbac_scoped_permissions_sqlite.py` covers `require_active` fallback, JWT active-org behavior, admin denylist filtering, and `tools.execute:*` eligibility through `require_permissions`. +- `Docs/Product/Completed/Orgs-virtual-keys-PRD.md` repeats the implemented follow-up semantics: `require_active`, JWT/default-membership scope, no request headers, denylisted admin permissions, MCP/tool eligibility, and active-org team permission behavior. + +Caveats for TASK-520: + +- The exact admin mapping endpoints listed in the design (`/api/v1/admin/rbac/org-roles/...` and `/team-roles/...`) were not found in the current API surface. The ADR should not claim those endpoints exist. +- Resolver success/failure/latency metrics and an `AuthPrincipal.resolver_failure` flag were not found. The ADR should treat them as follow-up implementation gaps, not accepted current behavior. +- The current JWT path rejects invalid active org/team claims with `403`; it does not implement the source design's default invalid-claim fallback with optional strict mode. The ADR should describe the implemented stricter behavior or omit the invalid-claim fallback. + +## Backfilled ADRs + +TASK-520 created two accepted ADRs from this audit: + +1. `Docs/ADR/016-acp-session-and-orchestration-persistence.md`: shared `acp_sessions.db` plus per-user `Databases/user_databases//orchestration.db` by default. +2. `Docs/ADR/017-scoped-org-team-rbac-core-semantics.md`: feature-flagged scoped permission overlays with `require_active` default and denylist-filtered grants. + +Both ADRs link this audit and the source design docs. Missing operational surfaces remain consequences or follow-up notes, not accepted claims. diff --git a/Docs/Published/ADR/inventory/2026-06-03-decision-inventory.md b/Docs/Published/ADR/inventory/2026-06-03-decision-inventory.md new file mode 100644 index 0000000000..de7f778acb --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-03-decision-inventory.md @@ -0,0 +1,115 @@ +# ADR Decision Inventory - 2026-06-03 + +**Related task:** TASK-509 +**Inventory status:** Draft for owner review +**ADR creation policy:** This inventory does not create accepted ADRs. Ambiguous, contradicted, stale, historical, or owner-sensitive decisions require owner review before backfill. + +## Classification Rules + +| Candidate status | Meaning | Allowed next action | +| --- | --- | --- | +| Current governing | Appears to describe a durable rule still consistent with current docs/code. | Owner review, then backfill slice planning. | +| Superseded | A newer source or ADR appears to replace it. | Keep classified; do not backfill as accepted. | +| Stale | Source appears outdated or inconsistent with current repo state. | Keep classified; request owner review or doc cleanup. | +| Duplicate | Same decision appears in multiple sources. | Pick a canonical source candidate; link duplicates. | +| Needs owner review | Decision is ambiguous, contradicted, historical, or policy-sensitive. | Do not accept/backfill until owner confirms. | + +## Coverage Matrix + +| Scope | Command/source | Candidate count | Reviewed files | Skipped files/rationale | Coverage result | +| --- | --- | --- | --- | --- | --- | +| `Docs/Design/**` | `find` file enumeration plus `rg` decision-language search | 71 candidate files from 122 Markdown/RST files | Reviewed explicit decision-heading docs and high-signal design records: Apple containerization, workspace canonical model, workspace persistence, Quick Chat Docs Assistant, STT/TTS Audio API, Data Tables, DeepSeek OCR, Org/Team RBAC, WebUI dependency audit. | Files without explicit decision headings were not individually converted. Many are product notes, reference docs, or historical implementation plans. | Covered with concrete rows for current-looking candidates and owner-review rows for uncertain/historical decisions. | +| `Docs/Plans/**` | `find` file enumeration plus `rg` decision-language search | 863 candidate files from 971 Markdown/RST files | Reviewed explicit decision-heading subset and ACP persistence registry architecture decisions. | Most plan hits are implementation instructions, test snippets, or historical PR recovery notes rather than durable governing rules. | Covered by owner-review rows for explicit architecture decisions and a broad historical-plan classification. | +| `Docs/superpowers/specs/**` | `find` file enumeration plus `rg` decision-language search | 191 candidate files from 197 Markdown/RST files | Reviewed ADR workflow adoption spec, product roadmap design, TTS/STT workflows design, and explicit decision-heading examples. | Most specs are task-local designs. They need per-domain review before any backfill because many are not current architecture policy. | Covered by concrete rows plus a broad owner-review classification. | +| `Docs/superpowers/plans/**` | `find` file enumeration plus `rg` decision-language search | 265 candidate files from 277 Markdown/RST files | Reviewed ADR adoption Stage 1 plan and roadmap first-slice plan. | Most plans are execution artifacts. They should not become ADRs unless their governing decisions are owner-confirmed and still current. | Covered by duplicate/current rows for already-backed ADR decisions and owner-review rows for unresolved roadmap decision records. | +| Embedded ADRs / `Docs/ADR/**` | ADR index, ADR files, and embedded ADR search | 17 ADR files plus `Docs/Evals/Evals-Plan-1.md` embedded ADR set as of TASK-520 | Reviewed `Docs/ADR/001` through `006` and the seven embedded evaluation ADRs during the original inventory; later backfill tasks added `Docs/ADR/007` through `017`. | Non-canonical ADR examples in best-practice or published docs were treated as examples, not project decisions. | Existing canonical ADRs are classified; embedded eval ADRs require owner review before backfill. | +| Module docs | Markdown/RST files under `tldw_Server_API/app` plus decision-language search | 66 candidate files from 71 Markdown/RST files | Reviewed Jobs, Scheduler, LLM_Calls, Resource_Governance, Security, DB_Management, Embeddings, Services, Collections, and related high-signal module docs. | Many module docs describe implementation surfaces rather than durable decisions. Convert only owner-confirmed cross-module defaults. | Covered by concrete module rows and owner-review slice recommendations. | + +## Inventory + +| ID | Source path | Decision summary | Candidate status | Recommended action | Owner-review need | Notes | +| --- | --- | --- | --- | --- | --- | --- | +| INV-001 | `Docs/ADR/001-adr-workflow-and-governance.md` | `Docs/ADR/` is the canonical ADR home; substantial specs, plans, and PRs require ADR assessment. | Current governing | Already covered by canonical ADR. | No | Governs this inventory and later backfill work. | +| INV-002 | `Docs/ADR/002-backlog-md-task-tracking.md` | Repo-changing work requires an associated Backlog.md task before file edits begin. | Current governing | Already covered by canonical ADR. | No | Duplicated in `AGENTS.md`; ADR remains canonical rationale. | +| INV-003 | `Docs/ADR/003-jobs-vs-scheduler-default.md` | Use Jobs by default for user-visible/admin-visible work; use Scheduler for internal dependency orchestration. | Current governing | Already covered by canonical ADR; later inventory should identify module-specific exceptions. | No | Jobs and Scheduler module docs duplicate parts of this rule. | +| INV-004 | `Docs/ADR/004-ai-generated-pr-change-summary-gate.md` | Materially AI-authored PRs require a human-written change summary before merge readiness. | Current governing | Already covered by canonical ADR. | No | Duplicated in `AGENTS.md` quality gates. | +| INV-005 | `Docs/ADR/005-bandit-touched-scope-security-gate.md` | Run Bandit on touched Python/code scope before completion. | Superseded | Keep as superseded historical ADR. | No | Superseded by ADR-006 for portable report paths. | +| INV-006 | `Docs/ADR/006-bandit-report-path-portability.md` | Keep the Bandit touched-scope gate but require portable report output paths and ignore generated reports. | Current governing | Already covered by canonical ADR. | No | Current security validation policy for agents. | +| INV-007 | `AGENTS.md` | The root agent guide requires ADR checks, immutable accepted ADRs, and backfilled ADR metadata. | Duplicate | Keep `AGENTS.md` as operational guidance linked to ADR-001 and ADR README. | No | Included as workflow context even though primary task scope is documentation sources. | +| INV-008 | `Docs/superpowers/specs/2026-06-02-adr-workflow-adoption-design.md` | Staged ADR adoption: Stage 1 framework, Stage 2 inventory, Stage 3 module/domain conversion. | Duplicate | Already implemented by ADR-001 plus TASK-509/TASK-510/TASK-511. | No | Do not create another ADR for the same workflow. | +| INV-009 | `Docs/Evals/Evals-Plan-1.md` | Evaluation data storage uses SQLite. | Superseded | Do not backfill the old SQLite-only ADR as accepted; use the TASK-517 audit as input to a future backend-aware persistence ADR only if owner approves. | Yes, for replacement ADR | Current `EvaluationsDatabase` supports SQLite or PostgreSQL; the Evaluations README documents optional PostgreSQL/RLS and per-user DB paths. | +| INV-010 | `Docs/Evals/Evals-Plan-1.md` | Evaluation, run, and dataset IDs use OpenAI-style prefixed UUIDs. | Current governing | Backfilled by `Docs/ADR/012-evaluations-resource-id-prefixes.md` via TASK-518. | No | Confirmed by TASK-517: create paths still generate `eval_`, `run_`, and `dataset_` IDs. | +| INV-011 | `Docs/Evals/Evals-Plan-1.md` | Evaluations use soft deletes while datasets use hard deletes. | Current governing | Backfilled by `Docs/ADR/013-evaluations-deletion-lifecycle.md` via TASK-518. | No | Confirmed by TASK-517: evaluations use `deleted_at` filters/updates and datasets use hard `DELETE`. | +| INV-012 | `Docs/Evals/Evals-Plan-1.md` | Evaluation complex objects are stored as JSON text in SQLite. | Needs owner review | Do not backfill the old SQLite-only JSON TEXT ADR as accepted; rewrite as a backend-aware storage representation decision if owner approves. | Yes | SQLite still uses JSON text serialization, but PostgreSQL uses JSONB and `_json_maybe` accepts already parsed JSON values. | +| INV-013 | `Docs/Evals/Evals-Plan-1.md` | Evaluation schemas use separate request/response models following OpenAI conventions. | Current governing | Backfilled by `Docs/ADR/014-evaluations-openai-compatible-schemas.md` via TASK-518. | No | Confirmed by TASK-517 in `openai_eval_schemas.py`, `evaluation_schemas_unified.py`, and API response-shape tests. | +| INV-014 | `Docs/Evals/Evals-Plan-1.md` | Evaluation runs use async processing with background tasks, progress tracking, cancellation, and webhooks. | Needs owner review | Do not backfill the broad async/background ADR as accepted; split core eval-run async behavior from recipe-run Jobs ownership if owner wants an ADR. | Yes | Partially current for core eval runs, but current docs and startup code route user-visible recipe runs through Jobs. | +| INV-015 | `Docs/Evals/Evals-Plan-1.md` | Evaluation API wraps existing evaluation modules rather than rewriting them. | Current governing | Backfilled by `Docs/ADR/015-evaluations-existing-evaluator-integration.md` via TASK-518. | No | Confirmed by TASK-517: runner/service code delegates to existing GEval, RAG, response-quality, OCR/proposition, and unified RAG components. | +| INV-016 | `Docs/Design/2026-05-02-apple-containerization-evaluation.md` | `vz_linux` should not require Apple `container`; keep repo-owned helper and guest-agent path, move only narrow image-store metadata toward OCI compatibility, and treat networking as a separate policy milestone. | Current governing | Backfilled by `Docs/ADR/010-sandbox-vz-runtime-ownership.md` via TASK-515. | Owner approved continuation | Source doc now links to covering ADR. | +| INV-017 | `Docs/Design/Workspace_Canonical_Model_Decision_2026_05.md` | `ResearchWorkspace` is the canonical shell for the first roadmap slice; `ChatWorkspace` and `DocumentWorkspace` remain specialized routes/modes. | Current governing | Backfilled by `Docs/ADR/007-research-workspace-canonical-first-slice-shell.md` via TASK-514. | Owner approved defaults | Source doc now links to covering ADR. | +| INV-018 | `Docs/Design/Workspace_Persistence_Architecture.md` | Workspace persistence uses split localStorage keys with optional IndexedDB offload for heavy chat and artifact payloads. | Current governing | Backfilled by `Docs/ADR/008-workspace-split-key-persistence-and-indexeddb-offload.md` via TASK-514. | Owner approved defaults | Source doc now links to covering ADR. | +| INV-019 | `Docs/superpowers/specs/2026-05-06-tldw-product-roadmap-design.md` and `Docs/superpowers/plans/2026-05-06-tldw-product-roadmap-first-slice-implementation-plan.md` | Roadmap work should create a canonical workspace decision record before route consolidation. | Duplicate | Covered as context by ADR-007; no duplicate ADR. | Owner approved defaults | Kept as context for the canonical workspace decision. | +| INV-020 | `Docs/Design/Quick_Chat_Docs_Assistant.md` | Quick Chat has three intentional modes: normal chat, retrieval-grounded Docs Q&A, and deterministic Browse Guides, using existing RAG/search and tutorial registries. | Current governing | Backfilled by `Docs/ADR/009-quick-chat-docs-assistant-modes.md` via TASK-514. | Owner approved defaults | Source doc now links to covering ADR. | +| INV-021 | `Docs/Design/STT_TTS_Audio_API_Design.md` | Audio API auth is centralized; TTS routing is model-first with fallback priority; streaming errors are structured failures by default; download links are non-streaming only. | Current governing | Backfilled by `Docs/ADR/011-audio-api-semantics.md` via TASK-516. | Owner approved continuation | Source doc now links to covering ADR; document says implemented and verified against code on 2026-02-22. | +| INV-022 | `Docs/superpowers/specs/2026-05-18-tts-stt-webui-extension-workflows-prd-design.md` | Storage ownership for TTS/STT presets must be reviewed before implementation. | Needs owner review | Treat as open decision or follow-up, not an accepted ADR, until owner confirms storage ownership. | Yes | Explicitly says review is required before implementation; do not backfill as accepted. | +| INV-023 | `Docs/Plans/2026-03-08-acp-persistence-registry-expansion-design.md` | ACP stores global session/agent registry data in shared `Databases/acp_sessions.db` and per-user orchestration data in `Databases/user_databases//orchestration.db` by default, with the user DB base directory overrideable by configuration; uses separate session messages, denormalized token usage, health monitoring, review feedback, and SQLite WAL/FK settings. | Current governing | Backfilled by `Docs/ADR/016-acp-session-and-orchestration-persistence.md` via TASK-520. | Owner sign-off recorded in TASK-520 | Confirmed for implemented persistence paths. Do not claim unverified setup-guide consolidation or treat the legacy in-memory service class as governing architecture. See `Docs/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md`. | +| INV-024 | `Docs/Design/Org_Team_RBAC_Propagation_V2.md` | Scoped RBAC defaults to `require_active`, denies admin-level permissions in scoped grants, uses JWT claims plus default membership for active scope, and allows MCP/tool permissions in scoped grants. | Current governing | Backfilled by `Docs/ADR/017-scoped-org-team-rbac-core-semantics.md` via TASK-520. | Owner sign-off recorded in TASK-520 | Confirmed for core scoped permission semantics. Exclude missing admin mapping endpoints, resolver metrics, and the source design's invalid-claim fallback from accepted claims. See `Docs/ADR/inventory/2026-06-03-acp-rbac-confirmation-audit.md`. | +| INV-025 | `Docs/Design/Data_Tables_Backend.md` | Data Tables should live in per-user Media DB, generate asynchronously via JobManager, snapshot RAG query sources, use server-side exports, expose UUIDs externally, and reuse File_Artifacts for export tracking. | Current governing | Backfilled by `Docs/ADR/023-data-tables-backend-storage-jobs-and-exports.md` via TASK-2273. | No | ADR-023 scopes the accepted decision to Media DB table/source/row ownership, Jobs-backed generation/regeneration, stored source snapshots, server-side exports, and table UUID API identity. It keeps numeric job IDs, wait/direct-export paths, source ownership depth, snapshot limits, File Artifacts internals, and frontend editing as caveats. See `Docs/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md`. | +| INV-026 | `Docs/Design/DeepSeek_OCR_Backend.md` | DeepSeek OCR backend uses local Transformers with `deepseek` name, markdown default prompt, Gundam-equivalent sizing defaults, safe string output, non-persistent result handling by default, and CUDA/FlashAttention availability gates by default. | Current governing | Backfilled by `Docs/ADR/024-deepseek-ocr-local-transformers-backend.md` via TASK-2276. | No | ADR-024 scopes the accepted decision to the local Transformers-only `deepseek` backend, upstream `trust_remote_code=True` loading, markdown/Gundam-equivalent defaults, safe output extraction, temporary output by default, explicit dependency gates, registry/API exposure, and caveats for manual dependency install, default CUDA/FlashAttention behavior with env overrides, no server mode, actual registry priority behavior, and gated live-model tests. See `Docs/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md`. | +| INV-027 | `tldw_Server_API/app/core/LLM_Calls/README.md` | LLM calls route through adapter registry, normalize OpenAI-compatible responses/SSE, allow trusted base URL overrides only for allowlisted providers, and reject request-level local provider URL overrides. | Current governing | Backfilled by `Docs/ADR/025-llm-provider-adapter-routing-and-overrides.md` via TASK-2310. | No | ADR-025 scopes the accepted decision to provider registry routing, OpenAI-compatible response/SSE normalization, strict local payload filtering, trusted allowlisted `base_url` overrides, and Chat adapter-request rejection of local provider `api_url`/`*_api_url` request keys. Keep caveats for config-derived local adapter URLs, boundary-specific enforcement, and provider-specific response preservation as an extension. See `Docs/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md`. | +| INV-028 | `tldw_Server_API/app/core/Resource_Governance/README.md` | New endpoints should use claim-first auth; latency/cost-sensitive endpoints should decide Resource Governor policy and route-map coverage; DB policy store can merge file route maps and fail closed on missing DB policies. | Current governing | Backfilled by `Docs/ADR/018-resource-governance-endpoint-policy-and-route-map.md` via TASK-2234. | TASK-2233 confirmation; TASK-2234 backfill | ADR-018 scopes the accepted decision to new-endpoint governance, route-map ownership, DB policy-store/file route-map merge behavior, and request-ingress missing-policy denial. It excludes all-endpoint coverage, non-request middleware enforcement, and global Redis outage fail-closed claims. See `Docs/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md`. | +| INV-029 | `tldw_Server_API/app/core/Security/README.md` | Security controls are centralized for egress policy, security headers, request IDs, setup CSP/access guard, URL validation, and secret management; production should keep security middleware enabled. | Current governing | Request-edge middleware portion backfilled by `Docs/ADR/019-security-request-edge-middleware.md` via TASK-2248. Outbound egress/SSRF portion backfilled by `Docs/ADR/026-security-outbound-egress-and-ssrf-policy.md` via TASK-2311. AES-GCM JSON envelope portion backfilled by `Docs/ADR/027-security-aes-gcm-json-envelope-helpers.md` via TASK-2313. Restricted legacy pickle compatibility portion backfilled by `Docs/ADR/028-security-restricted-legacy-pickle-compatibility.md` via TASK-2314. Keep `SecretManager` adoption as a separate inventory-only slice unless implementation-backed follow-up is approved. | TASK-2247 confirmation; TASK-2248 request-edge backfill; TASK-2311 outbound egress backfill; TASK-2312 secrets/serialization audit; TASK-2313 crypto-envelope backfill; TASK-2314 restricted-pickle backfill | The broad row is current as module ownership, but one ADR would overclaim. ADR-019 scopes request-edge startup wiring, request IDs, drain gate, setup guard/CSP, path-scoped CSP/security headers, and caveats. ADR-026 scopes central egress helper ownership and the forward rule that outbound integrations must route untrusted URLs through the shared policy. ADR-027 scopes Security AES-GCM JSON envelope helpers and known configured encrypted-persistence consumers. ADR-028 scopes Security restricted pickle helpers for explicitly gated legacy compatibility paths. Remaining caveats include no universal `SecretManager` adoption, no universal encryption claim for all sensitive JSON, no universal pickle-deserialization routing claim, and no Embeddings cache-local unpickler consolidation. See `Docs/ADR/inventory/2026-06-04-security-confirmation-audit.md` and `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md`. | +| INV-030 | `tldw_Server_API/app/core/DB_Management/README.md` | DB path utilities centralize per-user database locations under `Databases/user_databases`; content backend defaults to SQLite with PostgreSQL option. | Current governing | Backfilled by `Docs/ADR/020-db-management-per-user-paths-and-content-backend.md` via TASK-2254. | No | ADR-020 scopes the accepted decision to DB_Management per-user path ownership, SQLite default content mode, PostgreSQL content backend option, and startup validation. It keeps explicit SQLite path overrides, test fallback paths, deprecated aliases, historical compatibility paths, AuthNZ/users DB separation, and non-universal PostgreSQL support as caveats. See `Docs/ADR/inventory/2026-06-04-db-management-confirmation-audit.md`. | +| INV-031 | `tldw_Server_API/app/services/README.md` | Lifespan worker runtime state owns long-lived handles; background workers should use cooperative stop events; shutdown drains gates and stops workers in owned order. | Current governing | Backfilled by `Docs/ADR/021-services-lifecycle-startup-and-shutdown.md` via TASK-2260. | No | ADR-021 scopes the accepted decision to FastAPI lifespan Services orchestration, `LifespanWorkerRuntimeState` worker-session ownership, declarative lifecycle worker specs/engine/session, stop-event default strategy, bounded timeout/cancel fallback, and staged shutdown order. It keeps callback-only workers, legacy shutdown adapters, bounded lease drain, and non-Services-managed work as caveats. See `Docs/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md`. | +| INV-032 | `tldw_Server_API/app/core/Embeddings/README.md` | Embeddings use OpenAI-compatible API safeguards, provider auto-detect/adapters, cache/batching/breakers, and Redis Streams workers while Jobs remains the root status/billing record. | Current governing | Backfilled by `Docs/ADR/022-embeddings-api-and-media-pipeline.md` via TASK-2262. | No | ADR-022 scopes the accepted decision to OpenAI-compatible API semantics, provider resolution/allowlist safeguards, optional adapter routing with legacy provider-config fallback, endpoint cache/batching/circuit-breaker controls, and Jobs-root/Redis-stage media pipeline ownership. It keeps billing/accounting, local provider URL policy, vector-store backend evolution, broader cache architecture, and legacy Jobs worker details as caveats. See `Docs/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md`. | +| INV-033 | `tldw_Server_API/app/core/Jobs/README.md` and `tldw_Server_API/app/core/Scheduler/README.md` | Jobs own durable user/admin-visible work; Scheduler owns internal dependency orchestration. | Duplicate | Covered by ADR-003; use module docs as context for future exceptions only. | No | Do not create duplicate ADR unless a module-specific exception is owner-approved. | +| INV-034 | `tldw_Server_API/app/core/Collections/README.md` | Collections listing includes legacy Media DB fallback for compatibility, marked deprecated. | Superseded | Keep as module-doc compatibility note unless owner requests a deprecation ADR. | Yes | Deprecated fallback should not become accepted architecture without owner review. | +| INV-035 | `Docs/Design/WebUI_Dependency_Audit.md` and `Docs/superpowers/plans/2026-05-07-webui-dependency-audit-implementation-plan.md` | WebUI dependency audit uses a decision legend to classify keep/remove/replace choices. | Needs owner review | Treat as audit process output, not ADR, unless a dependency choice is still a durable project rule. | Yes | Likely many local dependency decisions; should be backfilled only one major dependency at a time if needed. | +| INV-036 | `Docs/Plans/**` broad historical set | Historical implementation plans contain many decision words and local choices. | Needs owner review | Do not convert wholesale. Review only explicit architecture decision sections by module/domain. | Yes | 863 candidate files by broad search; most are not durable governing architecture rules. | +| INV-037 | `Docs/superpowers/specs/**` and `Docs/superpowers/plans/**` broad historical set | Historical specs/plans contain many proposed or task-local decisions. | Needs owner review | Do not convert wholesale. Use them as sources only when owner confirms a decision is still governing. | Yes | 456 candidate files across specs/plans by broad search. | + +## Recommended Backfill Slices For TASK-510 + +These are planning suggestions only. `TASK-510` should create child Backlog tasks after owner review confirms which rows are current and worth converting. + +| Slice | Candidate inventory IDs | Rationale | Owner-review prerequisite | Backlog task | +| --- | --- | --- | --- | --- | +| Workflow/governance cleanup | INV-001 through INV-008 | Existing ADRs are already canonical; mostly no backfill needed. | Confirm no additional workflow ADR is needed. | None by default. | +| Evaluations | INV-009 through INV-015 | TASK-518 backfilled confirmed current rows INV-010, INV-011, INV-013, and INV-015; INV-009 is superseded and INV-012/INV-014 need replacement or split review. | Keep unresolved Evaluations persistence/async rows inventory-only until owner approves replacement decisions. | TASK-518. | +| Sandbox/vz runtime | INV-016 | High-value decision record with clear adoption/defer/reject choices. | Owner approved ADR backfill continuation. | TASK-515. | +| Workspace/WebUI | INV-017 through INV-020 | Canonical workspace, persistence, and Quick Chat conventions are durable frontend/product architecture choices. | Owner approved defaults. | TASK-514. | +| Audio/TTS/STT | INV-021 and INV-022 | Audio API semantics are implemented; preset storage ownership remains unresolved. | Owner approved ADR backfill continuation for INV-021; keep INV-022 separate. | TASK-516 for INV-021; no task for INV-022 by default. | +| ACP/AuthNZ/RBAC | INV-023 and INV-024 | Persistence and scoped permission choices are security-sensitive. | TASK-519 confirmed bounded current behavior and caveats; TASK-520 backfilled ADR-016 and ADR-017 after owner sign-off. | TASK-520. | +| Data Tables | INV-025 | Current backend implementation covers Media DB table/source/row ownership, Jobs-backed generation/regeneration, stored RAG snapshots, server-side exports, and table UUID API identity. | TASK-2272 confirmed bounded current behavior and caveats. | TASK-2273 created ADR-023. | +| Provider/integration modules | INV-026 through INV-032 | OCR, LLM provider integration, RG, Security, DB, Services, and Embeddings contain durable conventions. | Split into smaller child tasks; do not combine all provider/security decisions in one ADR. INV-029 specifically needs split ADR backfills. | TASK-2275 confirmed INV-026, and TASK-2276 backfilled it as ADR-024. TASK-2232 confirmed INV-027, TASK-2309 aligned its local URL override policy, and TASK-2310 backfilled it as ADR-025. TASK-2234 backfilled INV-028 as ADR-018. TASK-2248 backfilled the request-edge portion of INV-029 as ADR-019, TASK-2311 backfilled its outbound egress/SSRF portion as ADR-026, TASK-2313 backfilled its AES-GCM JSON envelope portion as ADR-027, and TASK-2314 backfilled its restricted legacy pickle compatibility portion as ADR-028. TASK-2253 confirmed INV-030, and TASK-2254 backfilled it as ADR-020. TASK-2259 confirmed INV-031, and TASK-2260 backfilled it as ADR-021. TASK-2261 confirmed INV-032, and TASK-2262 backfilled it as ADR-022. | +| Historical plan/spec triage | INV-035 through INV-037 | Large candidate set with many local or stale choices. | Owner selects domains worth deeper review. | None by default. | + +## Proposed Owner Review Defaults + +This section is the actionable owner-review handoff. The default is to proceed with these dispositions unless the owner wants to override a specific row or slice. + +| Disposition | Inventory IDs | Default next action | +| --- | --- | --- | +| Already covered; no backfill task needed | INV-001 through INV-008, INV-033 | Keep the canonical ADRs and module docs as-is. Do not create duplicate ADRs. | +| Completed pilot backfill slice | INV-017, INV-018, INV-020, with INV-019 as context | TASK-514 created ADR-007, ADR-008, and ADR-009 for the Workspace/WebUI pilot slice. | +| Completed secondary backfill slice | INV-016 | TASK-515 created ADR-010 for the Sandbox/vz runtime posture. | +| Completed secondary backfill slice | INV-021 | TASK-516 created ADR-011 for implemented Audio API auth/routing/error/download-link semantics. Keep INV-022 separate because storage ownership is explicitly unresolved. | +| Completed Evaluations backfill slice | INV-010, INV-011, INV-013, INV-015 | TASK-518 created ADR-012, ADR-013, ADR-014, and ADR-015 for confirmed current Evaluations behavior. | +| Completed ACP/AuthNZ/RBAC backfill slice | INV-023, INV-024 | TASK-520 created ADR-016 and ADR-017 using TASK-519 evidence, caveats, and owner sign-off. | +| Completed Resource Governance backfill slice | INV-028 | TASK-2234 created ADR-018 using TASK-2233 evidence and caveats. | +| Completed Data Tables backfill slice | INV-025 | TASK-2273 created ADR-023 using TASK-2272 evidence and caveats. | +| Needs replacement/split owner review | INV-009, INV-012, INV-014 | Do not convert the old SQLite-only storage, SQLite-only JSON TEXT, or broad async/background text as accepted ADRs without a replacement or split decision. | +| Completed LLM provider integration backfill slice | INV-027 | TASK-2310 created ADR-025 using TASK-2232 confirmation evidence and TASK-2309 alignment. Keep caveats for Chat adapter-request boundary enforcement, config-derived local adapter URLs, and provider-specific response preservation as an extension. | +| Partially backfilled; remaining SecretManager slice inventory-only | INV-029 | TASK-2247 confirmed current Security module ownership and caveats. TASK-2248 created ADR-019 for request-edge middleware, TASK-2311 created ADR-026 for outbound egress/SSRF policy, TASK-2313 created ADR-027 for AES-GCM JSON envelope helpers, and TASK-2314 created ADR-028 for restricted legacy pickle compatibility. TASK-2312 audited secrets/serialization and found helper-level evidence but not enough caller adoption for a broad accepted ADR. Do not create one broad Security ADR; future secret lookup ADRs need narrower implementation-backed adoption work. | +| Completed DB Management backfill slice | INV-030 | TASK-2254 created ADR-020 for the bounded DB Management path/content-backend decision. Keep caveats for explicit SQLite path overrides, test fallback paths, deprecated aliases, AuthNZ/users DB separation, historical compatibility paths, and non-universal PostgreSQL support. | +| Completed Services lifecycle backfill slice | INV-031 | TASK-2260 created ADR-021 for the bounded Services lifecycle startup/shutdown decision. Keep callback-only workers, legacy shutdown adapters, bounded lease drain, and non-Services-managed work as caveats. | +| Completed Embeddings backfill slice | INV-032 | TASK-2262 created ADR-022 for the bounded Embeddings API and media pipeline decision. Keep billing/accounting, local provider URL policy, vector-store backend evolution, broader cache architecture, and legacy Jobs worker details as caveats. | +| Completed OCR/provider backfill slice | INV-026 | TASK-2276 created ADR-024 for the bounded DeepSeek OCR backend decision. Keep manual dependency install, `trust_remote_code=True`, CUDA/FlashAttention defaults with env overrides, local Transformers-only mode, temporary output by default, actual registry priority behavior, and gated live-model tests as caveats. | +| Inventory-only unless owner requests deeper work | INV-022, INV-034 through INV-037 | Keep classified in the inventory. Do not backfill historical plans, unresolved preset storage ownership, deprecated compatibility notes, or dependency-audit choices by default. | + +## TASK-510/TASK-511 Default Gate + +Default: `TASK-510` should complete at least one owner-reviewed backfill child slice before `TASK-511` evaluates global Superpowers changes. + +Gate result: TASK-514 completed the Workspace/WebUI pilot (`INV-017`, `INV-018`, `INV-020`) and produced ADR-007, ADR-008, and ADR-009. This satisfies the default evidence gate for `TASK-511`. + +Owner override: if speed matters more than process evidence, the owner can explicitly approve using this reviewed inventory plus bounded slice plan as sufficient evidence for `TASK-511`. Without that override, do the pilot backfill first. diff --git a/Docs/Published/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md new file mode 100644 index 0000000000..c76dd34044 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-03-evaluations-confirmation-audit.md @@ -0,0 +1,54 @@ +# Evaluations ADR Confirmation Audit - 2026-06-03 + +**Related task:** TASK-517 +**Follow-up backfill task:** TASK-518 +**Scope:** `Docs/Evals/Evals-Plan-1.md` embedded ADRs mapped to inventory rows INV-009 through INV-015. + +## Purpose + +Confirm which embedded Evaluations ADRs still describe current governing behavior before promoting any of them into canonical ADRs. + +This audit does not create accepted ADRs. It separates confirmed current decisions from stale, superseded, or partial historical decisions so the follow-up backfill can stay one-decision-per-ADR. + +## Evidence Reviewed + +| Area | Evidence | +| --- | --- | +| Embedded ADR source | `Docs/Evals/Evals-Plan-1.md`, Architecture Decision Records section | +| Evaluations storage and CRUD | `tldw_Server_API/app/core/DB_Management/Evaluations_DB.py` | +| Current module documentation | `tldw_Server_API/app/core/Evaluations/README.md` | +| API schemas | `tldw_Server_API/app/api/v1/schemas/openai_eval_schemas.py`, `tldw_Server_API/app/api/v1/schemas/evaluation_schemas_unified.py` | +| Run orchestration | `tldw_Server_API/app/core/Evaluations/eval_runner.py`, `tldw_Server_API/app/core/Evaluations/unified_evaluation_service.py` | +| Jobs boundary | `tldw_Server_API/app/services/startup_sidecar_owned_jobs_pollers.py`, `tldw_Server_API/app/core/Evaluations/recipe_runs_jobs_worker.py` | +| Tests sampled | `tldw_Server_API/tests/Evaluations/test_evaluations_unified.py`, `tldw_Server_API/tests/Evaluations/test_evaluations_postgres_crud.py`, `tldw_Server_API/tests/Evaluations/test_recipe_runs_jobs_worker.py`, `tldw_Server_API/tests/DB_Management/test_evaluations_unified_and_crud.py` | + +## Dispositions + +| Inventory ID | Embedded decision | Disposition | Evidence summary | Next action | +| --- | --- | --- | --- | --- | +| INV-009 | Use SQLite for evaluation data storage. | Superseded | `EvaluationsDatabase` is now backend-aware and documents SQLite or PostgreSQL support. It resolves the shared content backend, keeps SQLite initialization, and has a PostgreSQL bootstrap path with JSONB columns. The Evaluations README also describes optional PostgreSQL and RLS support plus per-user DB paths. | Do not backfill the old SQLite-only decision as accepted. A future persistence ADR should be backend-aware if the owner wants one. | +| INV-010 | Use prefixed UUIDs for evaluations, runs, and datasets. | Current governing | `create_evaluation`, `create_run`, and `create_dataset` still generate `eval_`, `run_`, and `dataset_` IDs. `UnifiedEvaluationService.create_run` pre-generates a `run_` ID before persistence. API tests assert `eval_` IDs. | Include in TASK-518 as a resource ID convention ADR. | +| INV-011 | Use soft deletes for evaluations and hard deletes for datasets. | Current governing | The evaluations table has `deleted_at`; get/list/update paths filter `deleted_at IS NULL`; `delete_evaluation` updates `deleted_at`; `delete_dataset` executes `DELETE FROM datasets`. Unified tests cover delete behavior. | Include in TASK-518 as a deletion lifecycle ADR. | +| INV-012 | Store complex objects as JSON TEXT in SQLite. | Needs owner review | SQLite DDL stores complex fields as `TEXT` and CRUD methods serialize with `json.dumps` and parse with `_json_maybe`. PostgreSQL DDL stores matching fields as `JSONB`, and `_json_maybe` accepts already parsed JSON-like values. The old SQLite-only text is true for SQLite but incomplete for the current backend-aware design. | Do not backfill the old text as accepted. Fold into a backend-aware persistence representation ADR only after owner review. | +| INV-013 | Use separate request/response schemas following OpenAI conventions. | Current governing | `openai_eval_schemas.py` explicitly defines OpenAI-style request and response models, `object` fields, Unix timestamps, and list wrappers. `evaluation_schemas_unified.py` keeps separate create/update/response/run/dataset models with compatible `object` and `created` fields. Tests assert list/object response shape. | Include in TASK-518 as an API schema convention ADR. | +| INV-014 | Use asyncio/background tasks for runs, progress, webhooks, and cancellation. | Needs owner review | Core evaluation runs still use `asyncio.create_task`, tracked `running_tasks`, progress updates, webhook dispatch, and cancellation. However, current module docs say user-visible persona dialogue-tree recipe runs must use Jobs, and startup code starts an Evaluation recipe-run Jobs worker. The old broad decision is therefore only partially current. | Do not backfill the broad embedded ADR as accepted. Split core eval-run async behavior from recipe-run Jobs ownership if the owner wants ADR coverage. | +| INV-015 | Wrap existing evaluation modules rather than rewrite. | Current governing | `eval_runner.py` imports and delegates to existing `ms_g_eval`, `RAGEvaluator`, `ResponseQualityEvaluator`, proposition evaluation, and the unified RAG pipeline. `unified_evaluation_service.py` maps GEval, RAG, response quality, OCR, and other types to dedicated evaluator services. | Include in TASK-518 as an evaluator integration strategy ADR. | + +## Follow-Up Scope + +TASK-518 should backfill only these confirmed current decisions: + +- INV-010: Evaluations resource ID prefixes, backfilled by `Docs/ADR/012-evaluations-resource-id-prefixes.md`. +- INV-011: Evaluation and dataset deletion lifecycle, backfilled by `Docs/ADR/013-evaluations-deletion-lifecycle.md`. +- INV-013: OpenAI-compatible request/response schema shape, backfilled by `Docs/ADR/014-evaluations-openai-compatible-schemas.md`. +- INV-015: Reuse/wrap existing evaluator modules, backfilled by `Docs/ADR/015-evaluations-existing-evaluator-integration.md`. + +TASK-518 should exclude these rows from direct accepted backfill: + +- INV-009: superseded by backend-aware SQLite/PostgreSQL storage. +- INV-012: partially current for SQLite but incomplete without the PostgreSQL JSONB representation. +- INV-014: partially current for core runs but incomplete without the recipe-run Jobs boundary. + +## Verification Notes + +This is a documentation-only audit. No Python code is changed by TASK-517, so Bandit is not applicable beyond recording the docs-only skip. diff --git a/Docs/Published/ADR/inventory/2026-06-04-db-management-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-db-management-confirmation-audit.md new file mode 100644 index 0000000000..f396a6a34c --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-db-management-confirmation-audit.md @@ -0,0 +1,50 @@ +# DB Management Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2253 +**Follow-up:** TASK-2254 +**Inventory row:** INV-030 +**Source candidate:** `tldw_Server_API/app/core/DB_Management/README.md` +**Disposition:** Current governing; ready for a bounded accepted ADR backfill. + +## Decision Candidate Under Review + +INV-030 summarized the DB Management convention as: + +> DB path utilities centralize per-user database locations under `Databases/user_databases`; content backend defaults to SQLite with PostgreSQL option. + +The candidate is current enough for accepted ADR backfill if the ADR is scoped to per-user database path ownership, SQLite default content-storage behavior, the PostgreSQL shared content backend option, and startup validation for PostgreSQL content mode. It should not claim every database family is fully PostgreSQL-backed or that all historical compatibility paths have been removed. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| `DB_Management` owns the database path and backend abstraction boundary for content and user-scoped stores. | The source README describes `DB_Management` as the central home for content, prompts, notes, evaluations, workflows, per-user DB paths, SQLite/PostgreSQL backend abstraction, migrations, and factories at `tldw_Server_API/app/core/DB_Management/README.md:3` and `:13` through `:23`. It identifies `db_path_utils.py`, `content_backend.py`, `DB_Manager.py`, backend adapters, migrations, and feature DB modules as extension points at `:38` through `:44` and `:72` through `:81`. | Confirmed as module ownership. Do not treat this as proof every historical DB caller has no compatibility path. | +| Per-user database paths are centralized through `DatabasePaths`. | `_resolve_user_id_for_storage()` maps missing single-user IDs to `DatabasePaths.get_single_user_id()` and rejects missing multi-user IDs at `tldw_Server_API/app/core/DB_Management/db_path_utils.py:82` through `:92`; `get_single_user_id()` reads `SINGLE_USER_FIXED_ID` at `:867` through `:874`. `DatabasePaths.resolve_user_db_base_dir()` reads `USER_DB_BASE_DIR`, otherwise falls back outside tests to `/Databases/user_databases` at `:429` through `:495`. `get_user_db_base_dir()` creates the base at `:497` through `:503`; `get_user_base_directory()` creates the user directory at `:505` through `:531`. `get_media_db_path()`, `get_chacha_db_path()`, and `get_prompts_db_path()` derive per-user Media, ChaChaNotes, and Prompts paths at `:533` through `:562`. | Confirmed. The default production/local base is repo-root `Databases/user_databases`; test mode intentionally uses an isolated temp fallback when no base is configured. | +| Per-user path behavior is covered by tests. | `tldw_Server_API/tests/DB_Management/test_db_path_utils.py:20` through `:204` covers user expansion, relative-path resolution, fixed single-user ID, multi-user missing user rejection, settings/env precedence, invalid user IDs, safe Prompts salts, and isolated test fallback. `test_db_path_utils_env.py:8` through `:56` covers absolute and project-root-relative `USER_DB_BASE_DIR`. `test_db_paths_media_prompts_env.py:7` through `:36` confirms Media and Prompts paths follow the configured base. | Confirmed. | +| SQLite is the default content backend, and SQLite content mode uses per-user file paths rather than a shared content backend. | `load_content_db_settings()` defaults `TLDW_CONTENT_DB_BACKEND`/`Database.type` to `sqlite` at `tldw_Server_API/app/core/DB_Management/content_backend.py:57` through `:83`. `get_content_backend()` returns `None` unless the backend type is PostgreSQL at `:280` through `:297`, with the comment that SQLite callers should resolve per-user file paths instead of a root-level DB. Runtime defaults set `single_user_db_path` to configured SQLite path or `DatabasePaths.get_media_db_path(DatabasePaths.get_single_user_id())` at `tldw_Server_API/app/core/DB_Management/media_db/runtime/defaults.py:41` through `:47` and recompute that default on reset at `:135` through `:141`. `test_create_media_database_sqlite_uses_default_path_and_no_backend()` confirms SQLite media creation uses the default path and `backend=None` at `tldw_Server_API/tests/DB_Management/test_media_db_runtime_factory.py:73` through `:97`. | Confirmed. Caveat: explicit `TLDW_CONTENT_SQLITE_PATH` or `[Database].sqlite_path` can override the default Media path; do not write the ADR as "SQLite always uses the per-user path." | +| PostgreSQL is a shared content backend option, not the default. | `load_content_db_settings()` maps `postgres`/`postgresql` and builds a PostgreSQL `DatabaseConfig` from `TLDW_CONTENT_PG_*`, `TLDW_PG_*`, `POSTGRES_TEST_*`, or config values at `tldw_Server_API/app/core/DB_Management/content_backend.py:46` through `:153`. `get_content_backend()` creates and caches a shared backend only for PostgreSQL at `:280` through `:332`, with cache signatures including connection target and sensitive connection settings. `test_content_backend_cache_includes_password_and_sslmode()` confirms password and SSL mode changes invalidate the cache at `tldw_Server_API/tests/DB_Management/test_content_backend_cache.py:258` through `:297`. | Confirmed. | +| Media DB runtime requires a usable PostgreSQL backend when PostgreSQL content mode is configured. | `media_db/runtime/factory.py` raises if `postgres_content_mode` lacks a PostgreSQL backend at `tldw_Server_API/app/core/DB_Management/media_db/runtime/factory.py:42` through `:71`. `validate_postgres_content_backend()` no-ops for SQLite, but for PostgreSQL it checks schema version and required Media/sync RLS policies at `:88` through `:180`. Tests cover backend-required behavior and schema/RLS validation failures in `tldw_Server_API/tests/DB_Management/test_media_db_runtime_factory.py:130` through `:270`. | Confirmed for Media/content backend runtime. Do not broaden to every DB family without separate evidence. | +| Startup runs PostgreSQL content backend validation and fails on runtime validation errors. | `prepare_startup_pre_core()` calls `_validate_startup_content_backend()` at `tldw_Server_API/app/services/startup_pre_core.py:45` through `:64`. `startup_content_backend_validation.py` calls `DB_Manager.validate_postgres_content_backend()`, logs success, reraises `RuntimeError`, and only skips import errors at `tldw_Server_API/app/services/startup_content_backend_validation.py:10` through `:26`. Tests cover success logging, runtime-error reraising, and import-error skip behavior in `tldw_Server_API/tests/Services/test_startup_content_backend_validation.py:33` through `:96`, and startup-pre-core call ordering in `tldw_Server_API/tests/Services/test_startup_pre_core.py:55` through `:95`. | Confirmed. | +| PostgreSQL RLS auto-ensure exists but is separately env-gated. | `_maybe_ensure_pg_rls()` only applies RLS installers when `RAG_ENSURE_PG_RLS` is truthy at `tldw_Server_API/app/services/startup_infra_services.py:124` through `:141`. `_run_pg_rls_auto_ensure()` applies Prompt Studio and ChaCha RLS installers at `tldw_Server_API/app/main.py:124` through `:131`; the extracted helper logs the combined result at `tldw_Server_API/app/services/startup_pg_rls.py:10` through `:25`. `test_pg_rls_policies_contract.py:52` through `:74` covers non-Postgres no-op and transaction rollback on partial failure; `:76` through `:95` verifies combined success logging. | Confirmed as an optional startup helper. Keep it as a caveat/consequence, not the primary ADR decision. | +| Several API dependencies resolve user-scoped DB paths through `DatabasePaths`. | Media dependencies resolve `DatabasePaths.get_media_db_path(user_id)` at `tldw_Server_API/app/api/v1/API_Deps/DB_Deps.py:90` through `:120`, then use a shared backend only when PostgreSQL mode is active at `:134` through `:158`. ChaCha dependencies resolve `DatabasePaths.get_chacha_db_path(user_id)` at `tldw_Server_API/app/api/v1/API_Deps/ChaCha_Notes_DB_Deps.py:360` through `:374` and instantiate `CharactersRAGDB` at `:442` through `:458`. Prompts dependencies resolve `DatabasePaths.get_prompts_db_path()` and instantiate per-user `PromptsDatabase` at `tldw_Server_API/app/api/v1/API_Deps/Prompts_DB_Deps.py:255` through `:264` and `:320` through `:377`. | Confirmed for representative user-scoped stores. Caveat: this confirms representative coverage, not every DB dependency in the repo. | + +## Caveats For ADR Backfill + +- Do not claim universal PostgreSQL support for every DB family. Current evidence strongly supports Media/content backend runtime plus representative backend-aware factories and dependencies, while some user-scoped DBs still instantiate SQLite files directly in normal dependency paths. +- Do not claim SQLite always uses `Databases/user_databases`; explicit `TLDW_CONTENT_SQLITE_PATH` or `[Database].sqlite_path` can override the Media DB path. +- Do not treat AuthNZ users DB as part of this decision. AuthNZ uses its own `DATABASE_URL` configuration and belongs in AuthNZ/persistence decisions, not this content/per-user DB path ADR. +- Do not claim all legacy paths have been removed. `USER_DB_BASE` remains a deprecated alias for selected compatibility paths, and root-level/path-override compatibility can still exist where explicitly configured. +- Test mode intentionally avoids writing to repo-local `Databases/user_databases` when no `USER_DB_BASE_DIR` is configured. ADR language should distinguish production/local defaults from test isolation. +- `RAG_ENSURE_PG_RLS` auto-ensure is optional and env-gated; startup validation is mandatory when the validation helper imports and PostgreSQL content mode requires it. + +## Recommended Next Action + +Create one accepted ADR via TASK-2254, expected as `Docs/ADR/020-db-management-per-user-paths-and-content-backend.md`, covering: + +1. `DatabasePaths` owns per-user database path resolution under `USER_DB_BASE_DIR`, defaulting to `Databases/user_databases` outside tests. +2. SQLite is the default content-storage mode and uses per-user file paths by default rather than a shared content backend. +3. PostgreSQL is the shared content backend option for content/Media runtime when explicitly configured. +4. PostgreSQL content mode must have a usable shared backend and startup validation for schema/RLS readiness. +5. Caveats for explicit SQLite path overrides, test fallback directories, AuthNZ/users DB separation, deprecated compatibility aliases, and non-universal PostgreSQL support. + +Update INV-030 to record TASK-2253 confirmation and keep ADR creation bounded to these claims. diff --git a/Docs/Published/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md new file mode 100644 index 0000000000..9770f99a78 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-embeddings-confirmation-audit.md @@ -0,0 +1,66 @@ +# Embeddings ADR Candidate Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2261 +**Follow-up:** TASK-2262 +**Inventory row:** INV-032 in `Docs/ADR/inventory/2026-06-03-decision-inventory.md` +**Source:** `tldw_Server_API/app/core/Embeddings/README.md` + +## Candidate under review + +INV-032 summarized the Embeddings convention as: + +> Embeddings use OpenAI-compatible API safeguards, provider auto-detect/adapters, cache/batching/breakers, and Redis Streams workers while Jobs remains the root status/billing record. + +## Confirmation result + +Current governing, with bounded scope. Create one accepted ADR via TASK-2262, expected as `Docs/ADR/022-embeddings-api-and-media-pipeline.md`. + +The future ADR should cover: + +1. OpenAI-compatible embeddings request/response semantics and endpoint safeguards. +2. Provider resolution by explicit header, provider-qualified model id, or model-name heuristic, plus allowlist and unsupported-provider guards. +3. Optional LLM adapter-registry routing when enabled, with legacy provider-config/direct provider execution as the fallback path. +4. Endpoint reliability controls: keyed TTL cache, request batching, provider-scoped circuit breakers, connection reuse, provider fallback rules, and health/admin breaker visibility. +5. Media embeddings pipeline ownership: core Jobs creates and exposes the durable root `embeddings_pipeline` record, while Redis Streams carries chunking, embedding, storage, and content stage messages. + +## Evidence + +- `tldw_Server_API/app/api/v1/schemas/embeddings_models.py:18` defines an OpenAI-style `CreateEmbeddingRequest` with forbidden extra fields, string/list/token-array input, required model, `encoding_format`, `dimensions`, and `user`. +- `tldw_Server_API/app/api/v1/schemas/embeddings_models.py:64` defines an OpenAI-style list response with embedding data and usage. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2241` wires the main create endpoint behind embeddings create rate limits and API-call billing limits. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2272` resolves provider input, including explicit `x-provider`, provider-qualified model ids, and HuggingFace-style model-name heuristics. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2323` validates requested dimensions before provider execution. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2331` rejects empty inputs and enforces list/token-array shape limits before policy checks. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2372` enforces per-model token limits with fail-fast `input_too_long` responses. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2398` applies provider/model allowlists after input validation, and `:2419` rejects recognized but unimplemented providers with 501 instead of silently falling through. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2552` optionally routes through the LLM embeddings adapter registry when `LLM_EMBEDDINGS_ADAPTERS_ENABLED` is truthy. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2633` uses a fallback chain on provider failure, while explicit `x-provider` disables fallback by default unless `EMBEDDINGS_ALLOW_FALLBACK_WITH_HEADER` is enabled. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:1867` wraps provider execution in provider-scoped circuit breakers; `:3548` exposes breaker status in health output; `:3611` and `:3623` expose admin breaker status/reset endpoints. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:2057` performs cache lookup, uncached batching, provider execution, response-count validation, and cache writeback. +- `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:1281` partitions local API cache identity by backend URL while stripping credentials and sensitive query parameters. +- `tldw_Server_API/app/core/Embeddings/request_batching.py:87` defines the request batcher; `:166` queues requests per provider/model/config; `:313` collects batches by size/timeout; `:364` processes and distributes batched results. +- `tldw_Server_API/app/api/v1/endpoints/media_embeddings.py:124` forces media embedding job backend ownership to core Jobs, ignoring other backend override values. +- `tldw_Server_API/app/api/v1/endpoints/media_embeddings.py:663` and `:749` enqueue single and batch media embedding work through `EmbeddingsJobsAdapter`. +- `tldw_Server_API/app/core/Embeddings/jobs_adapter.py:129` creates root Jobs records with `job_type="embeddings_pipeline"` and enqueues Redis stages instead of creating durable stage Jobs. +- `tldw_Server_API/app/core/Embeddings/services/redis_worker.py:86` handles Redis stage messages and `:131`, `:160`, `:187`, and `:202` run chunking, embedding, storage, and content stage handlers. +- `tldw_Server_API/app/core/Embeddings/services/redis_worker.py:172`, `:197`, and `:240` update the root Jobs result/status on stage progress, completion, or failure. +- `tldw_Server_API/app/core/Embeddings/services/jobs_worker.py:1` explicitly labels the Jobs worker as legacy while stating root Jobs remain the status/billing record. +- `tldw_Server_API/tests/Embeddings/test_embeddings_policy.py:19` and `:65` cover token-limit and allowlist rejection. +- `tldw_Server_API/tests/Embeddings/test_embeddings_fallback.py:28` and `:86` cover fallback behavior and explicit-header fallback suppression. +- `tldw_Server_API/tests/Embeddings/test_embeddings_endpoint_cache_identity.py:19` and `:129` cover cache backend identity sanitization and distinct local API backend cache keys. +- `tldw_Server_API/tests/Embeddings/test_request_batching.py:24` covers provider credentials through batched requests. +- `tldw_Server_API/tests/Embeddings/test_embeddings_jobs_adapter.py:21` and `:64` cover idempotent root job creation and status derivation from the root Job. +- `tldw_Server_API/tests/Embeddings/test_embeddings_redis_worker.py:8` and `:62` cover Redis stage handoff and root-job completion ordering. + +## Caveats for the ADR + +- Do not claim Redis Streams owns durable status or billing. Redis Streams is the stage-delivery mechanism; root Jobs records remain the durable status surface. The direct create endpoints also have API-call billing limits, Resource Governor token reservation, and best-effort usage logging, but the media pipeline billing behavior was not fully audited as a separate accounting decision. +- Do not claim all providers route through one adapter registry. The adapter registry is optional and currently gated by `LLM_EMBEDDINGS_ADAPTERS_ENABLED`; legacy provider config/direct execution remains the fallback path and is still current. +- Do not import INV-027's local provider URL policy. Embeddings `local_api` accepts configured/API URL inputs in provider config paths and partitions cache keys by sanitized backend identity. +- Do not overstate cache architecture. The main endpoint uses the local keyed `TTLCache`; broader multi-tier cache modules exist, but this confirmation only supports a bounded claim around endpoint cache identity and supporting cache behavior. +- Do not turn ChromaDB versus pgvector storage into this ADR. The README mentions Chroma per-user collections and optional pgvector via RAG adapters, but storage-backend evolution should be a separate ADR if needed. +- Do not make the legacy Jobs worker the primary pipeline path. The current module explicitly labels it legacy; the accepted decision should focus on root Jobs ownership plus Redis Streams stage delivery. + +## Recommendation + +Create TASK-2262 for one accepted Embeddings ADR. Scope it to the API/provider safeguards, reliability controls, and Jobs-root/Redis-stage media pipeline ownership confirmed above. Update INV-032 to reference TASK-2261 confirmation and TASK-2262 backfill. diff --git a/Docs/Published/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md new file mode 100644 index 0000000000..b84e150960 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-llm-provider-integration-confirmation-audit.md @@ -0,0 +1,40 @@ +# LLM Provider Integration Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2232 +**Inventory row:** INV-027 +**Source candidate:** `tldw_Server_API/app/core/LLM_Calls/README.md` +**Disposition:** Backfilled by `Docs/ADR/025-llm-provider-adapter-routing-and-overrides.md` via TASK-2310. + +## Decision Candidate Under Review + +INV-027 summarized the LLM provider integration convention as: + +> LLM calls route through adapter registry, normalize OpenAI-compatible responses/SSE, allow trusted base URL overrides only for allowlisted providers, and reject request-level local provider URL overrides. + +TASK-2232 confirmed the first three claims as current ADR source material and found that the final local URL override claim was contradicted by the then-current Chat request-building and local adapter path. TASK-2309 aligned that final claim by rejecting request-level local endpoint URL overrides before adapter dispatch. TASK-2310 then backfilled the bounded accepted decision as ADR-025. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| LLM calls are adapter-registry routed. | `tldw_Server_API/app/core/LLM_Calls/README.md:28` through `:39` describe registry routing and provider adapters. `tldw_Server_API/app/core/LLM_Calls/adapter_registry.py:34` through `:62` register commercial, custom OpenAI-compatible, and local adapters by default. `tldw_Server_API/tests/LLM_Calls/test_adapter_registry_wrapper_migration.py:41` through `:87` verify registration, caching, aliases, capability isolation, and config disablement. | Confirmed current. | +| Responses and streams are normalized to OpenAI-compatible shapes and SSE. | `tldw_Server_API/app/core/LLM_Calls/README.md:5`, `:10`, `:15`, and `:16` document OpenAI-compatible response/SSE normalization and provider-response preservation. `tldw_Server_API/tests/LLM_Calls/test_local_streaming_contract.py:54` through `:106` verifies local stream normalization and final `[DONE]`. `tldw_Server_API/tests/LLM_Calls/test_provider_response_preservation.py:11` through `:93` verifies provider-specific response preservation for non-OpenAI providers. | Confirmed current. | +| `base_url` request overrides are trusted-caller and allowlist gated. | `tldw_Server_API/app/core/LLM_Calls/README.md:48` through `:51` documents additive request overrides and the allowlisted `base_url` gate. `tldw_Server_API/app/core/Chat/chat_service.py:1698` through `:1731` checks `base_url`/`api_base_url`, provider allowlist, trusted caller status, and URL validation. `tldw_Server_API/tests/Chat/unit/test_chat_service_base_url_override.py:20` through `:43` verifies allowed, untrusted, and not-allowlisted cases. | Confirmed current. | +| Strict OpenAI-compatible mode drops selected unsupported local payload fields. | `tldw_Server_API/app/core/LLM_Calls/README.md:47`, `:75`, and `:83` through `:86` document strict local compatibility behavior. `tldw_Server_API/tests/LLM_Calls/test_llamacpp_strict_filter.py:29` through `:94` and `tldw_Server_API/tests/LLM_Calls/test_vllm_strict_filter.py:22` through `:78` verify non-standard fields and cache hints are not forwarded to strict local payloads. | Confirmed current, but this is not the same as rejecting local endpoint URL overrides. | + +## Local URL Override Caveat Resolution + +The source README says local provider base URLs are config-only and request-level `api_url`/`*_api_url` overrides are rejected. TASK-2232 found that code did not fully support that claim: + +- `ChatCompletionRequest` allows extra request fields with `model_config = ConfigDict(extra="allow")` in `tldw_Server_API/app/api/v1/schemas/chat_request_schemas.py:1026` through `:1036`. +- `build_call_params_from_request()` starts from `request_data.model_dump(...)` in `tldw_Server_API/app/core/Chat/chat_service.py:1945` through `:1968`; its explicit exclusions do not include `api_url` or provider-specific `*_api_url` keys. +- `_build_adapter_request_from_chat_args()` skips `base_url` and `api_base_url`, but it does not skip `api_url`; it passes through unknown non-null keys in `tldw_Server_API/app/core/Chat/chat_service.py:1795` through `:1823`. +- Several local adapters then map `request.get("api_url")` into provider helper arguments: llama.cpp at `tldw_Server_API/app/core/LLM_Calls/providers/local_adapters.py:1871` through `:1899`, Ooba at `:1937` through `:1965`, TabbyAPI at `:1973` through `:2005`, vLLM as `vllm_api_url` at `:2013` through `:2046`, Ollama at `:2054` through `:2083`, and Aphrodite at `:2091` through `:2123`. + +TASK-2309 resolves this for the Chat adapter-request path by adding a local-provider guard in `tldw_Server_API/app/core/Chat/chat_service.py` that rejects non-null `api_url` and provider-specific `*_api_url` keys for local providers before adapter dispatch. `tldw_Server_API/tests/Chat/unit/test_chat_service_base_url_override.py` now covers `api_url`, `vllm_api_url`, and `ollama_api_url` rejection while preserving trusted allowlisted `base_url` behavior for supported providers. + +Remaining caveat: local adapters still accept config-derived URL values internally. The accepted ADR should describe request-level rejection at the Chat adapter-request boundary, not claim local helper functions can never receive endpoint URLs from trusted config paths. + +## Backfill Result + +ADR-025 covers registry routing, OpenAI-compatible response/SSE normalization, strict local payload filtering, trusted allowlisted `base_url` overrides, and request-level local endpoint URL rejection. Keep this audit as the evidence and caveat record for boundary-specific enforcement, config-derived local adapter URLs, and provider-specific response preservation as an extension. diff --git a/Docs/Published/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md new file mode 100644 index 0000000000..b358c0f60d --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-resource-governance-confirmation-audit.md @@ -0,0 +1,45 @@ +# Resource Governance Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2233 +**Follow-up:** TASK-2234 +**Inventory row:** INV-028 +**Source candidate:** `tldw_Server_API/app/core/Resource_Governance/README.md` +**Disposition:** Current governing; ready for a bounded accepted ADR backfill. + +## Decision Candidate Under Review + +INV-028 summarized the Resource Governance convention as: + +> New endpoints should use claim-first auth; latency/cost-sensitive endpoints should decide Resource Governor policy and route-map coverage; DB policy store can merge file route maps and fail closed on missing DB policies. + +The candidate is current enough for accepted ADR backfill if the ADR is scoped to new-endpoint governance, route-map ownership, DB policy-store route-map merge behavior, and request-ingress missing-policy denial. It should not claim universal coverage for every existing endpoint or blanket fail-closed behavior for every Resource Governor category/outage mode. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| New endpoints should use claim-first auth dependencies. | `tldw_Server_API/app/core/Resource_Governance/README.md:11` says new endpoints should use `get_auth_principal`, `RequirePermission(...)`, `RequireRole(...)`, or `require_service_principal()` and should not gate new behavior on `AUTH_MODE` or mode helpers. `Docs/Published/Code_Documentation/Guides/AuthNZ_Code_Guide.md:281` through `:283` repeats the same guardrail. `tldw_Server_API/tests/AuthNZ_Unit/test_claim_first_single_user_mode_guardrail.py:23` through `:42` scans API v1 endpoint code for non-allowlisted `is_single_user_mode()` authorization branches. Resource Governor admin endpoints import `RequireRole` at `tldw_Server_API/app/api/v1/endpoints/resource_governor.py:11` and gate admin/control routes with `Depends(RequireRole("admin"))` at `:71` through `:75`, `:212` through `:214`, `:275` through `:277`, and later diagnostic routes. | Confirmed as a new-endpoint governance rule and route-level guardrail. | +| Latency/cost-sensitive endpoints should decide Resource Governor applicability and route-map coverage. | `tldw_Server_API/app/core/Resource_Governance/README.md:12` and `:13` require a Resource Governor decision for latency/cost-sensitive/user-facing endpoints and a matching policy-store plus `route_map` entry when applicable. The default YAML contains Resource Governor policies and route maps under `tldw_Server_API/Config_Files/resource_governor_policies.yaml:267` through `:414`, including chat, embeddings, audio, RAG, chatbooks, watchlists, and AuthNZ paths. `tldw_Server_API/tests/Resource_Governance/test_slowapi_decorated_routes_mapped.py:30` through `:57` verifies representative ingress-limited paths resolve to an existing policy, and `tldw_Server_API/tests/Resource_Governance/test_auth_route_map_coverage.py:35` through `:57` verifies AuthNZ routes resolve to `authnz.*` policies. | Confirmed as route-map ownership and representative coverage, not as an all-endpoints guarantee. | +| DB policy store can merge file route maps, with file route-map precedence. | The README documents `RG_POLICY_STORE=file|db` at `tldw_Server_API/app/core/Resource_Governance/README.md:17` through `:18` and says DB mode merges the file `route_map` into the DB policy snapshot at `:114`. `PolicySnapshot` carries `route_map` at `tldw_Server_API/app/core/Resource_Governance/policy_loader.py:22` through `:26`; DB loading reads DB policies and route map at `:63` through `:78`, reads the file route map at `:81` through `:90`, and merges file route maps over DB route maps at `:92` through `:111`. `tldw_Server_API/tests/Resource_Governance/test_policy_loader_route_map_db_store.py:16` through `:30` verifies DB-store snapshots include route-map entries from the file, and `tldw_Server_API/tests/Resource_Governance/test_policy_loader_reload_db_store.py:28` through `:57` verifies file route-map entries survive DB policy reloads. | Confirmed. | +| Route-map resolution is path first, then tag. | The README states this resolution order at `tldw_Server_API/app/core/Resource_Governance/README.md:125`. `RGSimpleMiddleware._derive_policy_id()` initializes route maps at `tldw_Server_API/app/core/Resource_Governance/middleware_simple.py:99` through `:104`, checks `by_path` at `:116` through `:132`, then checks `by_tag` at `:133` through `:145`. `tldw_Server_API/tests/Resource_Governance/test_middleware_simple.py:73` through `:95` verifies denial and headers through tag/path route maps, and `:123` through `:129` verifies newer domain paths resolve through explicit path mappings. | Confirmed. | +| Middleware ingress enforcement is request-category enforcement; other categories need endpoint plumbing. | The README says middleware requests only at `tldw_Server_API/app/core/Resource_Governance/README.md:125` through `:126`. `RGSimpleMiddleware.__call__()` derives the policy id at `tldw_Server_API/app/core/Resource_Governance/middleware_simple.py:210`, stores it on request state at `:217`, creates an `RGRequest` with `categories={"requests": {"units": 1}}` at `:226`, and emits `Retry-After`/`X-RateLimit-*` headers on denial at `:279` through `:296`. | Confirmed. | +| Missing DB policy IDs referenced by route maps fail closed for request ingress. | The README scopes this to DB mode and missing request limits at `tldw_Server_API/app/core/Resource_Governance/README.md:114` through `:115`. The in-memory governor returns `{}` for a missing policy at `tldw_Server_API/app/core/Resource_Governance/governor.py:200` through `:208`; missing/zero request config returns denied headroom at `:303` through `:314`; request checks use the resolved policy at `:390` through `:402`. The Redis governor also resolves missing policies to `{}` at `tldw_Server_API/app/core/Resource_Governance/governor_redis.py:347` through `:354` and reads request rpm as `0` when missing at `:731` through `:737`. | Confirmed for request ingress. Do not broaden this into a blanket claim for tokens, concurrency categories, or Redis outage policy. | + +## Caveats For ADR Backfill + +- The ADR should describe a new-endpoint governance rule and route-map ownership expectation. It should not claim every existing API endpoint already uses claim-first auth or has Resource Governor route-map coverage. +- Middleware enforcement is request-category ingress only. Token, stream, job, and minute-budget categories still require endpoint-level reserve/commit plumbing. +- The fail-closed claim should be scoped to route-map entries that resolve to missing request policies. Redis backend outage behavior remains configurable through `RG_REDIS_FAIL_MODE` and per-policy/category fail modes. +- I found tests for route-map merge, route-map coverage, path/tag resolution, and claim-first guardrails. I did not find a focused regression named specifically for "route_map references missing DB policy and returns 429"; TASK-2234 can either rely on the existing governor code path or add that narrow test before creating the accepted ADR. + +## Recommended Next Action + +Create one accepted ADR via TASK-2234, expected as `Docs/ADR/018-resource-governance-endpoint-policy-and-route-map.md`, covering: + +1. New endpoints use claim-first auth dependencies for authorization. +2. Latency/cost-sensitive or user-facing endpoints must explicitly decide Resource Governor applicability. +3. Applicable ingress routes need policy-store and route-map ownership. +4. DB policy-store mode merges file `route_map` entries into DB policy snapshots, with file route-map precedence. +5. Request ingress fails closed when the route map resolves to a missing request policy. + +Keep Redis outage fail modes, non-request category plumbing, and all-endpoint coverage as consequences or follow-up notes rather than accepted decision claims. diff --git a/Docs/Published/ADR/inventory/2026-06-04-security-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-security-confirmation-audit.md new file mode 100644 index 0000000000..836b0fccf1 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-security-confirmation-audit.md @@ -0,0 +1,50 @@ +# Security Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2247 +**Follow-up:** TASK-2248, TASK-2311, TASK-2312, TASK-2313, TASK-2314 +**Inventory row:** INV-029 +**Source candidate:** `tldw_Server_API/app/core/Security/README.md` +**Disposition:** Current governing, but too broad for one accepted ADR. Request-edge middleware is backfilled by ADR-019; outbound egress/SSRF policy is backfilled by ADR-026; Security AES-GCM JSON envelopes are backfilled by ADR-027; restricted legacy pickle compatibility is backfilled by ADR-028; remaining `SecretManager` adoption stays inventory-only until narrower implementation-backed adoption work exists. + +## Decision Candidate Under Review + +INV-029 summarized the Security module convention as: + +> Security controls are centralized for egress policy, security headers, request IDs, setup CSP/access guard, URL validation, and secret management; production should keep security middleware enabled. + +The candidate describes current governing conventions, but it combines multiple security boundaries. Backfill should not turn the full row into one immutable ADR. The confirmed material is split into bounded ADRs so request-edge middleware, outbound egress/SSRF policy, and secrets/serialization policy do not overclaim each other's behavior. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| The Security module is the central home for egress, URL validation, middleware, setup guard/CSP, secret, crypto, and restricted serialization helpers. | The source README names those responsibilities at `tldw_Server_API/app/core/Security/README.md:3` through `:7`, maps the module files at `:30` through `:38`, and states extension expectations at `:72` through `:88`. The module contains `egress.py`, `url_validation.py`, `middleware.py`, `request_id_middleware.py`, `setup_access_guard.py`, `setup_csp.py`, `drain_gate_middleware.py`, `secret_manager.py`, `crypto.py`, and `safe_pickle.py`. | Confirmed as module ownership. Do not treat this as proof every security-sensitive caller already uses every helper. | +| Outbound URL validation is centralized through `evaluate_url_policy()` and `assert_url_safe()`. | `evaluate_url_policy()` rejects unsupported schemes, missing hosts, invalid/disallowed ports, denylisted hosts, strict-profile allowlist misses, DNS failures, and private/reserved resolved addresses in `tldw_Server_API/app/core/Security/egress.py:192` through `:298`. Private ranges are listed at `:28` through `:51`; private blocking defaults on at `:185` through `:189`; global/workflow allow and deny lists merge at `:240` through `:250`; production-like environments default to strict at `:252` through `:257`; tenant webhook and tenant egress helpers call the same policy at `:326` through `:365`. `assert_url_safe()` wraps the policy into a 400 response at `tldw_Server_API/app/core/Security/url_validation.py:6` through `:14`. Tests cover allowlists, IPv4-mapped loopback, invalid ports, and resolved private IP denial in `tldw_Server_API/tests/Security/test_egress.py:11` through `:75`, plus global allow/deny handling in `tldw_Server_API/tests/Security/test_egress_global_env.py`. | Confirmed as the shared policy helper and extension expectation. Caveat: outbound protection is only effective where callers route through these helpers; the audit did not prove universal coverage for every network call. | +| Normal startup installs path-sensitive setup CSP/access guard and security headers, while request IDs and drain gate are always added. | `app/main.py` imports Security middlewares at `tldw_Server_API/app/main.py:2226` through `:2230`. In test mode it still adds `SetupCSPMiddleware` and `SetupAccessGuardMiddleware` at `:2252` through `:2263`. In normal startup it adds setup CSP/access guard at `:2327` through `:2336`, computes security-header enablement at `:2321` through `:2326`, and adds `SecurityHeadersMiddleware` when enabled at `:2338` through `:2339`. It always adds `DrainGateMiddleware` and `RequestIDMiddleware` at `:2418` through `:2421`. | Confirmed. Caveat: security headers are intentionally skipped in explicit test mode and can be disabled by `ENABLE_SECURITY_HEADERS`; production defaults them on when the env var is absent. | +| Security headers are path scoped and HSTS is opt-in/HTTPS-aware. | `SecurityHeadersMiddleware` applies `X-Content-Type-Options`, `X-Frame-Options`, `Referrer-Policy`, `Content-Security-Policy`, `Permissions-Policy`, and related defaults at `tldw_Server_API/app/core/Security/middleware.py:120` through `:175`. `/setup` and `/docs`/`/redoc` receive relaxed CSP fallback policies at `:144` through `:156`. HSTS defaults from `SECURITY_ENABLE_HSTS` at `:100` through `:102` and is emitted only for HTTPS or `X-Forwarded-Proto: https` at `:113` through `:118` and `:161` through `:165`. Tests verify default headers, HSTS enabled over forwarded HTTPS, HSTS env disablement, and sanitized metric-failure logs in `tldw_Server_API/tests/Security/test_security_headers_middleware.py:18` through `:107`. | Confirmed. Do not claim HSTS is globally forced by default; it is opt-in and deployment/proxy-aware. | +| Request IDs and session IDs are sanitized or generated and propagated to response headers and tracing baggage. | `request_id_middleware.py` restricts accepted IDs to `[A-Za-z0-9._:-]` and 128 characters at `tldw_Server_API/app/core/Security/request_id_middleware.py:12` through `:15`; invalid, empty, or oversized values are replaced at `:26` through `:45`. `RequestIDMiddleware` writes sanitized IDs to request state, tracing baggage, and response headers at `:60` through `:78`. Tests cover preserving clean IDs, rejecting newline and oversized IDs, generating missing IDs, and sanitized tracing-failure logs in `tldw_Server_API/tests/Security/test_request_id_middleware.py:27` through `:94`. | Confirmed. | +| Setup UI access and CSP are path-sensitive and local-only by default, with explicit remote and eval controls. | `SetupAccessGuardMiddleware` only gates paths starting `/setup` at `tldw_Server_API/app/core/Security/setup_access_guard.py:129` through `:132`, allows loopback at `:140` through `:141`, applies denylist and allowlist precedence at `:150` through `:158`, allows explicit remote setup at `:161` through `:162`, and otherwise returns 403 at `:164` through `:186`. Tests verify allowlist deny/allow and default remote blocking in `tldw_Server_API/tests/Security/test_setup_access_guard.py:19` through `:47`. `SetupCSPMiddleware` only handles `/setup` at `tldw_Server_API/app/core/Security/setup_csp.py:73` through `:112`; it allows inline scripts, allows eval by default, and lets `TLDW_SETUP_NO_EVAL` remove `unsafe-eval`. Tests verify eval truthy/falsy/default behavior and sanitized failure logs in `tldw_Server_API/tests/Security/test_setup_csp_eval_policy.py:39` through `:113`. | Confirmed. Caveat: setup CSP is intentionally relaxed for the setup flow and allows eval unless explicitly disabled. | +| Secret lookup is centralized for configured secret types, with source precedence and validation. | `SecretManager` initializes standard secret configs at `tldw_Server_API/app/core/Security/secret_manager.py:132` through `:193`. `get_secret()` checks cache, then environment, config file, and default value in order at `:213` through `:260`; it raises for missing required secrets or too-short required secrets at `:262` through `:273`, validates type-specific formats at `:294` through `:314`, caches metadata at `:275` through `:290`, and validates required secrets on startup at `:316` through `:343`. Tests cover override immutability and sanitized secret-manager health/error output in `tldw_Server_API/tests/Security/test_secret_manager.py:26` through `:112`. | Confirmed for configured SecretManager consumers. Caveat: this audit did not prove every secret in the repository is retrieved through `SecretManager`, so avoid a universal secret-management ADR claim without a separate adoption audit. | +| Crypto and restricted pickle helpers exist for sensitive JSON blobs and legacy pickle compatibility. | `crypto.py` provides AES-GCM JSON blob encryption/decryption using `WORKFLOWS_ARTIFACT_ENC_KEY`, optional secondary key rotation, and explicit-key helpers at `tldw_Server_API/app/core/Security/crypto.py:72` through `:166`. `safe_pickle.py` restricts allowed pickle globals and raises for disallowed globals at `tldw_Server_API/app/core/Security/safe_pickle.py:18` through `:47`. Security crypto tests cover invalid-base64 decrypt behavior in `tldw_Server_API/tests/Security/test_crypto.py:8` through `:16`; downstream tests exercise restricted pickle migration behavior in `tldw_Server_API/tests/WebScraping/test_content_deduplicator_storage.py` and scheduler payload security tests under `tldw_Server_API/app/core/Scheduler/tests/test_payload_service_security.py`. | Confirmed as helper availability. Caveat: safe pickle and crypto behavior should be a separate ADR only if owner wants serialization/storage policy recorded. | + +## Caveats For ADR Backfill + +- Do not create one broad "Security module" ADR that claims all security-sensitive behavior is centrally enforced. The module centralizes helpers and middleware, but feature modules still need to call egress helpers for outbound work. +- Egress is a strong candidate for its own ADR, but the accepted claim should be "outbound integrations must use the central egress policy helpers" plus the current policy defaults. It should not claim universal historical coverage for every existing network path. +- Request-edge middleware is a separate strong candidate: normal startup installs setup guard/CSP and security headers, and request ID plus drain gate are always installed. It should explicitly carry the test-mode skip, `ENABLE_SECURITY_HEADERS`, HSTS opt-in, and setup CSP relaxed/eval caveats. +- Secret management and safe serialization should not be bundled into a broad ADR. TASK-2312 confirms helper availability and bounded crypto/restricted-pickle adoption, but not repository-wide `SecretManager` adoption or universal serialization coverage. ADR-027 records only the AES-GCM JSON envelope portion, and ADR-028 records only the restricted legacy pickle compatibility portion. +- Setup CSP intentionally allows inline scripts and allows eval by default unless `TLDW_SETUP_NO_EVAL` is truthy. Do not write an ADR that says setup CSP is strict. + +## Backfill Results + +Do not backfill INV-029 as a single accepted ADR. + +Bounded ADR follow-ups: + +1. Request-edge Security middleware ADR via TASK-2248: ADR-019 covers startup-installed request ID, drain gate, setup access/CSP, and security headers with path-specific CSP and production-default header enablement. +2. Outbound egress/SSRF policy ADR via TASK-2311: ADR-026 covers the rule that outbound integrations must use central `egress.py`/`url_validation.py` helpers, which enforce scheme, host, port, allow/deny, environment profile, tenant webhook, DNS, and private/reserved-address checks. +3. Secrets/serialization adoption audit via TASK-2312: `Docs/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md` explains why the slice must remain split. +4. AES-GCM JSON envelope ADR via TASK-2313: ADR-027 covers Security crypto envelope helpers and known configured encrypted-persistence consumers. +5. Restricted legacy pickle compatibility ADR via TASK-2314: ADR-028 covers Security `safe_pickle` helpers and explicitly gated Web Scraping/Scheduler compatibility consumers. Future ADRs should split `SecretManager` adoption instead of creating one broad Security ADR. + +Update INV-029 to record TASK-2247 confirmation and keep ADR creation split by boundary. diff --git a/Docs/Published/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md new file mode 100644 index 0000000000..81d592a21a --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-04-services-lifecycle-confirmation-audit.md @@ -0,0 +1,48 @@ +# Services Lifecycle Confirmation Audit - 2026-06-04 + +**Related task:** TASK-2259 +**Follow-up:** TASK-2260 +**Inventory row:** INV-031 +**Source candidate:** `tldw_Server_API/app/services/README.md` +**Disposition:** Current governing; ready for a bounded accepted ADR backfill. + +## Decision Candidate Under Review + +INV-031 summarized the Services lifecycle convention as: + +> Lifespan worker runtime state owns long-lived handles; background workers should use cooperative stop events; shutdown drains gates and stops workers in owned order. + +The candidate is current enough for accepted ADR backfill if the ADR is scoped to FastAPI lifespan startup/shutdown orchestration, the Services-owned worker lifecycle session, declarative worker specs/engine/session ownership, cooperative stop-event workers with bounded timeout/cancel fallback, and the staged shutdown order for job pollers, background workers, legacy components, and cleanup. It should not claim that every background operation in the repo is lifecycle-managed by Services or that every long-lived handle is stored directly on `LifespanWorkerRuntimeState`. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| The Services README describes the intended lifecycle rule. | `tldw_Server_API/app/services/README.md:15` identifies lifespan helpers as a Services module family. The lifecycle walkthrough says `LifespanWorkerRuntimeState` stores long-lived task/stop-event/scheduler/resource handles at `:31`, and shutdown drains admission gates, quiesces owned pollers, stops workers, coordinates legacy shutdown, and cleans up resources at `:35`. Handle ownership guidance says shutdown-needed handles should be copied into `LifespanWorkerRuntimeState` at `:70`. Worker shutdown guidance says shutdown sets stop events and that new workers should prefer cooperative stop events at `:84` through `:92`. | Confirmed as the source convention. The current implementation has narrowed the runtime state to the worker lifecycle session aggregate, so ADR language should avoid claiming it stores every individual long-lived handle directly. | +| The FastAPI lifespan delegates startup and shutdown to Services sequence helpers using a shared runtime state object. | `tldw_Server_API/app/main.py:1221` creates `LifespanWorkerRuntimeState`; `:1228` through `:1241` passes it to `run_lifespan_startup_sequence`; `:1261` through `:1270` passes the same object to `run_lifespan_shutdown_sequence`. Startup applies worker-bootstrap handles into the runtime state at `tldw_Server_API/app/services/lifespan_startup_sequence.py:105`. The runtime state object stores `worker_lifecycle_session` at `tldw_Server_API/app/services/lifespan_worker_runtime_state.py:12` through `:18`. | Confirmed. The ADR can say startup returns explicit handles and stores the worker lifecycle session for shutdown, not that `main.py` owns worker internals. | +| Startup-managed workers are declarative specs started through the lifecycle engine. | `StartupWorkerBootstrapHandles` carries the `WorkerLifecycleSession` at `tldw_Server_API/app/services/startup_worker_bootstrap.py:24`; startup collects specs and starts them at `:54`, then returns the session at `:73`. `WorkerSpec`, `WorkerStrategy`, and `WorkerFailurePolicy` define the worker contract at `tldw_Server_API/app/services/lifecycle_worker_specs.py:46` through `:78`, and `stop_event_worker_spec()` builds the default stop-event task form at `:78`. The engine validates specs, evaluates enabled dependencies, and starts workers at `tldw_Server_API/app/services/lifecycle_worker_engine.py:33` through `:36`. Representative providers use `stop_event_worker_spec()` for primary and content job pollers at `tldw_Server_API/app/services/startup_primary_jobs_pollers.py:50` through `:95` and `tldw_Server_API/app/services/startup_content_jobs_pollers.py:57` through `:153`. | Confirmed. The ADR should make stop-event task workers the default new-worker strategy while preserving callback-only specs for components that expose shutdown callbacks instead of task handles. | +| The lifecycle engine enforces dependency-aware startup and ordered shutdown. | The engine computes startup order from `depends_on` at `tldw_Server_API/app/services/lifecycle_worker_engine.py:87`; creates stop-event task handles at `:137`; stops a phase at `:46`; computes reverse dependency batches at `:184` and `:197`; sets stop events and awaits callbacks/tasks with timeouts and cancellation fallback at `:263` through `:350`. `WorkerLifecycleSession` tracks handles, stopped/quiesced names, phase-specific handles, and inventory/stopped-name publication at `tldw_Server_API/app/services/lifecycle_worker_session.py:21` through `:86`. | Confirmed. "Owned order" should be written as phase order plus reverse dependency order within each phase, with independent workers stopped concurrently. | +| Shutdown drains/gates first, then quiesces job pollers before background/post-worker phases and final cleanup. | `run_lifespan_shutdown_sequence()` starts with `transition_handoff`, then runs `run_shutdown_job_poller_handoff`, then stops `BACKGROUND_WORKER_SHUTDOWN`, runs coordinated legacy components and pre-worker cleanup, stops `POST_WORKER_SHUTDOWN`, runs post-worker services cleanup, and final cleanup at `tldw_Server_API/app/services/lifespan_shutdown_sequence.py:36` through `:148`. The transition gate wrapper disables new job acquisition via `JobManager.set_acquire_gate()` at `tldw_Server_API/app/main.py:134` through `:145`. Job-poller handoff filters job-poller handles and delegates quiesce to the lifecycle engine at `tldw_Server_API/app/services/shutdown_job_poller_handoff.py:40` through `:58`. `quiesce_owned_job_pollers_for_shutdown()` optionally waits for active leases, records `optional_lease_wait`, then runs the timed `job_poller_quiesce` segment at `tldw_Server_API/app/services/shutdown_owned_job_pollers.py:267` through `:329`. | Confirmed. The ADR should treat lease waiting as bounded/configured drain behavior, not a guarantee that all external work is fully completed before shutdown continues. | +| Compatibility inventory and legacy shutdown adapters still exist and should be caveated, not erased. | `main.py` still provides compatibility wrappers for owned job poller registration and quiesce at `tldw_Server_API/app/main.py:240`, `:353`, and `:372`. `shutdown_transition_handoff()` stores legacy shutdown inventory and falls back to direct drain when the transition component does not stop cleanly at `tldw_Server_API/app/services/shutdown_transition_handoff.py:21` through `:73`. Callback-only workers are represented in inventory without stop events by `tldw_Server_API/app/services/lifecycle_worker_specs.py:46` through `:78` and `tldw_Server_API/app/services/lifecycle_workers.py:30` through `:42`. | Confirmed as caveats. A bounded ADR should not claim the migration removed all legacy shutdown paths or that every managed worker has a stop event. | +| Tests cover the lifecycle contract and representative integration behavior. | `tldw_Server_API/tests/Services/test_lifespan_shutdown_sequence.py:13` verifies shutdown phase order and `:220` verifies no-session fallback. `test_lifecycle_worker_engine.py:67`, `:558`, `:728`, `:785`, `:854`, and `:878` cover dependency startup, startup-abort cleanup, reverse dependency shutdown, timeout/failure isolation, and phase publication. `test_lifecycle_workers.py:13`, `:219`, `:362`, `:532`, and `:684` cover custom worker registration, stop-event task registration, concurrent stop-event shutdown, bounded callback shutdown, and timeout cancellation. `test_shutdown_transition_handoff.py:101`, `:176`, and `:223` cover transition handoff and fallback drain paths. `test_shutdown_owned_job_pollers.py:20`, `:78`, `:178`, and `:292` cover owned poller inventory, replacement, lease wait/quiesce, and taskless callback handling. `test_main_lifecycle_contract.py:23`, `:171`, `:217`, `:308`, `:388`, `:519`, `:590`, `:698`, `:1635`, `:1680`, `:1726`, `:1771`, `:1816`, `:1861`, and `:1906` verify reentrant lifespan behavior, lifecycle hook order, startup delegation, and representative worker shutdown. | Confirmed. The test surface is broad enough for a bounded ADR backfill. | + +## Caveats For ADR Backfill + +- Do not claim `LifespanWorkerRuntimeState` directly stores every long-lived handle. Current code stores the `WorkerLifecycleSession`, which owns the declarative worker handles needed by shutdown. +- Do not claim every background operation in the repository is Services lifecycle-managed. The ADR should scope itself to workers started through the Services lifespan worker bootstrap and shutdown sequence. +- Do not claim every managed worker has a stop event. The default new-worker strategy is stop-event task ownership, but callback-only workers are supported for components that expose shutdown callbacks. +- Do not claim all legacy shutdown adapters have been removed. `shutdown_transition_handoff` still builds and stores a legacy shutdown plan, uses a transition coordinator, and has a direct-drain fallback. +- Do not promise unlimited graceful drain. Job-poller lease waiting is bounded/configurable, then shutdown proceeds to quiesce and timeout/cancel fallbacks. +- Do not treat this Services ADR as replacing ADR-003. Jobs vs Scheduler ownership remains covered by ADR-003; this candidate is about lifecycle startup/shutdown ownership for workers that Services actually starts. + +## Recommended Next Action + +Create one accepted ADR via TASK-2260, expected as `Docs/ADR/021-services-lifecycle-startup-and-shutdown.md`, covering: + +1. FastAPI lifespan delegates startup and shutdown orchestration to Services sequence helpers. +2. Startup stores the worker lifecycle session in `LifespanWorkerRuntimeState` so shutdown owns the same started worker handles. +3. Declarative `WorkerSpec`/`LifecycleWorkerEngine`/`WorkerLifecycleSession` owns lifecycle-managed workers, with stop-event task workers as the preferred default and callback-only workers as an explicit supported strategy. +4. Shutdown order is transition drain/gate, job-poller handoff and bounded quiesce, background worker shutdown, coordinated legacy components, pre-worker cleanup, post-worker phase, post-worker services cleanup, and final resource cleanup. +5. Caveats for legacy shutdown adapters, callback-only workers, bounded lease drain, and scope limited to Services lifecycle-managed workers. + +Update INV-031 to record TASK-2259 confirmation and queue TASK-2260 for the bounded ADR backfill. diff --git a/Docs/Published/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md new file mode 100644 index 0000000000..d6ceee2351 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-07-data-tables-confirmation-audit.md @@ -0,0 +1,48 @@ +# Data Tables ADR Candidate Confirmation Audit - 2026-06-07 + +**Related task:** TASK-2272 +**Follow-up:** TASK-2273 +**Inventory row:** INV-025 in `Docs/ADR/inventory/2026-06-03-decision-inventory.md` +**Source candidate:** `Docs/Design/Data_Tables_Backend.md` +**Disposition:** Current governing; ready for a bounded accepted ADR backfill. + +## Decision Candidate Under Review + +INV-025 summarized the Data Tables backend convention as: + +> Data Tables should live in per-user Media DB, generate asynchronously via JobManager, snapshot RAG query sources, use server-side exports, expose UUIDs externally, and reuse File_Artifacts for export tracking. + +The candidate is current enough for accepted ADR backfill if the ADR is scoped to backend table storage, Jobs-backed generation/regeneration, stored source snapshots for reproducibility, server-side export handling, and table UUID identity. It should not claim every Data Tables operation is asynchronous, that every external identifier is a UUID, that File Artifacts is the only storage/accounting layer for every export detail, or that all source ownership checks were fully audited. + +## Confirmed Evidence + +| Claim | Evidence | Result | +| --- | --- | --- | +| The source design calls for Media DB storage, Jobs generation, RAG snapshots, server-side exports, UUID table identity, and File Artifacts export tracking. | `Docs/Design/Data_Tables_Backend.md:4` describes async generation jobs, Media DB storage, reproducible RAG snapshots, and server-side exports. The decision summary at `:19` through `:24` says tables live in the per-user Media DB, generation is via JobManager, RAG queries persist snapshots, exports are server-side, table API identity uses `uuid`, and export tracking reuses File_Artifacts instead of a new Media DB table. The same design says table UUIDs are resolved to internal numeric IDs server-side at `:272`. | Confirmed as the source decision, with the caveat that job routes still expose numeric `job_id` values. | +| The current module README treats Data Tables as a Jobs worker plus endpoint/schema surface that persists generated content through Media DB. | `tldw_Server_API/app/core/Data_Tables/README.md:3` says the worker resolves media/chat/RAG source text, calls an LLM adapter, normalizes columns and rows, and persists generated table content through Media DB APIs. `:14` and `:19` list Jobs-domain consumption and Media DB snapshot persistence as responsibilities. `:39` through `:42` describe generate/regenerate enqueueing `job_type="data_table_generate"`, persisted snapshots, and export routes that either render directly or hand payloads to File Artifacts. `:46` through `:47` state Media DB owns table metadata/source rows/columns/generated rows/status snapshots while Jobs owns lifecycle state. | Confirmed as the current module-level convention. | +| Media DB owns table metadata, columns, rows, sources, source snapshots, and public table UUIDs. | The endpoint imports `get_media_db_for_user` at `tldw_Server_API/app/api/v1/endpoints/data_tables.py:22` and uses it on generate/get/export/update/regenerate routes, including generate at `:454` and regenerate at `:1097`. The worker resolves a user-specific Media DB path at `tldw_Server_API/app/core/Data_Tables/jobs_worker.py:649` through `:655`. `tldw_Server_API/app/core/DB_Management/media_db/media_database_impl.py:1307` through `:1389` creates `data_tables`, `data_table_columns`, `data_table_rows`, and `data_table_sources`; `data_tables.uuid` is unique at `:1309`, and source snapshots/retrieval params are stored at `:1381` through `:1382`. `tldw_Server_API/app/core/DB_Management/media_db/runtime/data_table_metadata_ops.py:59` creates tables, `:80` generates UUIDs when absent, `:81` resolves owner/client scope, and `:142` through `:154` fetches tables by UUID with owner filtering. `tldw_Server_API/app/core/DB_Management/media_db/runtime/data_table_child_ops.py:315` through `:366` inserts source rows with serialized `snapshot_json` and `retrieval_params_json`. `tldw_Server_API/app/core/DB_Management/media_db/runtime/data_table_generation_ops.py:12` through `:20` defines generation persistence, `:107` through `:110` serializes snapshots/retrieval params, and `:147` through `:214` replaces generated content and source rows in a transaction. | Confirmed. The ADR should say owner scoping is implemented through the Data Tables owner/client filter, not through a dedicated `owner_user_id` column on every Data Tables row. | +| Generate and regenerate are Jobs-backed and use the `data_tables` domain/job type. | `tldw_Server_API/app/api/v1/endpoints/data_tables.py:446` defines the generate endpoint; `:469` creates a Media DB table, `:507` stores source rows, and `:521` through `:524` creates a JobManager job with `domain="data_tables"` and `job_type="data_table_generate"`. Regeneration at `:1088` fetches stored table/source state, `:1118` through `:1119` reuses stored snapshots/retrieval params, and `:1150` through `:1153` creates the same domain/job type. `tldw_Server_API/app/core/Data_Tables/jobs_worker.py:72` through `:73` defines the same domain/job type, and `:1118` through `:1123` runs a WorkerSDK worker configured for that domain. `tldw_Server_API/app/services/startup_primary_jobs_pollers.py:76` through `:81`, `:282` through `:316`, and `:436` through `:441` wire the service-managed Data Tables Jobs worker behind `DATA_TABLES_JOBS_WORKER_ENABLED`. | Confirmed. The ADR should describe generation/regeneration as Jobs-backed, while noting `wait_for_completion=true` can return completed table details after waiting and cancellation/status routes still use numeric job IDs. | +| RAG query sources are snapshotted and regeneration can reuse stored snapshots instead of re-running RAG retrieval. | `tldw_Server_API/app/core/Data_Tables/jobs_worker.py:531` builds bounded RAG snapshots, `:755` through `:793` resolves RAG query sources and attaches snapshots, `:858` through `:859` reads stored source snapshots from table source rows, and `:921` through `:949` stores updated snapshots/retrieval params for persistence. `tldw_Server_API/tests/DataTables/test_data_tables_jobs_integration.py:168` covers regenerate-with-snapshot behavior and `:204` asserts RAG should not be resolved during regeneration when the snapshot is present. | Confirmed. The ADR should call this reproducibility for table regeneration and result inspection, not a full provenance/audit ledger; snapshots are bounded by worker limits. | +| Server-side export is implemented through direct DataTableAdapter rendering or File Artifacts, without a Data Tables export table. | `tldw_Server_API/app/api/v1/endpoints/data_tables.py:730` defines the export response shape, `:796` through `:818` directly renders a download through `DataTableAdapter`, `:827` builds a File Artifacts request with `file_type="data_table"`, `:834` creates `FileArtifactsService`, and `:841` through `:856` creates and returns File Artifacts export metadata. `tldw_Server_API/tests/DataTables/test_data_tables_export.py:97` and `:129` cover CSV direct downloads; `:143`, `:166` through `:177`, and `:186` cover async/pending File Artifacts export metadata. | Confirmed. The ADR should avoid making unverified claims about File Artifacts internals beyond Data Tables delegating generated-file metadata/export handling through that service. | +| Public table APIs expose table UUIDs while job APIs remain numeric-ID based. | `tldw_Server_API/app/api/v1/schemas/data_tables_schemas.py:157` through `:160` defines `DataTableSummary.uuid`; `:222` through `:226` defines generate responses with numeric `job_id` and optional `job_uuid`; `:237` through `:254` defines job status with numeric `id`, optional job UUID, and `table_uuid`. `tldw_Server_API/tests/DataTables/test_data_tables_api.py:70`, `:89` through `:97`, `:626` through `:632`, and `:734` through `:775` exercise UUID table routes and numeric job status/cancel routes. | Confirmed. The accepted ADR should phrase the identity decision as "table APIs use UUIDs externally and resolve to numeric IDs server-side"; it should not claim every Data Tables-related external identifier is a UUID. | + +## Caveats For ADR Backfill + +- Do not claim all Data Tables operations are asynchronous. Generate/regenerate are Jobs-backed, but endpoints support `wait_for_completion=true`, and exports can return direct server-rendered file content. +- Do not claim every Data Tables-related external identifier is a UUID. Table routes and table response models use UUIDs; job status/cancel routes and response fields still expose numeric job IDs, with optional job UUIDs. +- Do not overstate source authorization. This audit confirms table owner/client scoping and source resolution paths, but it did not prove complete ownership validation for every possible source type and future source adapter. +- Do not describe snapshots as a full audit/provenance system. RAG snapshots are stored for reproducible regeneration and inspection, but they are bounded/truncated worker payloads. +- Do not claim File Artifacts is the only export storage/accounting layer. The current endpoint either renders direct downloads or delegates generated-file metadata/export handling to File Artifacts; File Artifacts internals were not audited as part of this slice. +- Do not treat frontend table editing as part of this ADR. The backend contains content replacement routes, but the accepted decision should stay focused on storage, Jobs generation/regeneration, source snapshots, exports, and table identity. + +## Recommended Next Action + +Create one accepted ADR via TASK-2273 covering: + +1. Data Tables backend persists table metadata, selected source rows/snapshots, column definitions, generated rows, and user-visible table status through Media DB helpers. +2. Table API identity is UUID-first, with internal numeric IDs resolved server-side and numeric job IDs retained as an explicit caveat. +3. Generate/regenerate endpoints enqueue Jobs records in the `data_tables` domain with `job_type="data_table_generate"`; the Data Tables worker owns source resolution, LLM generation, normalization, persistence, cancellation checks, and failure/status mirroring. +4. RAG query sources store retrieval params and bounded chunk snapshots so regeneration can reuse stored source state. +5. Exports are server-side, either direct rendered downloads via `DataTableAdapter` or generated-file metadata/export handling through File Artifacts. + +Update INV-025 to record TASK-2272 confirmation and queue TASK-2273 for the bounded Data Tables backend ADR backfill. diff --git a/Docs/Published/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md b/Docs/Published/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md new file mode 100644 index 0000000000..0588819a20 --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-07-deepseek-ocr-confirmation-audit.md @@ -0,0 +1,44 @@ +# DeepSeek OCR ADR Candidate Confirmation Audit - 2026-06-07 + +**Related task:** TASK-2275 +**Inventory row:** INV-026 +**Candidate source:** `Docs/Design/DeepSeek_OCR_Backend.md` +**Follow-up task:** TASK-2276 +**Verdict:** Ready for bounded ADR backfill. + +## Confirmation Summary + +INV-026 is current enough to backfill as an accepted ADR if the ADR is scoped to the implemented DeepSeek OCR integration rather than every aspirational detail in the design note. + +The confirmed decision is: + +> tldw_server supports a local Transformers-only DeepSeek-OCR backend named `deepseek`, using the upstream HuggingFace `AutoTokenizer`/`AutoModel` plus `model.infer(...)` contract, markdown-oriented defaults, temporary output handling by default, and explicit availability gates for the heavy GPU/FlashAttention dependency stack. + +## Evidence + +| Area | Current evidence | Confirmation | +| --- | --- | --- | +| Backend ownership | `tldw_Server_API/app/core/Ingestion_Media_Processing/OCR/backends/deepseek_ocr.py` defines `DeepSeekOCRBackend` with `name = "deepseek"`. | Confirms the provider/backend naming decision. | +| Local Transformers contract | `_load_transformers()` imports `AutoTokenizer` and `AutoModel`, passes `trust_remote_code=True`, resolves `DEEPSEEK_OCR_MODEL_ID`, supports `DEEPSEEK_OCR_MODEL_REVISION`, prefers `use_safetensors=True`, falls back to `use_safetensors=False`, moves the model to the configured device, and calls `eval()`. | Confirms local HuggingFace ownership. The security caveat remains material because remote code execution is part of the upstream contract. | +| Default prompt and sizing | `_DEFAULT_PROMPT` is `\n<|grounding|>Convert the document to markdown.`. `_resolve_sizes()` defaults to `base_size=1024`, `image_size=640`, and `crop_mode=True`. | Confirms the markdown default and the Gundam-equivalent sizing defaults. The implementation omits the trailing space shown in the historical design text; that is not architecturally significant. | +| Availability gates | `available()` requires `transformers` and `torch`, defaults `DEEPSEEK_OCR_DEVICE` to `cuda`, checks CUDA availability when using CUDA, and requires `flash_attn` only when CUDA plus `flash_attention_2` are selected. `_resolve_attn_impl()` switches the default attention implementation to `eager` on non-CUDA devices when no env override is set. | Confirms default CUDA/FlashAttention gating with env-based escape hatches. The ADR should not claim CPU mode is the preferred or performance-tested deployment. | +| Inference and output extraction | `ocr_image()` writes image bytes to a temporary `page.png`, calls `model.infer(...)` with prompt, image path, output path, sizes, crop mode, `save_results`, and `test_compress`, then returns `_extract_text_from_any(result)`. `_extract_text_from_any()` returns strings directly, extracts common dict/list text fields, or safely stringifies fallback values. | Confirms the upstream inference contract and safe string output decision. | +| Result persistence | `DEEPSEEK_OCR_SAVE_RESULTS` defaults false. `_resolve_output_dir()` uses a temp output path by default and only uses `DEEPSEEK_OCR_OUTPUT_DIR` when saving is enabled and the directory can be created. If saving is enabled without an output dir, it warns and still uses a temporary directory. | Confirms non-persistent-by-default output handling. The ADR should not claim persistent storage unless the user explicitly opts in. | +| Registry/API exposure | `OCR/registry.py` registers `DeepSeekOCRBackend` in `_BACKENDS`, supports explicit `ocr_backend=deepseek`, and includes DeepSeek in default `auto` and `auto_high_quality` priority lists. `/api/v1/ocr/backends` exposes DeepSeek metadata from `describe()`. | Confirms integration into the OCR registry and discovery endpoint. | +| User docs | `Docs/OCR/DeepSeek-OCR.md` and `Docs/OCR/OCR_Providers.md` describe a local Transformers-only backend, manual install, default prompt, env vars, temporary output behavior, GPU-friendly dependencies, and the `trust_remote_code=True` warning. | Confirms the operational docs match the implemented decision. | +| Tests | `test_ocr_backend_deepseek.py` covers availability returning a bool and `DEEPSEEK_OCR_SAVE_RESULTS` using a configured output dir with a stubbed model. Runtime auto-selection tests patch DeepSeek availability in registry ordering. The live OCR PDF integration test is gated by `DEEPSEEK_OCR_RUN_INTEGRATION=1`, CUDA, and local model dependencies. | Confirms local unit coverage exists and live model coverage is intentionally opt-in. | + +## Caveats For ADR-Backfill Scope + +- Do not claim DeepSeek OCR dependencies are provided by a project optional extra. The current docs require manual installation of the compatible `torch`, `transformers`, and FlashAttention stack. +- Do not claim the `trust_remote_code=True` risk is eliminated. The decision accepts that risk for this backend and limits it to controlled environments. +- Do not claim CUDA or FlashAttention are unconditional requirements. CUDA is the default device, and FlashAttention is required only when CUDA plus `flash_attention_2` are selected; env overrides can choose CPU or alternate attention. +- Do not claim CPU mode has equivalent support or performance. It is an escape hatch/default non-CUDA behavior, not the primary supported operating mode. +- Do not claim DeepSeek has a server/remote mode in this integration. Current docs and code are local Transformers-only. +- Do not claim persistent OCR outputs are written by default. `save_results` defaults false and uses temporary directories unless explicitly enabled with a configured output directory. +- Do not claim the registry priority exactly matches the historical design phrase "after dots/points". Current normal `auto` order is `tesseract`, `nemotron_parse`, `points`, `deepseek`, `hunyuan`, `dots`, `dolphin`, `llamacpp`, `chatllm`; `auto_high_quality` places `deepseek` after `hunyuan` and before `points`/`dots` unless config overrides priority. +- Do not claim routine test runs validate live model inference. The live endpoint integration test is explicitly gated because it requires CUDA and local model/dependency setup. + +## Inventory Disposition + +Update INV-026 from `Needs owner review` to `Current governing` for a bounded OCR/provider backfill. TASK-2276 should create the accepted ADR and keep the caveats above explicit. diff --git a/Docs/Published/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md b/Docs/Published/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md new file mode 100644 index 0000000000..10a4976ada --- /dev/null +++ b/Docs/Published/ADR/inventory/2026-06-07-security-secrets-serialization-adoption-audit.md @@ -0,0 +1,51 @@ +# Security Secrets and Serialization Adoption Audit - 2026-06-07 + +**Related task:** TASK-2312 +**Follow-up:** TASK-2313, TASK-2314 +**Inventory row:** INV-029 +**Source candidate:** `tldw_Server_API/app/core/Security/README.md` +**Disposition:** Split before ADR backfill. TASK-2313 backfilled the AES-GCM JSON envelope portion as ADR-027; TASK-2314 backfilled the restricted legacy pickle compatibility portion as ADR-028; `SecretManager` adoption remains inventory-only. + +## Decision Candidate Under Review + +INV-029 originally grouped Security secret management and safe serialization with egress and request-edge controls: + +> Security controls are centralized for egress policy, security headers, request IDs, setup CSP/access guard, URL validation, and secret management; production should keep security middleware enabled. + +ADR-019 now covers request-edge middleware and ADR-026 covers outbound egress/SSRF. This audit reviews the remaining secrets and serialization portion: `SecretManager`, AES-GCM JSON helpers in `crypto.py`, and restricted pickle compatibility in `safe_pickle.py`. + +## Evidence Summary + +| Area | Evidence | Result | +| --- | --- | --- | +| `SecretManager` helper availability | `tldw_Server_API/app/core/Security/secret_manager.py` defines `SecretManager`, configured secret metadata, source precedence from environment to config to default, optional cache metadata, startup validation, health checks, and convenience functions such as `get_api_key()`, `get_auth_secret()`, `get_webhook_secret()`, and `validate_production_secrets()`. Tests in `tldw_Server_API/tests/Security/test_secret_manager.py` cover override immutability and sanitized health/error output. | Confirmed as a helper surface for configured secrets. Not confirmed as a repository-wide secret retrieval policy. | +| `SecretManager` caller adoption | Source search found app-level references to the Security `SecretManager` only inside `secret_manager.py` itself. Separate `TriggerSecretManager` usage exists for ACP triggers, but it is a distinct ACP-specific encryption helper. Many current modules read secrets or API keys directly from environment/config, including AuthNZ, Chat, LLM/TTS providers, Image Generation, External Sources connectors, workflows/webhooks, Third Party integrations, and configuration loading. | Do not write an ADR claiming secret lookup is centralized or universally adopted. A future SecretManager ADR needs an implementation/adoption slice first. | +| AES-GCM JSON helper availability | `tldw_Server_API/app/core/Security/crypto.py` provides `encrypt_json_blob()`, `decrypt_json_blob()`, explicit-key variants, `WORKFLOWS_ARTIFACT_ENC_KEY`, `JOBS_CRYPTO_SECONDARY_KEY`, and AES-GCM envelopes marked `_enc: aesgcm:v1`. Tests in `tldw_Server_API/tests/Security/test_crypto.py` cover invalid-envelope failure behavior. | Confirmed as the shared Security crypto primitive. | +| AES-GCM JSON helper consumers | Known consumers include Jobs payload/result encryption and key rotation in `tldw_Server_API/app/core/Jobs/manager.py`, External Sources OAuth state/token envelope handling in `tldw_Server_API/app/core/External_Sources/connectors_service.py`, AuthNZ user provider secrets in `tldw_Server_API/app/core/AuthNZ/user_provider_secrets.py`, admin webhook secrets in `tldw_Server_API/app/core/AuthNZ/admin_webhook_secrets.py`, and Workflow metadata decrypt/encrypt paths in `tldw_Server_API/app/core/Workflows/engine.py` and `tldw_Server_API/app/core/DB_Management/Workflows_DB.py`. Related tests cover connector token encryption, OAuth state metadata encryption, Jobs encryption, and key rotation. | Stronger candidate for a future bounded ADR, but still should not be combined with a universal SecretManager claim. | +| Restricted pickle helper availability | `tldw_Server_API/app/core/Security/safe_pickle.py` defines `RestrictedUnpickler` and `safe_pickle_loads()`, allowing only basic built-in containers and `collections.OrderedDict`. | Confirmed as the Security-owned restricted legacy pickle helper. | +| Restricted pickle consumers | `tldw_Server_API/app/core/Web_Scraping/enhanced_web_scraping.py` uses `safe_pickle_loads()` only for legacy content-hash migration and only when `WEBSCRAPER_ALLOW_LEGACY_PICKLE_HASHES` is enabled. `tldw_Server_API/app/core/Scheduler/services/payload_service.py` uses `safe_pickle_loads()` only for legacy scheduler payloads and only when `allow_legacy_pickle_payloads` / `SCHEDULER_ALLOW_LEGACY_PICKLE_PAYLOADS` enables compatibility mode. Tests confirm default-disabled behavior and rejection of disallowed globals. | Confirmed for bounded legacy compatibility paths. TASK-2314 backfilled this bounded portion as ADR-028. Not universal serialization policy. | +| Serialization divergence | `tldw_Server_API/app/core/Embeddings/multi_tier_cache.py` defines its own local restrictive unpickler rather than using `Security.safe_pickle`. This is not necessarily wrong for cache-local data, but it means the Security helper is not the universal pickle boundary. | Do not backfill a broad safe-serialization ADR without either narrowing it to known compatibility paths or consolidating local implementations first. | + +## Disposition + +Do not create one accepted ADR for the remaining secrets/serialization portion of INV-029 in its broad shape. + +The current evidence supports these narrower statements: + +- Security provides a `SecretManager` helper with source precedence, validation, cache metadata, health checks, and sanitized test coverage. +- Security provides AES-GCM JSON envelope helpers that several Jobs, AuthNZ, External Sources, and Workflows paths use for optional or configured encrypted persistence. TASK-2313 backfilled this bounded portion as ADR-027. +- Security provides a restricted pickle loader used by bounded legacy compatibility paths in Web Scraping and Scheduler. TASK-2314 backfilled this bounded portion as ADR-028. + +The current evidence does not support these broader ADR claims: + +- All repository secrets are retrieved through `SecretManager`. +- All sensitive stored JSON is encrypted through `Security.crypto`. +- All pickle deserialization routes through `Security.safe_pickle`. + +## Recommended Next Action + +Keep INV-029 partially backfilled. ADR-019 covers request-edge middleware, ADR-026 covers outbound egress/SSRF, ADR-027 covers AES-GCM JSON envelope helpers, ADR-028 covers restricted legacy pickle compatibility, and this audit records why `SecretManager` adoption remains inventory-only. + +If the owner wants more ADR work here, the remaining slice needs implementation-backed adoption work: + +1. SecretManager adoption slice: migrate or explicitly exempt direct secret reads before considering any "centralized secret lookup" ADR. diff --git a/Docs/Published/Architecture.md b/Docs/Published/Architecture.md new file mode 100644 index 0000000000..b86b65526a --- /dev/null +++ b/Docs/Published/Architecture.md @@ -0,0 +1,385 @@ +# Architecture Overview + +This document gives new contributors a fast, opinionated tour of how tldw_server is structured and how the main pieces fit together at runtime. It complements the top-level `README.md` (high-level overview) and `Docs/Code_Documentation/Code_Map.md` (detailed code map). + +If you read **this file**, then **Code_Map.md**, and skim the module-specific developer guides, you will have a solid mental model of the system. + +- High-level intro and mental model +- Repository and directory layout +- Runtime architecture and request flow +- Core modules and data flows (media, RAG, chat, audio, MCP) +- Databases and storage +- Auth modes and multi-tenancy +- Patterns, conventions, and where to start when adding features + +--- + +## 1. Mental Model + +At a high level, tldw_server is: + +- A **FastAPI app** exposing REST and WebSocket APIs under `/api/v1`. +- A set of **core domain modules** under `tldw_Server_API/app/core/` (AuthNZ, Media Ingestion, Chunking, Embeddings, RAG, Chat, Evaluations, MCP, etc.). +- A **storage layer** using SQLite by default (PostgreSQL supported) plus ChromaDB for vectors, with per-user content and metadata. +- A **provider layer** for commercial/local LLMs, STT/TTS backends, OCR, and connectors. +- Optional **Next.js WebUI** at `apps/tldw-frontend/` and external clients (CLI tools, MCP-aware IDE integrations). + +Think of the architecture as: + +> Clients → FastAPI endpoints → Core domain services → Databases / Vector stores / External providers + +The goal is to keep endpoints thin, push logic into core modules, and keep storage access centralized via `core/DB_Management/` and the vector store adapters. + +For a visual diagram, see `README.md` (Architecture Diagram) and `Docs/Code_Documentation/Code_Map.md`. For detailed backend data flow and process diagrams, see `Docs/Code_Documentation/Data_Flow_Atlas.md`. + +--- + +## 2. Repository Layout (High Level) + +From the repo root: + +```text +/ +├── tldw_Server_API/ # Main API server implementation +│ ├── app/ +│ │ ├── api/v1/ +│ │ │ ├── endpoints/ # REST endpoints (media, chat, audio, rag, evals, etc.) +│ │ │ ├── schemas/ # Pydantic models +│ │ │ └── API_Deps/ # Shared dependencies (auth, DB, rate limits) +│ │ ├── core/ # Core logic (AuthNZ, RAG, LLM, DB, TTS, MCP, etc.) +│ │ ├── services/ # Background services and workers +│ │ └── main.py # FastAPI entry point +│ ├── Config_Files/ # config.txt, MCP configs, helpers +│ ├── Databases/ # Runtime DBs (some paths deprecated) +│ ├── tests/ # Pytest suite (mirrors app structure) +├── apps/tldw-frontend/ # Next.js WebUI (primary web client) +├── Docs/ # Architecture, API, design, and developer docs +├── Dockerfiles/ # Docker images and compose files +├── Databases/ # AuthNZ + per-user content DB roots +├── Helper_Scripts/ # Utilities (installers, doc ingestion, etc.) +├── models/ # Optional model assets +├── pyproject.toml # Project configuration and extras +├── Env_Vars.md # Environment variable reference +├── Project_Guidelines.md # Development philosophy and standards +└── README.md # High-level overview and quickstart +``` + +For a file-by-file code map of the backend, see `Docs/Code_Documentation/Code_Map.md`. + +--- + +## 3. Runtime Architecture + +### 3.1 Components + +#### Clients +- Next.js WebUI at `apps/tldw-frontend/` (primary web client). +- Any HTTP client (curl, Postman, other backends) and MCP-aware tools. + +#### FastAPI app +- Entry point: `tldw_Server_API/app/main.py`. +- Routers mounted under `/api/v1` from `app/api/v1/endpoints/`. +- Shared dependencies (auth, DB sessions, rate limiting): `app/api/v1/API_Deps/`. +- Background services and tasks: `app/services/` (jobs, schedulers, maintenance). + +#### Core modules (`app/core/`) +- Domain-specific packages: AuthNZ, media ingestion, chunking, embeddings, RAG, chat, audio STT/TTS, MCP, evaluations, metrics, resource governance, etc. +- Each module is responsible for its own business logic and typically exposes pure-ish Python APIs used by endpoints. + +#### Storage +- Relational databases (SQLite or PostgreSQL) for auth, jobs, evaluations, chats/notes, and media metadata. +- Per-user vector stores via ChromaDB (or pgvector when configured). +- File-based media and temporary assets (e.g., downloads, transcodes, embeddings cache). + +#### External providers +- Commercial LLMs (OpenAI, Anthropic, Google, Groq, etc.). +- Local/self-hosted LLMs (Ollama, vLLM, llama.cpp, TabbyAPI, etc.). +- STT/TTS providers (faster_whisper, NeMo, Qwen2Audio, OpenAI-compatible TTS, local Kokoro ONNX). +- OCR engines, web scrapers, and other external tools. + +### 3.2 High-Level Flow + +Typical flow for an HTTP request: + +1. **Client** calls an endpoint (e.g., `POST /api/v1/chat/completions`). +2. **FastAPI router** in `app/api/v1/endpoints/` parses/validates the request using Pydantic schemas from `app/api/v1/schemas/`. +3. **Dependencies** (`API_Deps`) inject: + - Auth context (single-user API key or multi-user JWT). + - Database connections (AuthNZ DB, content DBs, vector stores). + - Rate limiting and resource governance guards. +4. The endpoint calls into one or more **core modules** (e.g., `core/RAG/`, `core/LLM_Calls/`, `core/Chat/`), which: + - Read or write to databases via `core/DB_Management/`. + - Call external providers via pluggable adapters. + - Orchestrate pipelines (chunking → embeddings → search → generation). +5. The endpoint returns a response, optionally streaming via SSE/WebSocket. + +For deeper diagrams and call graphs per subsystem, see: +- `Docs/Code_Documentation/Code_Map.md` +- `Docs/Code_Documentation/Embeddings-Documentation.md` +- `Docs/Code_Documentation/RAG-Developer-Guide.md` +- `Docs/MCP/Unified/Developer_Guide.md` + +--- + +## 4. Core Modules (Backend) + +Most feature work touches one or more of these directories under `tldw_Server_API/app/core/`. This list is intentionally selective; see `Code_Map.md` for a more exhaustive view. + +- `AuthNZ/` + - Auth modes (`single_user` API key vs `multi_user` JWT) and user management. + - Initialization CLI (`python -m tldw_Server_API.app.core.AuthNZ.initialize`) for setting up DBs and keys. + - Integration with FastAPI dependencies and security scopes. + +- `Ingestion_Media_Processing/` + - Pipelines for ingesting video, audio, documents, and web content. + - Uses `ffmpeg`, `yt-dlp`, PDF/e-book libraries, OCR, etc. + - Normalizes content into chunks + metadata and writes to Media DB v2. + +- `Chunking/` + - Generic chunking engine (`chunker.py`) and strategies (`strategies/`). + - Template system (`templates.py`, `template_library/`) for hierarchical and domain-specific chunking. + - Powers both ingestion and evaluations workflows. + +- `Embeddings/` + - Embedding pipeline (synchronous and worker-based). + - Adapters for OpenAI-compatible and local embedding models. + - Integrates with ChromaDB / pgvector and Media DB v2. + +- `RAG/` + - Unified retrieval pipeline combining FTS5/BM25 + vectors + re-ranking. + - Service layer for `/api/v1/rag/*` endpoints and chat retrieval. + - Handles scoring, ranking, and answer assembly. + +- `Chat/` and `Character_Chat/` + - OpenAI-compatible `/chat/completions` orchestration. + - Character cards, chat sessions, and history management. + - Provider routing and streaming orchestration (via `LLM_Calls/`). + +- `LLM_Calls/` + - Provider abstraction for 16+ LLM backends (commercial and local). + - Handles API key usage, rate limits, error handling, and streaming. + - Central place to add new providers or tweak provider behavior. + +- `TTS/` and audio-related modules + - Text-to-speech and speech-to-text pipelines. + - File-based transcription (`/audio/transcriptions`) and streaming transcription (`/audio/stream/transcribe`). + - Voice catalog and multi-provider TTS abstraction. + +- `MCP_unified/` + - Production-ready Model Context Protocol server + HTTP/WebSocket endpoints. + - Modules (`media`, `knowledge`, `notes`, etc.) mapped to tools for agentic clients. + - Metrics, health checks, and RBAC integration. + +- `Evaluations/` + - Unified evaluations engine (G-Eval, RAG metrics, batch scoring). + - Integrates with embeddings, chunking, and LLM providers. + - Backed by its own evaluations DB. + +- `DB_Management/` + - Media DB v2, notes/chats DB, migrations, and helpers. + - Abstractions for SQLite/PostgreSQL; **no raw SQL in endpoints**. + +- `Resource_Governance/` and `RateLimiting/` + - Centralized resource governor (tokens, concurrency, quota) with Redis support. + - Endpoint-level rate limiting and policy enforcement. + +Other important areas: + +- `Monitoring/`, `Metrics/`: Prometheus/OpenTelemetry exporters and metrics collection. +- `Search_and_Research/`, `WebSearch/`, `Web_Scraping/`: web search, scraping, and research helpers. +- `Notes/`, `Chatbooks/`, `Prompt_Management/`: knowledge management and artifacts. + +--- + +## 5. Key Data Flows + +This section highlights common flows a new contributor will likely touch. + +### 5.1 Media Ingestion → Chunking → Embeddings → RAG + +1. Client calls one of the `POST /api/v1/media/process-*` endpoints (e.g., `/process-documents`, `/process-videos`, `/process-audios`) or `/api/v1/media/add` when also persisting to the Media DB. +2. Endpoint package `app/api/v1/endpoints/media/`: + - Validates input and resolves user/context. + - Calls into `core/Ingestion_Media_Processing/`. +3. Ingestion module: + - Downloads/transcodes media if necessary (`yt-dlp`, `ffmpeg`, etc.). + - Extracts raw text/transcripts + metadata. + - Writes media and basic metadata into Media DB v2 via `DB_Management/`. +4. Chunking module (`core/Chunking/`): + - Splits content by strategy and/or templates. + - Assigns chunk IDs and hierarchy. +5. Embeddings module (`core/Embeddings/`): + - Computes embeddings for chunks. + - Writes vectors and metadata to ChromaDB / pgvector and updates Media DB. +6. RAG module (`core/RAG/`): + - Exposes search endpoints (`/api/v1/rag/*`). + - Uses both text and vector indexes when serving queries. + +### 5.2 Chat with Retrieval + +1. Client calls `POST /api/v1/chat/completions` with messages and optional retrieval settings. +2. Endpoint in `app/api/v1/endpoints/chat.py`: + - Resolves provider/model (from config, aliases, or request). + - Optionally calls `core/RAG/` to fetch context for retrieval-augmented replies. +3. `core/Chat/` orchestrates: + - System/instruction messages. + - Context windows and truncation/compaction. + - Conversation persistence. +4. `core/LLM_Calls/` sends the final request to the chosen provider and streams the response back to the client. + +### 5.3 Audio STT/TTS and Streaming + +1. STT (file): `POST /api/v1/audio/transcriptions`. +2. STT (streaming): `WS /api/v1/audio/stream/transcribe`. +3. TTS: `POST /api/v1/audio/speech`. +4. Endpoints delegate to: + - `core/Ingestion_Media_Processing/Audio/*` for STT. + - `core/TTS/` for TTS and voice management. +5. Outputs can be: + - Persisted as media items for search and RAG. + - Streamed directly to clients. + +For subsystem-level diagrams and details, see: +- `Docs/Code_Documentation/Ingestion_Media_Processing.md` +- `Docs/User_Guides/WebUI_Extension/TTS_Getting_Started.md` +- `Docs/Development/Audio-Multi-User-Architecture.md` + +--- + +## 6. Databases and Storage + +Database design is covered in depth in: +- `Docs/Code_Documentation/Database.md` +- the media DB code documentation page +- `Docs/Code_Documentation/Databases/ChaChaNotes_DB.md` + +This section gives the quick mental model. + +Note: `` is defined in `tldw_Server_API.app.core.config`, defaults to `Databases/user_databases/` under the project root, and can be overridden via environment variable or `Config_Files/config.txt`. + +### AuthNZ DB +- Centralized in all auth modes. +- Default (single-user): SQLite file configured by `DATABASE_URL` (defaults to `sqlite:///./Databases/users.db`). +- Multi-user: centralized PostgreSQL instance (e.g., `postgresql://user:password@host:5432/tldw_users`). +- Unlike per-user Content/Media DBs under `//`, AuthNZ data remains centralized. +- Stores users, credentials, permissions, and related auth data. + +### Content / Media DB +- Per-user SQLite DB under `//.db`. +- Stores media items, chunks, metadata, and FTS indexes. +- Root-level single-file content DB paths are deprecated; always go through the DB helpers. +- Replace `.db` with your configured per-user content DB filename. + +### Notes / Chats / Characters +- Per-user `ChaChaNotes.db` under `//ChaChaNotes.db`. +- Stores notes, chat history, and character data. + +### Prompt Studio and related artifacts +- Per-user prompts DB under `//prompts_user_dbs/user_prompts_v2.sqlite`. + +### Evaluations DB +- Per-user SQLite DB under `//evaluations/evaluations.db`. +- Stores evaluations, metrics, and audit logs for the resolved user context. +- Root-level `Databases/evaluations.db` may exist as a legacy/fallback path; use `DatabasePaths.get_evaluations_db_path(user_id)` for normal access. + +### Vector Store +- Default: ChromaDB, usually per-user under `//chroma_storage/`. +- Optionally: PostgreSQL with pgvector, configured via `config.txt` and env vars. + +All DB access should go through the abstractions in `core/DB_Management/` and the vector store wrappers in `core/Embeddings/` and `core/RAG/`. + +--- + +## 7. Auth Modes and Multi-Tenancy + +tldw_server supports two primary auth modes: + +- `AUTH_MODE=single_user` + - Simple API key authentication via `X-API-KEY` header. + - Intended for personal/local deployments and single-user setups. + - Content and notes are still organized per logical user ID, but the AuthNZ layer is simpler. + +- `AUTH_MODE=multi_user` + - JWT-based auth with signup/login flows and permissions. + - Recommended for multi-tenant deployments and hosted environments. + - Typically paired with PostgreSQL for AuthNZ DB and Job DB. + +Per-user data: + +- User identity (from API key or JWT) is mapped to a **user_id**. +- Per-user DB paths are derived from this user_id under `/` (defaults to `Databases/user_databases/` unless configured). +- RAG, notes, prompts, and vector stores all use these per-user roots to keep content logically isolated. + +See: +- `Env_Vars.md` for environment variable reference. +- `Docs/Code_Documentation/AuthNZ-Developer-Guide.md` for implementation details. + +--- + +## 8. Frontend and Clients + +### Next.js WebUI (`apps/tldw-frontend/`) +- Primary web client, talking to the same FastAPI APIs (`/api/v1`). +- Focused on interactive media ingestion, search, chat, and evaluations. + +#### Programmatic clients +- Any HTTP client can call the OpenAI-compatible Chat, Embeddings, Audio, and RAG endpoints. +- MCP clients (IDEs, agents) use the MCP Unified APIs at `/api/v1/mcp/*`. + +Key documentation: +- `Docs/API-related/API_README.md` +- `Docs/MCP/Unified/Developer_Guide.md` +- `Docs/MCP/Unified/Documentation_Ingestion_Playbook.md` + +--- + +## 9. Patterns, Conventions, and How to Add Features + +The project guidelines in `Project_Guidelines.md` and `AGENTS.md` cover philosophy in detail. This section summarizes the most important patterns for contributors. + +#### Coding patterns +- Prefer **thin endpoints** and **fat core modules**: + - Endpoint: parse/validate, call core, shape response. + - Core: domain logic, side effects, DB + provider integration. +- Rely on **Pydantic models** for all API inputs/outputs (`app/api/v1/schemas/`). +- Keep functions focused on single responsibilities and fully type hinted. +- Prefer **async/await** for I/O-bound code (HTTP calls, DB, file I/O). +- Centralize DB access via `core/DB_Management/`; avoid raw SQL in endpoints. + +#### Adding a new feature +1. **Design first**: Sketch the feature and data flow. For larger features, add a design doc under `Docs/Design/`. +2. **Core implementation**: Add business logic under `app/core//` or extend an existing module. +3. **API layer**: Add or update endpoints under `app/api/v1/endpoints/` and Pydantic models under `app/api/v1/schemas/`. +4. **Dependencies**: If you need shared dependencies (auth, DB, rate limits), wire them in `API_Deps/`. +5. **Tests**: Add tests under `tldw_Server_API/tests//` mirroring the app structure. +6. **Config and docs**: Wire any knobs into `Config_Files/config.txt` and update docs under `Docs/`. + +#### Testing and local dev +- Run tests via `python -m pytest -v` from the repo root. +- Use markers (`unit`, `integration`, `e2e`, `external_api`, `performance`) to scope suites. +- For DB-intensive features, prefer existing fixtures (e.g., AuthNZ Postgres fixture) over custom setups. + +--- + +## 10. Where to Go Next + +If you are new to the project, a good path is: + +1. Read `README.md` (Overview, Architecture & Repo Layout, Quickstart). +2. Read this file (`Docs/Architecture.md`) to internalize the mental model. +3. Open `Docs/Code_Documentation/Code_Map.md` and skim: + - High-Level Architecture + - Top-Level Layout + - Key Flows +4. Jump into module guides for the area you care about: + - RAG: `Docs/Code_Documentation/RAG-Developer-Guide.md` + - AuthNZ: `Docs/Code_Documentation/AuthNZ-Developer-Guide.md` + - Embeddings: `Docs/Code_Documentation/Embeddings-Documentation.md` + - Chat & Chatbooks: `Docs/Code_Documentation/Chat_Developer_Guide.md`, `Docs/Code_Documentation/Chatbook_Developer_Guide.md` + - MCP: `Docs/MCP/Unified/Developer_Guide.md` +5. Review `Project_Guidelines.md` and `Env_Vars.md` before making substantial changes. + +With those pieces in place, you should be able to: +- Trace any request from client → endpoint → core module → database/provider. +- Identify where to plug in new functionality. +- Confidently navigate the codebase without being overwhelmed by its size. diff --git a/Docs/Published/Code_Documentation/Data_Flow_Atlas.md b/Docs/Published/Code_Documentation/Data_Flow_Atlas.md new file mode 100644 index 0000000000..bc1167b39e --- /dev/null +++ b/Docs/Published/Code_Documentation/Data_Flow_Atlas.md @@ -0,0 +1,1295 @@ +# tldw_Server_API Data Flow Atlas + +This atlas maps how data moves through `tldw_Server_API`. It is written for new contributors and maintainers who need to trace requests across FastAPI endpoints, dependencies, core modules, storage, providers, and background workers. + +File path: `Docs/Code_Documentation/Data_Flow_Atlas.md` + +## Table Of Contents + +- [How To Read This Atlas](#how-to-read-this-atlas) +- [System Context](#system-context) +- [Request Lifecycle](#request-lifecycle) +- [Router Group Map](#router-group-map) +- [Data Store Map](#data-store-map) +- [Core Flow Diagrams](#core-flow-diagrams) +- [Extended Domain Maps](#extended-domain-maps) +- [Router Coverage Matrix](#router-coverage-matrix) +- [How To Update This Atlas](#how-to-update-this-atlas) + +## How To Read This Atlas + +Use this atlas as a flow map, not as an OpenAPI replacement. Route names, module names, and storage paths should be verified against the code before edits. + +| Shape or Group | Meaning | +| --- | --- | +| Clients | WebUI, admin UI, extension, HTTP clients, MCP clients, or other callers | +| FastAPI app | `app/main.py`, middleware, lifecycle, router registration | +| Endpoint groups | Routers under `app/api/v1/endpoints/`, grouped by `router_groups/*.py` | +| API dependencies | Auth, user context, DB handles, rate limits, resource governance, request validation | +| Core modules | Domain logic under `app/core/` | +| Storage | SQLite/PostgreSQL DBs, ChromaDB/pgvector, file storage, Redis/job backends | +| Providers | LLM, STT, TTS, OCR, web/media, and other external or local providers | +| Workers | Jobs, Scheduler, APScheduler bridges, background services, lifecycle workers | +| Optional routes | Feature-gated, lazy-imported, or optional dependency routes | + +## System Context + +```mermaid +flowchart LR + subgraph Clients + WebUI[Next.js WebUI] + AdminUI[Admin UI] + Extension[Browser extension] + HTTP[HTTP clients] + MCPClients[MCP clients] + end + + subgraph FastAPI["FastAPI app"] + Main["app/main.py"] + Lifespan[Middleware and lifespan] + Registry[Router registry] + end + + subgraph Deps["API dependencies"] + AuthDeps[Auth and user context] + Validation[Pydantic validation] + RateLimit[Rate limiting] + Governance[Resource governance] + DBDeps[DB and vector dependencies] + end + + subgraph Endpoints["Endpoint groups"] + Core[Core specs] + Content[Content specs] + Admin[Admin specs] + Optional[Optional and minimal specs] + end + + subgraph CoreModules["Core modules"] + AuthNZ[AuthNZ] + Ingestion[Ingestion] + Chunking[Chunking] + Embeddings[Embeddings] + RAG[RAG] + ChatLLM[Chat and LLM] + AudioTTS[Audio and TTS] + Evaluations[Evaluations] + MCP[MCP Unified] + JobsScheduler[Jobs and Scheduler] + StorageCore[Storage and DB Management] + end + + subgraph Storage["Storage"] + AuthDB[AuthNZ DB] + MediaDB[Per-user Media DB] + NotesDB[Per-user ChaChaNotes DB] + PromptDB[Prompt and Prompt Studio DBs] + EvalDB[Per-user Evaluations DB] + VectorStore[ChromaDB or pgvector] + Files[Files, outputs, voices, cache] + RedisJobs[Redis or Jobs backend] + end + + subgraph Providers + LLMProviders[LLM providers] + STTProviders[STT providers] + TTSProviders[TTS providers] + ExternalSources[Web, media, OCR, connectors] + end + + subgraph Workers + JobWorkers[Jobs workers] + SchedulerWorkers[Scheduler workers] + APScheduler[APScheduler bridges] + BackgroundServices[Lifecycle services] + end + + WebUI --> Main + AdminUI --> Main + Extension --> Main + HTTP --> Main + MCPClients --> Main + Main --> Lifespan + Main --> Registry + Registry --> Core + Registry --> Content + Registry --> Admin + Registry --> Optional + Core --> Deps + Content --> Deps + Admin --> Deps + Optional --> Deps + Deps --> AuthNZ + Deps --> StorageCore + Core --> AuthNZ + Core --> ChatLLM + Core --> MCP + Content --> Ingestion + Content --> Chunking + Content --> Embeddings + Content --> RAG + Content --> AudioTTS + Content --> Evaluations + Content --> JobsScheduler + Admin --> AuthNZ + Admin --> JobsScheduler + Admin --> StorageCore + AuthNZ --> AuthDB + Ingestion --> MediaDB + Chunking --> MediaDB + Embeddings --> VectorStore + RAG --> MediaDB + RAG --> VectorStore + ChatLLM --> NotesDB + AudioTTS --> Files + Evaluations --> EvalDB + MCP --> AuthDB + JobsScheduler --> RedisJobs + StorageCore --> AuthDB + StorageCore --> MediaDB + StorageCore --> NotesDB + StorageCore --> PromptDB + ChatLLM --> LLMProviders + AudioTTS --> STTProviders + AudioTTS --> TTSProviders + Ingestion --> ExternalSources + JobsScheduler --> JobWorkers + JobsScheduler --> SchedulerWorkers + APScheduler --> JobsScheduler + BackgroundServices --> JobsScheduler +``` + +## Request Lifecycle + +```mermaid +sequenceDiagram + participant Client + participant Main as app/main.py + participant Registry as router_registry.py + participant Spec as RouterSpec + participant Endpoint as Endpoint router + participant Deps as API dependencies + participant Core as Core module + participant Store as Storage/provider/worker + + Main->>Registry: register_all_routers or minimal register_router_specs + Registry->>Registry: register_router_specs(specs) + loop each RouterSpec + alt spec has route_key + Registry->>Registry: route_enabled(route_key, default_stable) + alt route disabled or gating fails + Registry-->>Main: skip router + else route enabled + Registry->>Spec: RouterSpec.resolve_router + end + else unkeyed spec + Registry->>Spec: RouterSpec.resolve_router + end + Note over Spec: Lazy imported routers resolve through factories from append_imported_router_spec. + Spec-->>Registry: APIRouter or skippable optional import error + Registry->>Main: include_router_idempotent(router, prefix, tags) + end + + Client->>Main: HTTP request, streaming request, or WebSocket connect + Main->>Endpoint: route match after middleware and lifespan readiness + Endpoint->>Deps: schema validation and dependency resolution + Deps->>Deps: auth and user context + Deps->>Deps: rate limit and resource governance + alt auth, rate, governance, or validation failure + Deps-->>Client: error response without core work + else dependencies accepted + Deps->>Core: request model, user context, DB handles + Core->>Store: read/write DBs, call provider, or enqueue work + alt normal response + Store-->>Core: result data + Core-->>Endpoint: response model + Endpoint-->>Client: JSON or file response + else streaming or WebSocket + Store-->>Core: chunks or events + Core-->>Client: StreamingResponse or WebSocket messages + else async job + Store-->>Core: job id and status handle + Core-->>Client: job handle response + end + end +``` + +## Router Group Map + +```mermaid +flowchart TB + Main["app/main.py"] + Mode{App mode} + Ultra[Ultra minimal control-plane health only] + Minimal[MINIMAL_TEST_APP] + Full[Full app] + + Main --> Mode + Mode --> Ultra + Mode --> Minimal + Mode --> Full + + subgraph MinimalPath["Minimal test registration path"] + MinRequired[iter_minimal_test_router_specs] + MinOptional[iter_minimal_optional_router_specs] + MinRegister[register_router_specs] + MinRequired --> MinRegister + MinOptional --> MinRegister + end + + subgraph FullPath["Full app registration path"] + RegisterAll[register_all_routers] + CoreSpecs[iter_core_router_specs] + ContentSpecs[iter_content_router_specs] + AdminSpecs[iter_admin_router_specs] + RegisterAll --> CoreSpecs + RegisterAll --> ContentSpecs + RegisterAll --> AdminSpecs + end + + subgraph CoreGroup["Core specs"] + Infrastructure["health, moderation, monitoring, metrics, audit, consent, setup"] + Identity["auth, users, user keys, config, sync"] + ChatProviders["chat, chat loop, tools, ACP, LLM, VLM, MCP Unified"] + end + + subgraph ContentGroup["Content specs"] + Retrieval[RAG, research, paper search] + Processing[embeddings, media embeddings, evaluations, OCR, media, audio] + DataWorkflows[chunking, vector stores, prompts, workflows, scheduler] + Experience[notes, prompt studio, workspaces, characters, outputs, chatbooks] + Integrations[connectors, ingestion sources, web scraping, Slack, Discord, Telegram, meetings] + end + + subgraph AdminGroup["Admin specs"] + AdminOps[admin, config admin, resource governor, jobs admin] + OrgBilling[orgs, scoped keys, privileges, billing, invites] + SafetyOps[guardian, self monitoring, sandbox, benchmarks, MCP catalogs and hub] + end + + subgraph SpecFlow["Registration and gating flow"] + Imported[append_imported_router_spec] + RouterSpecNode[RouterSpec] + Gate{route_enabled for route_key} + Resolve[RouterSpec.resolve_router] + OptionalSkip[Skip optional missing module or attribute] + Include[include_router_idempotent] + Registered[Router included once per router, prefix, tags] + end + + Minimal --> MinRequired + Full --> RegisterAll + CoreSpecs --> CoreGroup + ContentSpecs --> ContentGroup + AdminSpecs --> AdminGroup + CoreGroup --> Imported + ContentGroup --> Imported + AdminGroup --> Imported + MinRegister --> RouterSpecNode + Imported --> RouterSpecNode + RouterSpecNode --> Gate + Gate -->|disabled| OptionalSkip + Gate -->|enabled or unkeyed| Resolve + Resolve -->|optional import failure| OptionalSkip + Resolve --> Include + Include --> Registered + Include -->|duplicate signature| OptionalSkip +``` + +## Data Store Map + +```mermaid +flowchart LR + subgraph Shared["Shared or deployment-level storage"] + AuthDB[AuthNZ DB: Databases/users.db or PostgreSQL] + JobDB[Jobs DB: SQLite or PostgreSQL when configured] + Redis[Redis: queues, locks, rate/backpressure, optional job backend] + end + + subgraph UserRoot["Per-user root: USER_DB_BASE_DIR//"] + MediaDB[Media DB: Media_DB_v2.db] + ChaCha[ChaChaNotes: ChaChaNotes.db] + Prompts[Prompts DB and prompt libraries] + PromptStudio[Prompt Studio DB: prompt_studio_dbs/prompt_studio.db] + EvalDB[Per-user evaluations storage: evaluations/evaluations.db] + Vector[ChromaDB: chroma_storage plus vector_store metadata] + Outputs[outputs/ generated artifacts] + Voices[voices/ custom voices and provider runtime cache] + Rewrite[Rewrite_Cache/rewrite_cache.jsonl] + Personalization[rag_personalization.json] + end + + subgraph Owners["Typical owners"] + AuthNZ[core/AuthNZ] + DBMgmt[core/DB_Management and API_Deps] + Ingestion[Ingestion and media endpoints] + NotesChat[Notes, chat, characters, workspaces] + PromptCore[Prompts and Prompt Studio] + EvalCore[Evaluations] + EmbedRAG[Embeddings and RAG] + FileCore[Storage, outputs, TTS] + JobsScheduler[Jobs, Scheduler, APScheduler] + end + + AuthNZ --> AuthDB + DBMgmt --> MediaDB + DBMgmt --> ChaCha + Ingestion --> MediaDB + NotesChat --> ChaCha + PromptCore --> Prompts + PromptCore --> PromptStudio + EvalCore --> EvalDB + EmbedRAG --> Vector + EmbedRAG --> MediaDB + FileCore --> Outputs + FileCore --> Voices + FileCore --> Rewrite + EmbedRAG --> Personalization + JobsScheduler --> JobDB + JobsScheduler --> Redis + Ingestion --> JobsScheduler + FileCore --> JobsScheduler +``` + +## Core Flow Diagrams + +These flows trace the backend paths most likely to matter when a newcomer asks where data goes after an API call. They are intentionally grouped by process rather than by every route handler. + +### Auth And User Context + +**Purpose:** Resolve the caller, enforce auth policy, and turn identity into the user-scoped paths used by content modules. + +**Primary entrypoints:** Most protected endpoints through `get_current_user`, `get_request_user`, `AuthPrincipal`, `TokenScopeGuard`, `RequireRole`, and related dependencies in `app/api/v1/API_Deps/auth_deps.py`. + +```mermaid +flowchart LR + subgraph Caller["Caller credentials"] + APIKey[X-API-KEY single-user or API key] + Bearer[Authorization bearer JWT] + Cookie[Session or browser context] + end + + subgraph Deps["API auth dependencies"] + AuthDep[get_current_user and get_request_user] + Principal[AuthPrincipal and user dict] + Guards[Role, scope, rate, quota guards] + end + + subgraph IdentityStore["Identity and auth storage"] + AuthNZ[core/AuthNZ] + AuthDB[AuthNZ DB: users, sessions, API keys, RBAC, MFA] + JWTService[JWT service and session manager] + APIKeyMgr[API key manager] + end + + subgraph UserContext["Resolved user context"] + SingleUser[Fixed single-user principal] + MultiUser[DB-backed user principal] + UserId[user_id and permissions] + end + + subgraph UserStorage["Per-user content storage selection"] + DBPaths[DatabasePaths and API_Deps DB helpers] + UserRoot["USER_DB_BASE_DIR//"] + MediaDB[Media DB, FTS, chunks] + NotesDB[ChaChaNotes] + VectorRoot[ChromaDB and vector metadata] + EvalDB[Per-user evaluations DB] + end + + APIKey --> AuthDep + Bearer --> AuthDep + Cookie --> AuthDep + AuthDep --> AuthNZ + AuthNZ --> APIKeyMgr + AuthNZ --> JWTService + APIKeyMgr --> AuthDB + JWTService --> AuthDB + AuthNZ --> Principal + Principal --> Guards + Guards -->|single_user mode| SingleUser + Guards -->|multi_user mode| MultiUser + SingleUser --> UserId + MultiUser --> UserId + UserId --> DBPaths + DBPaths --> UserRoot + UserRoot --> MediaDB + UserRoot --> NotesDB + UserRoot --> VectorRoot + UserRoot --> EvalDB +``` + +**Key storage/provider touchpoints:** AuthNZ DB stores identity, sessions, API keys, RBAC, quotas, and MFA state. Per-user content storage is selected only after user context resolves; it lives under `USER_DB_BASE_DIR//` and includes Media DB, ChaChaNotes, ChromaDB/vector metadata, prompts, outputs, and per-user evaluations storage. + +**Where to look in code:** `app/api/v1/API_Deps/auth_deps.py`, `app/core/AuthNZ/`, `app/core/DB_Management/db_path_utils.py`, `app/core/DB_Management/Users_DB.py`, and the per-domain DB dependency modules under `app/api/v1/API_Deps/`. + +### Media Ingestion + +**Purpose:** Convert files, documents, URLs, web pages, audio, and video into normalized records, chunks, search indexes, and optional embeddings so content is searchable and RAG-ready. + +**Primary entrypoints:** `POST /api/v1/media/add`, `POST /api/v1/media/process-documents`, `POST /api/v1/media/process-videos`, `POST /api/v1/media/process-audios`, `POST /api/v1/media/process-pdfs`, `POST /api/v1/media/process-ebooks`, web scraping and ingestion-source routes. + +```mermaid +flowchart LR + subgraph Inputs + Files[Uploaded files and documents] + URLs[URL, video, audio, feed inputs] + Web[Web scraping and article extraction] + end + + subgraph EndpointLayer["Media endpoints"] + Add["/media/add persistent ingest"] + Process["process-* no-persistence helpers"] + JobsPath[Optional Jobs or background path] + end + + subgraph Processing["core/Ingestion_Media_Processing"] + Dispatch[Media type dispatch] + Download[Download with yt-dlp or URL fetch] + Transcode[ffmpeg transcode or audio extraction] + OCR[PDF or image OCR branch] + STT[Audio/video STT branch] + Parse[Document, ebook, HTML, XML parsing] + Normalize[Normalize text, metadata, transcript segments] + Chunk[Chunking strategies and templates] + end + + subgraph Searchable["Search and RAG readiness"] + Persist[Persist primary item and metadata] + MediaDB[Per-user Media DB] + FTS[FTS5 media and keyword indexes] + EmbedOpt{generate_embeddings?} + Embed[Embedding provider/model] + Vector[Per-user ChromaDB or vector backend] + end + + Files --> Add + URLs --> Add + Web --> Add + Files --> Process + URLs --> Process + Add --> Dispatch + Process --> Dispatch + Add --> JobsPath + JobsPath --> Dispatch + Dispatch --> Download + Dispatch --> Parse + Download --> Transcode + Transcode --> STT + Parse --> OCR + OCR --> Normalize + STT --> Normalize + Parse --> Normalize + Normalize --> Chunk + Chunk --> Persist + Persist --> MediaDB + MediaDB --> FTS + Persist --> EmbedOpt + EmbedOpt -->|yes| Embed + Embed --> Vector + EmbedOpt -->|no| FTS +``` + +**Key storage/provider touchpoints:** Media DB stores content, transcripts, metadata, chunks, keywords, and FTS state. Embedding generation writes per-user vector records and vector metadata. Providers include yt-dlp, ffmpeg, OCR backends, STT backends, web extractors, embedding providers, and optional Jobs workers. + +**Where to look in code:** `app/api/v1/endpoints/media/`, `app/core/Ingestion_Media_Processing/`, `app/core/DB_Management/Media_DB_v2.py`, `app/core/DB_Management/media_db/`, `app/core/Embeddings/`, `Docs/Code_Documentation/Pieces.md`, and `Docs/Code_Documentation/Ingestion_Pipeline_Video.md`. + +### Audio STT/TTS + +**Purpose:** Handle file transcription, real-time streaming transcription, and speech synthesis while keeping the file, WebSocket, and TTS paths distinct. + +**Primary entrypoints:** `POST /api/v1/audio/transcriptions`, `WS /api/v1/audio/stream/transcribe`, `POST /api/v1/audio/speech`, `GET /api/v1/audio/voices/catalog`, audio history and audio job/status endpoints. + +```mermaid +flowchart TB + subgraph FileSTT["File transcription path"] + FileReq["/audio/transcriptions upload"] + ValidateAudio[Validate file and options] + STTBackend["Select STT backend: faster_whisper, NeMo, Qwen, local"] + Transcript[Transcript, segments, SRT/VTT/JSON] + STTResponse[Return transcript response] + UploadRetention[Uploaded audio retained only by STT policy] + end + + subgraph StreamSTT["Streaming transcription path"] + WSReq["WebSocket /audio/stream/transcribe"] + StreamAuth[Token or auth context] + StreamConfig[Streaming model config] + AudioChunks[Incoming audio chunks] + PartialFinal[Partial and final transcript frames] + PersistGate{"persist_transcript and media_id?"} + NoPersist[No Media DB transcript write] + end + + subgraph TTSPath["TTS path"] + SpeechReq["/audio/speech text request"] + VoiceCatalog[Voice catalog and settings] + TTSBackend[Select TTS backend: OpenAI-compatible or Kokoro/local] + AudioOut[Audio bytes or file output] + end + + subgraph OptionalPersistence["Optional persistence and background tracking"] + TTSHistory[TTS history and audio job records] + MediaPersist[upsert_transcript writes media transcript] + ChunkSearch[Optional chunk and index transcript] + MediaDB[Per-user Media DB and FTS] + Vector[Optional embeddings and vector store] + Files[Per-user outputs, voices, retained artifacts] + Jobs[Audio Jobs/background workers] + end + + FileReq --> ValidateAudio --> STTBackend --> Transcript + FileReq --> UploadRetention --> Files + WSReq --> StreamAuth --> StreamConfig --> AudioChunks --> PartialFinal + SpeechReq --> VoiceCatalog --> TTSBackend --> AudioOut + Transcript --> STTResponse + PartialFinal --> PersistGate + PersistGate -->|yes| MediaPersist + PersistGate -->|no| NoPersist + AudioOut --> TTSHistory + MediaPersist --> ChunkSearch + ChunkSearch --> MediaDB + ChunkSearch --> Vector + AudioOut --> Files + TTSHistory --> Jobs +``` + +**Key storage/provider touchpoints:** STT and TTS providers may be local runtimes or external OpenAI-compatible services. File STT usually returns transcript responses; uploaded audio may be retained according to STT policy. Streaming transcript persistence is opt-in and requires `persist_transcript` plus `media_id` before `upsert_transcript` writes to the Media DB. TTS has history/audio jobs, and generated or uploaded artifacts may be retained by policy. Media transcript persistence is optional and conditional; only persisted transcripts can later be chunked, indexed with FTS, and embedded for RAG. + +**Where to look in code:** `app/api/v1/endpoints/audio/`, especially `audio.py`, `audio_transcriptions.py`, `audio_streaming.py`, `audio_tts.py`, `audio_history.py`, and `audio_jobs.py`; also `app/core/Ingestion_Media_Processing/Audio/`, `app/core/TTS/`, `Docs/STT-TTS/`, and media persistence helpers when transcription is saved as content. + +### Chunking And Embeddings + +**Purpose:** Produce stable text pieces from raw content and attach embedding vectors so chunks can be retrieved by FTS, BM25, vector search, or hybrid RAG. + +**Primary entrypoints:** `POST /api/v1/chunking/chunk_text`, chunk template routes, ingestion-triggered chunking in media/process endpoints, embedding endpoints, media embedding jobs, and vector-store admin routes. + +```mermaid +flowchart LR + subgraph Triggers + APIChunk[API-triggered chunk_text] + IngestChunk[Ingestion-triggered chunking] + Batch[Batch or worker-triggered embedding] + end + + subgraph Chunking["core/Chunking"] + Options[Resolve strategy/template/options] + Strategies["words, sentences, paragraphs, tokens, semantic, template"] + Pieces["Chunk objects with text, offsets, metadata"] + end + + subgraph Metadata["Media DB relationship"] + MediaItem[Media item or transcript] + Unvectorized[UnvectorizedMediaChunks] + MediaChunks[MediaChunks or claims/propositions] + FTS[FTS5 text and keyword indexes] + end + + subgraph Embeddings["core/Embeddings"] + Provider[Embedding provider/model selection] + Queue[Batch/job metadata] + Vectors[Vector records] + VectorStore[Per-user ChromaDB or pgvector] + end + + APIChunk --> Options + IngestChunk --> Options + Options --> Strategies --> Pieces + Pieces --> MediaItem + Pieces --> Unvectorized + Pieces --> MediaChunks + MediaItem --> FTS + MediaChunks --> FTS + Unvectorized --> Batch + Batch --> Queue + APIChunk --> Provider + IngestChunk --> Provider + Queue --> Provider + Provider --> Vectors --> VectorStore + Vectors --> MediaChunks +``` + +**Key storage/provider touchpoints:** Chunk metadata and FTS state live in the per-user Media DB. Vector payloads and embedding job/batch metadata live under the per-user vector store path. Embedding providers and models are resolved from request/config, and chunking can be invoked directly by API callers or indirectly by ingestion. + +**Where to look in code:** `app/api/v1/endpoints/chunking.py`, embedding endpoints, `app/core/Chunking/`, `app/core/Ingestion_Media_Processing/chunking_options.py`, `app/core/Embeddings/ChromaDB_Library.py`, vector metadata/job DB modules, `Docs/Code_Documentation/Pieces.md`, and `Docs/Code_Documentation/Database.md`. + +### RAG/Search + +**Purpose:** Normalize search/RAG requests, retrieve candidate chunks from lexical and vector paths, rerank and post-process them, then assemble results or generation context. + +**Primary entrypoints:** `POST /api/v1/rag/search`, `POST /api/v1/rag/search/stream`, RAG settings/backends endpoints, media search endpoints, and chat flows that request RAG context before generation. + +```mermaid +flowchart LR + subgraph Request + Standalone[Standalone RAG/Search endpoint] + ChatUse[Chat asks for optional RAG context] + Normalize[resolve_rag_request and settings] + end + + subgraph Retrieval["Hybrid retrieval"] + Plan[Retrieval plan] + FTS[FTS/BM25 retrieval from Media DB] + Vector[Vector retrieval from ChromaDB or pgvector] + Merge[Score normalization and merge] + end + + subgraph RankContext["Rank and context assembly"] + Rerank[rerank: FlashRank, cross-encoder, hybrid, llama.cpp, or none] + Filters[Security filters, citations, highlighting] + Context[Result/context assembly] + Stream[Optional event stream] + end + + subgraph Consumers + SearchResponse[RAG search response] + ChatPrompt[Context passed to chat prompt] + Feedback[Feedback and analytics] + end + + Standalone --> Normalize + ChatUse --> Normalize + Normalize --> Plan + Plan --> FTS + Plan --> Vector + FTS --> Merge + Vector --> Merge + Merge --> Rerank + Rerank --> Filters + Filters --> Context + Context --> SearchResponse + Context --> ChatPrompt + Context --> Stream + SearchResponse --> Feedback +``` + +**Key storage/provider touchpoints:** FTS/BM25 reads from the per-user Media DB and its FTS tables. Vector retrieval reads per-user ChromaDB or pgvector collections populated by embeddings. Rerankers may use local models or provider-backed adapters. Feedback and analytics attach to the RAG service path. + +**Where to look in code:** `app/api/v1/endpoints/rag_unified.py`, `app/core/RAG/rag_service/request_resolution.py`, `retrieval_plan.py`, `database_retrievers.py`, `unified_pipeline.py`, `response_mapping.py`, `streaming_executor.py`, and embedding/vector-store modules. + +### Chat And LLM Provider Calls + +**Purpose:** Accept OpenAI-compatible chat requests, optionally enrich them with retrieval context, resolve a provider/model, call the adapter, and persist conversation state separately from retrieval. + +**Primary entrypoints:** `POST /api/v1/chat/completions`, chat session/conversation routes, chat document/workflow routes, `/api/v1/llm/providers`, and provider metadata/model routing routes. + +```mermaid +flowchart LR + subgraph ChatRequest["Chat generation"] + Endpoint["/chat/completions"] + Validate[OpenAI-compatible request validation] + Session[Optional conversation or session state] + PersistIn[Persist user message when configured] + end + + subgraph RetrievalContext["Optional retrieval"] + NeedRAG{"RAG requested?"} + RAGFlow["RAG/Search flow"] + PromptContext[Prompt context and citations] + end + + subgraph ProviderCall["LLM provider call"] + Resolve[Provider/model resolution and BYOK/config lookup] + Adapter[LLM adapter registry] + External[Commercial or local provider] + end + + subgraph ResponsePaths["Response paths"] + NonStream[Non-streaming JSON response] + Stream[Streaming SSE chunks] + PersistOut[Persist assistant message and metadata] + NotesDB[Per-user ChaChaNotes chat/session DB] + end + + Endpoint --> Validate --> Session --> PersistIn + PersistIn --> NeedRAG + NeedRAG -->|yes| RAGFlow --> PromptContext --> Resolve + NeedRAG -->|no| Resolve + Resolve --> Adapter --> External + External -->|complete response| NonStream + External -->|delta events| Stream + NonStream --> PersistOut + Stream --> PersistOut + PersistOut --> NotesDB +``` + +**Key storage/provider touchpoints:** Chat/session state persists in the per-user ChaChaNotes database when configured. RAG context is assembled from Media DB and vector-store reads but remains separable from generation. Provider resolution can use config, BYOK/user provider secrets, model routing, and adapter registry entries for OpenAI-compatible, commercial, and local providers. + +**Where to look in code:** chat endpoints under `app/api/v1/endpoints/`, `app/core/Chat/`, `app/core/LLM_Calls/adapter_registry.py`, `app/core/LLM_Calls/providers/`, `app/core/LLM_Calls/routing/`, `app/core/AuthNZ/byok_helpers.py`, and `app/core/DB_Management/ChaChaNotes_DB.py`. + +### Jobs And Scheduler + +**Purpose:** Distinguish user-visible Jobs from internal Scheduler orchestration and show how recurring APScheduler services bridge into the chosen backend. + +**Primary entrypoints:** Jobs admin/status endpoints, domain workers that enqueue Jobs, Scheduler workflow endpoints, `@task`-registered scheduler handlers, APScheduler-backed workflow and digest services. + +```mermaid +flowchart LR + subgraph Producers + UserAction[User-visible long work] + InternalFlow[Internal orchestration] + Recurring[Recurring APScheduler trigger] + end + + subgraph JobsPath["Jobs backend"] + JobCreate[Create Job with owner, domain, quota] + JobDB[Jobs DB or Redis-backed state] + Admin[Admin status, pause, resume, drain, retry] + WorkerSDK[Jobs WorkerSDK or domain worker] + JobResult[Result, failure, retry, audit] + end + + subgraph SchedulerPath["Core Scheduler backend"] + TaskReg[@task handler registration] + TaskCreate[Create task with dependency and idempotency key] + SchedulerDB[Scheduler persistence] + Dependency[Dependency resolution] + SchedulerWorker[Scheduler worker pool] + TaskResult[Task result and workflow state] + end + + subgraph Bridge["APScheduler bridges"] + APS[APScheduler service] + Choose{"Chosen backend"} + end + + UserAction --> JobCreate --> JobDB --> Admin + JobDB --> WorkerSDK --> JobResult --> Admin + InternalFlow --> TaskReg --> TaskCreate --> SchedulerDB --> Dependency --> SchedulerWorker --> TaskResult + Recurring --> APS --> Choose + Choose -->|user-visible or ops-controlled| JobCreate + Choose -->|dependency orchestration| TaskCreate +``` + +**Key storage/provider touchpoints:** Jobs use a Jobs backend for owner/domain state, retries, admin controls, quotas, worker leases, and status summaries. Scheduler uses its own persistence for task registration, dependencies, idempotency, and workflow execution. APScheduler services should enqueue into Jobs or Scheduler according to the workflow they support. + +**Where to look in code:** `app/api/v1/endpoints/jobs_admin.py`, `app/core/Jobs/`, `app/services/*jobs_worker*.py`, `app/api/v1/endpoints/scheduler_workflows.py`, `app/core/Scheduler/`, workflow/watchlist scheduler services, and APScheduler startup/lifecycle services. + +**Decision note:** Use Jobs for new user-visible features or work needing admin/ops status, pause/resume/drain, retries, quotas, or RLS. Use Scheduler for internal orchestration where registered handlers, task dependencies, and idempotency keys are central. Recurring schedules should use APScheduler to enqueue into whichever backend the feature needs. + +## Extended Domain Maps + +These maps cover the remaining router domains at group level. They avoid endpoint inventory detail, but each section names the route families, core services, storage, providers, and handoff points needed to trace a domain end to end. + +### Evaluations + +**Purpose:** Manage evaluation recipes, datasets, runs, model-graded checks, RAG evaluations, metrics, and result persistence without mixing evaluation state into chat or media storage. + +**Primary entrypoints:** `/api/v1/evaluations`, `/api/v1/evaluations/datasets`, `/api/v1/evaluations/{eval_id}/runs`, recipes, synthetic datasets, RAG pipeline evaluation, embeddings A/B tests, benchmarks, webhooks, and evaluation history/status routes. + +```mermaid +flowchart LR + subgraph Routes["Evaluation routes"] + CRUD[Recipes and evaluation CRUD] + Datasets[Datasets and samples] + Runs[Runs, cancel, history, status] + RAGHooks[RAG eval and benchmark hooks] + end + + subgraph Services["core/Evaluations"] + Unified[UnifiedEvaluationService] + Runner[Evaluation runner and recipe executors] + Judge[GEval, response quality, LLM judge] + Metrics[Metrics, audit, webhooks] + end + + subgraph External["Inputs and providers"] + RAG[RAG/Search results and traces] + LLM[LLM provider or BYOK judge call] + Embed[Embedding provider for A/B tests] + end + + subgraph Storage["Per-user evaluation storage"] + EvalDB["USER_DB_BASE_DIR//evaluations/evaluations.db"] + Audit[Unified audit events] + Results[Metrics, outputs, run state] + end + + CRUD --> Unified + Datasets --> Unified + Runs --> Runner + RAGHooks --> Runner + Unified --> EvalDB + Runner --> RAG + Runner --> Judge + Judge --> LLM + Runner --> Embed + Runner --> Results --> EvalDB + Metrics --> Audit + Metrics --> EvalDB +``` + +**Key storage/provider touchpoints:** Evaluations use per-user evaluation storage where user context is available, including recipes, datasets, runs, idempotency keys, metrics, and results. LLM judge calls go through configured provider/BYOK resolution; RAG evaluation reads RAG outputs and persists evaluation metrics rather than changing RAG storage directly. + +**Where to look in code:** `app/api/v1/endpoints/evaluations/`, `app/core/Evaluations/`, `app/core/DB_Management/Evaluations_DB.py`, `app/core/DB_Management/db_path_utils.py`, `Docs/Code_Documentation/Evaluations_Developer_Guide.md`, and RAG evaluation helpers under `app/core/RAG/`. + +### MCP Unified + +**Purpose:** Expose MCP over HTTP and WebSocket with AuthNZ/RBAC, module/tool discovery, domain dispatch, health, metrics, and tool execution responses. + +**Primary entrypoints:** `/api/v1/mcp`, `/api/v1/mcp/request/batch`, `/api/v1/mcp/ws`, `/api/v1/mcp/status`, `/api/v1/mcp/metrics`, `/api/v1/mcp/tools`, `/api/v1/mcp/tools/execute`, `/api/v1/mcp/modules`, `/api/v1/mcp/resources`, `/api/v1/mcp/prompts`, MCP token routes, hub routes, and scoped tool catalog routes. + +```mermaid +flowchart LR + subgraph Entrypoints["MCP entrypoints"] + HTTP[MCP JSON-RPC HTTP] + Batch[Batch request] + WS[WebSocket session] + Status[Status, metrics, health] + Tools[Tools, modules, resources, prompts] + end + + subgraph Security["Auth and governance"] + Auth[API key, JWT, or MCP JWT] + RBAC[Permissions and RBAC] + Catalogs[Tool catalogs and org/team scope] + end + + subgraph Server["core/MCP_unified"] + ServerCore[Unified MCP server] + Registry[Module and tool registry] + Dispatch[Domain dispatch] + Monitor[Metrics and monitoring] + end + + subgraph Domains["Tool domains"] + Content[Content, RAG, notes, media] + Admin[Admin and configuration tools] + External[External MCP servers and hub] + end + + subgraph Output["Responses and telemetry"] + ToolResult[Tool execution result] + Lists[Filtered discovery lists] + Health[Status and Prometheus metrics] + end + + HTTP --> Auth + Batch --> Auth + WS --> Auth + Tools --> Auth + Status --> RBAC + Auth --> RBAC --> Catalogs --> ServerCore + ServerCore --> Registry --> Dispatch + Dispatch --> Content + Dispatch --> Admin + Dispatch --> External + Dispatch --> ToolResult + Registry --> Lists + Monitor --> Health + ServerCore --> Monitor +``` + +**Key storage/provider touchpoints:** AuthNZ stores identities, permissions, org/team membership, provider secrets, and tool catalog metadata. MCP runtime state, metrics, external server settings, and tool/module health live in MCP unified services. Tool execution then touches the target domain storage or provider through the dispatched module. + +**Where to look in code:** `app/api/v1/endpoints/mcp_unified_endpoint.py`, `app/api/v1/endpoints/mcp_hub_management.py`, `app/api/v1/endpoints/mcp_catalogs_manage.py`, `app/core/MCP_unified/`, `app/services/admin_tool_catalog_service.py`, `Docs/MCP/`, and `Docs/MCP/Unified/`. + +### Prompt Studio + +**Purpose:** Manage prompt projects, prompt versions, test cases, evaluations, optimization jobs, live status, and WebSocket progress around provider-backed prompt execution. + +**Primary entrypoints:** Prompt Studio project, prompt, test case, evaluation, optimization, status, and WebSocket routers under `/api/v1/prompt-studio`. + +```mermaid +flowchart LR + subgraph Routes["Prompt Studio routes"] + Projects[Projects] + Prompts[Prompts and versions] + Cases[Test cases] + Eval[Evaluations] + Opt[Optimization] + Status[Status and WebSocket] + end + + subgraph Services["prompt_studio core"] + DBDep[Prompt Studio DB dependency] + Executor[Prompt executor] + TestRunner[Test runner] + Optimizer[Optimization strategies] + JobsAdapter[Jobs adapter] + end + + subgraph Providers + LLM[LLM provider calls] + Jobs[Core Jobs backend and worker] + end + + subgraph Storage + PromptDB["USER_DB_BASE_DIR//prompt_studio_dbs/prompt_studio.db"] + Results[Test, evaluation, optimization results] + end + + Projects --> DBDep + Prompts --> DBDep + Cases --> DBDep + Eval --> TestRunner + Opt --> Optimizer + Status --> JobsAdapter + DBDep --> PromptDB + TestRunner --> Executor --> LLM + Optimizer --> TestRunner + Eval --> JobsAdapter --> Jobs + Opt --> JobsAdapter --> Jobs + Jobs --> Results --> PromptDB +``` + +**Key storage/provider touchpoints:** Prompt Studio persists projects, signatures, prompts, versions, test cases, evaluation runs, optimization runs, and job metadata in the per-user Prompt Studio DB. Prompt execution and optimization call LLM providers through the existing provider layer. Longer evaluation, generation, and optimization work can run through the core Jobs backend and prompt-studio worker. + +**Where to look in code:** `app/api/v1/endpoints/prompt_studio/`, `app/api/v1/API_Deps/prompt_studio_deps.py`, `app/core/Prompt_Management/prompt_studio/`, `app/core/Prompt_Management/prompt_studio/services/jobs_worker.py`, and `Docs/Code_Documentation/Database.md`. + +### Notes And Chatbooks + +**Purpose:** Store notes and graph links, support web clipper style captures, and export/import portable Chatbooks that can include notes, conversations, characters, and related artifacts. + +**Primary entrypoints:** `/api/v1/notes`, `/api/v1/notes/graph`, web clipper/capture paths where enabled, `/api/v1/chatbooks/export`, `/api/v1/chatbooks/import`, preview, continuation, download, and export/import job status routes. + +```mermaid +flowchart LR + subgraph NotesRoutes["Notes routes"] + Notes[Notes CRUD and search] + Graph[Graph and links] + Clip[Web clipper or captured content] + end + + subgraph ChatbookRoutes["Chatbook routes"] + Export[Export selection] + Import[Import ZIP] + Preview[Preview and continuation] + Jobs[Export/import jobs] + end + + subgraph Core["Core services"] + NotesCore[Notes service] + GraphCore[Notes graph service] + ChatbookSvc[ChatbookService] + Validator[ChatbookValidator and quotas] + end + + subgraph Storage + ChaCha[Per-user ChaChaNotes DB] + Temp[Per-user chatbooks temp] + Archives[Generated chatbook archives] + Audit[Audit and metrics] + end + + Notes --> NotesCore --> ChaCha + Graph --> GraphCore --> ChaCha + Clip --> NotesCore + Export --> Validator --> ChatbookSvc + Import --> Validator --> ChatbookSvc + Preview --> ChatbookSvc + ChatbookSvc --> ChaCha + ChatbookSvc --> Temp + ChatbookSvc --> Archives + Jobs --> ChatbookSvc + ChatbookSvc --> Audit +``` + +**Key storage/provider touchpoints:** Notes, graph edges, chats, and characters are stored in the per-user ChaChaNotes DB. Chatbook import/export uses per-user chatbook temp/export directories, validates archive content, tracks quotas/jobs, and writes audit/metrics events. Generated archives are returned through job-backed download metadata. + +**Where to look in code:** `app/api/v1/endpoints/notes.py`, `app/api/v1/endpoints/notes_graph.py`, `app/api/v1/endpoints/chatbooks.py`, `app/core/Notes/`, `app/core/Notes_Graph/`, `app/core/WebClipper/`, `app/core/Chatbooks/`, and `app/core/DB_Management/ChaChaNotes_DB.py`. + +### Research And Web Scraping + +**Purpose:** Search papers, perform multi-provider web search, scrape web content, run deeper research sessions, and hand useful results to ingestion, Media DB, or RAG-ready storage. + +**Primary entrypoints:** `/api/v1/research/websearch`, preferred `/api/v1/paper-search/*` routes, deprecated research shims, `/api/v1/research/runs`, web scraping service/job/progress routes, media web scraping process routes, and optional ingestion handoff routes. + +```mermaid +flowchart LR + subgraph Routes["Research and scrape routes"] + Paper[Paper search] + WebSearch[Web search and aggregation] + Scrape[Web scraping service] + Deep[Deep research runs] + Process[Media web scrape process] + end + + subgraph Sources["External sources"] + PaperSrc[arXiv, Semantic Scholar, PubMed, OSF, Zenodo] + SearchSrc[Searx, Tavily, Serper, Google-like providers] + Web[Target web pages and feeds] + LLM[LLM aggregation and relevance calls] + end + + subgraph Core["Research core"] + Normalize[Normalize and rank results] + Policy[Egress, robots, rate, dedupe policy] + Extract[Article extraction and scraping] + Bundle[Research bundles and artifacts] + end + + subgraph Handoff["Persistence and handoff"] + Ingest[Ingestion handoff] + MediaDB[Per-user Media DB] + RAG[RAG/Search availability] + Outputs[Research outputs/artifacts] + end + + Paper --> PaperSrc --> Normalize + WebSearch --> SearchSrc --> Normalize + WebSearch --> LLM + Scrape --> Policy --> Web --> Extract + Deep --> Bundle + Process --> Extract + Normalize --> Ingest + Extract --> Ingest + Bundle --> Outputs + Ingest --> MediaDB --> RAG +``` + +**Key storage/provider touchpoints:** Paper and web providers are external and may require API keys or configured endpoints. Scraping applies outbound/robots/rate/dedupe policy before extraction. Ingestion handoff writes normalized content to the per-user Media DB and can make content available for FTS, embeddings, and RAG; deep research can also produce allowlisted output artifacts. + +**Where to look in code:** `app/api/v1/endpoints/research.py`, `app/api/v1/endpoints/research_runs.py`, `app/api/v1/endpoints/paper_search.py`, `app/api/v1/endpoints/web_scraping.py`, `app/api/v1/endpoints/media/process_web_scraping.py`, `app/core/Search_and_Research/README.md`, `app/core/Web_Scraping/`, `app/core/WebSearch/`, and `app/core/Research/`. + +### Storage, Files, And Outputs + +**Purpose:** Track generated files, user folders, trash, downloads, quotas, output templates, and generated artifacts consistently across features. + +**Primary entrypoints:** `/api/v1/storage/files`, storage usage, folders, trash, download routes, admin storage quota routes, `/api/v1/outputs`, output template routes, and feature-specific generated file registration helpers. + +```mermaid +flowchart LR + subgraph Routes["Storage and output routes"] + Files[User files] + Folders[Virtual folders] + Trash[Trash and restore] + Download[Download and signed access] + Usage[Usage and quotas] + Outputs[Outputs and templates] + end + + subgraph Services["Storage services"] + Quota[StorageQuotaService] + Repo[Generated files repo] + Helpers[Generated file helpers] + Guard[Storage quota guard] + end + + subgraph Producers["Artifact producers"] + TTS[TTS and voice clones] + Chatbooks[Chatbooks] + Research[Research artifacts] + Media[Media and ingestion outputs] + end + + subgraph Storage + FileStore["USER_DB_BASE_DIR//outputs and voices"] + Metadata[Generated files metadata] + Templates[Output templates] + Quotas[User, team, org quotas] + end + + Producers --> Helpers --> Quota + Files --> Repo + Folders --> Repo + Trash --> Repo + Download --> Repo + Usage --> Quota + Outputs --> Templates + Guard --> Quota + Quota --> Repo --> Metadata + Repo --> FileStore + Quota --> Quotas +``` + +**Key storage/provider touchpoints:** Generated file metadata, access times, soft delete state, folders, and quota accounting are stored through AuthNZ/generated-file repositories and storage services, while bytes live under per-user outputs/voices or feature-specific directories. Download routes verify ownership and path containment; signed download behavior is documented for job-backed/generated artifacts where the feature exposes expiring download URLs. + +**Where to look in code:** `app/api/v1/endpoints/storage.py`, `storage_user_files.py`, `storage_user_folders.py`, `storage_trash.py`, `storage_usage.py`, `storage_download.py`, `storage_admin_quotas.py`, `outputs.py`, `outputs_templates.py`, `app/services/storage_quota_service.py`, `app/api/v1/API_Deps/storage_quota_guard.py`, and `app/core/Storage/`. + +### Admin, Ops, And Governance + +**Purpose:** Centralize operator controls for users, RBAC, monitoring, audit, orgs, billing, config, jobs, resource limits, usage, and operational safety surfaces. + +**Primary entrypoints:** Admin route group under `/api/v1/admin/*`, jobs admin routes, config admin routes, monitoring/metrics/audit routes, org/team/billing/privilege routes, resource governor and quota routes, MCP catalog/hub admin routes, and startup/system diagnostics. + +```mermaid +flowchart LR + subgraph AdminRoutes["Admin routes"] + Users["Users, sessions, MFA, API keys"] + RBAC["RBAC, privileges, orgs, billing"] + Ops["Monitoring, metrics, audit, system"] + Config[Config admin and profiles] + JobsAdmin[Jobs admin] + Governor[Resource governor and quotas] + end + + subgraph Deps["Admin dependencies"] + Role[RequireRole admin] + Perm[Permission and scope guards] + Rate[Rate limits] + AuditDep[Audit context] + end + + subgraph Core["Admin core services"] + AuthNZ[AuthNZ services and repos] + Metrics[Metrics manager] + Jobs[JobManager and RLS/domain controls] + ConfigSvc[Config/profile stores] + Governance[Moderation, resource, policy services] + end + + subgraph Storage + AuthDB[AuthNZ users, roles, orgs, billing, BYOK] + Usage[Usage, audit, metrics, quotas] + JobsDB[Jobs DB or archive] + ConfigFiles[Config files and snapshots] + end + + AdminRoutes --> Role --> Perm --> Rate --> AuditDep + AuditDep --> AuthNZ + Users --> AuthNZ --> AuthDB + RBAC --> AuthNZ + Ops --> Metrics --> Usage + Config --> ConfigSvc --> ConfigFiles + JobsAdmin --> Jobs --> JobsDB + Governor --> Governance --> Usage + AuditDep --> Usage +``` + +**Key storage/provider touchpoints:** Admin surfaces primarily touch shared AuthNZ/usage/audit storage, org/team/billing/privilege tables, config snapshots/files, resource governor quota state, and Jobs persistence/archive state. Domain-scoped admin controls may apply RBAC and RLS context before listing, mutating, or sweeping operational records. + +**Where to look in code:** `app/api/v1/endpoints/admin/`, `app/api/v1/endpoints/jobs_admin.py`, `app/api/v1/endpoints/config_admin.py`, `app/core/AuthNZ/`, `app/core/Jobs/`, `app/core/Metrics/`, `app/core/Moderation/`, `app/services/*admin*`, and `Docs/API-related/User_Registration_API_Documentation.md`. + +### Characters And Workspaces + +**Purpose:** Manage character cards, character chat sessions/messages/memory, workspace sources/artifacts/notes, workspace migrations, and their handoff into chat and LLM generation. + +**Primary entrypoints:** Character endpoints, character session/message/memory routes, workspace CRUD, workspace sources/artifacts/notes/capabilities/status routes, workspace migration session/chunk/finalize/client-delete-ack routes, and prototype workspace/session routes. + +```mermaid +flowchart LR + subgraph Routes["Character and workspace routes"] + Characters[Character CRUD and cards] + Sessions[Character sessions/messages/memory] + Workspaces[Workspaces] + Sources[Workspace sources, artifacts, notes] + Migrations[Workspace migrations] + Prototype[Prototype workspaces and branch sessions] + end + + subgraph Core["Core services"] + CharCore[Character_Chat modules] + WorkspaceCore[Workspace capability and DB helpers] + MigrationCore[Migration session and chunk protocol] + ProtoCore[Prototype workspace orchestration] + ChatHandoff[Chat orchestration handoff] + end + + subgraph Storage + ChaCha[Per-user ChaChaNotes DB] + MigrationTables[Workspace migration sessions and chunks] + AuthDB[AuthNZ prototype workspace repos] + Jobs[Jobs for branch/source bootstrap] + end + + subgraph Providers + LLM[LLM providers] + RAG[RAG context from workspace sources] + end + + Characters --> CharCore --> ChaCha + Sessions --> CharCore --> ChaCha + Workspaces --> WorkspaceCore --> ChaCha + Sources --> WorkspaceCore --> ChaCha + Migrations --> MigrationCore --> MigrationTables --> ChaCha + MigrationCore --> WorkspaceCore + Prototype --> ProtoCore --> AuthDB + Prototype --> Jobs + CharCore --> ChatHandoff + WorkspaceCore --> ChatHandoff + ChatHandoff --> RAG + ChatHandoff --> LLM +``` + +**Key storage/provider touchpoints:** Characters, sessions, messages, memories, workspaces, workspace sources, artifacts, notes, and workspace migration records live primarily in the per-user ChaChaNotes DB. Workspace migrations create or reuse a target workspace, record migration sessions and declared chunks, accept idempotent chunk receipts, finalize only after all chunks are present, and track client legacy-delete acknowledgement state. Prototype workspace collaboration uses AuthNZ repository storage and Jobs for branch/session bootstrap. Character and workspace context can be passed to chat orchestration, which then calls RAG and LLM providers. + +**Where to look in code:** `app/api/v1/endpoints/characters_endpoint.py`, `app/api/v1/endpoints/workspaces.py`, `app/api/v1/endpoints/workspace_migrations.py`, `app/api/v1/endpoints/prototype_workspaces.py`, `app/core/Character_Chat/`, `app/core/Workspaces/`, `app/core/Prototype_Workspaces/`, workspace migration schema/methods in `app/core/DB_Management/ChaChaNotes_DB.py`, and chat orchestration modules. + +### Integrations And Connectors + +**Purpose:** Connect external systems, ingestion sources, and chat/meeting integrations to the same ingestion, research, Jobs, AuthNZ, and provider-secret paths used by internal workflows. + +**Primary entrypoints:** `/api/v1/connectors`, `/api/v1/ingestion-sources`, Slack events/commands/OAuth/admin routes, Discord routes/OAuth/admin helpers, Telegram admin/webhook routes, meetings routes, and optional connector or integration routers gated by configuration/dependencies. + +```mermaid +flowchart LR + subgraph Routes["Integration routes"] + Connectors[Connectors and OAuth] + Sources[Ingestion sources and sync] + ChatOps["Slack, Discord, Telegram"] + Meetings[Meetings] + Optional[Optional gated routes] + end + + subgraph AuthConfig["Auth, secrets, and gating"] + Auth[User, org, team identity] + Secrets[Provider secrets and installs] + Gates[Feature/config gates] + Verify[Webhook signatures and policies] + end + + subgraph Work["Processing path"] + Queue[Connector or ingestion Jobs] + Normalize[Normalize external payloads] + Ingest[Ingestion handoff] + Research[Research/search handoff] + Chat[Chat/LLM handoff] + end + + subgraph StorageProviders["Storage and providers"] + AuthDB[AuthNZ secrets, installs, approvals] + MediaDB[Media DB] + NotesDB[ChaChaNotes] + External[External APIs and webhooks] + end + + Connectors --> Auth + Sources --> Auth + ChatOps --> Verify + Meetings --> Auth + Optional --> Gates + Auth --> Secrets --> AuthDB + Verify --> Secrets + Connectors --> External + ChatOps --> External + Sources --> Queue + Connectors --> Queue + Queue --> Normalize + Normalize --> Ingest --> MediaDB + Normalize --> Research + Normalize --> Chat + Chat --> NotesDB +``` + +**Key storage/provider touchpoints:** AuthNZ stores user/org/team identities, provider secrets, OAuth installs, linked actors, approvals, and connector metadata. Ingestion-source syncs and connector jobs enqueue work, normalize external payloads, and hand content to ingestion/Media DB, notes, research, or chat/LLM providers. Optional routes may be gated by config, dependency availability, or explicit feature flags. + +**Where to look in code:** `app/api/v1/endpoints/connectors.py`, `app/api/v1/endpoints/ingestion_sources.py`, Slack/Discord/Telegram endpoint and support files, `app/api/v1/endpoints/meetings.py`, `app/core/Ingestion_Sources/`, connector services under `app/core/External_Sources/` where present, provider-secret repos under `app/core/AuthNZ/`, and media/research ingestion handoff modules. + +## Router Coverage Matrix + +This table groups routers by the way they are registered and maintained, not by every concrete endpoint path. Use it to audit whether new router domains have a corresponding atlas entry and whether related diagrams were updated together. + +| Router group or domain | Representative routes/modules | Atlas section | Coverage note | +| --- | --- | --- | --- | +| Core/infrastructure | `main.py`, `router_registry.py`, `router_groups/core.py`, `router_groups/minimal.py`, setup, health, metrics, OpenAPI helpers | [System Context](#system-context), [Request Lifecycle](#request-lifecycle), [Router Group Map](#router-group-map) | Covers app startup, router registration, middleware/dependencies, and operational surfaces. Individual health/setup variants are grouped under infrastructure rather than listed one by one. | +| Identity/config/sync | `auth.py`, `users.py`, `config_info.py`, `config_admin.py`, `sync.py`, AuthNZ dependencies, provider-secret helpers | [Auth And User Context](#auth-and-user-context), [Admin, Ops, And Governance](#admin-ops-and-governance) | Groups identity, user context, configuration, sync, and provider-secret flows because they share AuthNZ/user-scope ownership. | +| Chat/LLM | `chat.py`, OpenAI-compatible chat routes, `core/Chat/`, `core/LLM_Calls/`, provider routing | [Chat And LLM Provider Calls](#chat-and-llm-provider-calls) | Covers request shaping, optional RAG context, conversation persistence, provider selection, and streaming responses. Character-specific chat is cross-linked through the characters/workspaces row. | +| ACP/MCP | `mcp_unified_endpoint.py`, ACP endpoints where enabled, `core/MCP_unified/` | [MCP Unified](#mcp-unified), [Router Group Map](#router-group-map) | Groups MCP and ACP-style tool/client protocols as external tool-control surfaces with shared auth, RBAC, and execution concerns. | +| Content/RAG/media/audio/embeddings/evaluations/OCR | `media/` endpoint package, `media_embeddings.py`, `rag_unified.py`, `rag_health.py`, `audio/` endpoint package, `embeddings_*`, `evaluations_unified.py`, `ocr.py`, ingestion/chunking/embedding/RAG/evaluation core modules | [Media Ingestion](#media-ingestion), [Audio STT/TTS](#audio-stttts), [Chunking And Embeddings](#chunking-and-embeddings), [RAG/Search](#ragsearch), [Evaluations](#evaluations) | Groups high-volume content processing domains that move data between uploads/providers, Media DB, vector stores, per-user evaluations storage, and response-first audio paths. | +| Workflows/scheduler/jobs | `workflows.py`, jobs endpoints, Scheduler handlers, APScheduler bridges, WorkerSDK/background services | [Jobs And Scheduler](#jobs-and-scheduler), [Admin, Ops, And Governance](#admin-ops-and-governance) | Separates user-visible Jobs from internal Scheduler orchestration while showing where recurring schedules enqueue into each backend. | +| Notes/prompts/prompt studio/workspaces/characters | notes/chatbook endpoints, prompt endpoints, `prompt_studio/` endpoint package, workspace routes including migrations, character endpoints and card/session helpers | [Prompt Studio](#prompt-studio), [Notes And Chatbooks](#notes-and-chatbooks), [Characters And Workspaces](#characters-and-workspaces) | Groups user-authored knowledge, conversation artifacts, prompt assets, workspace migration state, and character/session data because they primarily persist through ChaChaNotes and related per-user stores. | +| Collections/reading and learning tools | `collections_feeds.py`, `collections_websub.py`, `reading.py`, `reading_highlights.py`, `translate.py`, `slides.py`, `flashcards.py`, `quizzes.py`, `study_suggestions.py`, writing/manuscript routes | [Media Ingestion](#media-ingestion), [Notes And Chatbooks](#notes-and-chatbooks), [Characters And Workspaces](#characters-and-workspaces) | Explicitly groups registered lightweight content and learning routers that organize, annotate, transform, or study content. The atlas covers their storage/provider handoffs at the domain level, not every route. | +| Application content tools | Kanban modules, `data_tables.py`, `items.py`, `reminders.py`, `notifications.py`, `watchlists.py`, scheduled tasks control plane, VN assets/play routes | [Storage, Files, And Outputs](#storage-files-and-outputs), [Jobs And Scheduler](#jobs-and-scheduler), [Admin, Ops, And Governance](#admin-ops-and-governance) | Covers app-level boards, data tables, items, tasks/reminders, notifications, watchlists, scheduled tasks, and VN routes as registered content/application surfaces with storage, scheduling, and governance touchpoints. | +| Persona/companion personalization | `persona.py`, `personalization.py`, `companion.py`, `archetype_endpoints.py`, voice assistant routes | [Chat And LLM Provider Calls](#chat-and-llm-provider-calls), [Characters And Workspaces](#characters-and-workspaces), [Audio STT/TTS](#audio-stttts) | Groups persona, companion, archetype, personalization, and voice assistant routes because they bridge user profile state, character/workspace context, chat/LLM calls, and audio streams. | +| Storage/files/outputs/sharing | file upload/download routes, outputs/artifacts, local storage helpers, sharing/export/import handlers, chatbooks | [Storage, Files, And Outputs](#storage-files-and-outputs), [Notes And Chatbooks](#notes-and-chatbooks), [Data Store Map](#data-store-map) | Covers storage ownership and file/output lifecycles at the domain level; concrete file routes are intentionally summarized by storage responsibility. | +| Research/web scraping/connectors/integrations | `research.py`, `paper_search.py`, `web_scraping.py`, connectors, ingestion sources, Slack/Discord/Telegram/meeting routes where enabled | [Research And Web Scraping](#research-and-web-scraping), [Integrations And Connectors](#integrations-and-connectors) | Groups external-source ingestion and integration callbacks because both normalize provider/web payloads before handing off to media, notes, research, chat, or jobs. | +| Admin/orgs/billing/resource governance/monitoring | admin routers, org/team routes, billing/subscription routes, resource governance, rate limits, metrics, audit/ops endpoints | [Admin, Ops, And Governance](#admin-ops-and-governance), [Request Lifecycle](#request-lifecycle) | Covers governance, quotas, RBAC, observability, and administrative controls as cross-cutting policy layers rather than feature-specific endpoint lists. | + +## How To Update This Atlas + +- Check `router_groups/*.py` and `router_registry.py` for router additions, removals, lazy imports, optional routes, or registration changes. +- Check changed endpoint and core modules for new storage ownership, provider calls, background workers, queue paths, or persistence gates. +- Update the relevant Mermaid diagram and the [Router Coverage Matrix](#router-coverage-matrix) together so coverage remains auditable. +- Re-run Markdown/Mermaid text checks for changed headings, diagram syntax anchors, and required terms. +- Record verification commands and results in the relevant Backlog task. diff --git a/Docs/Published/Code_Documentation/Docs_Site_Guide.md b/Docs/Published/Code_Documentation/Docs_Site_Guide.md index ca971d63e0..ff8a994d77 100644 --- a/Docs/Published/Code_Documentation/Docs_Site_Guide.md +++ b/Docs/Published/Code_Documentation/Docs_Site_Guide.md @@ -12,17 +12,30 @@ This document explains how the tldw_Server documentation site is organized, buil The public docs site is for OSS, self-host, and developer documentation. Hosted/commercial docs are excluded from the published site and should live in the private repo instead of this public docs pipeline. +The published site is audience-first: + +- `User Wiki`: install, run, configure, and use tldw_server. +- `Developer Wiki`: contribute to, test, package, and understand the codebase. + +These wiki pages are MkDocs landing pages in this repository, not the separate GitHub Wiki feature. + ## What Gets Published Only these folders are included on the public site: +- `Docs/Wiki` - `Docs/API-related` +- `Docs/ADR` - `Docs/Code_Documentation` - `Docs/Deployment` (excluding its nested `Monitoring`) - `Docs/Deployment/Monitoring` (published as top-level `Monitoring`) - `Docs/Evals` - `Docs/User_Guides` +These root-level files are also included when present: + +- `Docs/Architecture.md` + The curated content is synced into `Docs/Published/`. Do not manually edit files in `Docs/Published/` - they can be overwritten by the refresh script or CI. Hosted/commercial docs are excluded from this curated set even when they live under similarly named source areas. If a page exists mainly to run, sell, support, or differentiate the hosted SaaS service, keep it in the private repo rather than adding it to the public docs tree. @@ -32,6 +45,9 @@ Hosted/commercial docs are excluded from this curated set even when they live un - Script: `Helper_Scripts/refresh_docs_published.sh` - What it does: - Copies the approved folders from `Docs/` to `Docs/Published/` + - Copies the audience wiki landing pages from `Docs/Wiki` + - Copies architecture decision records from `Docs/ADR` + - Copies `Docs/Architecture.md` when present - Promotes `Docs/Deployment/Monitoring` to top-level `Docs/Published/Monitoring` - Removes the nested `Monitoring` under `Deployment` to avoid duplication - Preserves each section's `index.md` landing page @@ -88,6 +104,9 @@ To change the logo: replace `Docs/Logo.png` and run the refresh script. - The sidebar and ordering are defined explicitly in `mkdocs.yml` under `nav:` - When adding a new page you want visible in the sidebar, add a new entry under the appropriate section in `mkdocs.yml` - The nav uses paths relative to `Docs/Published/` +- Keep the top-level navigation audience-first: `Home`, `User Wiki`, `Developer Wiki`, and shared reference links. +- User-facing workflow docs belong under the `User Wiki` nav tree. +- Contributor, implementation, architecture, and docs-maintenance material belongs under the `Developer Wiki` nav tree. Example nav entry (under Code section): @@ -107,6 +126,10 @@ Tip: keep titles short and parallel (e.g., "Guide", "Reference", "Checklist"). 5. Commit and push; CI will refresh, build, and deploy the site Notes: +- Put audience chooser pages in `Docs/Wiki/` +- Put user-facing workflow guides in `Docs/User_Guides/` or `Docs/Getting_Started/` +- Put contributor-facing implementation guides in `Docs/Code_Documentation/` +- Put public architecture decision records in `Docs/ADR/` - Keep file names stable after they’re published to avoid broken links - Use relative links within the allowed folders; avoid linking to WIP docs outside the curated set - Prefer images stored under `Docs/assets/` or section subfolders; the refresh script copies section contents diff --git a/Docs/Published/User_Guides/WebUI_Extension/Knowledge_QA_Guide.md b/Docs/Published/User_Guides/WebUI_Extension/Knowledge_QA_Guide.md new file mode 100644 index 0000000000..c8375da562 --- /dev/null +++ b/Docs/Published/User_Guides/WebUI_Extension/Knowledge_QA_Guide.md @@ -0,0 +1,203 @@ +# Knowledge QA Guide + +Knowledge QA is the `/knowledge` workflow for asking questions against your personal library and reviewing grounded answers with citations. Use it when you want a cited answer from indexed documents, media, notes, conversations, task boards, or other selected Knowledge QA source categories. + +Knowledge QA is not the flashcards workflow. Flashcards are handled by the separate flashcards route and are not part of `/knowledge`. + +## Where To Open It + +- WebUI: open `/knowledge`. +- Browser extension: open the extension options page and choose Knowledge QA. + +The WebUI and extension use the same shared Knowledge QA interface. The extension can also be blocked by extension-specific setup, such as missing server URL, API key, host permission, or an allowlist/backend reachability problem. + +## Setup Requirements + +Before Knowledge QA can answer: + +1. The tldw server must be reachable. +2. Credentials must be configured for the selected auth mode. +3. The server must expose the Knowledge QA/RAG endpoints. +4. At least one selected personal-library source category must be searchable, or web fallback must be available and enabled. +5. Documents, media, notes, or other library items must be indexed before local-only answers can cite them. + +If setup is incomplete, Knowledge QA should show a recovery state instead of a blank page. Follow the visible action, such as finishing setup, reconnecting, adding or indexing sources, selecting source categories, or retrying the backend check. + +## Ask A Question + +1. Open Knowledge QA. +2. Confirm the page says `Ask Your Library`. +3. Review the source scope. +4. Enter a question in the search box. +5. Choose `Ask`. +6. Review the answer and citations. + +Good questions are specific and library-grounded, for example: + +- "What does my library say about the onboarding checklist?" +- "Which notes mention API key setup?" +- "Summarize the cited evidence about the release process." + +## Source Scope + +Source scope controls what Knowledge QA is allowed to search. + +Use the source scope controls to: + +- Select source categories such as documents/media or notes. +- Select specific documents or notes when available. +- Save and restore source/search profiles for repeated workflows. +- Enable or disable web fallback when the server supports web search. + +If no source categories are selected and web fallback is off, Knowledge QA should block submission with recovery copy. If the server has no indexed library sources, add or index content first through source-owner surfaces such as Media, Notes, or Quick Ingest. + +Web fallback is optional. Leave it off when you only want personal-library evidence. When it is on and the server uses external web results, Knowledge QA should label that evidence origin in the answer, evidence review, history, and export surfaces. + +## Presets And Settings + +Use presets when you want a quick tradeoff: + +- `Fast`: quicker retrieval with lighter search. +- `Balanced`: normal default for most questions. +- `Deep` or `Thorough`: broader retrieval for harder questions. +- `Custom`: manual settings have diverged from a preset. + +Use settings when you need more control: + +- Basic settings cover common tuning. +- Expert settings expose retrieval details for power users. +- Reset defaults returns settings to the supported baseline. + +Keep settings changes tied to a concrete question. If answers become noisy, reduce scope or return to `Balanced`. + +## Answer Model + +The answer model/provider menu controls which configured model generates the final answer. You can use the server default, select a configured provider/model, or enter a manual model name when the UI supports it. + +Provider/model availability depends on server configuration. Knowledge QA should avoid exposing sensitive provider configuration details in UI errors. + +## Citations And Evidence + +Citations connect answer claims to retrieved evidence. Use them to decide whether the answer is grounded enough to trust. + +Expected review flow: + +1. Read the answer. +2. Open or inspect cited sources. +3. Compare citation snippets with the answer claims. +4. Use the evidence rail when more detail is needed. +5. Switch between `Sources` and `Details` evidence views when available. + +Treat uncited or weakly cited claims as lower confidence. Narrow source scope, try a more specific question, or use a deeper preset if the evidence does not support the answer. + +If an answer is labeled as unsupported, degraded, unknown, or missing citations, do not treat it as a normal grounded answer. Re-run with narrower sources, inspect the evidence rail, or change settings before relying on it. + +## No Results And Recovery + +When Knowledge QA returns no results: + +- Check whether the correct source categories are selected. +- Select exact documents or notes if the search should be scoped. +- Try a broader query or a different phrase. +- Use web fallback only when you want external web evidence and the server supports it. +- Wait for indexing to finish if recently added sources are not searchable yet. + +No-results recovery should be explicit and should not imply that the library was searched successfully when the backend was offline or no sources were selected. + +## Export + +Use export after reviewing the answer and evidence. + +Observed export behavior includes: + +- Markdown export for text reuse. +- PDF export through the browser print flow. +- Chatbook export for portable tldw conversation packaging. +- Save to Notes when exportable content is available. +- Share-link controls when the current thread supports read-only sharing. + +Exported content should preserve the question, answer, citations, source scope, and relevant retrieval details when available. + +Exports should also preserve trust and evidence labels. Markdown exports include a `Trust and Evidence` section when trust state or evidence origin is known. Unsupported drafts are labeled in export output; if the export dialog asks for confirmation, confirm only when you intentionally want to keep that lower-confidence answer with the warning attached. + +## Relationship To Other Workflows + +- Research Workspace: use it for broader research projects, source organization, and multi-step investigation. Knowledge QA can support focused cited questions. +- Chat: use it for general conversation or model interaction. Knowledge QA is for library-grounded answers with citations. +- Notes: notes can be searched as Knowledge QA sources when indexed and selected. Export can also save reviewed output back to Notes. +- Media: ingested and indexed media can be searched as Knowledge QA sources. +- Flashcards: use the separate flashcards route. `/knowledge` does not create decks, review cards, or run spaced repetition. + +## WebUI Versus Extension Differences + +Shared behavior: + +- Search state, source scope, presets, model controls, settings, citations, evidence review, and export should follow the same product contract. +- Backend unavailable, no-source, no-results, and blocked-search states should show actionable recovery. + +Extension-specific behavior: + +- Extension setup can fail because of missing server configuration, missing API key, host permission, allowlist, or backend reachability. +- Extension options width can force compact layout sooner than the WebUI. +- Extension shared-link routing may differ from WebUI routing. + +WebUI-specific behavior: + +- WebUI readiness can be blocked by route-level backend readiness or server health checks. +- WebUI has more horizontal space for detailed layout and evidence review. + +## Regression Commands + +Run focused Knowledge QA checks from the relevant package directory. + +Shared UI: + +```bash +cd apps/packages/ui +bunx vitest run src/components/Option/KnowledgeQA +``` + +WebUI: + +```bash +cd apps/tldw-frontend +npx playwright test e2e/ux-audit/knowledge-readiness-recovery.spec.ts e2e/ux-audit/knowledge-qa-states.spec.ts e2e/ux-audit/knowledge-empty-recovery.spec.ts --project=chromium +``` + +Seeded live WebUI release gate: + +```bash +source ../../.venv/bin/activate +python ../../Helper_Scripts/seed_knowledge_qa_uat.py \ + --server-url http://127.0.0.1:8000 \ + --api-key "$TLDW_E2E_API_KEY" \ + --manifest /tmp/knowledge-qa-uat.json + +TLDW_WEB_AUTOSTART=false \ +TLDW_WEB_URL=http://127.0.0.1:3000 \ +TLDW_E2E_SERVER_URL=http://127.0.0.1:8000 \ +TLDW_E2E_API_KEY="$TLDW_E2E_API_KEY" \ +TLDW_KNOWLEDGE_QA_FIXTURE_MANIFEST=/tmp/knowledge-qa-uat.json \ +bunx playwright test e2e/ux-audit/knowledge-qa-live-backend.spec.ts --project=chromium --reporter=line +``` + +Extension: + +```bash +cd apps/extension +npx playwright test tests/e2e/knowledge-qa-setup-diagnostics.spec.ts tests/e2e/knowledge-qa-states.spec.ts tests/e2e/knowledge-empty-recovery.spec.ts --project=chromium-extension +``` + +Seeded live extension release gate: + +```bash +cd apps/extension +TLDW_E2E_SERVER_URL=http://127.0.0.1:8000 \ +TLDW_E2E_API_KEY="$TLDW_E2E_API_KEY" \ +TLDW_KNOWLEDGE_QA_FIXTURE_MANIFEST=/tmp/knowledge-qa-uat.json \ +bunx playwright test tests/e2e/knowledge-qa-live-backend.spec.ts --project=chromium-extension --reporter=line +``` + +The seeded live gates are strict. Missing backend readiness, missing manifest, null seeded source IDs, API failure, no generated cited answer, or extension launch failure should be recorded as a failing release gate rather than a passing test. + +Python backend checks are not required for documentation-only changes. If backend Knowledge QA/RAG code is touched, run the focused pytest paths and Bandit on the touched Python scope. diff --git a/Docs/Published/Wiki/Developer_Wiki.md b/Docs/Published/Wiki/Developer_Wiki.md new file mode 100644 index 0000000000..0bf88a564e --- /dev/null +++ b/Docs/Published/Wiki/Developer_Wiki.md @@ -0,0 +1,68 @@ +# Developer Wiki + +This is the developer and contributor-facing map for understanding, changing, testing, and maintaining tldw_server. These docs are part of the published MkDocs site, not the separate GitHub Wiki feature. + +## Start Here + +- [Contributor development setup](https://github.com/rmusser01/tldw_server/blob/main/apps/DEVELOPMENT.md) +- [Code documentation index](../Code_Documentation/index.md) +- [Code map](../Code_Documentation/Code_Map.md) +- [Architecture overview](../Architecture.md) +- [Docs site guide](../Code_Documentation/Docs_Site_Guide.md) + +## Backend and Core Modules + +- [Chat developer guide](../Code_Documentation/Chat_Developer_Guide.md) +- [Character chat code guide](../Code_Documentation/Guides/Character_Chat_Code_Guide.md) +- [RAG developer guide](../Code_Documentation/RAG-Developer-Guide.md) +- [RAG functional pipeline guide](../Code_Documentation/RAG-Functional-Pipeline-Guide.md) +- [Embeddings developer guide](../Code_Documentation/Embeddings-Developer-Guide.md) +- [Evaluations developer guide](../Code_Documentation/Evaluations_Developer_Guide.md) +- [Services module](../Code_Documentation/Services_Module.md) + +## API and Integration Contracts + +- [API documentation index](../API-related/API_README.md) +- [API design](../API-related/API_Design.md) +- [API tag index](../API-related/API_Tags_Index.md) +- [Providers API](../API-related/Providers_API_Documentation.md) +- [Character chat sessions API](../API-related/Character_Chat_Sessions_API.md) +- [Tools API](../API-related/Tools_API_Documentation.md) +- [Storage API](../API-related/Storage_API_Documentation.md) + +## Data, Jobs, and Storage + +- [Database overview](../Code_Documentation/Database.md) +- [Database backends](../Code_Documentation/Database-Backends.md) +- [Media DB v2](../Code_Documentation/Databases/Media_DB_v2.md) +- [ChaChaNotes DB](../Code_Documentation/Databases/ChaChaNotes_DB.md) +- [Jobs module](../Code_Documentation/Jobs_Module.md) +- [Jobs manager](../Code_Documentation/Jobs_Manager.md) +- [Generated files storage code guide](../Code_Documentation/Guides/Generated_Files_Storage_Code_Guide.md) + +## Ingestion, Audio, and Media + +- [Ingestion media processing](../Code_Documentation/Ingestion_Media_Processing.md) +- [Audio ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Audio.md) +- [Document ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Documents.md) +- [PDF ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_PDF.md) +- [Video ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Video.md) +- [Voice assistant module](../Code_Documentation/VoiceAssistant_Module.md) +- [VLM backends](../Code_Documentation/VLM_Backends.md) + +## Architecture Decisions and Operations + +- [ADR index](../ADR/README.md) +- [Deployment docs](../Deployment/First_Time_Production_Setup.md) +- [Sidecar workers](../Deployment/Sidecar_Workers.md) +- [Resource requirements](../Deployment/resource-requirements.md) +- [Monitoring metrics cheatsheet](../Monitoring/Metrics_Cheatsheet.md) +- [Environment variables](../Env_Vars.md) + +## Quality, Release, and Maintenance + +- [Release notes](../RELEASE_NOTES.md) +- [Docs site guide](../Code_Documentation/Docs_Site_Guide.md) +- [STT testing documentation](../Code_Documentation/STT_TESTING_DOCUMENTATION.md) +- [Moderation guardrails](../Code_Documentation/Moderation-Guardrails.md) +- [Setup UI developer guide](../Code_Documentation/Setup_UI_Developer_Guide.md) diff --git a/Docs/Published/Wiki/User_Wiki.md b/Docs/Published/Wiki/User_Wiki.md new file mode 100644 index 0000000000..7f5cc6ba96 --- /dev/null +++ b/Docs/Published/Wiki/User_Wiki.md @@ -0,0 +1,68 @@ +# User Wiki + +This is the user-facing map for installing, running, configuring, and using tldw_server. These docs are part of the published MkDocs site, not the separate GitHub Wiki feature. + +## Start Here + +- [Getting Started](../Getting_Started/README.md) +- [Docker single-user + WebUI](../Getting_Started/Profile_Docker_Single_User.md) +- [Docker multi-user + Postgres](../Getting_Started/Profile_Docker_Multi_User_Postgres.md) +- [Local single-user](../Getting_Started/Profile_Local_Single_User.md) +- [Troubleshooting](../Getting_Started/TROUBLESHOOTING.md) + +## WebUI and Browser Workflows + +- [WebUI user guide](../User_Guides/WebUI_Extension/User_Guide.md) +- [Chat pages](../User_Guides/WebUI_Extension/Chat_Pages.md) +- [Workflow examples](../User_Guides/WebUI_Extension/Workflows_Examples.md) +- [Chatbooks](../User_Guides/WebUI_Extension/Chatbook_User_Guide.md) +- [Flashcards](../User_Guides/WebUI_Extension/Flashcards_Study_Guide.md) +- [Knowledge QA](../User_Guides/WebUI_Extension/Knowledge_QA_Guide.md) +- [Browser extension docs](https://github.com/rmusser01/tldw_server/tree/main/apps/extension/docs) + +## Chat, Characters, and Personalization + +- [Character roleplay quickstart](../User_Guides/WebUI_Extension/Character_Roleplay_Quickstart.md) +- [Advanced character roleplay](../User_Guides/WebUI_Extension/Advanced_Character_Roleplay_Guide.md) +- [Character cards and character chat](../User_Guides/Server/Character_Cards_User_Guide.md) +- [Personas](../User_Guides/Server/Personas_User_Guide.md) +- [Prompt engineering notes](../User_Guides/WebUI_Extension/Prompt_Engineering_Notes.md) + +## Knowledge, Research, and RAG + +- [Feature map](../User_Guides/Feature_Map.md) +- [Media to RAG evaluations workflow](../User_Guides/Server/Media_to_RAG_Evals_Workflow.md) +- [RAG production configuration](../User_Guides/Server/RAG_Production_Configuration_Guide.md) +- [RAG API consumer guide](../API-related/RAG-API-Guide.md) +- [Web scraping and ingestion](../User_Guides/Server/Web_Scraping_Ingestion_Guide.md) +- [Bulk conference playlist ingest](../User_Guides/Bulk_Conference_Playlist_Ingest.md) + +## Local Models, Audio, and Integrations + +- [Local LLM setup](../User_Guides/Integrations_Experiments/Setting_up_a_local_LLM.md) +- [STT and TTS quickstart](../User_Guides/WebUI_Extension/Getting-Started-STT_and_TTS.md) +- [TTS getting started](../User_Guides/WebUI_Extension/TTS_Getting_Started.md) +- [TTS setup guide](../User_Guides/WebUI_Extension/TTS-SETUP-GUIDE.md) +- [CPU audio setup](../Getting_Started/First_Time_Audio_Setup_CPU.md) +- [GPU or accelerated audio setup](../Getting_Started/First_Time_Audio_Setup_GPU_Accelerated.md) +- [Getting started with ACP](../User_Guides/Integrations_Experiments/Getting_Started_with_ACP.md) + +## Admin, Operations, and Shared Servers + +- [Authentication setup](../User_Guides/Server/Authentication_Setup.md) +- [Production hardening](../User_Guides/Server/Production_Hardening_Checklist.md) +- [Organizations and sharing](../User_Guides/Server/Organizations_and_Sharing.md) +- [BYOK user guide](../User_Guides/Server/BYOK_User_Guide.md) +- [Usage module](../User_Guides/Server/Usage_Module.md) +- [Backups using Litestream](../User_Guides/Server/Backups_Using_Litestream.md) +- [Long-term admin guide](../Deployment/Long_Term_Admin_Guide.md) +- [Metrics cheatsheet](../Monitoring/Metrics_Cheatsheet.md) + +## Practical API Usage + +- [API documentation index](../API-related/API_README.md) +- [OpenAPI tag index](../API-related/API_Tags_Index.md) +- [Chat API](../API-related/Chat_API_Documentation.md) +- [Media ingest jobs API](../API-related/Media_Ingest_Jobs_API.md) +- [Prompt Studio API](../API-related/Prompt_Studio_API.md) +- [Evaluations API](../API-related/Evaluations_API_Unified_Reference.md) diff --git a/Docs/Published/Wiki/index.md b/Docs/Published/Wiki/index.md new file mode 100644 index 0000000000..dcc5a659a8 --- /dev/null +++ b/Docs/Published/Wiki/index.md @@ -0,0 +1,33 @@ +# tldw Server Documentation + +Choose the wiki that matches what you are trying to do. + +## User Wiki + +Use the [User Wiki](User_Wiki.md) if you want to install, run, configure, or use tldw_server. + +Start there for: + +- first-time setup and self-hosting profiles; +- WebUI and browser extension workflows; +- local LLM, STT, TTS, RAG, notes, chat, and character-card usage; +- production hardening, operations, and shared-server administration; +- practical API usage for automation and integrations. + +## Developer Wiki + +Use the [Developer Wiki](Developer_Wiki.md) if you want to modify, contribute to, test, package, or understand the codebase. + +Start there for: + +- repository and frontend development setup; +- architecture, code maps, ADRs, and module guides; +- backend, API, database, RAG, chat, audio, and evaluation internals; +- docs-site maintenance and release process guidance. + +## Shared References + +- [Feature status](../Overview/Feature_Status.md) +- [Release notes](../RELEASE_NOTES.md) +- [API documentation index](../API-related/API_README.md) +- [Code documentation index](../Code_Documentation/index.md) diff --git a/Docs/Wiki/Developer_Wiki.md b/Docs/Wiki/Developer_Wiki.md new file mode 100644 index 0000000000..0bf88a564e --- /dev/null +++ b/Docs/Wiki/Developer_Wiki.md @@ -0,0 +1,68 @@ +# Developer Wiki + +This is the developer and contributor-facing map for understanding, changing, testing, and maintaining tldw_server. These docs are part of the published MkDocs site, not the separate GitHub Wiki feature. + +## Start Here + +- [Contributor development setup](https://github.com/rmusser01/tldw_server/blob/main/apps/DEVELOPMENT.md) +- [Code documentation index](../Code_Documentation/index.md) +- [Code map](../Code_Documentation/Code_Map.md) +- [Architecture overview](../Architecture.md) +- [Docs site guide](../Code_Documentation/Docs_Site_Guide.md) + +## Backend and Core Modules + +- [Chat developer guide](../Code_Documentation/Chat_Developer_Guide.md) +- [Character chat code guide](../Code_Documentation/Guides/Character_Chat_Code_Guide.md) +- [RAG developer guide](../Code_Documentation/RAG-Developer-Guide.md) +- [RAG functional pipeline guide](../Code_Documentation/RAG-Functional-Pipeline-Guide.md) +- [Embeddings developer guide](../Code_Documentation/Embeddings-Developer-Guide.md) +- [Evaluations developer guide](../Code_Documentation/Evaluations_Developer_Guide.md) +- [Services module](../Code_Documentation/Services_Module.md) + +## API and Integration Contracts + +- [API documentation index](../API-related/API_README.md) +- [API design](../API-related/API_Design.md) +- [API tag index](../API-related/API_Tags_Index.md) +- [Providers API](../API-related/Providers_API_Documentation.md) +- [Character chat sessions API](../API-related/Character_Chat_Sessions_API.md) +- [Tools API](../API-related/Tools_API_Documentation.md) +- [Storage API](../API-related/Storage_API_Documentation.md) + +## Data, Jobs, and Storage + +- [Database overview](../Code_Documentation/Database.md) +- [Database backends](../Code_Documentation/Database-Backends.md) +- [Media DB v2](../Code_Documentation/Databases/Media_DB_v2.md) +- [ChaChaNotes DB](../Code_Documentation/Databases/ChaChaNotes_DB.md) +- [Jobs module](../Code_Documentation/Jobs_Module.md) +- [Jobs manager](../Code_Documentation/Jobs_Manager.md) +- [Generated files storage code guide](../Code_Documentation/Guides/Generated_Files_Storage_Code_Guide.md) + +## Ingestion, Audio, and Media + +- [Ingestion media processing](../Code_Documentation/Ingestion_Media_Processing.md) +- [Audio ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Audio.md) +- [Document ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Documents.md) +- [PDF ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_PDF.md) +- [Video ingestion pipeline](../Code_Documentation/Ingestion_Pipeline_Video.md) +- [Voice assistant module](../Code_Documentation/VoiceAssistant_Module.md) +- [VLM backends](../Code_Documentation/VLM_Backends.md) + +## Architecture Decisions and Operations + +- [ADR index](../ADR/README.md) +- [Deployment docs](../Deployment/First_Time_Production_Setup.md) +- [Sidecar workers](../Deployment/Sidecar_Workers.md) +- [Resource requirements](../Deployment/resource-requirements.md) +- [Monitoring metrics cheatsheet](../Monitoring/Metrics_Cheatsheet.md) +- [Environment variables](../Env_Vars.md) + +## Quality, Release, and Maintenance + +- [Release notes](../RELEASE_NOTES.md) +- [Docs site guide](../Code_Documentation/Docs_Site_Guide.md) +- [STT testing documentation](../Code_Documentation/STT_TESTING_DOCUMENTATION.md) +- [Moderation guardrails](../Code_Documentation/Moderation-Guardrails.md) +- [Setup UI developer guide](../Code_Documentation/Setup_UI_Developer_Guide.md) diff --git a/Docs/Wiki/User_Wiki.md b/Docs/Wiki/User_Wiki.md new file mode 100644 index 0000000000..7f5cc6ba96 --- /dev/null +++ b/Docs/Wiki/User_Wiki.md @@ -0,0 +1,68 @@ +# User Wiki + +This is the user-facing map for installing, running, configuring, and using tldw_server. These docs are part of the published MkDocs site, not the separate GitHub Wiki feature. + +## Start Here + +- [Getting Started](../Getting_Started/README.md) +- [Docker single-user + WebUI](../Getting_Started/Profile_Docker_Single_User.md) +- [Docker multi-user + Postgres](../Getting_Started/Profile_Docker_Multi_User_Postgres.md) +- [Local single-user](../Getting_Started/Profile_Local_Single_User.md) +- [Troubleshooting](../Getting_Started/TROUBLESHOOTING.md) + +## WebUI and Browser Workflows + +- [WebUI user guide](../User_Guides/WebUI_Extension/User_Guide.md) +- [Chat pages](../User_Guides/WebUI_Extension/Chat_Pages.md) +- [Workflow examples](../User_Guides/WebUI_Extension/Workflows_Examples.md) +- [Chatbooks](../User_Guides/WebUI_Extension/Chatbook_User_Guide.md) +- [Flashcards](../User_Guides/WebUI_Extension/Flashcards_Study_Guide.md) +- [Knowledge QA](../User_Guides/WebUI_Extension/Knowledge_QA_Guide.md) +- [Browser extension docs](https://github.com/rmusser01/tldw_server/tree/main/apps/extension/docs) + +## Chat, Characters, and Personalization + +- [Character roleplay quickstart](../User_Guides/WebUI_Extension/Character_Roleplay_Quickstart.md) +- [Advanced character roleplay](../User_Guides/WebUI_Extension/Advanced_Character_Roleplay_Guide.md) +- [Character cards and character chat](../User_Guides/Server/Character_Cards_User_Guide.md) +- [Personas](../User_Guides/Server/Personas_User_Guide.md) +- [Prompt engineering notes](../User_Guides/WebUI_Extension/Prompt_Engineering_Notes.md) + +## Knowledge, Research, and RAG + +- [Feature map](../User_Guides/Feature_Map.md) +- [Media to RAG evaluations workflow](../User_Guides/Server/Media_to_RAG_Evals_Workflow.md) +- [RAG production configuration](../User_Guides/Server/RAG_Production_Configuration_Guide.md) +- [RAG API consumer guide](../API-related/RAG-API-Guide.md) +- [Web scraping and ingestion](../User_Guides/Server/Web_Scraping_Ingestion_Guide.md) +- [Bulk conference playlist ingest](../User_Guides/Bulk_Conference_Playlist_Ingest.md) + +## Local Models, Audio, and Integrations + +- [Local LLM setup](../User_Guides/Integrations_Experiments/Setting_up_a_local_LLM.md) +- [STT and TTS quickstart](../User_Guides/WebUI_Extension/Getting-Started-STT_and_TTS.md) +- [TTS getting started](../User_Guides/WebUI_Extension/TTS_Getting_Started.md) +- [TTS setup guide](../User_Guides/WebUI_Extension/TTS-SETUP-GUIDE.md) +- [CPU audio setup](../Getting_Started/First_Time_Audio_Setup_CPU.md) +- [GPU or accelerated audio setup](../Getting_Started/First_Time_Audio_Setup_GPU_Accelerated.md) +- [Getting started with ACP](../User_Guides/Integrations_Experiments/Getting_Started_with_ACP.md) + +## Admin, Operations, and Shared Servers + +- [Authentication setup](../User_Guides/Server/Authentication_Setup.md) +- [Production hardening](../User_Guides/Server/Production_Hardening_Checklist.md) +- [Organizations and sharing](../User_Guides/Server/Organizations_and_Sharing.md) +- [BYOK user guide](../User_Guides/Server/BYOK_User_Guide.md) +- [Usage module](../User_Guides/Server/Usage_Module.md) +- [Backups using Litestream](../User_Guides/Server/Backups_Using_Litestream.md) +- [Long-term admin guide](../Deployment/Long_Term_Admin_Guide.md) +- [Metrics cheatsheet](../Monitoring/Metrics_Cheatsheet.md) + +## Practical API Usage + +- [API documentation index](../API-related/API_README.md) +- [OpenAPI tag index](../API-related/API_Tags_Index.md) +- [Chat API](../API-related/Chat_API_Documentation.md) +- [Media ingest jobs API](../API-related/Media_Ingest_Jobs_API.md) +- [Prompt Studio API](../API-related/Prompt_Studio_API.md) +- [Evaluations API](../API-related/Evaluations_API_Unified_Reference.md) diff --git a/Docs/Wiki/index.md b/Docs/Wiki/index.md new file mode 100644 index 0000000000..dcc5a659a8 --- /dev/null +++ b/Docs/Wiki/index.md @@ -0,0 +1,33 @@ +# tldw Server Documentation + +Choose the wiki that matches what you are trying to do. + +## User Wiki + +Use the [User Wiki](User_Wiki.md) if you want to install, run, configure, or use tldw_server. + +Start there for: + +- first-time setup and self-hosting profiles; +- WebUI and browser extension workflows; +- local LLM, STT, TTS, RAG, notes, chat, and character-card usage; +- production hardening, operations, and shared-server administration; +- practical API usage for automation and integrations. + +## Developer Wiki + +Use the [Developer Wiki](Developer_Wiki.md) if you want to modify, contribute to, test, package, or understand the codebase. + +Start there for: + +- repository and frontend development setup; +- architecture, code maps, ADRs, and module guides; +- backend, API, database, RAG, chat, audio, and evaluation internals; +- docs-site maintenance and release process guidance. + +## Shared References + +- [Feature status](../Overview/Feature_Status.md) +- [Release notes](../RELEASE_NOTES.md) +- [API documentation index](../API-related/API_README.md) +- [Code documentation index](../Code_Documentation/index.md) diff --git a/Docs/mkdocs.yml b/Docs/mkdocs.yml index 8d951cbedd..c5851dec06 100644 --- a/Docs/mkdocs.yml +++ b/Docs/mkdocs.yml @@ -68,54 +68,99 @@ copyright: | © 2024-2025 tldw_Server - v0.1.34 - GitHub nav: - - Home: User_Guides/index.md - - Getting Started: Getting_Started/README.md - - Feature Status: Overview/Feature_Status.md - - User Guides: - - Documentation Map: User_Guides/index.md - - Feature Map: User_Guides/Feature_Map.md + - Home: Wiki/index.md + - User Wiki: + - Start Here: Wiki/User_Wiki.md + - Getting Started: + - Setup Overview: Getting_Started/README.md + - Docker Single-User + WebUI: Getting_Started/Profile_Docker_Single_User.md + - Docker Multi-User + Postgres: Getting_Started/Profile_Docker_Multi_User_Postgres.md + - Local Single-User: Getting_Started/Profile_Local_Single_User.md + - Troubleshooting: Getting_Started/TROUBLESHOOTING.md - WebUI and Extension: - WebUI User Guide: User_Guides/WebUI_Extension/User_Guide.md - Chat Pages: User_Guides/WebUI_Extension/Chat_Pages.md - Workflow Examples: User_Guides/WebUI_Extension/Workflows_Examples.md - - STT and TTS Quickstart: User_Guides/WebUI_Extension/Getting-Started-STT_and_TTS.md - Chatbooks: User_Guides/WebUI_Extension/Chatbook_User_Guide.md - Flashcards: User_Guides/WebUI_Extension/Flashcards_Study_Guide.md - - Server: + - Knowledge QA: User_Guides/WebUI_Extension/Knowledge_QA_Guide.md + - Chat and Characters: + - Character Roleplay Quickstart: User_Guides/WebUI_Extension/Character_Roleplay_Quickstart.md + - Advanced Character Roleplay: User_Guides/WebUI_Extension/Advanced_Character_Roleplay_Guide.md + - Character Cards: User_Guides/Server/Character_Cards_User_Guide.md + - Personas: User_Guides/Server/Personas_User_Guide.md + - Prompt Engineering Notes: User_Guides/WebUI_Extension/Prompt_Engineering_Notes.md + - Knowledge and Research: + - Feature Map: User_Guides/Feature_Map.md + - RAG API Consumer Guide: API-related/RAG-API-Guide.md + - RAG Production Configuration: User_Guides/Server/RAG_Production_Configuration_Guide.md + - Media to RAG Evals Workflow: User_Guides/Server/Media_to_RAG_Evals_Workflow.md + - Web Scraping and Ingestion: User_Guides/Server/Web_Scraping_Ingestion_Guide.md + - Evaluations User Guide: User_Guides/Server/Evaluations_User_Guide.md + - Local Models and Audio: + - Local LLM Setup: User_Guides/Integrations_Experiments/Setting_up_a_local_LLM.md + - STT and TTS Quickstart: User_Guides/WebUI_Extension/Getting-Started-STT_and_TTS.md + - TTS Getting Started: User_Guides/WebUI_Extension/TTS_Getting_Started.md + - TTS Setup Guide: User_Guides/WebUI_Extension/TTS-SETUP-GUIDE.md + - CPU Audio Setup: Getting_Started/First_Time_Audio_Setup_CPU.md + - GPU or Accelerated Audio Setup: Getting_Started/First_Time_Audio_Setup_GPU_Accelerated.md + - Admin and Operations: - Authentication Setup: User_Guides/Server/Authentication_Setup.md - Production Hardening: User_Guides/Server/Production_Hardening_Checklist.md - Organizations and Sharing: User_Guides/Server/Organizations_and_Sharing.md - BYOK User Guide: User_Guides/Server/BYOK_User_Guide.md - Usage Module: User_Guides/Server/Usage_Module.md - - Integrations and Experiments: - - Getting Started with ACP: User_Guides/Integrations_Experiments/Getting_Started_with_ACP.md - - Local LLM Setup: User_Guides/Integrations_Experiments/Setting_up_a_local_LLM.md - - API: - - API Documentation Index: API-related/API_README.md - - API Tag Index: API-related/API_Tags_Index.md - - Chat API: API-related/Chat_API_Documentation.md - - RAG API: API-related/RAG-API-Guide.md - - Embeddings API: API-related/Embeddings_API_Documentation.md - - Audio Transcription API: API-related/Audio_Transcription_API.md - - TTS API: API-related/TTS_API.md - - Evaluations API: API-related/Evaluations_API_Unified_Reference.md - - Media Ingest Jobs API: API-related/Media_Ingest_Jobs_API.md - - Collections Feeds API: API-related/Collections_Feeds_API.md - - Ingestion Sources API: API-related/Ingestion_Sources_API.md - - Prompt Studio API: API-related/Prompt_Studio_API.md - - Deployment: - - First-Time Production Setup: Deployment/First_Time_Production_Setup.md - - Long-Term Admin Guide: Deployment/Long_Term_Admin_Guide.md - - Sidecar Workers: Deployment/Sidecar_Workers.md - - Resource Requirements: Deployment/resource-requirements.md - - Monitoring: - - Metrics Cheatsheet: Monitoring/Metrics_Cheatsheet.md - - Alerts: Monitoring/Alerts/README.md - - Code: - - Code Documentation Index: Code_Documentation/index.md - - Embeddings Documentation: Code_Documentation/Embeddings-Documentation.md - - Tutorial System Developer Guide: Code_Documentation/Tutorial_System_Developer_Guide.md - - Documentation Site Guide: Code_Documentation/Docs_Site_Guide.md - - AuthNZ Code Guide: Code_Documentation/Guides/AuthNZ_Code_Guide.md - - Environment Variables: Env_Vars.md + - Long-Term Admin Guide: Deployment/Long_Term_Admin_Guide.md + - Metrics Cheatsheet: Monitoring/Metrics_Cheatsheet.md + - Practical API Usage: + - API Documentation Index: API-related/API_README.md + - API Tag Index: API-related/API_Tags_Index.md + - Chat API: API-related/Chat_API_Documentation.md + - Media Ingest Jobs API: API-related/Media_Ingest_Jobs_API.md + - Prompt Studio API: API-related/Prompt_Studio_API.md + - Evaluations API: API-related/Evaluations_API_Unified_Reference.md + - Developer Wiki: + - Start Here: Wiki/Developer_Wiki.md + - Architecture and Decisions: + - Architecture Overview: Architecture.md + - ADR Index: ADR/README.md + - Code Documentation Index: Code_Documentation/index.md + - Code Map: Code_Documentation/Code_Map.md + - Data Flow Atlas: Code_Documentation/Data_Flow_Atlas.md + - API and Contracts: + - API Documentation Index: API-related/API_README.md + - API Design: API-related/API_Design.md + - API Tag Index: API-related/API_Tags_Index.md + - Providers API: API-related/Providers_API_Documentation.md + - Character Chat Sessions API: API-related/Character_Chat_Sessions_API.md + - Tools API: API-related/Tools_API_Documentation.md + - Storage API: API-related/Storage_API_Documentation.md + - Backend Code Guides: + - Chat Developer Guide: Code_Documentation/Chat_Developer_Guide.md + - Character Chat Code Guide: Code_Documentation/Guides/Character_Chat_Code_Guide.md + - RAG Developer Guide: Code_Documentation/RAG-Developer-Guide.md + - Embeddings Developer Guide: Code_Documentation/Embeddings-Developer-Guide.md + - Evaluations Developer Guide: Code_Documentation/Evaluations_Developer_Guide.md + - Services Module: Code_Documentation/Services_Module.md + - Data and Jobs: + - Database Overview: Code_Documentation/Database.md + - Database Backends: Code_Documentation/Database-Backends.md + - Media DB v2: Code_Documentation/Databases/Media_DB_v2.md + - ChaChaNotes DB: Code_Documentation/Databases/ChaChaNotes_DB.md + - Jobs Module: Code_Documentation/Jobs_Module.md + - Jobs Manager: Code_Documentation/Jobs_Manager.md + - Ingestion and Audio Internals: + - Ingestion Media Processing: Code_Documentation/Ingestion_Media_Processing.md + - Audio Pipeline: Code_Documentation/Ingestion_Pipeline_Audio.md + - Document Pipeline: Code_Documentation/Ingestion_Pipeline_Documents.md + - PDF Pipeline: Code_Documentation/Ingestion_Pipeline_PDF.md + - Video Pipeline: Code_Documentation/Ingestion_Pipeline_Video.md + - Voice Assistant Module: Code_Documentation/VoiceAssistant_Module.md + - Contributor Maintenance: + - Docs Site Guide: Code_Documentation/Docs_Site_Guide.md + - Setup UI Developer Guide: Code_Documentation/Setup_UI_Developer_Guide.md + - Tutorial System Developer Guide: Code_Documentation/Tutorial_System_Developer_Guide.md + - STT Testing Documentation: Code_Documentation/STT_TESTING_DOCUMENTATION.md + - Environment Variables: Env_Vars.md + - Feature Status: Overview/Feature_Status.md - Release Notes: RELEASE_NOTES.md diff --git a/Docs/superpowers/plans/2026-07-03-docs-audience-wikis-implementation-plan.md b/Docs/superpowers/plans/2026-07-03-docs-audience-wikis-implementation-plan.md new file mode 100644 index 0000000000..159b684537 --- /dev/null +++ b/Docs/superpowers/plans/2026-07-03-docs-audience-wikis-implementation-plan.md @@ -0,0 +1,92 @@ +# Docs Audience Wikis Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add audience-focused User Wiki and Developer Wiki entry points to the existing public MkDocs site. + +**Architecture:** Keep one MkDocs site and one published docs pipeline. Add `Docs/Wiki` as source landing pages, sync that folder to `Docs/Published/Wiki`, and reorganize navigation around audience tabs without moving existing guide files. + +**Tech Stack:** Markdown, MkDocs Material, Bash refresh script, pytest docs contract tests. + +--- + +### Task 1: Write Docs Contract Test + +**Files:** +- Create: `tldw_Server_API/tests/Docs/test_docs_audience_wikis.py` + +- [ ] **Step 1: Add a failing test** + +Create a pytest module that asserts the `Docs/Wiki` source pages, `Docs/Published/Wiki` generated pages, MkDocs top-level nav entries, and README links exist. + +- [ ] **Step 2: Run the focused test and verify RED** + +Run: `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 .venv/bin/python -m pytest -q -p pytest_asyncio.plugin tldw_Server_API/tests/Docs/test_docs_audience_wikis.py` + +Expected: fail because the new wiki pages do not exist yet. + +### Task 2: Add Wiki Source Pages and Publish Sync + +**Files:** +- Create: `Docs/Wiki/index.md` +- Create: `Docs/Wiki/User_Wiki.md` +- Create: `Docs/Wiki/Developer_Wiki.md` +- Modify: `Helper_Scripts/refresh_docs_published.sh` + +- [ ] **Step 1: Add concise source landing pages** + +Create the chooser, user wiki, and developer wiki pages with links to existing stable docs. + +- [ ] **Step 2: Sync Wiki into Published** + +Update `Helper_Scripts/refresh_docs_published.sh` to copy `Docs/Wiki` to `Docs/Published/Wiki` and include the wiki links in the generated `Docs/Published/index.md`. + +- [ ] **Step 3: Refresh generated docs** + +Run: `bash Helper_Scripts/refresh_docs_published.sh` + +Expected: `Docs/Published/Wiki/index.md`, `Docs/Published/Wiki/User_Wiki.md`, and `Docs/Published/Wiki/Developer_Wiki.md` exist. + +### Task 3: Reorganize Navigation and Guidance + +**Files:** +- Modify: `Docs/mkdocs.yml` +- Modify: `README.md` +- Modify: `Docs/Code_Documentation/Docs_Site_Guide.md` + +- [ ] **Step 1: Make the nav audience-first** + +Replace the current broad top-level docs tabs with `Home`, `User Wiki`, `Developer Wiki`, and shared release/status links. + +- [ ] **Step 2: Update authoring guidance** + +Document that source pages remain in existing folders, generated pages under `Docs/Published` are not edited manually, and audience chooser pages live under `Docs/Wiki`. + +- [ ] **Step 3: Update README entry points** + +Point users to `Docs/Wiki/User_Wiki.md` and contributors to `Docs/Wiki/Developer_Wiki.md`. + +### Task 4: Verify and Commit + +**Files:** +- Modify: `backlog/tasks/task-12119 - Split-published-docs-navigation-into-user-and-developer-wiki-entry-points.md` + +- [ ] **Step 1: Run focused tests** + +Run: `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 .venv/bin/python -m pytest -q -p pytest_asyncio.plugin tldw_Server_API/tests/Docs/test_docs_audience_wikis.py` + +Expected: pass. + +- [ ] **Step 2: Run docs checks** + +Run: `python Helper_Scripts/docs/check_public_private_boundary.py` + +Expected: pass. + +Run: `mkdocs build -f Docs/mkdocs.yml` + +Expected: pass with existing baseline warnings only. + +- [ ] **Step 3: Update Backlog task and commit** + +Record verification results in `TASK-12119`, stage the docs/test/task changes, and commit. diff --git a/Docs/superpowers/specs/2026-07-03-docs-audience-wikis-design.md b/Docs/superpowers/specs/2026-07-03-docs-audience-wikis-design.md new file mode 100644 index 0000000000..5ce18ee154 --- /dev/null +++ b/Docs/superpowers/specs/2026-07-03-docs-audience-wikis-design.md @@ -0,0 +1,41 @@ +# Docs Audience Wikis Design + +## Goal + +Split the existing public MkDocs site into two clear audience entry points: a user-facing wiki and a developer/contributor-facing wiki. + +## Scope + +This keeps one MkDocs site and one GitHub Pages deployment. It does not move existing guide files or introduce a second documentation build. Existing source paths such as `Docs/User_Guides/index.md`, `Docs/API-related/API_README.md`, and `Docs/Code_Documentation/index.md` remain valid. + +## Design + +Add shared source landing pages under `Docs/Wiki/`: + +- `Docs/Wiki/index.md`: audience chooser for the docs site. +- `Docs/Wiki/User_Wiki.md`: user-focused route map for setup, WebUI, extension, local providers, character chat, knowledge workflows, audio, admin, and operations. +- `Docs/Wiki/Developer_Wiki.md`: contributor-focused route map for development setup, architecture, code guides, API references, testing, docs process, ADRs, and release work. + +Publish `Docs/Wiki` through `Helper_Scripts/refresh_docs_published.sh` so generated pages land in `Docs/Published/Wiki/`. Do not manually edit generated published pages. + +Rework `Docs/mkdocs.yml` so the first tabs are: + +- `Home` +- `User Wiki` +- `Developer Wiki` +- `Release Notes` + +User-facing guides and practical API usage are grouped under `User Wiki`. Contributor-oriented API/code/architecture references are grouped under `Developer Wiki`. Shared references may be linked from both audiences. + +Update `README.md` and `Docs/Code_Documentation/Docs_Site_Guide.md` to document the audience split and source-of-truth rules. + +## Verification + +Add a focused docs contract test that checks: + +- source wiki pages exist; +- refreshed published wiki pages exist; +- `Docs/mkdocs.yml` exposes top-level User Wiki and Developer Wiki entries; +- `README.md` links to both audience entry points. + +Run the docs refresh script, the focused docs test, docs hygiene checks, and `mkdocs build -f Docs/mkdocs.yml`. diff --git a/Helper_Scripts/refresh_docs_published.sh b/Helper_Scripts/refresh_docs_published.sh index 184d0429a6..6679f89a29 100644 --- a/Helper_Scripts/refresh_docs_published.sh +++ b/Helper_Scripts/refresh_docs_published.sh @@ -17,9 +17,14 @@ cat > "$DEST_DIR/index.md" <<'EOF' Welcome to the curated public documentation for tldw_server. +- [User Wiki](Wiki/User_Wiki.md) +- [Developer Wiki](Wiki/Developer_Wiki.md) - [Getting Started](Getting_Started/README.md) - [User Guides](User_Guides/index.md) - [API Documentation](API-related/API_README.md) +- [Code Documentation](Code_Documentation/index.md) +- [Architecture](Architecture.md) +- [Architecture Decisions](ADR/README.md) - [Deployment](Deployment/First_Time_Production_Setup.md) - [Monitoring](Monitoring/Metrics_Cheatsheet.md) - [Release Notes](RELEASE_NOTES.md) @@ -54,6 +59,15 @@ preserve_and_copy() { # API-related preserve_and_copy "$SRC_DIR/API-related" "$DEST_DIR/API-related" +# Audience wiki landing pages +preserve_and_copy "$SRC_DIR/Wiki" "$DEST_DIR/Wiki" + +# Architecture and decision records for contributor docs +if [ -f "$SRC_DIR/Architecture.md" ]; then + cp "$SRC_DIR/Architecture.md" "$DEST_DIR/Architecture.md" +fi +preserve_and_copy "$SRC_DIR/ADR" "$DEST_DIR/ADR" + # Code_Documentation preserve_and_copy "$SRC_DIR/Code_Documentation" "$DEST_DIR/Code_Documentation" diff --git a/README.md b/README.md index 0071810ef1..427ea44957 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ Good fit for: `make quickstart` remains the shortest Docker single-user + WebUI alias. It runs setup, start, and verification for the first profile. -For a user-facing map of key workflows across the server API, WebUI, and browser extension, start with the [User Guides documentation map](Docs/User_Guides/index.md). +For user-facing workflows across setup, the WebUI, browser extension, local models, APIs, and administration, start with the [User Wiki](Docs/Wiki/User_Wiki.md). Contributors should start with the [Developer Wiki](Docs/Wiki/Developer_Wiki.md). After cloning, you can run the optional Makefile helper checks with `make quickstart-prereqs`, or verify Python 3.10+, ffmpeg, and Docker manually for your chosen profile. On a fresh checkout, the setup targets are still the source of truth because they create the lightweight setup environment they need. @@ -1457,6 +1457,8 @@ Run locally Documentation and resources **Getting Started Guides:** +- [User Wiki](Docs/Wiki/User_Wiki.md) - install, run, configure, and use tldw_server +- [Developer Wiki](Docs/Wiki/Developer_Wiki.md) - contribute to, test, package, and understand the codebase - [User Guides Documentation Map](Docs/User_Guides/index.md) - task-oriented map for setup, WebUI, extension, API, and admin workflows - [Getting Started Index](Docs/Getting_Started/README.md) - choose the right setup path - [Docker Single-User + WebUI Profile](Docs/Getting_Started/Profile_Docker_Single_User.md) - self-host with Docker and the WebUI diff --git a/backlog/tasks/task-12119 - Split-published-docs-navigation-into-user-and-developer-wiki-entry-points.md b/backlog/tasks/task-12119 - Split-published-docs-navigation-into-user-and-developer-wiki-entry-points.md new file mode 100644 index 0000000000..0c906a54cd --- /dev/null +++ b/backlog/tasks/task-12119 - Split-published-docs-navigation-into-user-and-developer-wiki-entry-points.md @@ -0,0 +1,64 @@ +--- +id: TASK-12119 +title: Split published docs navigation into user and developer wiki entry points +status: Done +labels: +- docs +- mkdocs +priority: High +--- + +## Description + + +Create audience-focused user and developer wiki entry points in the existing MkDocs site without moving existing source docs or breaking published links. + + +## Acceptance Criteria + +- [x] #1 Published docs expose User Wiki and Developer Wiki top-level entry points. +- [x] #2 Docs/Wiki source pages are synced into Docs/Published by the refresh script. +- [x] #3 MkDocs navigation is organized around the audience split while preserving existing guide/reference links. +- [x] #4 README and docs-site guide explain which wiki to use and where new docs belong. +- [x] #5 Focused docs tests and MkDocs build pass. + + +## Implementation Plan + + +Use the existing single MkDocs site. Add Docs/Wiki landing pages, publish them through Helper_Scripts/refresh_docs_published.sh, reorganize Docs/mkdocs.yml nav around User Wiki and Developer Wiki, update README and Docs_Site_Guide, and add focused docs contract tests. + + +## Implementation Notes + + +Implemented one MkDocs/GitHub Pages docs site with audience-first entry points instead of a second documentation build. Added `Docs/Wiki` source landing pages, refreshed `Docs/Published/Wiki`, published contributor-oriented `Architecture.md` and `ADR` content, reorganized `Docs/mkdocs.yml` around `User Wiki` and `Developer Wiki`, updated README and docs-site guidance, and added a focused docs contract test. + +Verification: +- `bash Helper_Scripts/refresh_docs_published.sh` +- `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 .venv/bin/python -m pytest -q -p pytest_asyncio.plugin tldw_Server_API/tests/Docs/test_docs_audience_wikis.py` -> 3 passed +- `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 .venv/bin/python -m pytest -q -p pytest_asyncio.plugin tldw_Server_API/tests/Docs` -> 120 passed +- `.venv/bin/python Helper_Scripts/docs/check_public_private_boundary.py` +- `.venv/bin/python Helper_Scripts/docs/check_readme_docs_path_hygiene.py` +- `.venv/bin/python Helper_Scripts/docs/check_top_guides_docs_path_hygiene.py` +- `.venv/bin/python Helper_Scripts/docs/check_onboarding_command_boundaries.py` +- `.venv/bin/python Helper_Scripts/docs/check_onboarding_endpoint_drift.py` +- `.venv/bin/python -m mkdocs build -f Docs/mkdocs.yml` -> exit 0 with existing baseline warnings +- `.venv/bin/python -m bandit -r tldw_Server_API/tests/Docs/test_docs_audience_wikis.py tldw_Server_API/tests/Docs/conftest.py -f json -o /tmp/bandit_task_12119_docs.json` -> 0 findings + + +## Final Summary + + +Added audience-focused User Wiki and Developer Wiki entry points to the existing MkDocs docs site without moving existing source docs. Updated the publish script, generated published docs, MkDocs navigation, README routing, docs-site guidance, CI curated-doc verification, and docs tests for the new structure. + + +## Definition of Done + +- [x] #1 Acceptance criteria completed +- [x] #2 Tests or verification recorded +- [x] #3 Documentation updated when relevant +- [x] #4 Bandit run for touched code when applicable or document non-code/environment skip +- [x] #5 Final summary added +- [x] #6 Known skips or blockers documented + diff --git a/tldw_Server_API/tests/Docs/conftest.py b/tldw_Server_API/tests/Docs/conftest.py index ab256e8d77..c5df59f25d 100644 --- a/tldw_Server_API/tests/Docs/conftest.py +++ b/tldw_Server_API/tests/Docs/conftest.py @@ -13,6 +13,8 @@ "Docs/Published/User_Guides/WebUI_Extension/Getting-Started-STT_and_TTS.md", "Docs/Published/User_Guides/WebUI_Extension/TTS_Getting_Started.md", "Docs/Published/User_Guides/WebUI_Extension/TTS-SETUP-GUIDE.md", + "Docs/Published/Wiki/User_Wiki.md", + "Docs/Published/Wiki/Developer_Wiki.md", ) @@ -39,6 +41,11 @@ def _refresh_docs_published(repo_root: Path) -> None: _preserve_and_copy(src_dir / "API-related", dest_dir / "API-related") _preserve_and_copy(src_dir / "Code_Documentation", dest_dir / "Code_Documentation") + _preserve_and_copy(src_dir / "Wiki", dest_dir / "Wiki") + _preserve_and_copy(src_dir / "ADR", dest_dir / "ADR") + architecture = src_dir / "Architecture.md" + if architecture.exists(): + shutil.copy2(architecture, dest_dir / "Architecture.md") _preserve_and_copy(src_dir / "Deployment", dest_dir / "Deployment", skip_monitoring=True) _preserve_and_copy(src_dir / "Deployment" / "Monitoring", dest_dir / "Monitoring") _preserve_and_copy(src_dir / "Evaluations", dest_dir / "Evaluations") diff --git a/tldw_Server_API/tests/Docs/test_docs_audience_wikis.py b/tldw_Server_API/tests/Docs/test_docs_audience_wikis.py new file mode 100644 index 0000000000..0628a5fb6c --- /dev/null +++ b/tldw_Server_API/tests/Docs/test_docs_audience_wikis.py @@ -0,0 +1,63 @@ +"""Contract tests for audience-focused documentation wiki entry points.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[3] + + +def _read(relative_path: str) -> str: + return (REPO_ROOT / relative_path).read_text(encoding="utf-8") + + +def _require(condition: bool, message: str) -> None: + """Fail with a descriptive assertion message when a docs contract is broken.""" + if not condition: + pytest.fail(message) + + +def test_docs_audience_wiki_source_and_published_pages_exist() -> None: + """The docs site should expose source and generated audience wiki pages.""" + for relative_path in ( + "Docs/Wiki/index.md", + "Docs/Wiki/User_Wiki.md", + "Docs/Wiki/Developer_Wiki.md", + "Docs/Published/Wiki/index.md", + "Docs/Published/Wiki/User_Wiki.md", + "Docs/Published/Wiki/Developer_Wiki.md", + ): + _require((REPO_ROOT / relative_path).is_file(), f"Missing {relative_path}") + + +def test_mkdocs_nav_exposes_audience_wikis() -> None: + """MkDocs navigation should make the audience split visible at top level.""" + mkdocs_text = _read("Docs/mkdocs.yml") + + _require("Home: Wiki/index.md" in mkdocs_text, "MkDocs nav should use Wiki home") + _require("User Wiki:" in mkdocs_text, "MkDocs nav should expose User Wiki") + _require("Developer Wiki:" in mkdocs_text, "MkDocs nav should expose Developer Wiki") + _require( + "Start Here: Wiki/User_Wiki.md" in mkdocs_text, + "User Wiki nav should start at the user wiki page", + ) + _require( + "Start Here: Wiki/Developer_Wiki.md" in mkdocs_text, + "Developer Wiki nav should start at the developer wiki page", + ) + + +def test_readme_points_users_and_contributors_to_wikis() -> None: + """README should route readers to the right documentation audience entry.""" + readme_text = _read("README.md") + + _require("Docs/Wiki/User_Wiki.md" in readme_text, "README should link User Wiki") + _require( + "Docs/Wiki/Developer_Wiki.md" in readme_text, + "README should link Developer Wiki", + ) + _require("User Wiki" in readme_text, "README should label the User Wiki") + _require("Developer Wiki" in readme_text, "README should label the Developer Wiki")