From aaf289a16593f3963e3ef13ab39a27c343dc1daf Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Thu, 12 Mar 2026 01:54:09 -0400 Subject: [PATCH 1/7] fix(cron): separate delivery failures from execution state --- .workflow/inputs/original-request.md | 22 +- .workflow/plan.md | 244 ++++++++++++++++-- .workflow/prd.md | 87 ++++++- ...p-recipient-besteffortdeliver-true.test.ts | 17 +- .../delivery-dispatch.double-announce.test.ts | 19 ++ src/cron/isolated-agent/delivery-dispatch.ts | 24 +- src/cron/isolated-agent/run.ts | 5 +- src/cron/service.failure-alert.test.ts | 110 ++++++++ .../service.persists-delivered-status.test.ts | 99 ++++++- src/cron/service.restart-catchup.test.ts | 46 ++++ src/cron/service/ops.ts | 21 +- src/cron/service/timer.ts | 88 +++++-- src/cron/types.ts | 24 +- 13 files changed, 695 insertions(+), 111 deletions(-) diff --git a/.workflow/inputs/original-request.md b/.workflow/inputs/original-request.md index b6618bbe062e..93044b088686 100644 --- a/.workflow/inputs/original-request.md +++ b/.workflow/inputs/original-request.md @@ -1,19 +1,23 @@ # Original Request -**Date:** 2026-03-08 +**Date:** 2026-03-11 **Source:** Codex session **From:** Bryan ## The Ask -> help me update openclaw with the newest features - -> -> and fix this inability to read large posts (that have to go to .txt files) -> -> Bryan Fisher: PastedText.txt Archie Bot: We have a group chat message from Bryan Fisher in group chat #13:fixing: infrastructure-loop > Infrastructure Expansion (Nanochat). There's a pasted file PastedText.txt. The user didn't write any explicit request, but presumably the pasted text contains a request. We need to open the file to see its contents. Archie Bot: Hey Bryan! I see you attached a text file, but I can’t open it directly from here. Could you let me know what you’d like to do with its contents? Feel free to paste the relevant part or describe the task, and I’ll help you out. +> Zulip no longer needs a bigger fork to get meaningfully closer to Discord. The core primitives are already live in `clawdbot/extensions/zulip`. The next pass should be a targeted plugin hardening pass: durable interactive state, productized exec approvals and model picker UX, topic lifecycle/rebinding, and startup audit/resolution polish. + +## Interpreted Scope + +- Replace the older Phase 3 framing with a reality-based hardening pass. +- Keep the work primarily inside `extensions/zulip`. +- Prioritize reliability and correctness over new capability work. +- Defer new `lionroot-zulip` fork work unless real usage proves button-first UX is no longer enough. ## Initial Context -- The core OpenClaw media-understanding pipeline can already extract text-like attachments once they enter the media pipeline. -- Lionroot-specific intake paths appear to drop or underutilize large `.txt` attachments before they reach that extractor. -- The local clawdbot fork is substantially divergent from upstream and has a dirty worktree, so upstream sync must be handled as a separate, careful track. +- `extensions/zulip` already ships draft streaming, button send/callbacks, widget-aware reply delivery, topic-bound sessions, exec approvals, model picker helpers, typing, reactions, uploads, and target resolution helpers. +- The largest remaining gap is not transport capability; it is plugin-owned state durability and callback correctness. +- The current `components-registry.ts` and `exec-approvals.ts` both keep critical interactive state in memory only. +- `commands-approve.ts` already provides a shared text `/approve` path, so Zulip-local command work should validate reuse before adding duplication. diff --git a/.workflow/plan.md b/.workflow/plan.md index 43c8a2688596..a40fa59225aa 100644 --- a/.workflow/plan.md +++ b/.workflow/plan.md @@ -1,42 +1,240 @@ -# Technical Plan — Repo-wide Lint Cleanup +# Technical Plan: Zulip Plugin Hardening Pass **Status:** Approved -**Date:** 2026-03-10 -**Flow/Context Builder Output:** Lint backlog profiling shows 410 remaining diagnostics after the initial pass, led by `no-explicit-any`, `curly`, `no-unsafe-optional-chaining`, `no-unused-vars`, and `no-unnecessary-type-assertion`. The first safe tranche targets mechanical fixes only. +**Date:** 2026-03-11 +**Flow/Context Builder Output:** Repo-grounded planning pass across the current Zulip plugin, shared approval/model command paths, and the Lionroot Zulip strategy docs. The code already ships the core primitives; the next pass should harden state ownership and productize the existing UX rather than add major new transport capability. Bryan approved proceeding with this plan in the Codex session on 2026-03-11 by replying "both" after the draft workflow artifacts were presented. ## Architecture -- Lint cleanup should be done in narrow, rule-driven batches. -- Mechanical rule families (`curly`, `no-unused-vars`) should be preferred first because they are low-risk and often auto-fixable. -- Higher-cost categories such as `no-explicit-any` should be deferred until after easy wins reduce noise. +The work stays primarily inside `extensions/zulip/src/zulip/` and uses the current seams instead of introducing a new framework: -## Files to Modify +- `components-registry.ts` becomes the durable owner of button callback state. +- `send-components.ts` registers persisted widget state and carries callback expiry metadata. +- `monitor.ts` switches from resolve/remove to claim/consume semantics, hosts thin local command handling, and runs startup audits. +- `exec-approvals.ts` keeps owning approval delivery and resolution, but gains persisted pending state. +- `model-picker.ts` keeps owning provider/model rendering and callback decoding, but gains invoker ACL scoping. +- `topic-bindings.ts` stays the lifecycle foundation; only targeted rebind work is added if live event validation proves it safe. -### First safe tranche +## Files to Modify -- `extensions/feishu/src/bot.ts` -- `extensions/feishu/src/outbound.ts` -- `extensions/zalo/src/accounts.ts` -- `extensions/zalo/src/channel.ts` +- `extensions/zulip/src/zulip/components-registry.ts` — replace in-memory-only registry with durable per-account registry and message-scope consumption helpers. +- `extensions/zulip/src/zulip/send-components.ts` — register durable widget entries and pass optional callback expiry metadata. +- `extensions/zulip/src/zulip/components.ts` — normalize `allowedUsers` and `allowed_users` for Zulip component payloads. +- `extensions/zulip/src/zulip/exec-approvals.ts` — persist pending approval prompts, rehydrate timers, retire widget entries by message. +- `extensions/zulip/src/zulip/model-picker.ts` — preserve button ACL metadata and add invoker-scoped builder params. +- `extensions/zulip/src/zulip/commands.ts` — new thin Zulip-local command parser/handler for transport-owned UX. +- `extensions/zulip/src/zulip/monitor.ts` — adopt registry claim flow, insert local command handling, run stale-click DM behavior, run startup audit, and host topic rebind integration if validated. +- `extensions/zulip/src/zulip/topic-bindings.ts` — add targeted rebind helper only if live topic-edit events support it. +- `extensions/zulip/src/zulip/resolve-users.ts` — add outbound-only fuzzy/suggestion helper while preserving strict auth resolution. +- `extensions/zulip/src/zulip/send.ts` — use clearer outbound resolution errors and optional fuzzy helper. +- `extensions/zulip/src/zulip/client.ts` — only if needed to surface topic-edit event typing or registration. +- `../lionroot-openclaw/docs/zulip-upgrade-plan.md` — refresh the roadmap to match shipped reality and the new execution order. -### Deferred until separately cleaned +## New Files -- Synology Chat, Mattermost, and Feishu docx helper files touched by exploratory autofix but not yet brought clean. +- `extensions/zulip/src/zulip/commands.ts` — minimal transport-owned Zulip command layer for `/models` and, only if validation proves necessary, `/approve` fallback handling. ## Tasks -1. [x] Profile lint backlog by rule and file. -2. [x] Fix a first mechanical tranche (`curly`, `no-unused-vars`, safe typing cleanup) in the four selected files. -3. [ ] Commit the first clean tranche. -4. [ ] Re-profile the backlog and select the next tranche. +1. [ ] Refresh the Zulip upgrade doc so planning truth matches the shipped code. +2. [ ] Implement a durable per-account component registry with claim semantics and message-scope consumption for non-reusable widgets. +3. [ ] Update send + monitor flows to use the new registry API, including stale-click handling. +4. [ ] Persist pending Zulip exec approvals and rehydrate them on startup. +5. [ ] Thread `allowedUsers` through model-picker rendering and scope picker controls to the invoker. +6. [ ] Add a thin Zulip-local command layer for `/models`; validate and reuse shared `/approve` unless testing proves a local fallback is necessary. +7. [ ] Validate live topic-edit event fidelity; if safe, add targeted topic rebinding, otherwise stop short and document the manual fallback path. +8. [ ] Add outbound resolution polish and a warn-only startup audit for configured streams, approval targets, analysis streams, and approver identities. +9. [ ] Add focused tests for registry durability, approval recovery, model-picker ACL behavior, local command entry, and any topic rebinding logic that lands. +10. [ ] Run targeted Zulip tests and a review pass before asking for approval to implement. + +## Detailed File Plan + +### 1) `extensions/zulip/src/zulip/components-registry.ts` + +Replace the current global in-memory helpers with a registry manager that owns: + +- in-memory `entriesById` +- per-account store path +- serialized persistence queue +- expiry pruning +- message-scope consumption helpers + +Add types roughly shaped like: + +- `StoredZulipComponentEntry` +- `ZulipComponentClaimResult` +- `StoredZulipComponentRegistryFile` + +Add or replace APIs with: + +- `registerEntries(...)` +- `claimEntry(...)` +- `consumeMessageEntries(messageId)` +- `removeMessageEntries(messageId)` +- `pruneExpired(now?)` +- per-account loader/manager getter + +Behavioral requirement: + +- For non-reusable widgets, a successful click retires the entire widget message, not just the clicked button. +- Unauthorized clicks must not consume the widget. +- Expired/missing/consumed claims must be distinguishable so `monitor.ts` can DM the clicker with a stale-action notice. + +### 2) `extensions/zulip/src/zulip/send-components.ts` + +Update the high-level send path to: + +- register entries through the durable registry manager +- pass message ID into registry registration +- optionally accept `callbackExpiresAtMs?: number` +- align approval widget TTL with approval expiry when supplied + +The existing text degradation path stays intact. + +### 3) `extensions/zulip/src/zulip/components.ts` + +Extend spec parsing so Zulip payload round trips preserve ACL metadata. + +Required changes: + +- normalize both `allowedUsers` and `allowed_users` +- preserve existing `callbackData` / `callback_data` handling +- keep button schema limited and explicit + +### 4) `extensions/zulip/src/zulip/exec-approvals.ts` + +Keep `ZulipExecApprovalHandler` as the owner, but add a persisted pending approval store. + +Add stored record state for: + +- approval ID +- prompt message IDs + targets +- expiry timestamp +- cleanup mode + +Required behavior: + +- on `start()`, load persisted approvals, reschedule timers, and immediately expire stale ones +- on request, persist prompt metadata after sends succeed +- on resolve/timeout, update prompt messages, retire widget entries by message ID, and remove persisted state +- use `callbackExpiresAtMs` so approval widgets expire with the approval request + +### 5) `extensions/zulip/src/zulip/model-picker.ts` + +Productize the already-shipped picker instead of redesigning it. + +Required changes: + +- add `allowedUserIds?: number[]` to public builder params +- preserve `allowedUsers` when converting picker render state into Zulip reply payloads +- ensure render-on-click flows keep the same ACL on the next picker page +- keep picker pages disposable rather than reusable + +Do not add a separate picker store in phase 1. + +### 6) `extensions/zulip/src/zulip/commands.ts` (new) + +Keep this intentionally small. + +Responsibilities: + +- parse a minimal Zulip-local command set +- own transport rendering/affordance for `/models` +- validate whether shared `/approve` is already sufficient +- only add local `/approve` handling if end-to-end validation proves the shared path inadequate in Zulip + +Suggested initial union: + +- `{ kind: "models" }` +- `{ kind: "approve"; approvalId: string; decision: ExecApprovalDecision }` + +### 7) `extensions/zulip/src/zulip/monitor.ts` + +This is the orchestration point and gets the biggest integration change. + +Required updates: + +- load the durable component registry for the active account on startup +- replace resolve+remove callback flow with `claimEntry(...)` +- on `missing|expired|consumed`, DM the clicker with a stale-action notice +- on `unauthorized`, log and stop without consuming +- consume widget state at message scope after successful non-reusable clicks +- insert thin local command handling after xcase handling and before generic agent execution +- run warn-only startup audit for streams/approval targets/approvers +- if topic-edit event validation succeeds, host the rebind flow here + +### 8) `extensions/zulip/src/zulip/topic-bindings.ts` + +Do not rebuild this system. + +Only add targeted lifecycle support if validation proves automatic rename handling is safe: + +- `rebindConversation(...)` + +If the live Zulip event stream is insufficient, document the stop condition and leave automatic rebinding out of scope for this pass. + +### 9) `extensions/zulip/src/zulip/resolve-users.ts` and `send.ts` + +Polish outbound ergonomics without weakening authorization semantics. + +- exact match first +- then single-candidate fuzzy suggestion/match for outbound sends only +- fail clearly on ambiguous results +- keep allowlist, approver, and auth-sensitive identity checks strict + +### 10) `../lionroot-openclaw/docs/zulip-upgrade-plan.md` + +Update the roadmap after the code-grounded pass so it: + +- marks exec approvals and model picker as already shipped but rough +- promotes interaction durability to the top priority +- frames the next Zulip pass as plugin hardening/productization +- defers fork growth until real usage proves it necessary ## Testing Strategy -- Re-run `oxlint --type-aware` on the selected tranche files. -- Re-run formatting checks on those files. -- Preserve previously passing targeted Zulip/typecheck tests. +Add focused, colocated tests near the touched Zulip files. + +Minimum test coverage: + +- component registry persistence/load/expiry/claim behavior +- message-scope consumption for multi-button widgets +- stale-click and unauthorized-click handling +- exec approval restart recovery and prompt update behavior +- model picker ACL preservation and invoker scoping +- `/models` local command handling with widgets enabled/disabled +- shared `/approve` Zulip path validation or local fallback tests if implemented +- topic rebind tests only if automatic rebinding actually lands + +Recommended execution checks: + +- targeted Zulip Vitest suites for touched files +- at least one broader typecheck/build pass before review ## Rollback Plan -- Revert the tranche commit if any behavior regresses. -- Keep future lint cleanup isolated in separate commits by tranche. +- Reverting the registry changes falls back to today’s in-memory widget behavior. +- Reverting approval persistence falls back to today’s non-durable approval prompts. +- Reverting picker ACL work removes scoping but preserves the existing picker behavior. +- Reverting the local command layer still leaves shared text commands available. +- Topic rebind logic, if added, should be isolated so it can be reverted without affecting the existing binding manager. +- Docs update can be reverted independently from code. + +## Risks and Validation Gates + +- **Shared `/approve` status is known but must still be validated end to end in Zulip.** The shared handler exists in `src/auto-reply/reply/commands-approve.ts`; do not duplicate it unless a real Zulip gap is proven. +- **Automatic topic rebinding must be gated on live event fidelity.** Do not implement heuristic rename matching. +- **Persistence assumes one active monitor process per Zulip account.** If deployment moves multi-process later, these stores may need a shared backend. +- **Startup audit must remain warn-only.** It should clarify operator issues without blocking the monitor from connecting. + +## Implementation Order + +1. Docs refresh in workflow + roadmap +2. Durable component registry +3. Monitor/send integration for claim + stale-click handling +4. Exec approval durability +5. Model picker ACL/scoping +6. Thin Zulip-local `/models` command handling +7. Topic event validation and conditional rebind work +8. Resolution polish + startup audit +9. Tests + review diff --git a/.workflow/prd.md b/.workflow/prd.md index d37b1f021ba7..ea60091e07a5 100644 --- a/.workflow/prd.md +++ b/.workflow/prd.md @@ -1,28 +1,87 @@ -# PRD — Repo-wide Lint Cleanup +# PRD: Zulip Plugin Hardening Pass **Status:** Approved -**Date:** 2026-03-10 -**Provenance:** Bryan explicitly asked to continue into the repo-wide lint failures after the typecheck cleanup was committed and pushed on 2026-03-10. +**Date:** 2026-03-11 +**Provenance:** See `inputs/original-request.md` and `inputs/references.md`. Bryan approved proceeding with this PRD in the Codex session on 2026-03-11 by replying "both" after the draft workflow artifacts were presented. ## Summary -Reduce the current repo-wide lint backlog in `clawdbot` with a pragmatic, category-driven cleanup plan aimed at making `pnpm check` materially healthier and eventually green. +Harden the existing Zulip plugin UX in `extensions/zulip` so the features that are already shipped behave reliably in real use. This pass is not about adding major new Zulip primitives or broadening the fork. It is a plugin-first reliability and productization pass focused on durable callback state, restart-safe exec approvals, invoker-scoped model picker flows, thin Zulip-native command affordances, topic lifecycle validation/rebinding, and warn-only startup audits. ## User Stories -- As a maintainer, I want lint failures grouped and attacked by highest-yield categories instead of random file hopping. -- As an engineer, I want fixes to be mostly mechanical and low-risk. -- As an operator, I do not want the already-verified Zulip or typecheck fixes to regress during lint cleanup. +- As a Zulip user, I want approval and picker buttons to keep working after a monitor restart so the transport feels dependable. +- As an approver, I want exec approval prompts to survive restart and retire correctly after resolution or expiry. +- As a user, I want model picker controls scoped to me so other people in the stream cannot hijack my selection flow. +- As an operator, I want `/models` to feel like a first-class Zulip control without duplicating the shared command system unnecessarily. +- As an operator, I want topic-bound sessions to stay stable across normal lifecycle events and fail safely when rename/rebind cannot be trusted. +- As a maintainer, I want the Zulip upgrade docs to match shipped reality so future planning is grounded in what already exists. ## Acceptance Criteria -- [x] Workflow docs exist for this scope. -- [ ] Lint backlog is profiled by rule and high-impact files. -- [ ] A first safe tranche of lint fixes lands with targeted verification. -- [ ] Previously-fixed Zulip and typecheck areas still pass focused verification. +### Docs and planning truth + +- [ ] `lionroot-openclaw/docs/zulip-upgrade-plan.md` is updated to reflect that exec approvals and model picker are already shipped but need hardening/productization. +- [ ] The document ranks durable interaction state as the top plugin priority. +- [ ] The document no longer frames the next pass as mainly new capability work. + +### Durable component registry + +- [ ] Zulip component callback state is persisted per account to disk. +- [ ] Restarting the Zulip monitor preserves unexpired widget callbacks. +- [ ] Callback claiming distinguishes `ok`, `unauthorized`, `missing`, `expired`, and `consumed` outcomes. +- [ ] Non-reusable widgets are retired at message scope, not just button scope. +- [ ] Stale/expired/consumed clicks notify the clicker privately rather than spamming the originating stream. + +### Exec approval durability + +- [ ] Pending Zulip approval prompt state is persisted per account. +- [ ] On startup, the handler rehydrates pending approvals and timeout jobs. +- [ ] On resolve or timeout after restart, previously-sent approval prompt messages are still updated. +- [ ] Approval widget entries are retired by message ID on resolve/expiry. + +### Model picker productization + +- [ ] `allowedUsers` survives the Zulip reply payload round trip. +- [ ] Picker buttons are scoped to the invoking user. +- [ ] Old picker pages are disposable and retired after click. +- [ ] Unauthorized picker clicks are ignored/logged without consuming the control. + +### Thin Zulip-local command UX + +- [ ] Zulip gets a thin transport-owned `/models` entry point that renders widget UX when widgets are enabled. +- [ ] When widgets are disabled, `/models` falls back cleanly to the existing shared text path. +- [ ] The shared `/approve` path is validated end to end in Zulip before any local duplication is added. +- [ ] If shared `/approve` is insufficient in practice, a minimal local fallback is added and documented. + +### Topic lifecycle and audit + +- [ ] Existing persisted topic bindings remain intact. +- [ ] Live Zulip topic-edit event fidelity is validated before any automatic rebind logic is implemented. +- [ ] If automatic rename/rebind is viable, a targeted binding migration path is added. +- [ ] If it is not viable, the plan explicitly stops short of heuristic rebinding. +- [ ] Startup audit runs once, remains warn-only, and checks configured streams, analysis streams, approval targets, and approver identities. + +### Resolution polish + +- [ ] Outbound-only fuzzy/suggestion behavior improves ambiguous user/stream sends. +- [ ] Allowlist and approval authorization resolution remain strict. +- [ ] Ambiguous and not-found outbound errors are clearer than they are today. ## Out of Scope -- Feature work. -- Broad refactors that are not needed for lint compliance. -- Changing lint policy/config unless strictly required. +- New Zulip fork primitives such as dropdowns, modal forms, or rich cards. +- A broad shared cross-channel interaction framework. +- A generic shared command-system refactor. +- Rewriting `monitor.ts` wholesale. +- XCase expansion or unrelated Zulip feature work. +- Reworking draft streaming, which is already shipped. + +## Technical Notes + +- Keep the work in `extensions/zulip` unless a validated blocker truly requires fork work. +- Reuse the current seams: `components-registry.ts`, `send-components.ts`, `exec-approvals.ts`, `model-picker.ts`, `monitor.ts`, and `topic-bindings.ts`. +- Preserve existing shared command behavior where it already works, especially `/approve`. +- Use the same Zulip state area pattern already used by `topic-bindings.ts` for new persisted state. +- Startup audit should be non-blocking and warn-only on the first pass. +- Do not add new config knobs unless field use clearly proves they are needed. diff --git a/src/cron/isolated-agent.skips-delivery-without-whatsapp-recipient-besteffortdeliver-true.test.ts b/src/cron/isolated-agent.skips-delivery-without-whatsapp-recipient-besteffortdeliver-true.test.ts index 6b2ab85739a2..d1369065da6a 100644 --- a/src/cron/isolated-agent.skips-delivery-without-whatsapp-recipient-besteffortdeliver-true.test.ts +++ b/src/cron/isolated-agent.skips-delivery-without-whatsapp-recipient-besteffortdeliver-true.test.ts @@ -91,7 +91,7 @@ async function expectStructuredTelegramFailure(params: { expect(res.deliveryAttempted).toBe(params.expectDeliveryAttempted); } if (params.expectedErrorFragment) { - expect(res.error).toContain(params.expectedErrorFragment); + expect(res.deliveryError ?? res.error).toContain(params.expectedErrorFragment); } expect(runSubagentAnnounceFlow).not.toHaveBeenCalled(); expect(deps.sendMessageTelegram).toHaveBeenCalledTimes(1); @@ -332,12 +332,13 @@ describe("runCronIsolatedAgentTurn", () => { }); }); - it("fails when structured direct delivery fails and best-effort is disabled", async () => { + it("records deliveryError when structured direct delivery fails and best-effort is disabled", async () => { await expectStructuredTelegramFailure({ payload: { text: "hello from cron", mediaUrl: "https://example.com/img.png" }, bestEffort: false, - expectedStatus: "error", + expectedStatus: "ok", expectedErrorFragment: "boom", + expectDeliveryAttempted: true, }); }); @@ -361,6 +362,7 @@ describe("runCronIsolatedAgentTurn", () => { expect(res.status).toBe("ok"); expect(res.delivered).toBe(false); expect(res.deliveryAttempted).toBe(true); + expect(res.deliveryError).toContain("boom"); expect(runSubagentAnnounceFlow).not.toHaveBeenCalled(); expect(deps.sendMessageTelegram).toHaveBeenCalledTimes(1); }, @@ -401,7 +403,7 @@ describe("runCronIsolatedAgentTurn", () => { }); }); - it("returns error when text direct delivery fails and best-effort is disabled", async () => { + it("records deliveryError when text direct delivery fails and best-effort is disabled", async () => { await withTelegramAnnounceFixture( async ({ home, storePath, deps }) => { mockAgentPayloads([{ text: "hello from cron" }]); @@ -418,10 +420,11 @@ describe("runCronIsolatedAgentTurn", () => { }, }); - expect(res.status).toBe("error"); - expect(res.delivered).toBeUndefined(); + expect(res.status).toBe("ok"); + expect(res.delivered).toBe(false); expect(res.deliveryAttempted).toBe(true); - expect(res.error).toContain("boom"); + expect(res.deliveryError).toContain("boom"); + expect(res.error).toBeUndefined(); expect(runSubagentAnnounceFlow).not.toHaveBeenCalled(); expect(deps.sendMessageTelegram).toHaveBeenCalledTimes(1); }, diff --git a/src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts b/src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts index f9a7d90a276d..3ad0798fe17b 100644 --- a/src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts +++ b/src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts @@ -255,6 +255,25 @@ describe("dispatchCronDelivery — double-announce guard", () => { expect(deliverOutboundPayloads).toHaveBeenCalledTimes(1); }); + it("keeps execution-success semantics when direct delivery fails after work completed", async () => { + vi.mocked(countActiveDescendantRuns).mockReturnValue(0); + vi.mocked(isLikelyInterimCronMessage).mockReturnValue(false); + vi.mocked(deliverOutboundPayloads).mockRejectedValue(new Error("announce failed")); + + const params = makeBaseParams({ synthesizedText: "Briefing ready." }); + const state = await dispatchCronDelivery(params); + + expect(state.result).toEqual( + expect.objectContaining({ + status: "ok", + summary: "Briefing ready.", + delivered: false, + deliveryAttempted: true, + deliveryError: "announce failed", + }), + ); + }); + it("no delivery requested means deliveryAttempted stays false and no delivery is sent", async () => { const params = makeBaseParams({ synthesizedText: "Task done.", diff --git a/src/cron/isolated-agent/delivery-dispatch.ts b/src/cron/isolated-agent/delivery-dispatch.ts index a3a98b245d07..d1eb2357ca23 100644 --- a/src/cron/isolated-agent/delivery-dispatch.ts +++ b/src/cron/isolated-agent/delivery-dispatch.ts @@ -103,6 +103,7 @@ export type DispatchCronDeliveryState = { result?: RunCronAgentTurnResult; delivered: boolean; deliveryAttempted: boolean; + deliveryError?: string; summary?: string; outputText?: string; synthesizedText?: string; @@ -201,6 +202,7 @@ export async function dispatchCronDelivery( // Keep this strict so timer fallback can safely decide whether to wake main. let delivered = params.skipMessagingToolDelivery; let deliveryAttempted = params.skipMessagingToolDelivery; + let deliveryError: string | undefined; const failDeliveryTarget = (error: string) => params.withRunSession({ status: "error", @@ -211,6 +213,14 @@ export async function dispatchCronDelivery( deliveryAttempted, ...params.telemetry, }); + const recordDeliveryError = (error: unknown) => { + const message = summarizeDirectCronDeliveryError(error); + if (!deliveryError) { + deliveryError = message; + } + logWarn(`[cron:${params.job.id}] direct announce delivery failed: ${message}`); + return message; + }; const deliverViaDirect = async ( delivery: SuccessfulDeliveryTarget, @@ -254,6 +264,9 @@ export async function dispatchCronDelivery( bestEffort: params.deliveryBestEffort, deps: createOutboundSendDeps(params.deps), abortSignal: params.abortSignal, + onError: (error) => { + recordDeliveryError(error); + }, }); const deliveryResults = options?.retryTransient ? await retryTransientDirectCronDelivery({ @@ -265,13 +278,15 @@ export async function dispatchCronDelivery( delivered = deliveryResults.length > 0; return null; } catch (err) { + const errorMessage = recordDeliveryError(err); if (!params.deliveryBestEffort) { return params.withRunSession({ - status: "error", + status: "ok", summary, outputText, - error: String(err), + delivered: false, deliveryAttempted, + deliveryError: errorMessage, ...params.telemetry, }); } @@ -415,6 +430,7 @@ export async function dispatchCronDelivery( result: failDeliveryTarget(params.resolvedDelivery.error.message), delivered, deliveryAttempted, + deliveryError, summary, outputText, synthesizedText, @@ -432,6 +448,7 @@ export async function dispatchCronDelivery( }), delivered, deliveryAttempted, + deliveryError, summary, outputText, synthesizedText, @@ -451,6 +468,7 @@ export async function dispatchCronDelivery( result: directResult, delivered, deliveryAttempted, + deliveryError, summary, outputText, synthesizedText, @@ -464,6 +482,7 @@ export async function dispatchCronDelivery( result: finalizedTextResult, delivered, deliveryAttempted, + deliveryError, summary, outputText, synthesizedText, @@ -478,6 +497,7 @@ export async function dispatchCronDelivery( deliveryAttempted, summary, outputText, + deliveryError, synthesizedText, deliveryPayloads, }; diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 89f28ab8d29d..2c163171602e 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -960,10 +960,7 @@ export async function runCronIsolatedAgentTurn(params: { } const delivered = deliveryResult.delivered; const deliveryAttempted = deliveryResult.deliveryAttempted; - const deliveryError = - "deliveryError" in deliveryResult && typeof deliveryResult.deliveryError === "string" - ? deliveryResult.deliveryError - : undefined; + const deliveryError = deliveryResult.deliveryError; summary = deliveryResult.summary; outputText = deliveryResult.outputText; diff --git a/src/cron/service.failure-alert.test.ts b/src/cron/service.failure-alert.test.ts index 0967274548af..95b9773c9a95 100644 --- a/src/cron/service.failure-alert.test.ts +++ b/src/cron/service.failure-alert.test.ts @@ -117,6 +117,70 @@ describe("CronService failure alerts", () => { await store.cleanup(); }); + it("alerts after configured consecutive delivery failures and honors cooldown", async () => { + const store = await makeStorePath(); + const sendCronFailureAlert = vi.fn(async () => undefined); + const runIsolatedAgentJob = vi.fn(async () => ({ + status: "ok" as const, + summary: "done", + delivered: false, + deliveryError: "announce failed", + })); + + const cron = createFailureAlertCron({ + storePath: store.storePath, + cronConfig: { + failureAlert: { + enabled: true, + after: 2, + cooldownMs: 60_000, + }, + }, + runIsolatedAgentJob, + sendCronFailureAlert, + }); + + await cron.start(); + const job = await cron.add({ + name: "delivery alert job", + enabled: true, + schedule: { kind: "every", everyMs: 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "run report" }, + delivery: { mode: "announce", channel: "telegram", to: "19098680" }, + }); + + await cron.run(job.id, "force"); + expect(sendCronFailureAlert).not.toHaveBeenCalled(); + + await cron.run(job.id, "force"); + expect(sendCronFailureAlert).toHaveBeenCalledTimes(1); + expect(sendCronFailureAlert).toHaveBeenLastCalledWith( + expect.objectContaining({ + job: expect.objectContaining({ id: job.id }), + channel: "telegram", + to: "19098680", + text: expect.stringContaining('Cron job "delivery alert job" delivery failed 2 times'), + }), + ); + + await cron.run(job.id, "force"); + expect(sendCronFailureAlert).toHaveBeenCalledTimes(1); + + vi.setSystemTime(new Date("2026-01-01T00:01:00.000Z")); + await cron.run(job.id, "force"); + expect(sendCronFailureAlert).toHaveBeenCalledTimes(2); + expect(sendCronFailureAlert).toHaveBeenLastCalledWith( + expect.objectContaining({ + text: expect.stringContaining('Cron job "delivery alert job" delivery failed 4 times'), + }), + ); + + cron.stop(); + await store.cleanup(); + }); + it("supports per-job failure alert override when global alerts are disabled", async () => { const store = await makeStorePath(); const sendCronFailureAlert = vi.fn(async () => undefined); @@ -204,6 +268,52 @@ describe("CronService failure alerts", () => { await store.cleanup(); }); + it("skips delivery failure alerts for best-effort jobs", async () => { + const store = await makeStorePath(); + const sendCronFailureAlert = vi.fn(async () => undefined); + const runIsolatedAgentJob = vi.fn(async () => ({ + status: "ok" as const, + summary: "done", + delivered: false, + deliveryError: "announce failed", + })); + + const cron = createFailureAlertCron({ + storePath: store.storePath, + cronConfig: { + failureAlert: { + enabled: true, + after: 1, + }, + }, + runIsolatedAgentJob, + sendCronFailureAlert, + }); + + await cron.start(); + const bestEffortJob = await cron.add({ + name: "best effort delivery alert job", + enabled: true, + schedule: { kind: "every", everyMs: 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "run report" }, + delivery: { + mode: "announce", + channel: "telegram", + to: "19098680", + bestEffort: true, + }, + }); + + await cron.run(bestEffortJob.id, "force"); + await cron.run(bestEffortJob.id, "force"); + expect(sendCronFailureAlert).not.toHaveBeenCalled(); + + cron.stop(); + await store.cleanup(); + }); + it("threads failure alert mode/accountId and skips best-effort jobs", async () => { const store = await makeStorePath(); const sendCronFailureAlert = vi.fn(async () => undefined); diff --git a/src/cron/service.persists-delivered-status.test.ts b/src/cron/service.persists-delivered-status.test.ts index dab021731c73..69072bb16a8e 100644 --- a/src/cron/service.persists-delivered-status.test.ts +++ b/src/cron/service.persists-delivered-status.test.ts @@ -40,7 +40,16 @@ function buildMainSessionSystemEventJob(name: string): CronAddInput { function createIsolatedCronWithFinishedBarrier(params: { storePath: string; delivered?: boolean; - onFinished?: (evt: { jobId: string; delivered?: boolean; deliveryStatus?: string }) => void; + deliveryError?: string; + onFinished?: (evt: { + jobId: string; + delivered?: boolean; + deliveryStatus?: string; + deliveryError?: string; + source?: string; + correlationId?: string; + cronJobId?: string; + }) => void; }) { const finished = createFinishedBarrier(); const cron = new CronService({ @@ -53,6 +62,7 @@ function createIsolatedCronWithFinishedBarrier(params: { status: "ok" as const, summary: "done", ...(params.delivered === undefined ? {} : { delivered: params.delivered }), + ...(params.deliveryError === undefined ? {} : { deliveryError: params.deliveryError }), })), onEvent: (evt) => { if (evt.action === "finished") { @@ -60,6 +70,10 @@ function createIsolatedCronWithFinishedBarrier(params: { jobId: evt.jobId, delivered: evt.delivered, deliveryStatus: evt.deliveryStatus, + deliveryError: evt.deliveryError, + source: evt.source, + correlationId: evt.correlationId, + cronJobId: evt.cronJobId, }); } finished.onEvent(evt); @@ -117,12 +131,22 @@ function expectDeliveryNotRequested( async function runIsolatedJobAndReadState(params: { job: CronAddInput; delivered?: boolean; - onFinished?: (evt: { jobId: string; delivered?: boolean; deliveryStatus?: string }) => void; + deliveryError?: string; + onFinished?: (evt: { + jobId: string; + delivered?: boolean; + deliveryStatus?: string; + deliveryError?: string; + source?: string; + correlationId?: string; + cronJobId?: string; + }) => void; }) { const store = await makeStorePath(); const { cron, finished } = createIsolatedCronWithFinishedBarrier({ storePath: store.storePath, ...(params.delivered !== undefined ? { delivered: params.delivered } : {}), + ...(params.deliveryError !== undefined ? { deliveryError: params.deliveryError } : {}), ...(params.onFinished ? { onFinished: params.onFinished } : {}), }); @@ -162,6 +186,19 @@ describe("CronService persists delivered status", () => { expect(updated?.state.lastDeliveryError).toBeUndefined(); }); + it("persists deliveryError without downgrading execution status", async () => { + const updated = await runIsolatedJobAndReadState({ + job: buildIsolatedAgentTurnJob("delivery-error"), + delivered: false, + deliveryError: "announce failed", + }); + expectSuccessfulCronRun(updated); + expect(updated?.state.lastDelivered).toBe(false); + expect(updated?.state.lastDeliveryStatus).toBe("not-delivered"); + expect(updated?.state.lastDeliveryError).toBe("announce failed"); + expect(updated?.state.lastError).toBeUndefined(); + }); + it("persists not-requested delivery state when delivery is not configured", async () => { const updated = await runIsolatedJobAndReadState({ job: buildIsolatedAgentTurnJob("no-delivery"), @@ -203,17 +240,67 @@ describe("CronService persists delivered status", () => { }); it("emits delivered in the finished event", async () => { - let capturedEvent: { jobId: string; delivered?: boolean; deliveryStatus?: string } | undefined; + let capturedEvent: + | { + jobId: string; + delivered?: boolean; + deliveryStatus?: string; + deliveryError?: string; + source?: string; + correlationId?: string; + cronJobId?: string; + } + | undefined; await runIsolatedJobAndReadState({ job: buildIsolatedAgentTurnJob("event-test"), - delivered: true, + delivered: false, + deliveryError: "announce failed", onFinished: (evt) => { capturedEvent = evt; }, }); expect(capturedEvent).toBeDefined(); - expect(capturedEvent?.delivered).toBe(true); - expect(capturedEvent?.deliveryStatus).toBe("delivered"); + expect(capturedEvent?.delivered).toBe(false); + expect(capturedEvent?.deliveryStatus).toBe("not-delivered"); + expect(capturedEvent?.deliveryError).toBe("announce failed"); + }); + + it("emits cron attribution fields for manual finished events", async () => { + const store = await makeStorePath(); + let capturedEvent: + | { + jobId: string; + source?: string; + correlationId?: string; + cronJobId?: string; + } + | undefined; + const { cron } = createIsolatedCronWithFinishedBarrier({ + storePath: store.storePath, + delivered: false, + deliveryError: "announce failed", + onFinished: (evt) => { + capturedEvent = evt; + }, + }); + + await cron.start(); + try { + const job = await cron.add({ + ...buildIsolatedAgentTurnJob("manual-event-test"), + delivery: { mode: "announce", channel: "telegram", to: "123" }, + }); + + await cron.run(job.id, "force"); + + expect(capturedEvent).toBeDefined(); + expect(capturedEvent?.jobId).toBe(job.id); + expect(capturedEvent?.source).toBe("cron"); + expect(capturedEvent?.correlationId).toBe(job.id); + expect(capturedEvent?.cronJobId).toBe(job.id); + } finally { + cron.stop(); + } }); }); diff --git a/src/cron/service.restart-catchup.test.ts b/src/cron/service.restart-catchup.test.ts index f0c9c3e4dc93..44c28d70d007 100644 --- a/src/cron/service.restart-catchup.test.ts +++ b/src/cron/service.restart-catchup.test.ts @@ -369,6 +369,52 @@ describe("CronService restart catch-up", () => { await store.cleanup(); }); + it("persists deliveryError from startup catch-up isolated runs", async () => { + const store = await makeStorePath(); + const now = Date.parse("2025-12-13T17:00:00.000Z"); + + await writeStoreJobs(store.storePath, [ + { + id: "startup-catchup-delivery-error", + name: "startup catch-up delivery error", + enabled: true, + createdAtMs: now - 60_000, + updatedAtMs: now - 60_000, + schedule: { kind: "every", everyMs: 60_000, anchorMs: now - 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "produce a digest" }, + delivery: { mode: "announce", channel: "telegram", to: "123" }, + state: { nextRunAtMs: now - 1_000 }, + }, + ]); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ + status: "ok" as const, + summary: "ok", + delivered: false, + deliveryAttempted: true, + deliveryError: "boom", + })), + }); + + await runMissedJobs(state); + + const updated = state.store?.jobs.find((job) => job.id === "startup-catchup-delivery-error"); + expect(updated?.state.lastStatus).toBe("ok"); + expect(updated?.state.lastDeliveryStatus).toBe("not-delivered"); + expect(updated?.state.lastDeliveryError).toBe("boom"); + + await store.cleanup(); + }); + it("reschedules deferred missed jobs from the post-catchup clock so they stay in the future", async () => { const store = await makeStorePath(); const startNow = Date.parse("2025-12-13T17:00:00.000Z"); diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index c027c8d553f8..c3bd4f8c8b1f 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -19,6 +19,7 @@ import { applyJobResult, armTimer, emit, + emitJobFinished, executeJobCoreWithTimeout, runMissedJobs, stopTimer, @@ -458,6 +459,7 @@ async function finishPreparedManualRun( { status: coreResult.status, error: coreResult.error, + deliveryError: coreResult.deliveryError, delivered: coreResult.delivered, startedAt, endedAt, @@ -465,24 +467,7 @@ async function finishPreparedManualRun( { preserveSchedule: mode === "force" }, ); - emit(state, { - jobId: job.id, - action: "finished", - status: coreResult.status, - error: coreResult.error, - summary: coreResult.summary, - delivered: coreResult.delivered, - deliveryStatus: job.state.lastDeliveryStatus, - deliveryError: job.state.lastDeliveryError, - sessionId: coreResult.sessionId, - sessionKey: coreResult.sessionKey, - runAtMs: startedAt, - durationMs: job.state.lastDurationMs, - nextRunAtMs: job.state.nextRunAtMs, - model: coreResult.model, - provider: coreResult.provider, - usage: coreResult.usage, - }); + emitJobFinished(state, job, coreResult, startedAt); if (shouldDelete && state.store) { state.store.jobs = state.store.jobs.filter((entry) => entry.id !== job.id); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 17d8c762dff0..af399fca81ff 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -254,12 +254,14 @@ function emitFailureAlert( to?: string; mode?: "announce" | "webhook"; accountId?: string; + kind?: "execution" | "delivery"; }, ) { const safeJobName = params.job.name || params.job.id; const truncatedError = (params.error?.trim() || "unknown error").slice(0, 200); + const failureLabel = params.kind === "delivery" ? "delivery failed" : "failed"; const text = [ - `Cron job "${safeJobName}" failed ${params.consecutiveErrors} times`, + `Cron job "${safeJobName}" ${failureLabel} ${params.consecutiveErrors} times`, `Last error: ${truncatedError}`, ].join("\n"); @@ -299,6 +301,7 @@ export function applyJobResult( result: { status: CronRunStatus; error?: string; + deliveryError?: string; delivered?: boolean; startedAt: number; endedAt: number; @@ -328,34 +331,34 @@ export function applyJobResult( const deliveryStatus = resolveDeliveryStatus({ job, delivered: result.delivered }); job.state.lastDeliveryStatus = deliveryStatus; job.state.lastDeliveryError = - deliveryStatus === "not-delivered" && result.error ? result.error : undefined; + deliveryStatus === "not-requested" ? undefined : result.deliveryError; job.updatedAtMs = result.endedAt; - // Track consecutive errors for backoff / auto-disable. + const alertConfig = resolveFailureAlert(state, job); + const isBestEffort = + job.delivery?.bestEffort === true || + (job.payload.kind === "agentTurn" && job.payload.bestEffortDeliver === true); + + // Track consecutive execution errors for backoff / auto-disable. if (result.status === "error") { job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1; - const alertConfig = resolveFailureAlert(state, job); - if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) { - const isBestEffort = - job.delivery?.bestEffort === true || - (job.payload.kind === "agentTurn" && job.payload.bestEffortDeliver === true); - if (!isBestEffort) { - const now = state.deps.nowMs(); - const lastAlert = job.state.lastFailureAlertAtMs; - const inCooldown = - typeof lastAlert === "number" && now - lastAlert < Math.max(0, alertConfig.cooldownMs); - if (!inCooldown) { - emitFailureAlert(state, { - job, - error: result.error, - consecutiveErrors: job.state.consecutiveErrors, - channel: alertConfig.channel, - to: alertConfig.to, - mode: alertConfig.mode, - accountId: alertConfig.accountId, - }); - job.state.lastFailureAlertAtMs = now; - } + if (alertConfig && job.state.consecutiveErrors >= alertConfig.after && !isBestEffort) { + const now = state.deps.nowMs(); + const lastAlert = job.state.lastFailureAlertAtMs; + const inCooldown = + typeof lastAlert === "number" && now - lastAlert < Math.max(0, alertConfig.cooldownMs); + if (!inCooldown) { + emitFailureAlert(state, { + job, + error: result.error, + consecutiveErrors: job.state.consecutiveErrors, + channel: alertConfig.channel, + to: alertConfig.to, + mode: alertConfig.mode, + accountId: alertConfig.accountId, + kind: "execution", + }); + job.state.lastFailureAlertAtMs = now; } } } else { @@ -363,6 +366,34 @@ export function applyJobResult( job.state.lastFailureAlertAtMs = undefined; } + // Track consecutive delivery failures separately so delivery issues do not + // pollute execution status/backoff state. + if (result.deliveryError) { + job.state.consecutiveDeliveryErrors = (job.state.consecutiveDeliveryErrors ?? 0) + 1; + if (alertConfig && job.state.consecutiveDeliveryErrors >= alertConfig.after && !isBestEffort) { + const now = state.deps.nowMs(); + const lastAlert = job.state.lastDeliveryFailureAlertAtMs; + const inCooldown = + typeof lastAlert === "number" && now - lastAlert < Math.max(0, alertConfig.cooldownMs); + if (!inCooldown) { + emitFailureAlert(state, { + job, + error: result.deliveryError, + consecutiveErrors: job.state.consecutiveDeliveryErrors, + channel: alertConfig.channel, + to: alertConfig.to, + mode: alertConfig.mode, + accountId: alertConfig.accountId, + kind: "delivery", + }); + job.state.lastDeliveryFailureAlertAtMs = now; + } + } + } else { + job.state.consecutiveDeliveryErrors = 0; + job.state.lastDeliveryFailureAlertAtMs = undefined; + } + const shouldDelete = job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; @@ -488,6 +519,7 @@ function applyOutcomeToStoredJob(state: CronServiceState, result: TimedCronRunOu const shouldDelete = applyJobResult(state, job, { status: result.status, error: result.error, + deliveryError: result.deliveryError, delivered: result.delivered, startedAt: result.startedAt, endedAt: result.endedAt, @@ -933,6 +965,8 @@ async function runStartupCatchupCandidate( error: result.error, summary: result.summary, delivered: result.delivered, + deliveryAttempted: result.deliveryAttempted, + deliveryError: result.deliveryError, sessionId: result.sessionId, sessionKey: result.sessionKey, model: result.model, @@ -1181,6 +1215,7 @@ export async function executeJobCore( return { status: res.status, error: res.error, + deliveryError: res.deliveryError, summary: res.summary, delivered: res.delivered, deliveryAttempted: res.deliveryAttempted, @@ -1225,6 +1260,7 @@ export async function executeJob( const shouldDelete = applyJobResult(state, job, { status: coreResult.status, error: coreResult.error, + deliveryError: coreResult.deliveryError, delivered: coreResult.delivered, startedAt, endedAt, @@ -1238,7 +1274,7 @@ export async function executeJob( } } -function emitJobFinished( +export function emitJobFinished( state: CronServiceState, job: CronJob, result: { diff --git a/src/cron/types.ts b/src/cron/types.ts index 620f87061ecb..2ede086165de 100644 --- a/src/cron/types.ts +++ b/src/cron/types.ts @@ -61,6 +61,8 @@ export type CronRunOutcome = { error?: string; /** Optional classifier for execution errors to guide fallback behavior. */ errorKind?: "delivery-target"; + /** Delivery-specific error text, separate from execution error state. */ + deliveryError?: string; summary?: string; sessionId?: string; sessionKey?: string; @@ -77,6 +79,16 @@ export type CronFailureAlert = { accountId?: string; }; +export type CronPacingProviderTarget = "claude" | "codex" | "gemini"; +export type CronPacingRole = "maintenance" | "report" | "review"; + +export type CronPacingMetadata = { + providerTarget?: CronPacingProviderTarget; + role?: CronPacingRole; +}; + +export type CronPacingPatch = CronPacingMetadata | null; + export type CronPayload = { kind: "systemEvent"; text: string } | CronAgentTurnPayload; export type CronPayloadPatch = { kind: "systemEvent"; text?: string } | CronAgentTurnPayloadPatch; @@ -134,8 +146,12 @@ export type CronJobState = { lastDurationMs?: number; /** Number of consecutive execution errors (reset on success). Used for backoff. */ consecutiveErrors?: number; - /** Last failure alert timestamp (ms since epoch) for cooldown gating. */ + /** Last failure alert timestamp (ms since epoch) for execution-error cooldown gating. */ lastFailureAlertAtMs?: number; + /** Number of consecutive delivery failures with explicit deliveryError (reset when delivery succeeds or is clean). */ + consecutiveDeliveryErrors?: number; + /** Last delivery-failure alert timestamp (ms since epoch) for cooldown gating. */ + lastDeliveryFailureAlertAtMs?: number; /** Number of consecutive schedule computation errors. Auto-disables job after threshold. */ scheduleErrorCount?: number; /** Explicit delivery outcome, separate from execution outcome. */ @@ -154,6 +170,7 @@ export type CronJob = CronJobBase< CronDelivery, CronFailureAlert | false > & { + pacing?: CronPacingMetadata; state: CronJobState; }; @@ -166,8 +183,11 @@ export type CronJobCreate = Omit; }; -export type CronJobPatch = Partial> & { +export type CronJobPatch = Partial< + Omit +> & { payload?: CronPayloadPatch; delivery?: CronDeliveryPatch; + pacing?: CronPacingPatch; state?: Partial; }; From 557408302cbf0aac56d8151e1f2d36242aab25d6 Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Fri, 13 Mar 2026 10:34:49 -0400 Subject: [PATCH 2/7] fix(clawdbot): align workflow lane ordering and codex defaults --- docs/contributor/ai-tooling.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/contributor/ai-tooling.md b/docs/contributor/ai-tooling.md index ad46e98ca7de..d92c44b39b01 100644 --- a/docs/contributor/ai-tooling.md +++ b/docs/contributor/ai-tooling.md @@ -51,6 +51,7 @@ For non-trivial coding work: - create and approve `.workflow/plan.md` - use `rp-cli` / RepoPrompt for ANCHOR and REVIEW when available - follow `ANCHOR -> EXECUTE -> REVIEW -> TEST -> GATE` +- runtime workflow-lane enforcement is narrower than the procedural `.workflow/` gate: it enforces stage ordering around mutation and finalization, but it does not replace PRD approval, plan approval, or deploy authority Canonical workflow source: `/Users/lionheart/clawd/workflows/cody/cody_Workflow-SKILL.md` From 26b864a9ec922ab0017d8d5c91127c930eb6c61a Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Fri, 13 Mar 2026 10:48:09 -0400 Subject: [PATCH 3/7] fix(cron): preserve isolated agent routing for self-learning Preserve explicit agent IDs for isolated cron jobs instead of silently downgrading to main. Harden wake routing for legacy unqualified session keys so the target session is not dropped. Add regression coverage for isolated Maclern cron runs and guard against agent:main fallback. --- src/gateway/server-cron.test.ts | 71 +++++++++++++++++++++++++++++++++ src/gateway/server-cron.ts | 19 ++++----- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/src/gateway/server-cron.test.ts b/src/gateway/server-cron.test.ts index 945840a71062..4a1c33982964 100644 --- a/src/gateway/server-cron.test.ts +++ b/src/gateway/server-cron.test.ts @@ -9,6 +9,7 @@ const enqueueSystemEventMock = vi.fn(); const requestHeartbeatNowMock = vi.fn(); const loadConfigMock = vi.fn(); const fetchWithSsrFGuardMock = vi.fn(); +const runCronIsolatedAgentTurnMock = vi.fn(); vi.mock("../infra/system-events.js", () => ({ enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args), @@ -30,6 +31,10 @@ vi.mock("../infra/net/fetch-guard.js", () => ({ fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args), })); +vi.mock("../cron/isolated-agent.js", () => ({ + runCronIsolatedAgentTurn: (...args: unknown[]) => runCronIsolatedAgentTurnMock(...args), +})); + import { buildGatewayCronService } from "./server-cron.js"; describe("buildGatewayCronService", () => { @@ -38,6 +43,7 @@ describe("buildGatewayCronService", () => { requestHeartbeatNowMock.mockClear(); loadConfigMock.mockClear(); fetchWithSsrFGuardMock.mockClear(); + runCronIsolatedAgentTurnMock.mockReset(); }); it("routes main-target jobs to the scoped session for enqueue + wake", async () => { @@ -86,6 +92,71 @@ describe("buildGatewayCronService", () => { } }); + it("preserves explicit isolated cron agent IDs even when they are missing from agents.list", async () => { + const tmpDir = path.join(os.tmpdir(), `server-cron-isolated-${Date.now()}`); + const cfg = { + session: { + mainKey: "main", + }, + cron: { + store: path.join(tmpDir, "cron.json"), + }, + agents: { + list: [{ id: "main", default: true }], + }, + } as OpenClawConfig; + loadConfigMock.mockReturnValue(cfg); + runCronIsolatedAgentTurnMock.mockResolvedValue({ + status: "ok", + summary: "digest ready", + }); + + const state = buildGatewayCronService({ + cfg, + deps: {} as CliDeps, + broadcast: () => {}, + }); + try { + const job = await state.cron.add({ + name: "isolated-maclern-digest", + enabled: true, + schedule: { kind: "at", at: new Date(1).toISOString() }, + sessionTarget: "isolated", + wakeMode: "now", + agentId: "maclern", + payload: { kind: "agentTurn", message: "Review overnight training outputs." }, + }); + + await state.cron.run(job.id, "force"); + + expect(runCronIsolatedAgentTurnMock).toHaveBeenCalledWith( + expect.objectContaining({ + agentId: "maclern", + sessionKey: `cron:${job.id}`, + job: expect.objectContaining({ agentId: "maclern" }), + }), + ); + expect(enqueueSystemEventMock).toHaveBeenCalledWith( + "Cron: digest ready", + expect.objectContaining({ + sessionKey: "agent:maclern:main", + }), + ); + for (const [, payload] of enqueueSystemEventMock.mock.calls) { + expect(payload?.sessionKey).not.toMatch(/^agent:main:/); + } + expect(requestHeartbeatNowMock).toHaveBeenCalledWith( + expect.objectContaining({ + agentId: "maclern", + sessionKey: undefined, + reason: `cron:${job.id}`, + }), + ); + } finally { + state.cron.stop(); + } + }); + it("blocks private webhook URLs via SSRF-guarded fetch", async () => { const tmpDir = path.join(os.tmpdir(), `server-cron-ssrf-${Date.now()}`); const cfg = { diff --git a/src/gateway/server-cron.ts b/src/gateway/server-cron.ts index 1f1cd1f5359e..e932cc68ce25 100644 --- a/src/gateway/server-cron.ts +++ b/src/gateway/server-cron.ts @@ -154,14 +154,11 @@ export function buildGatewayCronService(params: { const runtimeConfig = loadConfig(); const normalized = typeof requested === "string" && requested.trim() ? normalizeAgentId(requested) : undefined; - const hasAgent = - normalized !== undefined && - Array.isArray(runtimeConfig.agents?.list) && - runtimeConfig.agents.list.some( - (entry) => - entry && typeof entry.id === "string" && normalizeAgentId(entry.id) === normalized, - ); - const agentId = hasAgent ? normalized : resolveDefaultAgentId(runtimeConfig); + // Preserve explicit cron agent IDs even when the runtime config is missing + // an agents.list entry. Main-session jobs are already constrained to the + // default agent during cron job validation; isolated runs should stay + // agent-scoped instead of silently downgrading to main. + const agentId = normalized ?? resolveDefaultAgentId(runtimeConfig); return { agentId, cfg: runtimeConfig }; }; @@ -207,7 +204,11 @@ export function buildGatewayCronService(params: { (opts?.sessionKey ? normalizeAgentId(resolveAgentIdFromSessionKey(opts.sessionKey)) : undefined); - const agentId = derivedAgentId || undefined; + // If a caller supplies an unqualified legacy session key without an + // explicit agent ID, keep wake routing on the default agent instead of + // dropping the target session entirely. + const agentId = + derivedAgentId || (opts?.sessionKey ? resolveDefaultAgentId(runtimeConfig) : undefined); const sessionKey = opts?.sessionKey && agentId ? resolveCronSessionKey({ From c4dbbddc2cbf984348bfbd2f4247dd46d70ee894 Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Sat, 14 Mar 2026 15:51:44 -0400 Subject: [PATCH 4/7] chore(clawdbot): preserve shared ai context adapters wip --- .../src/zulip/components-registry.test.ts | 133 ++++++ .../zulip/src/zulip/components-registry.ts | 419 ++++++++++++++++-- extensions/zulip/src/zulip/components.ts | 6 +- .../zulip/src/zulip/exec-approvals.test.ts | 110 ++++- extensions/zulip/src/zulip/exec-approvals.ts | 204 ++++++++- extensions/zulip/src/zulip/index.ts | 5 +- .../zulip/src/zulip/model-picker.test.ts | 62 ++- extensions/zulip/src/zulip/model-picker.ts | 27 +- extensions/zulip/src/zulip/monitor.ts | 87 ++-- extensions/zulip/src/zulip/send-components.ts | 4 +- src/agents/pi-embedded-runner/lanes.ts | 2 +- src/agents/tools/cron-tool.test.ts | 9 +- src/agents/tools/cron-tool.ts | 2 + src/auto-reply/reply/commands-models.test.ts | 34 +- src/auto-reply/reply/commands-models.ts | 41 +- .../reply/commands-zulip-shared.test.ts | 170 +++++++ .../isolated-agent/run.skill-filter.test.ts | 25 ++ src/cron/normalize.test.ts | 56 +++ src/cron/normalize.ts | 33 ++ src/cron/service.jobs.test.ts | 16 + src/cron/service/jobs.ts | 10 + src/gateway/protocol/cron-validators.test.ts | 16 + src/gateway/protocol/schema/cron.ts | 21 + 23 files changed, 1376 insertions(+), 116 deletions(-) create mode 100644 extensions/zulip/src/zulip/components-registry.test.ts create mode 100644 src/auto-reply/reply/commands-zulip-shared.test.ts diff --git a/extensions/zulip/src/zulip/components-registry.test.ts b/extensions/zulip/src/zulip/components-registry.test.ts new file mode 100644 index 000000000000..1ab27e1bf1c9 --- /dev/null +++ b/extensions/zulip/src/zulip/components-registry.test.ts @@ -0,0 +1,133 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + __testing, + claimZulipComponentEntry, + consumeZulipComponentMessageEntries, + loadZulipComponentRegistry, + registerZulipComponentEntries, +} from "./components-registry.js"; + +function buildEntry(overrides: Partial[0]["entries"][number]> = {}) { + return { + id: overrides.id ?? "btn_1", + label: overrides.label ?? "Approve", + style: overrides.style ?? "primary", + sessionKey: overrides.sessionKey ?? "sess-1", + agentId: overrides.agentId ?? "archie", + accountId: overrides.accountId ?? "zulip-test", + callbackData: overrides.callbackData, + replyTo: overrides.replyTo ?? "stream:ops:topic:deploy", + chatType: overrides.chatType ?? "channel", + allowedUsers: overrides.allowedUsers, + reusable: overrides.reusable, + }; +} + +describe("components-registry", () => { + let stateDir: string; + const originalStateDir = process.env.OPENCLAW_STATE_DIR; + + beforeEach(() => { + stateDir = fs.mkdtempSync(path.join(os.tmpdir(), "zulip-components-")); + process.env.OPENCLAW_STATE_DIR = stateDir; + __testing.resetRegistries(); + }); + + afterEach(() => { + __testing.resetRegistries(); + if (originalStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = originalStateDir; + } + fs.rmSync(stateDir, { recursive: true, force: true }); + }); + + it("persists entries and reloads them after registry reset", async () => { + await registerZulipComponentEntries({ + entries: [buildEntry()], + messageId: 42, + }); + + __testing.resetRegistries(); + await loadZulipComponentRegistry("zulip-test"); + + const claim = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_1", + senderId: 7, + }); + + expect(claim.kind).toBe("ok"); + if (claim.kind === "ok") { + expect(claim.entry.messageId).toBe(42); + expect(claim.entry.replyTo).toBe("stream:ops:topic:deploy"); + } + }); + + it("consumes sibling buttons at message scope", async () => { + await registerZulipComponentEntries({ + entries: [buildEntry({ id: "btn_a" }), buildEntry({ id: "btn_b", label: "Deny" })], + messageId: 99, + }); + + const firstClaim = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_a", + senderId: 7, + }); + expect(firstClaim.kind).toBe("ok"); + + await consumeZulipComponentMessageEntries({ accountId: "zulip-test", messageId: 99 }); + + const siblingClaim = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_b", + senderId: 7, + }); + expect(siblingClaim).toEqual({ kind: "consumed" }); + }); + + it("returns unauthorized without consuming the widget", async () => { + await registerZulipComponentEntries({ + entries: [buildEntry({ allowedUsers: [42] })], + messageId: 13, + }); + + const unauthorized = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_1", + senderId: 7, + }); + expect(unauthorized.kind).toBe("unauthorized"); + + const authorized = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_1", + senderId: 42, + }); + expect(authorized.kind).toBe("ok"); + }); + + it("prunes expired entries on reload", async () => { + await registerZulipComponentEntries({ + entries: [buildEntry()], + messageId: 5, + callbackExpiresAtMs: Date.now() - 1_000, + }); + + __testing.resetRegistries(); + await loadZulipComponentRegistry("zulip-test"); + + const claim = await claimZulipComponentEntry({ + accountId: "zulip-test", + id: "btn_1", + senderId: 7, + }); + + expect(claim).toEqual({ kind: "missing" }); + }); +}); diff --git a/extensions/zulip/src/zulip/components-registry.ts b/extensions/zulip/src/zulip/components-registry.ts index e7c2c0692781..3da465789d69 100644 --- a/extensions/zulip/src/zulip/components-registry.ts +++ b/extensions/zulip/src/zulip/components-registry.ts @@ -1,7 +1,9 @@ -/** - * In-memory component registry with TTL. - * Mirrors src/discord/components-registry.ts for the Zulip channel. - */ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { resolveStateDir } from "../../../../src/config/paths.js"; +import { createAsyncLock, writeJsonAtomic } from "../../../../src/infra/json-files.js"; +import { normalizeAccountId } from "../../../../src/routing/session-key.js"; export type ZulipComponentEntry = { /** Unique button ID (e.g. "btn_abc123") */ @@ -32,64 +34,395 @@ export type ZulipComponentEntry = { expiresAt?: number; }; +export type StoredZulipComponentEntry = Omit & { + createdAtMs: number; + expiresAtMs?: number; + state: "active" | "consumed"; + consumedAtMs?: number; +}; + +export type ZulipComponentClaimResult = + | { kind: "ok"; entry: StoredZulipComponentEntry } + | { kind: "unauthorized"; entry: StoredZulipComponentEntry } + | { kind: "missing" | "expired" | "consumed" }; + +type StoredZulipComponentRegistryFile = { + version: 1; + entries: Record; +}; + const DEFAULT_COMPONENT_TTL_MS = 30 * 60 * 1000; // 30 minutes +const STORE_VERSION = 1; -const componentEntries = new Map(); +function normalizeTimestamp(raw: unknown, fallback: number): number { + return typeof raw === "number" && Number.isFinite(raw) ? Math.max(0, Math.floor(raw)) : fallback; +} + +function normalizePositiveOptionalNumber(raw: unknown): number | undefined { + return typeof raw === "number" && Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : undefined; +} -function isExpired(entry: { expiresAt?: number }, now: number): boolean { - return typeof entry.expiresAt === "number" && entry.expiresAt <= now; +function normalizeAllowedUsers(raw: unknown): number[] | undefined { + if (!Array.isArray(raw)) { + return undefined; + } + const ids = raw + .map((value) => (typeof value === "number" ? value : Number.parseInt(String(value), 10))) + .filter((value) => Number.isFinite(value) && value > 0); + return ids.length > 0 ? Array.from(new Set(ids)) : undefined; } -function normalizeEntryTimestamps( - entry: T, +function isExpired(entry: { expiresAtMs?: number }, now: number): boolean { + return typeof entry.expiresAtMs === "number" && entry.expiresAtMs <= now; +} + +function toStoredEntry( + entry: ZulipComponentEntry, now: number, ttlMs: number, -): T { - const createdAt = entry.createdAt ?? now; - const expiresAt = entry.expiresAt ?? createdAt + ttlMs; - return { ...entry, createdAt, expiresAt }; + messageId?: number, + callbackExpiresAtMs?: number, +): StoredZulipComponentEntry { + const createdAtMs = normalizeTimestamp(entry.createdAt, now); + const expiresAtMs = + normalizePositiveOptionalNumber(callbackExpiresAtMs) ?? + normalizePositiveOptionalNumber(entry.expiresAt) ?? + createdAtMs + ttlMs; + return { + id: entry.id, + label: entry.label, + style: entry.style, + sessionKey: entry.sessionKey, + agentId: entry.agentId, + accountId: normalizeAccountId(entry.accountId), + callbackData: entry.callbackData, + messageId: normalizePositiveOptionalNumber(messageId ?? entry.messageId), + replyTo: entry.replyTo, + chatType: entry.chatType, + reusable: entry.reusable, + allowedUsers: normalizeAllowedUsers(entry.allowedUsers), + createdAtMs, + expiresAtMs, + state: "active", + }; +} + +function readStoredEntry(raw: unknown, now: number): StoredZulipComponentEntry | null { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) { + return null; + } + const entry = raw as Record; + const id = typeof entry.id === "string" ? entry.id.trim() : ""; + const label = typeof entry.label === "string" ? entry.label : ""; + const style = typeof entry.style === "string" ? entry.style : "primary"; + const sessionKey = typeof entry.sessionKey === "string" ? entry.sessionKey.trim() : ""; + const agentId = typeof entry.agentId === "string" ? entry.agentId.trim() : ""; + const accountId = normalizeAccountId(entry.accountId); + if (!id || !label || !sessionKey || !agentId) { + return null; + } + const createdAtMs = normalizeTimestamp(entry.createdAtMs, now); + const expiresAtMs = normalizePositiveOptionalNumber(entry.expiresAtMs); + return { + id, + label, + style, + sessionKey, + agentId, + accountId, + callbackData: typeof entry.callbackData === "string" ? entry.callbackData : undefined, + messageId: normalizePositiveOptionalNumber(entry.messageId), + replyTo: typeof entry.replyTo === "string" ? entry.replyTo : undefined, + chatType: entry.chatType === "direct" ? "direct" : entry.chatType === "channel" ? "channel" : undefined, + reusable: typeof entry.reusable === "boolean" ? entry.reusable : undefined, + allowedUsers: normalizeAllowedUsers(entry.allowedUsers), + createdAtMs, + expiresAtMs, + state: entry.state === "consumed" ? "consumed" : "active", + consumedAtMs: normalizePositiveOptionalNumber(entry.consumedAtMs), + }; } -export function registerZulipComponentEntries(params: { +function resolveStorePath(accountId: string): string { + const stateDir = resolveStateDir(process.env, os.homedir); + return path.join(stateDir, "zulip", `components-${normalizeAccountId(accountId)}.json`); +} + +class ZulipComponentRegistry { + readonly accountId: string; + private readonly storePath: string; + private readonly entriesById = new Map(); + private readonly persistLock = createAsyncLock(); + private loadPromise: Promise | null = null; + + constructor(accountId: string) { + this.accountId = normalizeAccountId(accountId); + this.storePath = resolveStorePath(this.accountId); + } + + async ensureLoaded(): Promise { + this.loadPromise ??= this.loadFromDisk(); + await this.loadPromise; + } + + private async loadFromDisk(): Promise { + const now = Date.now(); + let changed = false; + try { + if (!fs.existsSync(this.storePath)) { + return; + } + const raw = JSON.parse(fs.readFileSync(this.storePath, "utf8")) as StoredZulipComponentRegistryFile; + if (raw?.version !== STORE_VERSION || !raw.entries || typeof raw.entries !== "object") { + return; + } + for (const [id, value] of Object.entries(raw.entries)) { + const entry = readStoredEntry(value, now); + if (!entry) { + changed = true; + continue; + } + if (entry.accountId !== this.accountId) { + changed = true; + continue; + } + if (isExpired(entry, now)) { + changed = true; + continue; + } + this.entriesById.set(id, entry); + } + } catch { + // Start empty on read failure; do not crash monitor startup. + return; + } + if (changed) { + await this.persist(); + } + } + + private async persist(): Promise { + await this.persistLock(async () => { + const entries: Record = {}; + for (const [id, entry] of this.entriesById) { + entries[id] = entry; + } + const payload: StoredZulipComponentRegistryFile = { + version: STORE_VERSION, + entries, + }; + try { + await writeJsonAtomic(this.storePath, payload, { + mode: 0o600, + trailingNewline: true, + ensureDirMode: 0o700, + }); + } catch { + // Keep in-memory registry alive if persistence fails. + } + }); + } + + async registerEntries(params: { + entries: ZulipComponentEntry[]; + ttlMs?: number; + messageId?: number; + callbackExpiresAtMs?: number; + }): Promise { + await this.ensureLoaded(); + const now = Date.now(); + for (const [id, entry] of this.entriesById) { + if (isExpired(entry, now)) { + this.entriesById.delete(id); + } + } + const ttlMs = params.ttlMs ?? DEFAULT_COMPONENT_TTL_MS; + for (const entry of params.entries) { + const stored = toStoredEntry( + entry, + now, + ttlMs, + params.messageId, + params.callbackExpiresAtMs, + ); + this.entriesById.set(stored.id, stored); + } + await this.persist(); + } + + async claimEntry(params: { id: string; senderId: number }): Promise { + await this.ensureLoaded(); + const entry = this.entriesById.get(params.id); + if (!entry) { + return { kind: "missing" }; + } + const now = Date.now(); + if (isExpired(entry, now)) { + this.entriesById.delete(params.id); + await this.persist(); + return { kind: "expired" }; + } + if (entry.state === "consumed") { + return { kind: "consumed" }; + } + if (entry.allowedUsers?.length && !entry.allowedUsers.includes(params.senderId)) { + return { kind: "unauthorized", entry }; + } + return { kind: "ok", entry }; + } + + async consumeMessageEntries(messageId: number): Promise { + await this.ensureLoaded(); + const now = Date.now(); + let count = 0; + for (const [id, entry] of this.entriesById) { + if (entry.messageId !== messageId || entry.state === "consumed") { + continue; + } + this.entriesById.set(id, { + ...entry, + state: "consumed", + consumedAtMs: now, + }); + count += 1; + } + if (count > 0) { + await this.persist(); + } + return count; + } + + async removeMessageEntries(messageId: number): Promise { + await this.ensureLoaded(); + let count = 0; + for (const [id, entry] of this.entriesById) { + if (entry.messageId !== messageId) { + continue; + } + this.entriesById.delete(id); + count += 1; + } + if (count > 0) { + await this.persist(); + } + return count; + } + + async removeEntry(id: string): Promise { + await this.ensureLoaded(); + const deleted = this.entriesById.delete(id); + if (deleted) { + await this.persist(); + } + return deleted; + } + + async pruneExpired(now = Date.now()): Promise { + await this.ensureLoaded(); + let count = 0; + for (const [id, entry] of this.entriesById) { + if (!isExpired(entry, now)) { + continue; + } + this.entriesById.delete(id); + count += 1; + } + if (count > 0) { + await this.persist(); + } + return count; + } + + async clear(): Promise { + await this.ensureLoaded(); + if (this.entriesById.size === 0) { + return; + } + this.entriesById.clear(); + await this.persist(); + } + + getEntryForTesting(id: string): StoredZulipComponentEntry | undefined { + return this.entriesById.get(id); + } +} + +const registriesByAccountId = new Map(); + +function getRegistry(accountId?: string): ZulipComponentRegistry { + const normalized = normalizeAccountId(accountId); + const existing = registriesByAccountId.get(normalized); + if (existing) { + return existing; + } + const created = new ZulipComponentRegistry(normalized); + registriesByAccountId.set(normalized, created); + return created; +} + +export async function loadZulipComponentRegistry(accountId?: string): Promise { + await getRegistry(accountId).ensureLoaded(); +} + +export async function registerZulipComponentEntries(params: { entries: ZulipComponentEntry[]; ttlMs?: number; messageId?: number; -}): void { - const now = Date.now(); - const ttlMs = params.ttlMs ?? DEFAULT_COMPONENT_TTL_MS; - for (const entry of params.entries) { - const normalized = normalizeEntryTimestamps( - { ...entry, messageId: params.messageId ?? entry.messageId }, - now, - ttlMs, - ); - componentEntries.set(entry.id, normalized); + callbackExpiresAtMs?: number; +}): Promise { + const entry = params.entries[0]; + if (!entry) { + return; } + await getRegistry(entry.accountId).registerEntries(params); } -export function resolveZulipComponentEntry(params: { +export async function claimZulipComponentEntry(params: { + accountId?: string; id: string; - consume?: boolean; -}): ZulipComponentEntry | null { - const entry = componentEntries.get(params.id); - if (!entry) { - return null; - } - const now = Date.now(); - if (isExpired(entry, now)) { - componentEntries.delete(params.id); - return null; + senderId: number; +}): Promise { + return await getRegistry(params.accountId).claimEntry({ id: params.id, senderId: params.senderId }); +} + +export async function consumeZulipComponentMessageEntries(params: { + accountId?: string; + messageId: number; +}): Promise { + return await getRegistry(params.accountId).consumeMessageEntries(params.messageId); +} + +export async function removeZulipComponentMessageEntries(params: { + accountId?: string; + messageId: number; +}): Promise { + return await getRegistry(params.accountId).removeMessageEntries(params.messageId); +} + +export async function removeZulipComponentEntry(id: string, accountId?: string): Promise { + if (accountId) { + return await getRegistry(accountId).removeEntry(id); } - if (params.consume !== false) { - componentEntries.delete(params.id); + let deleted = false; + for (const registry of registriesByAccountId.values()) { + deleted = (await registry.removeEntry(id)) || deleted; } - return entry; + return deleted; } -export function removeZulipComponentEntry(id: string): void { - componentEntries.delete(id); +export async function clearZulipComponentEntries(accountId?: string): Promise { + if (accountId) { + await getRegistry(accountId).clear(); + return; + } + await Promise.all([...registriesByAccountId.values()].map((registry) => registry.clear())); } -export function clearZulipComponentEntries(): void { - componentEntries.clear(); -} +export const __testing = { + getRegistryForAccount(accountId?: string) { + return getRegistry(accountId); + }, + resetRegistries() { + registriesByAccountId.clear(); + }, +}; diff --git a/extensions/zulip/src/zulip/components.ts b/extensions/zulip/src/zulip/components.ts index 28b57830e2d3..3125aa1ecc9c 100644 --- a/extensions/zulip/src/zulip/components.ts +++ b/extensions/zulip/src/zulip/components.ts @@ -179,7 +179,11 @@ export function readZulipComponentSpec(raw: unknown): ZulipComponentSpec { : undefined, style, reusable: typeof b.reusable === "boolean" ? b.reusable : undefined, - allowedUsers: Array.isArray(b.allowedUsers) ? b.allowedUsers : undefined, + allowedUsers: Array.isArray(b.allowedUsers) + ? b.allowedUsers + : Array.isArray(b.allowed_users) + ? b.allowed_users + : undefined, }); } diff --git a/extensions/zulip/src/zulip/exec-approvals.test.ts b/extensions/zulip/src/zulip/exec-approvals.test.ts index 268314164f47..b9df264c7ab4 100644 --- a/extensions/zulip/src/zulip/exec-approvals.test.ts +++ b/extensions/zulip/src/zulip/exec-approvals.test.ts @@ -1,7 +1,7 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { beforeEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { clearSessionStoreCacheForTest } from "../../../../src/config/sessions.js"; import type { ZulipExecApprovalConfig } from "../types.js"; import type { ZulipUser } from "./client.js"; @@ -19,6 +19,7 @@ const mockState = vi.hoisted(() => ({ sendMessageZulip: vi.fn(async () => ({ messageId: "43", target: "dm:123" })), fetchZulipUsers: vi.fn<() => Promise>(async () => []), updateZulipMessage: vi.fn(async () => ({ result: "success" })), + removeZulipComponentMessageEntries: vi.fn(async () => 0), buildGatewayConnectionDetails: vi.fn(() => ({ url: "ws://127.0.0.1:18789", urlSource: "local loopback", @@ -56,6 +57,10 @@ vi.mock("./client.js", async () => { }; }); +vi.mock("./components-registry.js", () => ({ + removeZulipComponentMessageEntries: mockState.removeZulipComponentMessageEntries, +})); + vi.mock("../../../../src/gateway/call.js", () => ({ buildGatewayConnectionDetails: mockState.buildGatewayConnectionDetails, })); @@ -86,6 +91,10 @@ function writeStore(store: Record) { clearSessionStoreCacheForTest(); } +function pendingApprovalsPath(accountId = "default") { + return path.join(stateDir, "zulip", `exec-approvals-${accountId}.json`); +} + function createRequest( overrides: Partial<{ command: string; @@ -168,13 +177,19 @@ function clearPendingTimeouts(handler: ZulipExecApprovalHandler) { internals.pending.clear(); } +let stateDir: string; +const originalStateDir = process.env.OPENCLAW_STATE_DIR; + beforeEach(() => { + stateDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-zulip-approvals-state-")); + process.env.OPENCLAW_STATE_DIR = stateDir; writeStore({}); vi.clearAllMocks(); mockState.sendZulipComponentMessage.mockResolvedValue({ messageId: "42", target: "dm:123" }); mockState.sendMessageZulip.mockResolvedValue({ messageId: "43", target: "dm:123" }); mockState.fetchZulipUsers.mockResolvedValue([]); mockState.updateZulipMessage.mockResolvedValue({ result: "success" }); + mockState.removeZulipComponentMessageEntries.mockResolvedValue(0); mockState.buildGatewayConnectionDetails.mockReturnValue({ url: "ws://127.0.0.1:18789", urlSource: "local loopback", @@ -187,6 +202,15 @@ beforeEach(() => { mockState.gatewayClientRequests.mockResolvedValue({ ok: true }); }); +afterEach(() => { + if (originalStateDir == null) { + delete process.env.OPENCLAW_STATE_DIR; + } else { + process.env.OPENCLAW_STATE_DIR = originalStateDir; + } + fs.rmSync(stateDir, { recursive: true, force: true }); +}); + describe("zulip exec approval callback data", () => { it("round-trips callback data", () => { const callbackData = buildZulipExecApprovalCallbackData("approval:1/2", "allow-always"); @@ -336,6 +360,82 @@ describe("ZulipExecApprovalHandler", () => { await handler.stop(); }); + it("persists pending approvals and reloads them on restart", async () => { + const request = createRequest(); + const firstHandler = createHandler({ enabled: true, approvers: [123], target: "dm" }); + await getInternals(firstHandler).handleApprovalRequested(request); + clearPendingTimeouts(firstHandler); + + const persisted = JSON.parse(fs.readFileSync(pendingApprovalsPath(), "utf8")) as { + approvals: Record; + }; + expect(Object.keys(persisted.approvals)).toContain(request.id); + + const secondHandler = createHandler({ enabled: true, approvers: [123], target: "dm" }); + await secondHandler.start(); + + expect(getInternals(secondHandler).pending.has(request.id)).toBe(true); + + await getInternals(secondHandler).handleApprovalResolved({ + id: request.id, + decision: "allow-once", + resolvedBy: "restart-test", + ts: Date.now(), + }); + + expect(mockState.updateZulipMessage).toHaveBeenCalledWith( + expect.any(Object), + expect.objectContaining({ messageId: 42, content: expect.stringContaining("Allowed (once)") }), + ); + expect(mockState.removeZulipComponentMessageEntries).toHaveBeenCalledWith({ + accountId: "default", + messageId: 42, + }); + expect(getInternals(secondHandler).pending.has(request.id)).toBe(false); + + await secondHandler.stop(); + clearPendingTimeouts(secondHandler); + }); + + it("expires persisted stale approvals on startup", async () => { + const request = createRequest(); + fs.mkdirSync(path.dirname(pendingApprovalsPath()), { recursive: true }); + fs.writeFileSync( + pendingApprovalsPath(), + `${JSON.stringify( + { + version: 1, + approvals: { + [request.id]: { + id: request.id, + request: { ...request, expiresAtMs: Date.now() - 1_000 }, + messages: [{ messageId: 42, target: "dm:123" }], + expiresAtMs: Date.now() - 1_000, + }, + }, + }, + null, + 2, + )}\n`, + "utf8", + ); + + const handler = createHandler({ enabled: true, approvers: [123], target: "dm" }); + await handler.start(); + + expect(mockState.updateZulipMessage).toHaveBeenCalledWith( + expect.any(Object), + expect.objectContaining({ messageId: 42, content: expect.stringContaining("expired") }), + ); + expect(mockState.removeZulipComponentMessageEntries).toHaveBeenCalledWith({ + accountId: "default", + messageId: 42, + }); + expect(getInternals(handler).pending.has(request.id)).toBe(false); + + await handler.stop(); + }); + it("rejects unauthorized approval callbacks without consuming the button", async () => { const handler = createHandler({ enabled: true, approvers: [123], target: "dm" }); await handler.start(); @@ -370,6 +470,10 @@ describe("ZulipExecApprovalHandler", () => { content: expect.stringContaining("Allowed (always)"), }), ); + expect(mockState.removeZulipComponentMessageEntries).toHaveBeenCalledWith({ + accountId: "default", + messageId: 42, + }); }); it("finalizes approval messages on timeout", async () => { @@ -386,6 +490,10 @@ describe("ZulipExecApprovalHandler", () => { content: expect.stringContaining("expired"), }), ); + expect(mockState.removeZulipComponentMessageEntries).toHaveBeenCalledWith({ + accountId: "default", + messageId: 42, + }); }); it("filters requests to the configured Zulip account when turn-source account is present", () => { diff --git a/extensions/zulip/src/zulip/exec-approvals.ts b/extensions/zulip/src/zulip/exec-approvals.ts index 91629b99e53a..53c95e618a97 100644 --- a/extensions/zulip/src/zulip/exec-approvals.ts +++ b/extensions/zulip/src/zulip/exec-approvals.ts @@ -1,4 +1,8 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import type { OpenClawConfig, RuntimeEnv } from "openclaw/plugin-sdk"; +import { resolveStateDir } from "../../../../src/config/paths.js"; import { loadSessionStore, resolveStorePath } from "../../../../src/config/sessions.js"; import { buildGatewayConnectionDetails } from "../../../../src/gateway/call.js"; import { GatewayClient } from "../../../../src/gateway/client.js"; @@ -26,6 +30,8 @@ import { import type { ZulipExecApprovalConfig } from "../types.js"; import type { ZulipClient } from "./client.js"; import { updateZulipMessage } from "./client.js"; +import { removeZulipComponentMessageEntries } from "./components-registry.js"; +import { writeJsonAtomic } from "../../../../src/infra/json-files.js"; import { resolveZulipUserInputs } from "./resolve-users.js"; import { sendZulipComponentMessage } from "./send-components.js"; import { sendMessageZulip } from "./send.js"; @@ -33,6 +39,7 @@ import { sendMessageZulip } from "./send.js"; const DEFAULT_TARGET = "dm" as const; const DEFAULT_APPROVAL_AGENT_ID = "main"; const DEFAULT_STREAM_TOPIC = "exec-approvals"; +const STORE_VERSION = 1; export const ZULIP_EXEC_APPROVAL_CALLBACK_PREFIX = "exec_approval:"; @@ -47,6 +54,18 @@ type PendingApproval = { timeoutId: NodeJS.Timeout; }; +type StoredPendingApproval = { + id: string; + request: ExecApprovalRequest; + messages: PendingApprovalMessage[]; + expiresAtMs: number; +}; + +type StoredPendingApprovalsFile = { + version: 1; + approvals: Record; +}; + export type ZulipExecApprovalHandlerOpts = { client: ZulipClient; accountId: string; @@ -313,15 +332,61 @@ function resolveConfiguredApprovalStreamTarget(config: ZulipExecApprovalConfig): return `stream:${stream}:topic:${topic}`; } +function resolvePendingApprovalsPath(accountId: string): string { + const stateDir = resolveStateDir(process.env, os.homedir); + return path.join(stateDir, "zulip", `exec-approvals-${normalizeAccountId(accountId)}.json`); +} + +function readStoredPendingApproval(raw: unknown): StoredPendingApproval | null { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) { + return null; + } + const approval = raw as Record; + const id = typeof approval.id === "string" ? approval.id.trim() : ""; + const request = approval.request as ExecApprovalRequest | undefined; + const requestId = typeof request?.id === "string" ? request.id.trim() : ""; + const requestCommand = + typeof request?.request?.command === "string" ? request.request.command.trim() : ""; + const expiresAtMs = + typeof approval.expiresAtMs === "number" && Number.isFinite(approval.expiresAtMs) + ? Math.floor(approval.expiresAtMs) + : undefined; + const messages = Array.isArray(approval.messages) + ? approval.messages + .map((entry) => { + if (!entry || typeof entry !== "object" || Array.isArray(entry)) { + return null; + } + const message = entry as Record; + const messageId = + typeof message.messageId === "number" && Number.isFinite(message.messageId) + ? Math.floor(message.messageId) + : Number.parseInt(String(message.messageId ?? ""), 10); + const target = typeof message.target === "string" ? message.target.trim() : ""; + if (!Number.isFinite(messageId) || messageId <= 0 || !target) { + return null; + } + return { messageId, target } satisfies PendingApprovalMessage; + }) + .filter((entry): entry is PendingApprovalMessage => Boolean(entry)) + : []; + if (!id || !request || !requestId || !requestCommand || !expiresAtMs || messages.length === 0) { + return null; + } + return { id, request, messages, expiresAtMs }; +} + export class ZulipExecApprovalHandler { private gatewayClient: GatewayClient | null = null; private pending = new Map(); private opts: ZulipExecApprovalHandlerOpts; private started = false; private approverUserIds: number[] = []; + private readonly storePath: string; constructor(opts: ZulipExecApprovalHandlerOpts) { this.opts = opts; + this.storePath = resolvePendingApprovalsPath(opts.accountId); } shouldHandle(request: ExecApprovalRequest): boolean { @@ -369,6 +434,103 @@ export class ZulipExecApprovalHandler { return true; } + private createTimeout(approvalId: string, expiresAtMs: number): NodeJS.Timeout { + const timeoutId = setTimeout( + () => { + void this.handleApprovalTimeout(approvalId); + }, + Math.max(0, expiresAtMs - Date.now()), + ); + timeoutId.unref?.(); + return timeoutId; + } + + private async persistPendingApprovals(): Promise { + const approvals: Record = {}; + for (const [id, pending] of this.pending) { + approvals[id] = { + id, + request: pending.request, + messages: pending.messages, + expiresAtMs: pending.request.expiresAtMs, + }; + } + const payload: StoredPendingApprovalsFile = { + version: STORE_VERSION, + approvals, + }; + try { + await writeJsonAtomic(this.storePath, payload, { + mode: 0o600, + trailingNewline: true, + ensureDirMode: 0o700, + }); + } catch (err) { + this.opts.runtime?.error?.( + `zulip exec approvals: failed to persist pending approvals: ${String(err)}`, + ); + } + } + + private async removePendingApproval(id: string): Promise { + const pending = this.pending.get(id); + if (pending) { + clearTimeout(pending.timeoutId); + this.pending.delete(id); + } + await this.persistPendingApprovals(); + } + + private async loadPendingApprovals(): Promise { + try { + if (!fs.existsSync(this.storePath)) { + return; + } + const raw = JSON.parse(fs.readFileSync(this.storePath, "utf8")) as StoredPendingApprovalsFile; + if (raw?.version !== STORE_VERSION || !raw.approvals || typeof raw.approvals !== "object") { + return; + } + const now = Date.now(); + let changed = false; + for (const stored of Object.values(raw.approvals)) { + const approval = readStoredPendingApproval(stored); + if (!approval) { + changed = true; + continue; + } + if (approval.expiresAtMs <= now) { + changed = true; + const content = buildExpiredMessage({ + request: approval.request, + cleanupAfterResolve: this.opts.config.cleanupAfterResolve, + }); + await Promise.allSettled( + approval.messages.map(async (message) => { + await this.updateApprovalMessage(message.messageId, content, approval.id); + await removeZulipComponentMessageEntries({ + accountId: this.opts.accountId, + messageId: message.messageId, + }); + }), + ); + continue; + } + this.pending.set(approval.id, { + request: approval.request, + messages: approval.messages, + timeoutId: this.createTimeout(approval.id, approval.expiresAtMs), + }); + } + if (changed) { + await this.persistPendingApprovals(); + } + } catch (err) { + this.opts.runtime?.error?.( + `zulip exec approvals: failed to load pending approvals: ${String(err)}`, + ); + } + } + async start(): Promise { if (this.started) { return; @@ -395,6 +557,8 @@ export class ZulipExecApprovalHandler { return; } + await this.loadPendingApprovals(); + const { url: gatewayUrl, urlSource } = buildGatewayConnectionDetails({ config: this.opts.cfg, url: this.opts.gatewayUrl, @@ -559,19 +723,12 @@ export class ZulipExecApprovalHandler { return; } - const timeoutId = setTimeout( - () => { - void this.handleApprovalTimeout(request.id); - }, - Math.max(0, request.expiresAtMs - Date.now()), - ); - timeoutId.unref?.(); - this.pending.set(request.id, { request, messages, - timeoutId, + timeoutId: this.createTimeout(request.id, request.expiresAtMs), }); + await this.persistPendingApprovals(); } private async sendApprovalPrompt(params: { @@ -615,6 +772,7 @@ export class ZulipExecApprovalHandler { accountId: this.opts.accountId, sessionKey: buildSyntheticSessionKey(params.request), agentId: params.request.request.agentId?.trim() || DEFAULT_APPROVAL_AGENT_ID, + callbackExpiresAtMs: params.request.expiresAtMs, }, ) : await sendMessageZulip( @@ -647,19 +805,21 @@ export class ZulipExecApprovalHandler { if (!pending) { return; } - clearTimeout(pending.timeoutId); - this.pending.delete(resolved.id); - const content = buildResolvedMessage({ request: pending.request, resolved, cleanupAfterResolve: this.opts.config.cleanupAfterResolve, }); await Promise.allSettled( - pending.messages.map((message) => - this.updateApprovalMessage(message.messageId, content, resolved.id), - ), + pending.messages.map(async (message) => { + await this.updateApprovalMessage(message.messageId, content, resolved.id); + await removeZulipComponentMessageEntries({ + accountId: this.opts.accountId, + messageId: message.messageId, + }); + }), ); + await this.removePendingApproval(resolved.id); } private async handleApprovalTimeout(approvalId: string): Promise { @@ -667,18 +827,20 @@ export class ZulipExecApprovalHandler { if (!pending) { return; } - clearTimeout(pending.timeoutId); - this.pending.delete(approvalId); - const content = buildExpiredMessage({ request: pending.request, cleanupAfterResolve: this.opts.config.cleanupAfterResolve, }); await Promise.allSettled( - pending.messages.map((message) => - this.updateApprovalMessage(message.messageId, content, approvalId), - ), + pending.messages.map(async (message) => { + await this.updateApprovalMessage(message.messageId, content, approvalId); + await removeZulipComponentMessageEntries({ + accountId: this.opts.accountId, + messageId: message.messageId, + }); + }), ); + await this.removePendingApproval(approvalId); } private async updateApprovalMessage( diff --git a/extensions/zulip/src/zulip/index.ts b/extensions/zulip/src/zulip/index.ts index 6c9fd8d374dd..6db6a39fb77b 100644 --- a/extensions/zulip/src/zulip/index.ts +++ b/extensions/zulip/src/zulip/index.ts @@ -11,7 +11,10 @@ export { } from "./components.js"; export { registerZulipComponentEntries, - resolveZulipComponentEntry, + loadZulipComponentRegistry, + claimZulipComponentEntry, + consumeZulipComponentMessageEntries, + removeZulipComponentMessageEntries, removeZulipComponentEntry, clearZulipComponentEntries, } from "./components-registry.js"; diff --git a/extensions/zulip/src/zulip/model-picker.test.ts b/extensions/zulip/src/zulip/model-picker.test.ts index 6025c5bd6fad..0f116a1ebb20 100644 --- a/extensions/zulip/src/zulip/model-picker.test.ts +++ b/extensions/zulip/src/zulip/model-picker.test.ts @@ -3,7 +3,10 @@ import os from "node:os"; import path from "node:path"; import { beforeEach, describe, expect, it, vi } from "vitest"; import { clearSessionStoreCacheForTest } from "../../../../src/config/sessions.js"; -import { resolveZulipModelPickerCallbackAction } from "./model-picker.js"; +import { + buildZulipModelPickerReply, + resolveZulipModelPickerCallbackAction, +} from "./model-picker.js"; const STORE_PATH = path.join(os.tmpdir(), "openclaw-zulip-model-picker-test.json"); @@ -42,12 +45,13 @@ describe("resolveZulipModelPickerCallbackAction", () => { }); }); - it("builds a provider page for mdl_prov", async () => { + it("builds a provider page for mdl_prov with invoker ACLs", async () => { const action = await resolveZulipModelPickerCallbackAction({ cfg, callbackData: "mdl_prov", agentId: "archie", sessionKey, + allowedUserIds: [42], }); expect(action).toMatchObject({ kind: "render" }); @@ -58,18 +62,27 @@ describe("resolveZulipModelPickerCallbackAction", () => { expect(action.render.spec.heading).toBe("Model Providers"); expect(action.render.spec.buttons).toEqual( expect.arrayContaining([ - expect.objectContaining({ label: "anthropic (1)", callbackData: "mdl_list_anthropic_1" }), - expect.objectContaining({ label: "openai (2)", callbackData: "mdl_list_openai_1" }), + expect.objectContaining({ + label: "anthropic (1)", + callbackData: "mdl_list_anthropic_1", + allowedUsers: [42], + }), + expect.objectContaining({ + label: "openai (2)", + callbackData: "mdl_list_openai_1", + allowedUsers: [42], + }), ]), ); }); - it("builds a model page with the current override marked", async () => { + it("builds a model page with the current override marked and keeps invoker ACLs", async () => { const action = await resolveZulipModelPickerCallbackAction({ cfg, callbackData: "mdl_list_openai_1", agentId: "archie", sessionKey, + allowedUserIds: [42], }); expect(action).toMatchObject({ kind: "render" }); @@ -79,12 +92,47 @@ describe("resolveZulipModelPickerCallbackAction", () => { expect(action.render.text).toContain("Models (openai"); expect(action.render.spec.buttons).toEqual( expect.arrayContaining([ - expect.objectContaining({ label: expect.stringContaining("gpt-4.1-mini ✓") }), - expect.objectContaining({ label: "<< Back", callbackData: "mdl_back" }), + expect.objectContaining({ + label: expect.stringContaining("gpt-4.1-mini ✓"), + allowedUsers: [42], + }), + expect.objectContaining({ + label: "<< Back", + callbackData: "mdl_back", + allowedUsers: [42], + }), ]), ); }); + it("preserves allowed users in Zulip reply payloads", () => { + const reply = buildZulipModelPickerReply({ + text: "Select a provider:", + spec: { + heading: "Model Providers", + buttons: [ + { + label: "openai (2)", + callbackData: "mdl_list_openai_1", + allowedUsers: [42], + }, + ], + }, + }); + + expect(reply.channelData?.zulip).toEqual({ + heading: "Model Providers", + buttons: [ + { + text: "openai (2)", + callback_data: "mdl_list_openai_1", + style: undefined, + allowed_users: [42], + }, + ], + }); + }); + it("resolves model selection callbacks to a synthetic /model command", async () => { const action = await resolveZulipModelPickerCallbackAction({ cfg, diff --git a/extensions/zulip/src/zulip/model-picker.ts b/extensions/zulip/src/zulip/model-picker.ts index 69e3d7447b5b..61d6e388a047 100644 --- a/extensions/zulip/src/zulip/model-picker.ts +++ b/extensions/zulip/src/zulip/model-picker.ts @@ -99,6 +99,7 @@ function isTextOnlyAction(value: ZulipModelPickerRender | TextOnlyAction): value function buildProvidersRender(params: { modelData: ModelsProviderData; introText?: string; + allowedUserIds?: number[]; }): ZulipModelPickerRender | TextOnlyAction { const providerInfos = buildProviderInfo(params.modelData); if (providerInfos.length === 0) { @@ -110,7 +111,12 @@ function buildProvidersRender(params: { : "Select a provider:", spec: readZulipComponentSpec({ heading: "Model Providers", - buttons: buildProviderKeyboard(providerInfos), + buttons: buildProviderKeyboard(providerInfos).map((row) => + row.map((button) => ({ + ...button, + allowedUsers: params.allowedUserIds, + })), + ), }), }; } @@ -128,6 +134,7 @@ export function buildZulipModelPickerReply(params: { text: button.label, callback_data: button.callbackData, style: button.style, + allowed_users: button.allowedUsers, })), }, }, @@ -138,11 +145,13 @@ export async function buildZulipModelPickerProvidersReply(params: { cfg: OpenClawConfig; agentId?: string; introText?: string; + allowedUserIds?: number[]; }): Promise { const modelData = await buildModelsProviderData(params.cfg, params.agentId); const render = buildProvidersRender({ modelData, introText: params.introText, + allowedUserIds: params.allowedUserIds, }); if (isTextOnlyAction(render)) { return { text: render.text }; @@ -156,6 +165,7 @@ export async function buildZulipModelPickerModelsReply(params: { sessionKey?: string; provider: string; page: number; + allowedUserIds?: number[]; }): Promise { const modelData = await buildModelsProviderData(params.cfg, params.agentId); const render = buildModelsRender({ @@ -165,6 +175,7 @@ export async function buildZulipModelPickerModelsReply(params: { provider: params.provider, page: params.page, modelData, + allowedUserIds: params.allowedUserIds, }); if (isTextOnlyAction(render)) { return { text: render.text }; @@ -179,12 +190,14 @@ function buildModelsRender(params: { provider: string; page: number; modelData: ModelsProviderData; + allowedUserIds?: number[]; }): ZulipModelPickerRender | TextOnlyAction { const modelSet = params.modelData.byProvider.get(params.provider); if (!modelSet || modelSet.size === 0) { return buildProvidersRender({ modelData: params.modelData, introText: `Unknown provider: ${params.provider}`, + allowedUserIds: params.allowedUserIds, }); } @@ -215,7 +228,12 @@ function buildModelsRender(params: { currentPage: safePage, totalPages, pageSize, - }), + }).map((row) => + row.map((button) => ({ + ...button, + allowedUsers: params.allowedUserIds, + })), + ), }), }; } @@ -225,6 +243,7 @@ export async function resolveZulipModelPickerCallbackAction(params: { callbackData?: string | null; agentId?: string; sessionKey?: string; + allowedUserIds?: number[]; }): Promise { const callback = parseModelCallbackData(params.callbackData ?? ""); if (!callback) { @@ -233,7 +252,7 @@ export async function resolveZulipModelPickerCallbackAction(params: { const modelData = await buildModelsProviderData(params.cfg, params.agentId); if (callback.type === "providers" || callback.type === "back") { - const render = buildProvidersRender({ modelData }); + const render = buildProvidersRender({ modelData, allowedUserIds: params.allowedUserIds }); return isTextOnlyAction(render) ? render : { kind: "render", render }; } @@ -245,6 +264,7 @@ export async function resolveZulipModelPickerCallbackAction(params: { provider: callback.provider, page: callback.page, modelData, + allowedUserIds: params.allowedUserIds, }); return isTextOnlyAction(render) ? render : { kind: "render", render }; } @@ -258,6 +278,7 @@ export async function resolveZulipModelPickerCallbackAction(params: { const render = buildProvidersRender({ modelData, introText: `Could not resolve model "${selection.model}".`, + allowedUserIds: params.allowedUserIds, }); return isTextOnlyAction(render) ? render : { kind: "render", render }; } diff --git a/extensions/zulip/src/zulip/monitor.ts b/extensions/zulip/src/zulip/monitor.ts index 6a10ab766a5c..48aff807cff8 100644 --- a/extensions/zulip/src/zulip/monitor.ts +++ b/extensions/zulip/src/zulip/monitor.ts @@ -51,7 +51,12 @@ import { type ZulipMessage, type ZulipSubmessageEvent, } from "./client.js"; -import { resolveZulipComponentEntry, removeZulipComponentEntry } from "./components-registry.js"; +import { + claimZulipComponentEntry, + consumeZulipComponentMessageEntries, + loadZulipComponentRegistry, + removeZulipComponentEntry, +} from "./components-registry.js"; import { formatZulipComponentEventText, readZulipComponentSpec } from "./components.js"; import { createZulipDraftStream, type ZulipDraftTarget } from "./draft-stream.js"; import { ZulipExecApprovalHandler } from "./exec-approvals.js"; @@ -1300,6 +1305,23 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise } }; + await loadZulipComponentRegistry(account.accountId); + + const sendStaleComponentNotice = async (senderId: number) => { + try { + await sendMessageZulip( + `dm:${senderId}`, + "That Zulip action is no longer active. Please rerun the command or request a fresh prompt.", + { + cfg, + accountId: account.accountId, + }, + ); + } catch (err) { + logVerbose(`zulip: failed to send stale component notice: ${String(err)}`); + } + }; + const resolvedAllowFromEntries = resolvedConfigAllowFrom.flatMap((entry) => [entry.id, entry.email].filter((value): value is string => Boolean(value)), ); @@ -1710,22 +1732,27 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise return; } - // Resolve the component entry from registry without consuming first so - // unauthorized clicks on shared widgets do not burn the button. - const entry = resolveZulipComponentEntry({ id: buttonId, consume: false }); - if (!entry) { - runtime.log?.(`zulip: ocform callback for unknown/expired button '${buttonId}', ignoring`); + const claimResult = await claimZulipComponentEntry({ + accountId: account.accountId, + id: buttonId, + senderId: event.sender_id, + }); + if (claimResult.kind === "missing" || claimResult.kind === "expired") { + runtime.log?.(`zulip: ocform callback for stale button '${buttonId}', notifying clicker`); + await sendStaleComponentNotice(event.sender_id); return; } - - // Check allowedUsers if configured - if (entry.allowedUsers && entry.allowedUsers.length > 0) { - if (!entry.allowedUsers.includes(event.sender_id)) { - runtime.log?.(`zulip: ocform callback from unauthorized user ${event.sender_id}`); - return; - } + if (claimResult.kind === "consumed") { + runtime.log?.(`zulip: ocform callback for consumed button '${buttonId}', notifying clicker`); + await sendStaleComponentNotice(event.sender_id); + return; + } + if (claimResult.kind === "unauthorized") { + runtime.log?.(`zulip: ocform callback from unauthorized user ${event.sender_id}`); + return; } + const entry = claimResult.entry; const sessionKey = entry.sessionKey; const agentId = entry.agentId; const componentChatType = entry.chatType ?? "channel"; @@ -1733,6 +1760,19 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise replyTo: entry.replyTo, senderId: event.sender_id, }); + const consumeWidgetMessage = async () => { + if (entry.reusable) { + return; + } + if (typeof entry.messageId === "number" && Number.isFinite(entry.messageId) && entry.messageId > 0) { + await consumeZulipComponentMessageEntries({ + accountId: entry.accountId, + messageId: entry.messageId, + }); + return; + } + await removeZulipComponentEntry(entry.id, entry.accountId); + }; const approvalResult = execApprovalsHandler ? await execApprovalsHandler.handleCallback({ @@ -1741,8 +1781,8 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise }) : { handled: false, consume: false }; if (approvalResult.handled) { - if (approvalResult.consume && !entry.reusable) { - removeZulipComponentEntry(entry.id); + if (approvalResult.consume) { + await consumeWidgetMessage(); } return; } @@ -1752,13 +1792,9 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise callbackData: entry.callbackData, agentId, sessionKey, + allowedUserIds: entry.allowedUsers, }); if (modelPickerAction) { - const consumeModelPickerEntry = () => { - if (!entry.reusable) { - removeZulipComponentEntry(entry.id); - } - }; if (modelPickerAction.kind === "render") { await sendZulipComponentMessage( replyTarget, @@ -1771,7 +1807,7 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise agentId, }, ); - consumeModelPickerEntry(); + await consumeWidgetMessage(); return; } if (modelPickerAction.kind === "text") { @@ -1779,7 +1815,7 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise cfg, accountId: entry.accountId, }); - consumeModelPickerEntry(); + await consumeWidgetMessage(); return; } runtime.log?.( @@ -1883,16 +1919,14 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise onModelSelected: prefixContext.onModelSelected, }, }); - consumeModelPickerEntry(); + await consumeWidgetMessage(); } finally { markDispatchIdle(); } return; } - if (!entry.reusable) { - removeZulipComponentEntry(entry.id); - } + const label = typeof data.label === "string" ? data.label : entry.label; const eventText = formatZulipComponentEventText({ @@ -2008,6 +2042,7 @@ export async function monitorZulipProvider(opts: MonitorZulipOpts = {}): Promise onModelSelected: prefixContext.onModelSelected, }, }); + await consumeWidgetMessage(); } finally { markDispatchIdle(); } diff --git a/extensions/zulip/src/zulip/send-components.ts b/extensions/zulip/src/zulip/send-components.ts index 62d60d1379e3..35dc32112bc2 100644 --- a/extensions/zulip/src/zulip/send-components.ts +++ b/extensions/zulip/src/zulip/send-components.ts @@ -19,6 +19,7 @@ export type ZulipComponentSendOpts = { replyToTopic?: string; sessionKey?: string; agentId?: string; + callbackExpiresAtMs?: number; }; function isHttpUrl(value: string): boolean { @@ -158,9 +159,10 @@ export async function sendZulipComponentMessage( widgetContent: JSON.stringify(buildResult.widgetContent), }); - registerZulipComponentEntries({ + await registerZulipComponentEntries({ entries: buildResult.entries, messageId: response.id, + callbackExpiresAtMs: opts.callbackExpiresAtMs, }); core.channel.activity.record({ diff --git a/src/agents/pi-embedded-runner/lanes.ts b/src/agents/pi-embedded-runner/lanes.ts index d89a741a662d..1fadd20a73f6 100644 --- a/src/agents/pi-embedded-runner/lanes.ts +++ b/src/agents/pi-embedded-runner/lanes.ts @@ -15,7 +15,7 @@ export function resolveGlobalLane(lane?: string) { // self-deadlock: the outer cron task awaits an inner task that cannot start // until the outer task releases the lane. Use the existing nested lane for // the embedded portion instead. - if (cleaned === "cron") { + if (cleaned === CommandLane.Cron) { return CommandLane.Nested; } return cleaned; diff --git a/src/agents/tools/cron-tool.test.ts b/src/agents/tools/cron-tool.test.ts index 28ab28626da7..323e32765051 100644 --- a/src/agents/tools/cron-tool.test.ts +++ b/src/agents/tools/cron-tool.test.ts @@ -388,14 +388,21 @@ describe("cron tool", () => { name: "empty-job", schedule: { kind: "cron", expr: "0 9 * * *" }, sessionTarget: "main", + pacing: { providerTarget: "codex", role: "maintenance" }, payload: { kind: "systemEvent", text: "wake up" }, }); const params = expectSingleGatewayCallMethod("cron.add") as - | { name?: string; sessionTarget?: string; payload?: { text?: string } } + | { + name?: string; + sessionTarget?: string; + pacing?: { providerTarget?: string; role?: string }; + payload?: { text?: string }; + } | undefined; expect(params?.name).toBe("empty-job"); expect(params?.sessionTarget).toBe("main"); + expect(params?.pacing).toEqual({ providerTarget: "codex", role: "maintenance" }); expect(params?.payload?.text).toBe("wake up"); }); diff --git a/src/agents/tools/cron-tool.ts b/src/agents/tools/cron-tool.ts index 14df69010245..b87567b90a60 100644 --- a/src/agents/tools/cron-tool.ts +++ b/src/agents/tools/cron-tool.ts @@ -308,6 +308,7 @@ Use jobId as the canonical identifier; id is accepted for compatibility. Use con "sessionTarget", "wakeMode", "payload", + "pacing", "delivery", "enabled", "description", @@ -458,6 +459,7 @@ Use jobId as the canonical identifier; id is accepted for compatibility. Use con "sessionKey", "sessionTarget", "wakeMode", + "pacing", "failureAlert", "allowUnsafeExternalContent", ]); diff --git a/src/auto-reply/reply/commands-models.test.ts b/src/auto-reply/reply/commands-models.test.ts index c7131ef49434..4c1189a10256 100644 --- a/src/auto-reply/reply/commands-models.test.ts +++ b/src/auto-reply/reply/commands-models.test.ts @@ -19,11 +19,12 @@ describe("resolveModelsCommandReply (zulip)", () => { }, } as OpenClawConfig; - it("returns provider buttons for /models on Zulip", async () => { + it("returns provider buttons for /models on Zulip with invoker ACLs", async () => { const reply = await resolveModelsCommandReply({ cfg, commandBodyNormalized: "/models", surface: "zulip", + allowedUserIds: [42], }); expect(reply?.text).toBe("Select a provider:"); @@ -31,24 +32,36 @@ describe("resolveModelsCommandReply (zulip)", () => { heading: "Model Providers", buttons: expect.arrayContaining([ expect.arrayContaining([ - expect.objectContaining({ text: "anthropic (1)", callback_data: "mdl_list_anthropic_1" }), - expect.objectContaining({ text: "openai (2)", callback_data: "mdl_list_openai_1" }), + expect.objectContaining({ + text: "anthropic (1)", + callback_data: "mdl_list_anthropic_1", + allowed_users: [42], + }), + expect.objectContaining({ + text: "openai (2)", + callback_data: "mdl_list_openai_1", + allowed_users: [42], + }), ]), ]), }); }); - it("returns model buttons for /models on Zulip", async () => { + it("returns model buttons for /models on Zulip with invoker ACLs", async () => { const reply = await resolveModelsCommandReply({ cfg, commandBodyNormalized: "/models openai", surface: "zulip", currentModel: "openai/gpt-4.1-mini", + allowedUserIds: [42], }); expect(reply?.text).toContain("Models (openai"); const zulipData = reply?.channelData?.zulip as - | { heading?: string; buttons?: Array> } + | { + heading?: string; + buttons?: Array>; + } | undefined; expect(zulipData).toMatchObject({ heading: "openai models", @@ -57,10 +70,17 @@ describe("resolveModelsCommandReply (zulip)", () => { expect(buttons).toEqual( expect.arrayContaining([ expect.arrayContaining([ - expect.objectContaining({ text: expect.stringContaining("gpt-4.1-mini ✓") }), + expect.objectContaining({ + text: expect.stringContaining("gpt-4.1-mini ✓"), + allowed_users: [42], + }), ]), expect.arrayContaining([ - expect.objectContaining({ text: "<< Back", callback_data: "mdl_back" }), + expect.objectContaining({ + text: "<< Back", + callback_data: "mdl_back", + allowed_users: [42], + }), ]), ]), ); diff --git a/src/auto-reply/reply/commands-models.ts b/src/auto-reply/reply/commands-models.ts index 5f5d45e44711..6d2597395bdf 100644 --- a/src/auto-reply/reply/commands-models.ts +++ b/src/auto-reply/reply/commands-models.ts @@ -15,6 +15,7 @@ import { buildProviderKeyboard, calculateTotalPages, getModelsPageSize, + type ButtonRow, type ProviderInfo, } from "../../telegram/model-buttons.js"; import type { ReplyPayload } from "../types.js"; @@ -187,6 +188,18 @@ function parseModelsArgs(raw: string): { }; } +function mapZulipButtonsWithAllowedUsers( + buttons: ButtonRow[], + allowedUserIds?: number[], +): Array> { + return buttons.map((row) => + row.map((button) => ({ + ...button, + allowed_users: allowedUserIds, + })), + ); +} + function resolveProviderLabel(params: { provider: string; cfg: OpenClawConfig; @@ -229,6 +242,7 @@ export async function resolveModelsCommandReply(params: { agentId?: string; agentDir?: string; sessionEntry?: ModelsSessionEntry; + allowedUserIds?: number[]; }): Promise { const body = params.commandBodyNormalized.trim(); if (!body.startsWith("/models")) { @@ -256,7 +270,12 @@ export async function resolveModelsCommandReply(params: { text, channelData: isTelegram ? { telegram: { buttons } } - : { zulip: { heading: "Model Providers", buttons } }, + : { + zulip: { + heading: "Model Providers", + buttons: mapZulipButtonsWithAllowedUsers(buttons, params.allowedUserIds), + }, + }, }; } @@ -284,7 +303,12 @@ export async function resolveModelsCommandReply(params: { text: `Unknown provider: ${provider}\n\nSelect a provider:`, channelData: isTelegram ? { telegram: { buttons } } - : { zulip: { heading: "Model Providers", buttons } }, + : { + zulip: { + heading: "Model Providers", + buttons: mapZulipButtonsWithAllowedUsers(buttons, params.allowedUserIds), + }, + }, }; } const lines: string[] = [ @@ -343,7 +367,12 @@ export async function resolveModelsCommandReply(params: { text, channelData: isTelegram ? { telegram: { buttons } } - : { zulip: { heading: `${provider} models`, buttons } }, + : { + zulip: { + heading: `${provider} models`, + buttons: mapZulipButtonsWithAllowedUsers(buttons, params.allowedUserIds), + }, + }, }; } @@ -406,6 +435,11 @@ export const handleModelsCommand: CommandHandler = async (params, allowTextComma }); const modelsAgentDir = resolveAgentDir(params.cfg, modelsAgentId); + const allowedUserIds = + params.ctx.Surface === "zulip" && params.command.senderId + ? [Number.parseInt(params.command.senderId, 10)].filter(Number.isFinite) + : undefined; + const reply = await resolveModelsCommandReply({ cfg: params.cfg, commandBodyNormalized, @@ -414,6 +448,7 @@ export const handleModelsCommand: CommandHandler = async (params, allowTextComma agentId: modelsAgentId, agentDir: modelsAgentDir, sessionEntry: params.sessionEntry, + allowedUserIds, }); if (!reply) { return null; diff --git a/src/auto-reply/reply/commands-zulip-shared.test.ts b/src/auto-reply/reply/commands-zulip-shared.test.ts new file mode 100644 index 000000000000..0ce4dbdfb02e --- /dev/null +++ b/src/auto-reply/reply/commands-zulip-shared.test.ts @@ -0,0 +1,170 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../../config/config.js"; +import type { HandleCommandsParams } from "./commands-types.js"; + +const gatewayMocks = vi.hoisted(() => ({ + callGateway: vi.fn(), +})); + +vi.mock("../../gateway/call.js", () => ({ + callGateway: gatewayMocks.callGateway, +})); + +vi.mock("../../agents/model-catalog.js", () => ({ + loadModelCatalog: vi.fn(async () => [ + { provider: "openai", id: "gpt-4.1", name: "GPT-4.1" }, + { provider: "openai", id: "gpt-4.1-mini", name: "GPT-4.1 Mini" }, + { provider: "anthropic", id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5" }, + ]), +})); + +const { handleModelsCommand } = await import("./commands-models.js"); +const { handleApproveCommand } = await import("./commands-approve.js"); + +function makeParams( + commandBodyNormalized: string, + overrides?: Partial, +): HandleCommandsParams { + return { + cfg: { + agents: { + defaults: { + model: "openai/gpt-4.1", + }, + }, + } as OpenClawConfig, + directives: {}, + elevated: { + enabled: false, + allowed: false, + failures: [], + }, + sessionKey: "agent:archie:zulip:stream:ops:topic:deploy", + workspaceDir: "/tmp/openclaw-workspace", + defaultGroupActivation: () => "mention", + resolvedVerboseLevel: "normal", + resolvedReasoningLevel: "normal", + resolveDefaultThinkingLevel: async () => undefined, + provider: "openai", + model: "gpt-4.1-mini", + contextTokens: 0, + isGroup: false, + sessionEntry: undefined, + previousSessionEntry: undefined, + sessionStore: undefined, + storePath: undefined, + sessionScope: undefined, + rootCtx: undefined, + agentId: "archie", + agentDir: undefined, + resolvedThinkLevel: undefined, + resolvedElevatedLevel: undefined, + skillCommands: undefined, + ...overrides, + command: { + surface: "zulip", + channel: "zulip", + ownerList: [], + senderIsOwner: false, + isAuthorizedSender: true, + senderId: "42", + rawBodyNormalized: commandBodyNormalized, + commandBodyNormalized, + from: "zulip:42", + to: "dm:42", + ...(overrides?.command ?? {}), + }, + ctx: { + Surface: "zulip", + AccountId: "default", + CommandSource: "text", + ...(overrides?.ctx ?? {}), + } as HandleCommandsParams["ctx"], + }; +} + +describe("shared Zulip command handlers", () => { + beforeEach(() => { + gatewayMocks.callGateway.mockReset(); + gatewayMocks.callGateway.mockResolvedValue({ ok: true }); + }); + + it("derives invoker ACLs for shared Zulip /models", async () => { + const result = await handleModelsCommand(makeParams("/models"), true); + + expect(result?.shouldContinue).toBe(false); + expect(result?.reply?.channelData?.zulip).toMatchObject({ + heading: "Model Providers", + buttons: expect.arrayContaining([ + expect.arrayContaining([ + expect.objectContaining({ + text: "anthropic (1)", + callback_data: "mdl_list_anthropic_1", + allowed_users: [42], + }), + expect.objectContaining({ + text: "openai (2)", + callback_data: "mdl_list_openai_1", + allowed_users: [42], + }), + ]), + ]), + }); + }); + + it("derives invoker ACLs for shared Zulip /models ", async () => { + const result = await handleModelsCommand(makeParams("/models openai"), true); + + expect(result?.shouldContinue).toBe(false); + expect(result?.reply?.text).toContain("Models (openai"); + expect(result?.reply?.channelData?.zulip).toMatchObject({ + heading: "openai models", + buttons: expect.arrayContaining([ + expect.arrayContaining([ + expect.objectContaining({ + text: expect.stringContaining("gpt-4.1-mini ✓"), + allowed_users: [42], + }), + ]), + expect.arrayContaining([ + expect.objectContaining({ + text: "<< Back", + callback_data: "mdl_back", + allowed_users: [42], + }), + ]), + ]), + }); + }); + + it("submits shared Zulip /approve through the gateway", async () => { + const result = await handleApproveCommand(makeParams("/approve req-123 allow-once"), true); + + expect(gatewayMocks.callGateway).toHaveBeenCalledWith({ + method: "exec.approval.resolve", + params: { id: "req-123", decision: "allow-once" }, + clientName: "gateway-client", + clientDisplayName: "Chat approval (zulip:42)", + mode: "backend", + }); + expect(result).toEqual({ + shouldContinue: false, + reply: { text: "✅ Exec approval allow-once submitted for req-123." }, + }); + expect(result?.reply?.channelData).toBeUndefined(); + }); + + it("blocks unauthorized shared Zulip /approve before calling the gateway", async () => { + const result = await handleApproveCommand( + makeParams("/approve req-123 allow-once", { + command: { + isAuthorizedSender: false, + } as Partial, + }), + true, + ); + + expect(gatewayMocks.callGateway).not.toHaveBeenCalled(); + expect(result).toEqual({ shouldContinue: false }); + }); +}); diff --git a/src/cron/isolated-agent/run.skill-filter.test.ts b/src/cron/isolated-agent/run.skill-filter.test.ts index b0d34ad2f403..078e6a065832 100644 --- a/src/cron/isolated-agent/run.skill-filter.test.ts +++ b/src/cron/isolated-agent/run.skill-filter.test.ts @@ -251,6 +251,31 @@ describe("runCronIsolatedAgentTurn — skill filter", () => { expect(logWarnMock).not.toHaveBeenCalled(); expect(runWithModelFallbackMock).not.toHaveBeenCalled(); }); + + it("applies inferred gemini routing when no stronger model override exists", async () => { + resolveAllowedModelRefMock.mockReturnValueOnce({ + ref: { provider: "google", model: "gemini-3.1-pro-preview" }, + }); + + const result = await runCronIsolatedAgentTurn( + makeSkillParams({ + job: makeSkillJob({ + name: "leo-nightly-research", + payload: { + kind: "agentTurn", + message: "Do deep research with web_search and summarize market signals.", + }, + }), + agentId: "leo", + }), + ); + + expect(result.status).toBe("ok"); + expect(runWithModelFallbackMock).toHaveBeenCalledOnce(); + const runParams = runWithModelFallbackMock.mock.calls[0][0]; + expect(runParams.provider).toBe("google"); + expect(runParams.model).toBe("gemini-3.1-pro-preview"); + }); }); describe("CLI session handoff (issue #29774)", () => { diff --git a/src/cron/normalize.test.ts b/src/cron/normalize.test.ts index 6f34c85ebedd..f7ca2069c008 100644 --- a/src/cron/normalize.test.ts +++ b/src/cron/normalize.test.ts @@ -233,6 +233,29 @@ describe("normalizeCronJobCreate", () => { expectAnnounceDeliveryTarget(delivery, { channel: "telegram", to: "7200373102" }); }); + it("normalizes top-level pacing metadata", () => { + const normalized = normalizeCronJobCreate({ + name: "paced", + enabled: true, + schedule: { kind: "cron", expr: "* * * * *" }, + sessionTarget: "isolated", + wakeMode: "now", + payload: { + kind: "agentTurn", + message: "hi", + }, + pacing: { + providerTarget: " GeMiNi ", + role: " Maintenance ", + }, + }) as unknown as Record; + + expect(normalized.pacing).toEqual({ + providerTarget: "gemini", + role: "maintenance", + }); + }); + it("normalizes delivery accountId and strips blanks", () => { const normalized = normalizeIsolatedAgentTurnCreateJob({ name: "delivery account", @@ -463,4 +486,37 @@ describe("normalizeCronJobPatch", () => { const schedule = normalized.schedule as Record; expect(schedule.staggerMs).toBe(30_000); }); + + it("strips invalid pacing provider values in patches while preserving valid roles", () => { + const normalized = normalizeCronJobPatch({ + pacing: { + providerTarget: "other", + role: " Review ", + }, + }) as unknown as Record; + + expect(normalized.pacing).toEqual({ + role: "review", + }); + }); + + it("accepts gemini pacing provider values in patches", () => { + const normalized = normalizeCronJobPatch({ + pacing: { + providerTarget: " GeMiNi ", + }, + }) as unknown as Record; + + expect(normalized.pacing).toEqual({ + providerTarget: "gemini", + }); + }); + + it("preserves null pacing patches so callers can clear metadata", () => { + const normalized = normalizeCronJobPatch({ + pacing: null, + }) as unknown as Record; + + expect(normalized.pacing).toBeNull(); + }); }); diff --git a/src/cron/normalize.ts b/src/cron/normalize.ts index 5a6c66ff3567..f804a5a9fd30 100644 --- a/src/cron/normalize.ts +++ b/src/cron/normalize.ts @@ -204,6 +204,23 @@ function coerceDelivery(delivery: UnknownRecord) { return next; } +function coercePacing(pacing: UnknownRecord) { + const next: UnknownRecord = {}; + if (typeof pacing.providerTarget === "string") { + const trimmed = pacing.providerTarget.trim().toLowerCase(); + if (trimmed === "claude" || trimmed === "codex" || trimmed === "gemini") { + next.providerTarget = trimmed; + } + } + if (typeof pacing.role === "string") { + const trimmed = pacing.role.trim().toLowerCase(); + if (trimmed === "maintenance" || trimmed === "report" || trimmed === "review") { + next.role = trimmed; + } + } + return Object.keys(next).length > 0 ? next : null; +} + function unwrapJob(raw: UnknownRecord) { if (isRecord(raw.data)) { return raw.data; @@ -395,6 +412,22 @@ export function normalizeCronJobInput( if (isRecord(base.delivery)) { next.delivery = coerceDelivery(base.delivery); } + if (base.pacing === null) { + if (options.applyDefaults) { + delete next.pacing; + } else { + next.pacing = null; + } + } else if (isRecord(base.pacing)) { + const pacing = coercePacing(base.pacing); + if (pacing) { + next.pacing = pacing; + } else { + delete next.pacing; + } + } else if ("pacing" in base) { + delete next.pacing; + } if ("isolation" in next) { delete next.isolation; diff --git a/src/cron/service.jobs.test.ts b/src/cron/service.jobs.test.ts index 053ea8764de8..4ef7e10d3263 100644 --- a/src/cron/service.jobs.test.ts +++ b/src/cron/service.jobs.test.ts @@ -59,6 +59,22 @@ describe("applyJobPatch", () => { expect(job.delivery).toBeUndefined(); }); + it("replaces and clears pacing metadata from job patches", () => { + const job = createIsolatedAgentTurnJob("job-pacing", { + mode: "announce", + channel: "telegram", + to: "123", + }, { + pacing: { providerTarget: "codex", role: "maintenance" }, + }); + + applyJobPatch(job, { pacing: { providerTarget: "claude", role: "review" } }); + expect(job.pacing).toEqual({ providerTarget: "claude", role: "review" }); + + applyJobPatch(job, { pacing: null }); + expect(job.pacing).toBeUndefined(); + }); + it("keeps webhook delivery when switching to main session", () => { const job = createIsolatedAgentTurnJob("job-webhook", { mode: "webhook", diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 5579e5430f05..b1aa8a0ae2cf 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -617,6 +617,16 @@ export function applyJobPatch( if (patch.delivery) { job.delivery = mergeCronDelivery(job.delivery, patch.delivery); } + if ("pacing" in patch) { + if (patch.pacing === null) { + job.pacing = undefined; + } else if (patch.pacing) { + job.pacing = { + providerTarget: patch.pacing.providerTarget, + role: patch.pacing.role, + }; + } + } if ("failureAlert" in patch) { job.failureAlert = mergeCronFailureAlert(job.failureAlert, patch.failureAlert); } diff --git a/src/gateway/protocol/cron-validators.test.ts b/src/gateway/protocol/cron-validators.test.ts index 33df9d478e97..57012d050395 100644 --- a/src/gateway/protocol/cron-validators.test.ts +++ b/src/gateway/protocol/cron-validators.test.ts @@ -21,6 +21,22 @@ describe("cron protocol validators", () => { expect(validateCronAddParams(minimalAddParams)).toBe(true); }); + it("accepts pacing metadata on add and update, including null clears", () => { + expect( + validateCronAddParams({ + ...minimalAddParams, + pacing: { providerTarget: "gemini", role: "maintenance" }, + }), + ).toBe(true); + expect( + validateCronUpdateParams({ + id: "job-1", + patch: { pacing: { providerTarget: "claude", role: "review" } }, + }), + ).toBe(true); + expect(validateCronUpdateParams({ id: "job-1", patch: { pacing: null } })).toBe(true); + }); + it("rejects add params when required scheduling fields are missing", () => { const { wakeMode: _wakeMode, ...withoutWakeMode } = minimalAddParams; expect(validateCronAddParams(withoutWakeMode)).toBe(false); diff --git a/src/gateway/protocol/schema/cron.ts b/src/gateway/protocol/schema/cron.ts index 41e7467becef..4f6fcfba67db 100644 --- a/src/gateway/protocol/schema/cron.ts +++ b/src/gateway/protocol/schema/cron.ts @@ -150,6 +150,24 @@ export const CronFailureAlertSchema = Type.Object( { additionalProperties: false }, ); +export const CronPacingMetadataSchema = Type.Object( + { + providerTarget: Type.Optional( + Type.Union([Type.Literal("claude"), Type.Literal("codex"), Type.Literal("gemini")]), + ), + role: Type.Optional( + Type.Union([ + Type.Literal("maintenance"), + Type.Literal("report"), + Type.Literal("review"), + ]), + ), + }, + { additionalProperties: false }, +); + +export const CronPacingPatchSchema = Type.Union([Type.Null(), CronPacingMetadataSchema]); + export const CronFailureDestinationSchema = Type.Object( { channel: Type.Optional(Type.Union([Type.Literal("last"), NonEmptyString])), @@ -245,6 +263,7 @@ export const CronJobSchema = Type.Object( wakeMode: CronWakeModeSchema, payload: CronPayloadSchema, delivery: Type.Optional(CronDeliverySchema), + pacing: Type.Optional(CronPacingMetadataSchema), failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])), state: CronJobStateSchema, }, @@ -275,6 +294,7 @@ export const CronAddParamsSchema = Type.Object( wakeMode: CronWakeModeSchema, payload: CronPayloadSchema, delivery: Type.Optional(CronDeliverySchema), + pacing: Type.Optional(CronPacingMetadataSchema), failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])), }, { additionalProperties: false }, @@ -289,6 +309,7 @@ export const CronJobPatchSchema = Type.Object( wakeMode: Type.Optional(CronWakeModeSchema), payload: Type.Optional(CronPayloadPatchSchema), delivery: Type.Optional(CronDeliveryPatchSchema), + pacing: Type.Optional(CronPacingPatchSchema), failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])), state: Type.Optional(Type.Partial(CronJobStateSchema)), }, From 078f99b42f65c62fa0cff5d2e820e3f9a7bc2fed Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Wed, 18 Mar 2026 10:49:53 -0400 Subject: [PATCH 5/7] feat(routing): wire budget-aware model routing with complexity tiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connect the existing provider-usage tracking infrastructure to the model fallback pipeline so routing decisions consider real-time subscription quota state. This closes the observation→action gap where usage was tracked comprehensively but never influenced model selection. New files: - complexity-tier.ts: Lightweight prompt complexity classifier (SIMPLE/MEDIUM/COMPLEX/REASONING) that maps to model preferences (local qwen for simple, cheap CLIs for medium, premium for complex) - provider-usage.cache.ts: 90s TTL cache for provider usage summary to avoid API call storms in the hot path Changes: - model-fallback.ts: reorderCandidatesByBudget() runs after resolveFallbackCandidates() and scores candidates by quota state, burn-rate projection, reset-time awareness, and complexity tier match - provider-usage.shared.ts: resolveUsageProviderIdForRouting() maps CLI providers (claude-cli, codex) to their subscription quota providers - provider-usage.ts: barrel re-exports for new functions Feature flags: BUDGET_ROUTING_DISABLED=1, BUDGET_ROUTING_ENFORCEMENT=strict Co-Authored-By: Claude Opus 4.6 --- .workflow/inputs/original-request.md | 19 +- .workflow/plan.md | 247 +++--------------- .workflow/prd.md | 88 ++----- src/agents/complexity-tier.ts | 84 ++++++ src/agents/model-fallback.ts | 176 ++++++++++++- src/config/types.agents.ts | 1 + src/config/zod-schema.agents.ts | 1 + src/infra/provider-usage.cache.ts | 63 +++++ src/infra/provider-usage.shared.ts | 29 ++ src/infra/provider-usage.ts | 6 +- src/lionroot/config/content-routing-schema.ts | 17 ++ src/lionroot/content-intake.test.ts | 75 +++++- src/lionroot/content-intake.ts | 35 ++- .../routing/content-intake-pipe.test.ts | 33 +-- src/lionroot/routing/content-route.test.ts | 48 ++++ src/lionroot/routing/content-route.ts | 24 ++ 16 files changed, 627 insertions(+), 319 deletions(-) create mode 100644 src/agents/complexity-tier.ts create mode 100644 src/infra/provider-usage.cache.ts diff --git a/.workflow/inputs/original-request.md b/.workflow/inputs/original-request.md index 93044b088686..80f819837f15 100644 --- a/.workflow/inputs/original-request.md +++ b/.workflow/inputs/original-request.md @@ -1,23 +1,16 @@ # Original Request -**Date:** 2026-03-11 +**Date:** 2026-03-12 **Source:** Codex session **From:** Bryan ## The Ask -> Zulip no longer needs a bigger fork to get meaningfully closer to Discord. The core primitives are already live in `clawdbot/extensions/zulip`. The next pass should be a targeted plugin hardening pass: durable interactive state, productized exec approvals and model picker UX, topic lifecycle/rebinding, and startup audit/resolution polish. - -## Interpreted Scope - -- Replace the older Phase 3 framing with a reality-based hardening pass. -- Keep the work primarily inside `extensions/zulip`. -- Prioritize reliability and correctness over new capability work. -- Defer new `lionroot-zulip` fork work unless real usage proves button-first UX is no longer enough. +> okay, fix maclern, and the other agents if they didn't follow their self-learning ## Initial Context -- `extensions/zulip` already ships draft streaming, button send/callbacks, widget-aware reply delivery, topic-bound sessions, exec approvals, model picker helpers, typing, reactions, uploads, and target resolution helpers. -- The largest remaining gap is not transport capability; it is plugin-owned state durability and callback correctness. -- The current `components-registry.ts` and `exec-approvals.ts` both keep critical interactive state in memory only. -- `commands-approve.ts` already provides a shared text `/approve` path, so Zulip-local command work should validate reuse before adding duplication. +- Overnight investigation for Wednesday night, March 11, 2026 into Thursday morning, March 12, 2026 found Maclern self-learning jobs produced irrelevant output. +- Evidence showed the latest Maclern digest was recorded under `agent:main:cron:...` and used workspace `/Users/lionheart/clawd`. +- Live runtime config at `~/.openclaw/openclaw.json` does not include `maclern` in `agents.list`, while `~/.openclaw/cron/jobs.json` includes `maclern-nightly-queue-review` and `maclern-overnight-digest`. +- Existing cron agent IDs in config and jobs were compared; `maclern` is the only current cron agent missing from `agents.list`. diff --git a/.workflow/plan.md b/.workflow/plan.md index a40fa59225aa..8a9467721dde 100644 --- a/.workflow/plan.md +++ b/.workflow/plan.md @@ -1,240 +1,69 @@ -# Technical Plan: Zulip Plugin Hardening Pass +# Technical Plan: Cron Agent Routing Repair for Self-Learning Jobs **Status:** Approved -**Date:** 2026-03-11 -**Flow/Context Builder Output:** Repo-grounded planning pass across the current Zulip plugin, shared approval/model command paths, and the Lionroot Zulip strategy docs. The code already ships the core primitives; the next pass should harden state ownership and productize the existing UX rather than add major new transport capability. Bryan approved proceeding with this plan in the Codex session on 2026-03-11 by replying "both" after the draft workflow artifacts were presented. +**Date:** 2026-03-12 +**Flow/Context Builder Output:** Repo-grounded planning identified `clawdbot/src/gateway/server-cron.ts` as the primary fault point. The live runtime config is also missing `maclern`, so a complete fix needs both gateway hardening and a config repair. ## Architecture -The work stays primarily inside `extensions/zulip/src/zulip/` and uses the current seams instead of introducing a new framework: - -- `components-registry.ts` becomes the durable owner of button callback state. -- `send-components.ts` registers persisted widget state and carries callback expiry metadata. -- `monitor.ts` switches from resolve/remove to claim/consume semantics, hosts thin local command handling, and runs startup audits. -- `exec-approvals.ts` keeps owning approval delivery and resolution, but gains persisted pending state. -- `model-picker.ts` keeps owning provider/model rendering and callback decoding, but gains invoker ACL scoping. -- `topic-bindings.ts` stays the lifecycle foundation; only targeted rebind work is added if live event validation proves it safe. +- `src/gateway/server-cron.ts` is the cron gateway wiring layer. It currently resolves requested cron agents through `resolveCronAgent`, which falls back to the default agent when the requested agent is absent from `agents.list`. +- `src/cron/service/timer.ts` passes `job.agentId` and `job.sessionKey` into the gateway callbacks, so the gateway layer is the final authority for cron wake/session routing. +- `src/cron/isolated-agent/run.ts` already supports requested agent IDs for isolated runs, but workspace selection still depends on runtime config when an explicit per-agent workspace is needed. +- The live config in `~/.openclaw/openclaw.json` must include `maclern` so the runtime resolves the intended workspace `/Users/lionheart/clawd/agents/maclern` and agent directory `~/.openclaw/agents/maclern/agent`. ## Files to Modify -- `extensions/zulip/src/zulip/components-registry.ts` — replace in-memory-only registry with durable per-account registry and message-scope consumption helpers. -- `extensions/zulip/src/zulip/send-components.ts` — register durable widget entries and pass optional callback expiry metadata. -- `extensions/zulip/src/zulip/components.ts` — normalize `allowedUsers` and `allowed_users` for Zulip component payloads. -- `extensions/zulip/src/zulip/exec-approvals.ts` — persist pending approval prompts, rehydrate timers, retire widget entries by message. -- `extensions/zulip/src/zulip/model-picker.ts` — preserve button ACL metadata and add invoker-scoped builder params. -- `extensions/zulip/src/zulip/commands.ts` — new thin Zulip-local command parser/handler for transport-owned UX. -- `extensions/zulip/src/zulip/monitor.ts` — adopt registry claim flow, insert local command handling, run stale-click DM behavior, run startup audit, and host topic rebind integration if validated. -- `extensions/zulip/src/zulip/topic-bindings.ts` — add targeted rebind helper only if live topic-edit events support it. -- `extensions/zulip/src/zulip/resolve-users.ts` — add outbound-only fuzzy/suggestion helper while preserving strict auth resolution. -- `extensions/zulip/src/zulip/send.ts` — use clearer outbound resolution errors and optional fuzzy helper. -- `extensions/zulip/src/zulip/client.ts` — only if needed to surface topic-edit event typing or registration. -- `../lionroot-openclaw/docs/zulip-upgrade-plan.md` — refresh the roadmap to match shipped reality and the new execution order. +- `clawdbot/src/gateway/server-cron.ts` — preserve requested isolated cron agent IDs instead of silently falling back to `main` when the agent is missing from config. +- `clawdbot/src/gateway/server-cron.test.ts` — add regression coverage for a cron agent that is missing from `agents.list`. +- `.openclaw/openclaw.json` — add the missing `maclern` agent entry following the existing per-agent workspace/agentDir convention. ## New Files -- `extensions/zulip/src/zulip/commands.ts` — minimal transport-owned Zulip command layer for `/models` and, only if validation proves necessary, `/approve` fallback handling. +- None. ## Tasks -1. [ ] Refresh the Zulip upgrade doc so planning truth matches the shipped code. -2. [ ] Implement a durable per-account component registry with claim semantics and message-scope consumption for non-reusable widgets. -3. [ ] Update send + monitor flows to use the new registry API, including stale-click handling. -4. [ ] Persist pending Zulip exec approvals and rehydrate them on startup. -5. [ ] Thread `allowedUsers` through model-picker rendering and scope picker controls to the invoker. -6. [ ] Add a thin Zulip-local command layer for `/models`; validate and reuse shared `/approve` unless testing proves a local fallback is necessary. -7. [ ] Validate live topic-edit event fidelity; if safe, add targeted topic rebinding, otherwise stop short and document the manual fallback path. -8. [ ] Add outbound resolution polish and a warn-only startup audit for configured streams, approval targets, analysis streams, and approver identities. -9. [ ] Add focused tests for registry durability, approval recovery, model-picker ACL behavior, local command entry, and any topic rebinding logic that lands. -10. [ ] Run targeted Zulip tests and a review pass before asking for approval to implement. +1. [ ] Patch `server-cron.ts` so `resolveCronAgent` preserves an explicit requested `agentId` for isolated cron routing. +2. [ ] Add regression tests that cover the missing-agent-in-config case and verify the requested agent is passed into isolated cron runs. +3. [ ] Patch `~/.openclaw/openclaw.json` to add a `maclern` agent entry with the correct workspace and agent directory. +4. [ ] Verify there are no other current cron agent IDs missing from `agents.list`. +5. [ ] Run targeted tests for the gateway cron path. +6. [ ] Do a review pass before wrapping up. ## Detailed File Plan -### 1) `extensions/zulip/src/zulip/components-registry.ts` - -Replace the current global in-memory helpers with a registry manager that owns: - -- in-memory `entriesById` -- per-account store path -- serialized persistence queue -- expiry pruning -- message-scope consumption helpers - -Add types roughly shaped like: - -- `StoredZulipComponentEntry` -- `ZulipComponentClaimResult` -- `StoredZulipComponentRegistryFile` - -Add or replace APIs with: - -- `registerEntries(...)` -- `claimEntry(...)` -- `consumeMessageEntries(messageId)` -- `removeMessageEntries(messageId)` -- `pruneExpired(now?)` -- per-account loader/manager getter - -Behavioral requirement: - -- For non-reusable widgets, a successful click retires the entire widget message, not just the clicked button. -- Unauthorized clicks must not consume the widget. -- Expired/missing/consumed claims must be distinguishable so `monitor.ts` can DM the clicker with a stale-action notice. - -### 2) `extensions/zulip/src/zulip/send-components.ts` - -Update the high-level send path to: - -- register entries through the durable registry manager -- pass message ID into registry registration -- optionally accept `callbackExpiresAtMs?: number` -- align approval widget TTL with approval expiry when supplied - -The existing text degradation path stays intact. - -### 3) `extensions/zulip/src/zulip/components.ts` - -Extend spec parsing so Zulip payload round trips preserve ACL metadata. - -Required changes: - -- normalize both `allowedUsers` and `allowed_users` -- preserve existing `callbackData` / `callback_data` handling -- keep button schema limited and explicit - -### 4) `extensions/zulip/src/zulip/exec-approvals.ts` - -Keep `ZulipExecApprovalHandler` as the owner, but add a persisted pending approval store. - -Add stored record state for: - -- approval ID -- prompt message IDs + targets -- expiry timestamp -- cleanup mode - -Required behavior: - -- on `start()`, load persisted approvals, reschedule timers, and immediately expire stale ones -- on request, persist prompt metadata after sends succeed -- on resolve/timeout, update prompt messages, retire widget entries by message ID, and remove persisted state -- use `callbackExpiresAtMs` so approval widgets expire with the approval request - -### 5) `extensions/zulip/src/zulip/model-picker.ts` +### 1) `clawdbot/src/gateway/server-cron.ts` -Productize the already-shipped picker instead of redesigning it. +- Update `resolveCronAgent` to normalize and preserve a non-empty requested `agentId` instead of requiring membership in `agents.list`. +- Keep the default-agent fallback only for empty or missing `agentId`. +- Leave main-session constraints enforced by existing cron job validation in `src/cron/service/jobs.ts`. -Required changes: +### 2) `clawdbot/src/gateway/server-cron.test.ts` -- add `allowedUserIds?: number[]` to public builder params -- preserve `allowedUsers` when converting picker render state into Zulip reply payloads -- ensure render-on-click flows keep the same ACL on the next picker page -- keep picker pages disposable rather than reusable +- Mock or observe the isolated cron run path so the test can assert that a job with `agentId: "maclern"` is passed through as `maclern` even when runtime config only lists `main`. +- Add coverage for the summary/wake path if needed to prove the gateway callbacks stay agent-scoped. -Do not add a separate picker store in phase 1. +### 3) `.openclaw/openclaw.json` -### 6) `extensions/zulip/src/zulip/commands.ts` (new) - -Keep this intentionally small. - -Responsibilities: - -- parse a minimal Zulip-local command set -- own transport rendering/affordance for `/models` -- validate whether shared `/approve` is already sufficient -- only add local `/approve` handling if end-to-end validation proves the shared path inadequate in Zulip - -Suggested initial union: - -- `{ kind: "models" }` -- `{ kind: "approve"; approvalId: string; decision: ExecApprovalDecision }` - -### 7) `extensions/zulip/src/zulip/monitor.ts` - -This is the orchestration point and gets the biggest integration change. - -Required updates: - -- load the durable component registry for the active account on startup -- replace resolve+remove callback flow with `claimEntry(...)` -- on `missing|expired|consumed`, DM the clicker with a stale-action notice -- on `unauthorized`, log and stop without consuming -- consume widget state at message scope after successful non-reusable clicks -- insert thin local command handling after xcase handling and before generic agent execution -- run warn-only startup audit for streams/approval targets/approvers -- if topic-edit event validation succeeds, host the rebind flow here - -### 8) `extensions/zulip/src/zulip/topic-bindings.ts` - -Do not rebuild this system. - -Only add targeted lifecycle support if validation proves automatic rename handling is safe: - -- `rebindConversation(...)` - -If the live Zulip event stream is insufficient, document the stop condition and leave automatic rebinding out of scope for this pass. - -### 9) `extensions/zulip/src/zulip/resolve-users.ts` and `send.ts` - -Polish outbound ergonomics without weakening authorization semantics. - -- exact match first -- then single-candidate fuzzy suggestion/match for outbound sends only -- fail clearly on ambiguous results -- keep allowlist, approver, and auth-sensitive identity checks strict - -### 10) `../lionroot-openclaw/docs/zulip-upgrade-plan.md` - -Update the roadmap after the code-grounded pass so it: - -- marks exec approvals and model picker as already shipped but rough -- promotes interaction durability to the top priority -- frames the next Zulip pass as plugin hardening/productization -- defers fork growth until real usage proves it necessary +- Add a `maclern` entry under `agents.list` using the existing convention: + - `workspace`: `/Users/lionheart/clawd/agents/maclern` + - `agentDir`: `/Users/lionheart/.openclaw/agents/maclern/agent` +- Do not disturb the ordering or semantics of existing agent entries beyond what is necessary. ## Testing Strategy -Add focused, colocated tests near the touched Zulip files. - -Minimum test coverage: - -- component registry persistence/load/expiry/claim behavior -- message-scope consumption for multi-button widgets -- stale-click and unauthorized-click handling -- exec approval restart recovery and prompt update behavior -- model picker ACL preservation and invoker scoping -- `/models` local command handling with widgets enabled/disabled -- shared `/approve` Zulip path validation or local fallback tests if implemented -- topic rebind tests only if automatic rebinding actually lands - -Recommended execution checks: - -- targeted Zulip Vitest suites for touched files -- at least one broader typecheck/build pass before review +- Run targeted gateway cron tests covering the new regression. +- Re-check the current cron agent IDs against `agents.list` after the config patch. +- If practical, inspect the resolved Maclern routing via a focused local invocation or config readback. ## Rollback Plan -- Reverting the registry changes falls back to today’s in-memory widget behavior. -- Reverting approval persistence falls back to today’s non-durable approval prompts. -- Reverting picker ACL work removes scoping but preserves the existing picker behavior. -- Reverting the local command layer still leaves shared text commands available. -- Topic rebind logic, if added, should be isolated so it can be reverted without affecting the existing binding manager. -- Docs update can be reverted independently from code. +- Revert the `server-cron.ts` change to restore the previous fallback behavior. +- Remove the `maclern` config entry if it proves incorrect. +- Existing misrouted historical runs remain untouched. ## Risks and Validation Gates -- **Shared `/approve` status is known but must still be validated end to end in Zulip.** The shared handler exists in `src/auto-reply/reply/commands-approve.ts`; do not duplicate it unless a real Zulip gap is proven. -- **Automatic topic rebinding must be gated on live event fidelity.** Do not implement heuristic rename matching. -- **Persistence assumes one active monitor process per Zulip account.** If deployment moves multi-process later, these stores may need a shared backend. -- **Startup audit must remain warn-only.** It should clarify operator issues without blocking the monitor from connecting. - -## Implementation Order - -1. Docs refresh in workflow + roadmap -2. Durable component registry -3. Monitor/send integration for claim + stale-click handling -4. Exec approval durability -5. Model picker ACL/scoping -6. Thin Zulip-local `/models` command handling -7. Topic event validation and conditional rebind work -8. Resolution polish + startup audit -9. Tests + review +- The code hardening should not change behavior for default-agent cron jobs. +- The config patch must not duplicate an existing `maclern` entry or break JSON formatting. +- Main-session jobs must remain restricted to the default agent through current validation. diff --git a/.workflow/prd.md b/.workflow/prd.md index ea60091e07a5..16c0733dcc81 100644 --- a/.workflow/prd.md +++ b/.workflow/prd.md @@ -1,87 +1,35 @@ -# PRD: Zulip Plugin Hardening Pass +# PRD: Cron Agent Routing Repair for Self-Learning Jobs **Status:** Approved -**Date:** 2026-03-11 -**Provenance:** See `inputs/original-request.md` and `inputs/references.md`. Bryan approved proceeding with this PRD in the Codex session on 2026-03-11 by replying "both" after the draft workflow artifacts were presented. +**Date:** 2026-03-12 +**Provenance:** See `.workflow/inputs/original-request.md` and `.workflow/inputs/references.md`. ## Summary -Harden the existing Zulip plugin UX in `extensions/zulip` so the features that are already shipped behave reliably in real use. This pass is not about adding major new Zulip primitives or broadening the fork. It is a plugin-first reliability and productization pass focused on durable callback state, restart-safe exec approvals, invoker-scoped model picker flows, thin Zulip-native command affordances, topic lifecycle validation/rebinding, and warn-only startup audits. +Repair isolated cron routing so Maclern self-learning jobs run as Maclern instead of silently falling back to the default `main` agent, and harden the cron runtime so future non-default cron agents do not misroute their self-learning jobs into the wrong agent/session namespace. ## User Stories -- As a Zulip user, I want approval and picker buttons to keep working after a monitor restart so the transport feels dependable. -- As an approver, I want exec approval prompts to survive restart and retire correctly after resolution or expiry. -- As a user, I want model picker controls scoped to me so other people in the stream cannot hijack my selection flow. -- As an operator, I want `/models` to feel like a first-class Zulip control without duplicating the shared command system unnecessarily. -- As an operator, I want topic-bound sessions to stay stable across normal lifecycle events and fail safely when rename/rebind cannot be trusted. -- As a maintainer, I want the Zulip upgrade docs to match shipped reality so future planning is grounded in what already exists. +- As Bryan, I want Maclern overnight self-learning jobs to run with Maclern’s own agent identity, workspace, and session store so the digest reflects actual training context. +- As an operator, I want cron jobs for non-default agents to fail or route correctly instead of silently downgrading to `main`. +- As a future agent owner, I want isolated cron jobs to remain agent-scoped even when the runtime config is incomplete. ## Acceptance Criteria -### Docs and planning truth - -- [ ] `lionroot-openclaw/docs/zulip-upgrade-plan.md` is updated to reflect that exec approvals and model picker are already shipped but need hardening/productization. -- [ ] The document ranks durable interaction state as the top plugin priority. -- [ ] The document no longer frames the next pass as mainly new capability work. - -### Durable component registry - -- [ ] Zulip component callback state is persisted per account to disk. -- [ ] Restarting the Zulip monitor preserves unexpired widget callbacks. -- [ ] Callback claiming distinguishes `ok`, `unauthorized`, `missing`, `expired`, and `consumed` outcomes. -- [ ] Non-reusable widgets are retired at message scope, not just button scope. -- [ ] Stale/expired/consumed clicks notify the clicker privately rather than spamming the originating stream. - -### Exec approval durability - -- [ ] Pending Zulip approval prompt state is persisted per account. -- [ ] On startup, the handler rehydrates pending approvals and timeout jobs. -- [ ] On resolve or timeout after restart, previously-sent approval prompt messages are still updated. -- [ ] Approval widget entries are retired by message ID on resolve/expiry. - -### Model picker productization - -- [ ] `allowedUsers` survives the Zulip reply payload round trip. -- [ ] Picker buttons are scoped to the invoking user. -- [ ] Old picker pages are disposable and retired after click. -- [ ] Unauthorized picker clicks are ignored/logged without consuming the control. - -### Thin Zulip-local command UX - -- [ ] Zulip gets a thin transport-owned `/models` entry point that renders widget UX when widgets are enabled. -- [ ] When widgets are disabled, `/models` falls back cleanly to the existing shared text path. -- [ ] The shared `/approve` path is validated end to end in Zulip before any local duplication is added. -- [ ] If shared `/approve` is insufficient in practice, a minimal local fallback is added and documented. - -### Topic lifecycle and audit - -- [ ] Existing persisted topic bindings remain intact. -- [ ] Live Zulip topic-edit event fidelity is validated before any automatic rebind logic is implemented. -- [ ] If automatic rename/rebind is viable, a targeted binding migration path is added. -- [ ] If it is not viable, the plan explicitly stops short of heuristic rebinding. -- [ ] Startup audit runs once, remains warn-only, and checks configured streams, analysis streams, approval targets, and approver identities. - -### Resolution polish - -- [ ] Outbound-only fuzzy/suggestion behavior improves ambiguous user/stream sends. -- [ ] Allowlist and approval authorization resolution remain strict. -- [ ] Ambiguous and not-found outbound errors are clearer than they are today. +- [ ] `maclern-nightly-queue-review` and `maclern-overnight-digest` no longer resolve to `main` because `maclern` is absent from `agents.list`. +- [ ] Isolated cron jobs preserve the requested non-default `agentId` through gateway cron routing, session key selection, wake routing, and isolated run execution. +- [ ] Maclern resolves to its intended workspace and agent directory for self-learning runs instead of using the default main workspace. +- [ ] Regression tests cover the missing-agent-in-config case for isolated cron jobs. +- [ ] The live runtime config includes the Maclern agent entry needed for correct workspace mapping. ## Out of Scope -- New Zulip fork primitives such as dropdowns, modal forms, or rich cards. -- A broad shared cross-channel interaction framework. -- A generic shared command-system refactor. -- Rewriting `monitor.ts` wholesale. -- XCase expansion or unrelated Zulip feature work. -- Reworking draft streaming, which is already shipped. +- Rewriting Maclern’s prompts, training artifacts, or Training Ops product surfaces. +- Changing main-session cron semantics for the default agent. +- Backfilling old misrouted Maclern session history. ## Technical Notes -- Keep the work in `extensions/zulip` unless a validated blocker truly requires fork work. -- Reuse the current seams: `components-registry.ts`, `send-components.ts`, `exec-approvals.ts`, `model-picker.ts`, `monitor.ts`, and `topic-bindings.ts`. -- Preserve existing shared command behavior where it already works, especially `/approve`. -- Use the same Zulip state area pattern already used by `topic-bindings.ts` for new persisted state. -- Startup audit should be non-blocking and warn-only on the first pass. -- Do not add new config knobs unless field use clearly proves they are needed. +- Current root cause is twofold: `server-cron.ts` falls back to `main` when a requested cron agent is not present in runtime config, and the live config currently omits `maclern`. +- The code change should be minimal and defensible, with regression tests in gateway cron coverage. +- The runtime config patch should follow the existing per-agent convention already used for agents like `cody`, `storie`, and `grove`. diff --git a/src/agents/complexity-tier.ts b/src/agents/complexity-tier.ts new file mode 100644 index 000000000000..30644c115554 --- /dev/null +++ b/src/agents/complexity-tier.ts @@ -0,0 +1,84 @@ +/** + * Lightweight prompt complexity classifier for budget-aware model routing. + * + * Maps prompt characteristics to a tier that influences which models are + * preferred in the fallback candidate list. This does NOT replace + * ClawRouter's 14-dim classifier — it's a simpler gateway-side heuristic + * used to bias candidates toward cheaper providers for simple tasks. + */ + +export type ComplexityTier = "simple" | "medium" | "complex" | "reasoning"; + +export type ComplexityHints = { + /** Approximate prompt character count. */ + promptChars?: number; + /** Number of tools available in the session. */ + toolCount?: number; + /** Explicit directive from user (e.g. /simple, /complex). */ + explicitTier?: ComplexityTier; + /** Whether the prompt includes code blocks. */ + hasCodeBlocks?: boolean; + /** Whether this is an agentic (tool-bearing) request. */ + isAgentic?: boolean; + /** Work category from budget gates (heartbeat, maintenance, etc.). */ + workCategory?: "mission" | "maintenance" | "self-improve" | "heartbeat"; +}; + +/** + * Classify prompt complexity from available hints. + * Returns "complex" when no hints are provided (safe default that preserves + * existing behavior of preferring premium models). + */ +export function classifyComplexity(hints: ComplexityHints): ComplexityTier { + if (hints.explicitTier) { + return hints.explicitTier; + } + + // Low-priority background work → simple tier + if (hints.workCategory === "heartbeat" || hints.workCategory === "self-improve") { + return "simple"; + } + + // Agentic with many tools → complex + if (hints.isAgentic && (hints.toolCount ?? 0) > 5) { + return "complex"; + } + + // Long prompts or code → complex + if ((hints.promptChars ?? 0) > 8_000 || hints.hasCodeBlocks) { + return "complex"; + } + + // Moderate length or has some tools → medium + if ((hints.promptChars ?? 0) > 2_000 || (hints.toolCount ?? 0) > 0) { + return "medium"; + } + + return "simple"; +} + +type TierModelPreference = { provider: string; model: string }; + +/** + * Model preferences by complexity tier. These are injected at the front + * of the candidate list when budget conditions favor cheaper routing. + * + * Hardcoded (not config) to comply with BOUNDARIES.md Rule #6 — + * agents must not autonomously modify routing config. + */ +export const TIER_MODEL_PREFERENCES: Record = { + simple: [ + { provider: "ollama", model: "qwen3.5:9b" }, + { provider: "exo", model: "llama-local" }, + ], + medium: [ + { provider: "claude-cli", model: "haiku" }, + { provider: "codex", model: "spark" }, + { provider: "ollama", model: "qwen3.5:9b" }, + ], + complex: [ + { provider: "claude-cli", model: "sonnet" }, + { provider: "codex", model: "gpt-5.3" }, + ], + reasoning: [], // Use primary from config — no override +}; diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 4f39bee471da..187b770a9f05 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -3,6 +3,9 @@ import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, } from "../config/model-input.js"; +import { getCachedProviderUsageSummary } from "../infra/provider-usage.cache.js"; +import { resolveUsageProviderIdForRouting } from "../infra/provider-usage.shared.js"; +import type { UsageSummary, UsageWindow } from "../infra/provider-usage.types.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { sanitizeForLog } from "../terminal/ansi.js"; import { @@ -13,6 +16,8 @@ import { resolveAuthProfileOrder, } from "./auth-profiles.js"; import { OPENAI_CODEX_DEFAULT_MODEL_REF } from "./codex-defaults.js"; +import { classifyComplexity, TIER_MODEL_PREFERENCES } from "./complexity-tier.js"; +import type { ComplexityHints, ComplexityTier } from "./complexity-tier.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, @@ -584,6 +589,164 @@ function resolveCooldownDecision(params: { }; } +// --------------------------------------------------------------------------- +// Budget-aware candidate reordering +// --------------------------------------------------------------------------- + +function getWorstUsageWindow( + summary: UsageSummary, + providerKey: string, +): { worstPercent: number; resetAtMs: number | null; windowMinutes: number } | null { + const usageId = resolveUsageProviderIdForRouting(providerKey); + if (!usageId) { + return null; // local/external provider — no tracked quota + } + const snapshot = summary.providers.find((p) => p.provider === usageId); + if (!snapshot || snapshot.windows.length === 0) { + return null; + } + let worst: UsageWindow = snapshot.windows[0]; + for (const w of snapshot.windows) { + if (w.usedPercent > worst.usedPercent) { + worst = w; + } + } + // Infer window duration from label heuristic + const isWeekly = /week/i.test(worst.label); + const windowMinutes = isWeekly ? 10_080 : 300; + return { + worstPercent: worst.usedPercent, + resetAtMs: worst.resetAt ?? null, + windowMinutes, + }; +} + +function scoreBudgetCandidate( + candidate: ModelCandidate, + index: number, + summary: UsageSummary | null, + tier: ComplexityTier, +): number { + const BASE_WEIGHT = 100; + let score = index * BASE_WEIGHT; // preserve config intent as baseline + + if (!summary) { + return score; + } + + const usage = getWorstUsageWindow(summary, candidate.provider); + + // Local providers (null usage) get a bonus for simple/medium tiers + if (!usage) { + if (tier === "simple" || tier === "medium") { + score -= 1500; // strongly prefer free local models + } + return score; + } + + const { worstPercent, resetAtMs, windowMinutes } = usage; + const now = Date.now(); + const minutesUntilReset = resetAtMs ? Math.max(0, (resetAtMs - now) / 60_000) : null; + + // Usage penalty — demote providers approaching quota limits + if (worstPercent > 90 && minutesUntilReset !== null && minutesUntilReset < 240) { + score += 5000; // near exhaustion, reset far away + } else if (worstPercent > 85) { + score += 3000; + } else if (worstPercent > 70) { + score += 1000; + } else if (worstPercent < 20) { + score -= 1000; // fresh quota — promote + } + + // Burn-rate projection (simplified from dashboard pacing.ts) + if (minutesUntilReset !== null && windowMinutes > 0) { + const elapsedMinutes = windowMinutes - minutesUntilReset; + if (elapsedMinutes > 6) { + const burnRatePerHour = worstPercent / (elapsedMinutes / 60); + const projectedEnd = worstPercent + burnRatePerHour * (minutesUntilReset / 60); + if (projectedEnd > 100 && minutesUntilReset > 240) { + score += 3000; // will exhaust well before reset + } + if (projectedEnd < 60) { + score -= 300; // comfortable headroom + } + } + // Just-reset bonus + if (worstPercent < 10 && elapsedMinutes < windowMinutes * 0.2) { + score -= 500; + } + } + + // Tier bonus — prefer tier-appropriate models + const tierPrefs = TIER_MODEL_PREFERENCES[tier]; + const isTierPreferred = tierPrefs.some( + (p) => p.provider === candidate.provider && p.model === candidate.model, + ); + if (isTierPreferred) { + score -= 2000; + } + + // Demote premium models for simple tasks + if (tier === "simple") { + const isPremium = + (candidate.provider === "anthropic" && /opus/i.test(candidate.model)) || + (candidate.provider === "openai-codex" && /gpt-5\.4/i.test(candidate.model)); + if (isPremium) { + score += 2000; + } + } + + return score; +} + +async function reorderCandidatesByBudget(params: { + candidates: ModelCandidate[]; + complexityTier: ComplexityTier; + enforcement: "soft" | "strict"; +}): Promise<{ candidates: ModelCandidate[]; usageSummary: UsageSummary | null }> { + if (process.env.BUDGET_ROUTING_DISABLED === "1") { + return { candidates: params.candidates, usageSummary: null }; + } + + const summary = await getCachedProviderUsageSummary(); + + const scored = params.candidates.map((c, i) => ({ + candidate: c, + score: scoreBudgetCandidate(c, i, summary, params.complexityTier), + })); + + scored.sort((a, b) => a.score - b.score); + + let reordered = scored.map((s) => s.candidate); + + // Strict mode: remove severely over-budget candidates (but keep at least one) + if (params.enforcement === "strict" && summary) { + const filtered = reordered.filter((c) => { + const usage = getWorstUsageWindow(summary, c.provider); + return !usage || usage.worstPercent < 95; + }); + if (filtered.length > 0) { + reordered = filtered; + } + } + + const changed = + reordered.length !== params.candidates.length || + reordered.some((c, i) => c !== params.candidates[i]); + if (changed) { + log.info( + `Budget reorder (${params.complexityTier}): ${reordered.map((c) => `${c.provider}/${c.model}`).join(" → ")}`, + ); + } + + return { candidates: reordered, usageSummary: summary }; +} + +// --------------------------------------------------------------------------- +// runWithModelFallback +// --------------------------------------------------------------------------- + export async function runWithModelFallback(params: { cfg: OpenClawConfig | undefined; provider: string; @@ -593,13 +756,24 @@ export async function runWithModelFallback(params: { fallbacksOverride?: string[]; run: ModelFallbackRunFn; onError?: ModelFallbackErrorHandler; + /** Hints for complexity-based routing. Omit for default ("complex") behavior. */ + complexityHints?: ComplexityHints; }): Promise> { - const candidates = resolveFallbackCandidates({ + const rawCandidates = resolveFallbackCandidates({ cfg: params.cfg, provider: params.provider, model: params.model, fallbacksOverride: params.fallbacksOverride, }); + + const complexityTier = classifyComplexity(params.complexityHints ?? {}); + const enforcement = + process.env.BUDGET_ROUTING_ENFORCEMENT === "strict" ? ("strict" as const) : ("soft" as const); + const { candidates } = await reorderCandidatesByBudget({ + candidates: rawCandidates, + complexityTier, + enforcement, + }); const authStore = params.cfg ? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }) : null; diff --git a/src/config/types.agents.ts b/src/config/types.agents.ts index ddc89809f6ed..35a5aacdc2f6 100644 --- a/src/config/types.agents.ts +++ b/src/config/types.agents.ts @@ -95,6 +95,7 @@ export type AgentConfig = { export type { ContentForwardConfig, ContentRoutingConfig, + FoodImageIntakeConfig, } from "../lionroot/config/content-routing-schema.js"; import type { ContentRoutingConfig } from "../lionroot/config/content-routing-schema.js"; diff --git a/src/config/zod-schema.agents.ts b/src/config/zod-schema.agents.ts index 6bc8a48f1dcb..c626bd94951c 100644 --- a/src/config/zod-schema.agents.ts +++ b/src/config/zod-schema.agents.ts @@ -8,6 +8,7 @@ export { ContentRoutingSchema, ContentForwardSchema, ContentRoutingAgentsSchema, + FoodImageIntakeSchema, } from "../lionroot/config/content-routing-schema.js"; export const AgentsSchema = z diff --git a/src/infra/provider-usage.cache.ts b/src/infra/provider-usage.cache.ts new file mode 100644 index 000000000000..5b3dec34771c --- /dev/null +++ b/src/infra/provider-usage.cache.ts @@ -0,0 +1,63 @@ +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { loadProviderUsageSummary } from "./provider-usage.load.js"; +import type { UsageSummary } from "./provider-usage.types.js"; + +const log = createSubsystemLogger("budget/usage-cache"); + +const DEFAULT_CACHE_TTL_MS = 90_000; // 90 seconds +const DEFAULT_FETCH_TIMEOUT_MS = 3_000; + +type CachedUsageSummary = { + summary: UsageSummary; + fetchedAt: number; +}; + +let cached: CachedUsageSummary | null = null; +let inflight: Promise | null = null; + +/** + * Returns a cached provider usage summary, fetching from live APIs at most + * once per TTL window. Concurrent callers share a single in-flight request. + * Returns `null` on fetch failure — callers must treat `null` as "unknown" + * and fall back to default behavior. + */ +export async function getCachedProviderUsageSummary(opts?: { + ttlMs?: number; + timeoutMs?: number; +}): Promise { + const ttl = opts?.ttlMs ?? DEFAULT_CACHE_TTL_MS; + const now = Date.now(); + + if (cached && now - cached.fetchedAt < ttl) { + return cached.summary; + } + + if (inflight) { + try { + return await inflight; + } catch { + return cached?.summary ?? null; + } + } + + inflight = loadProviderUsageSummary({ + timeoutMs: opts?.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS, + }); + + try { + const summary = await inflight; + cached = { summary, fetchedAt: Date.now() }; + return summary; + } catch (err) { + log.warn(`Usage summary fetch failed: ${err instanceof Error ? err.message : String(err)}`); + return cached?.summary ?? null; + } finally { + inflight = null; + } +} + +/** @internal — test helper */ +export function _clearUsageCache(): void { + cached = null; + inflight = null; +} diff --git a/src/infra/provider-usage.shared.ts b/src/infra/provider-usage.shared.ts index 6fa823db630a..bf6a874f4fc4 100644 --- a/src/infra/provider-usage.shared.ts +++ b/src/infra/provider-usage.shared.ts @@ -33,6 +33,35 @@ export function resolveUsageProviderId(provider?: string | null): UsageProviderI : undefined; } +/** + * Maps a model-fallback provider name to its underlying usage-tracked provider. + * CLI providers (claude-cli, codex) map to their API subscription quota. + * Local providers (ollama, exo) and external routers (blockrun) return null. + */ +export function resolveUsageProviderIdForRouting(provider: string): UsageProviderId | null { + const normalized = normalizeProviderId(provider); + switch (normalized) { + case "claude-cli": + case "anthropic": + return "anthropic"; + case "codex": + case "openai-codex": + return "openai-codex"; + case "google": + case "google-gemini-cli": + return "google-gemini-cli"; + case "zai": + return "zai"; + case "ollama": + case "exo": + case "blockrun": + case "lmstudio": + return null; // local or external — no tracked quota + default: + return resolveUsageProviderId(normalized) ?? null; + } +} + export const ignoredErrors = new Set([ "No credentials", "No token", diff --git a/src/infra/provider-usage.ts b/src/infra/provider-usage.ts index af69b18f9d71..28daba297769 100644 --- a/src/infra/provider-usage.ts +++ b/src/infra/provider-usage.ts @@ -3,8 +3,12 @@ export { formatUsageSummaryLine, formatUsageWindowSummary, } from "./provider-usage.format.js"; +export { getCachedProviderUsageSummary, _clearUsageCache } from "./provider-usage.cache.js"; export { loadProviderUsageSummary } from "./provider-usage.load.js"; -export { resolveUsageProviderId } from "./provider-usage.shared.js"; +export { + resolveUsageProviderId, + resolveUsageProviderIdForRouting, +} from "./provider-usage.shared.js"; export type { ProviderUsageSnapshot, UsageProviderId, diff --git a/src/lionroot/config/content-routing-schema.ts b/src/lionroot/config/content-routing-schema.ts index d90cff92066b..675ce213d7e3 100644 --- a/src/lionroot/config/content-routing-schema.ts +++ b/src/lionroot/config/content-routing-schema.ts @@ -21,6 +21,15 @@ export const ContentForwardSchema = z .strict() .optional(); +export const FoodImageIntakeSchema = z + .object({ + endpointUrl: z.string().url(), + bearerToken: z.string().min(1), + timeoutMs: z.number().int().positive().optional(), + }) + .strict() + .optional(); + export const ContentRoutingSchema = z .object({ enabled: z.boolean().optional(), @@ -30,6 +39,7 @@ export const ContentRoutingSchema = z defaultAgentId: z.string().optional(), agents: ContentRoutingAgentsSchema.optional(), forward: ContentForwardSchema, + foodImageIntake: FoodImageIntakeSchema, }) .strict() .optional(); @@ -48,6 +58,12 @@ export type ContentForwardConfig = { topicPrefix?: string; }; +export type FoodImageIntakeConfig = { + endpointUrl: string; + bearerToken: string; + timeoutMs?: number; +}; + export type ContentRoutingConfig = { enabled?: boolean; model?: string; @@ -56,4 +72,5 @@ export type ContentRoutingConfig = { defaultAgentId?: string; agents?: Record; forward?: ContentForwardConfig; + foodImageIntake?: FoodImageIntakeConfig; }; diff --git a/src/lionroot/content-intake.test.ts b/src/lionroot/content-intake.test.ts index 5a359a43517b..3388d042e321 100644 --- a/src/lionroot/content-intake.test.ts +++ b/src/lionroot/content-intake.test.ts @@ -16,6 +16,7 @@ const mockDeliverOutboundPayloads = vi.hoisted(() => vi.fn()); const mockClassifyContentWithLLM = vi.hoisted(() => vi.fn()); const mockResolveTwitterContent = vi.hoisted(() => vi.fn()); const mockMaybeHandleFoodImageCapture = vi.hoisted(() => vi.fn()); +const mockUploadFoodImageToCommandPost = vi.hoisted(() => vi.fn()); vi.mock("../auto-reply/dispatch.js", () => ({ dispatchInboundMessage: mockDispatchInboundMessage, @@ -38,6 +39,10 @@ vi.mock("./food-capture.js", () => ({ maybeHandleFoodImageCapture: mockMaybeHandleFoodImageCapture, })); +vi.mock("./routing/food-image-upload.js", () => ({ + uploadFoodImageToCommandPost: mockUploadFoodImageToCommandPost, +})); + import { handleContentIntake, type ContentIntakeParams } from "./content-intake.js"; function createConfig(): OpenClawConfig { @@ -59,10 +64,16 @@ function createConfig(): OpenClawConfig { streams: { cody: "04💻 coding-loop", liev: "08🌱 life-loop", + "liev:intake": "08🌱 intake-tracker", }, streamPattern: "{agent}", topicPrefix: "x", }, + foodImageIntake: { + endpointUrl: "http://127.0.0.1:3005/api/inbox/intake/food-image", + bearerToken: "food-token", + timeoutMs: 5000, + }, }, }, channels: { @@ -150,6 +161,7 @@ describe("handleContentIntake forwarded agent routing", () => { clearLastForward("+15551234567"); mockDeliverOutboundPayloads.mockReset().mockResolvedValue([]); mockResolveTwitterContent.mockReset().mockResolvedValue(null); + mockUploadFoodImageToCommandPost.mockReset().mockResolvedValue({ ok: true }); mockClassifyContentWithLLM.mockReset().mockResolvedValue({ kind: "recognized", agentId: "cody", @@ -248,7 +260,7 @@ describe("handleContentIntake forwarded agent routing", () => { it("lets abstained classifications fall back to normal dispatch", async () => { const params = createParams(); - params.bodyText = "some ambiguous note"; + params.bodyText = "can you think this through with me"; mockClassifyContentWithLLM.mockResolvedValueOnce({ kind: "abstain", confidence: "low", @@ -290,9 +302,68 @@ describe("handleContentIntake forwarded agent routing", () => { ); }); + it("intercepts meal image reports before LLM classification and uploads them", async () => { + const params = createParams(); + params.bodyText = "This was my breakfast. I had my morning supplements."; + params.mediaPath = "/tmp/meal.heic"; + params.mediaType = "image/heic"; + params.mediaPaths = [params.mediaPath]; + params.mediaTypes = [params.mediaType]; + + const result = await handleContentIntake(params); + + expect(result).toEqual({ handled: true }); + expect(mockClassifyContentWithLLM).not.toHaveBeenCalled(); + expect(mockUploadFoodImageToCommandPost).toHaveBeenCalledWith({ + imagePath: "/tmp/meal.heic", + mealText: "This was my breakfast. I had my morning supplements.", + endpointUrl: "http://127.0.0.1:3005/api/inbox/intake/food-image", + bearerToken: "food-token", + timeoutMs: 5000, + }); + expect(mockDeliverOutboundPayloads).toHaveBeenCalledTimes(2); + expect(mockDeliverOutboundPayloads.mock.calls[0]?.[0]?.to).toContain("08🌱 intake-tracker"); + }); + + it("still forwards meal image reports when upload fails", async () => { + const params = createParams(); + params.bodyText = "Lunch with rice and chicken"; + params.mediaPath = "/tmp/lunch.jpg"; + params.mediaType = "image/jpeg"; + params.mediaPaths = [params.mediaPath]; + params.mediaTypes = [params.mediaType]; + mockUploadFoodImageToCommandPost.mockResolvedValueOnce({ ok: false, error: "timeout" }); + + const result = await handleContentIntake(params); + + expect(result).toEqual({ handled: true }); + expect(mockClassifyContentWithLLM).not.toHaveBeenCalled(); + expect(mockDeliverOutboundPayloads).toHaveBeenCalledTimes(2); + expect(params.decision.route.agentId).toBe("liev"); + }); + + it("skips direct upload when food image intake config is absent", async () => { + const params = createParams(); + params.bodyText = "Dinner with steak and potatoes"; + params.mediaPath = "/tmp/dinner.jpg"; + params.mediaType = "image/jpeg"; + params.mediaPaths = [params.mediaPath]; + params.mediaTypes = [params.mediaType]; + delete (params.cfg.agents as { contentRouting?: { foodImageIntake?: unknown } }).contentRouting + ?.foodImageIntake; + + const result = await handleContentIntake(params); + + expect(result).toEqual({ handled: true }); + expect(mockUploadFoodImageToCommandPost).not.toHaveBeenCalled(); + expect(mockClassifyContentWithLLM).not.toHaveBeenCalled(); + expect(mockDeliverOutboundPayloads).toHaveBeenCalledTimes(2); + expect(mockDeliverOutboundPayloads.mock.calls[0]?.[0]?.to).toContain("08🌱 intake-tracker"); + }); + it("passes readable attachment text into content classification", async () => { const params = createParams(); - params.bodyText = "please route this attachment"; + params.bodyText = "please route this attached text"; const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-content-intake-")); const textPath = path.join(tempDir, "PastedText.txt"); await fs.writeFile( diff --git a/src/lionroot/content-intake.ts b/src/lionroot/content-intake.ts index ffde60d0c021..6d6bb961a6bb 100644 --- a/src/lionroot/content-intake.ts +++ b/src/lionroot/content-intake.ts @@ -52,6 +52,7 @@ import { TWITTER_STATUS_RE, URL_RE, } from "./routing/content-route.js"; +import { uploadFoodImageToCommandPost } from "./routing/food-image-upload.js"; const IMESSAGE_REPLY_TIMEOUT_SECONDS = 90; const FORWARDED_AGENT_FAILURE_SUMMARY = @@ -412,7 +413,7 @@ export async function handleContentIntake( // Follow-up to a recent forward: if the same sender sends a non-URL message // within the TTL window, append it to the same Zulip topic and dispatch agent. const lastFwd = getLastForward(decision.senderNormalized); - const hasNewUrl = URL_RE.test(bodyText); + const hasNewUrl = bodyText.match(URL_RE) !== null; if (lastFwd && !hasNewUrl) { // Forward follow-up text (and media) to existing Zulip topic const followUpPayloads: ReplyPayload[] = [ @@ -546,8 +547,38 @@ export async function handleContentIntake( mediaTypes, }); + const fastPathClassification = resolveContentRouteFastPath({ + text: bodyText, + mediaType, + }); + if ( + fastPathClassification?.kind === "recognized" && + fastPathClassification.agentId === "liev" && + fastPathClassification.category === "intake" && + mediaPath && + mediaType?.startsWith("image/") && + contentRoutingCfg.foodImageIntake + ) { + void uploadFoodImageToCommandPost({ + imagePath: mediaPath, + mealText: bodyText, + endpointUrl: contentRoutingCfg.foodImageIntake.endpointUrl, + bearerToken: contentRoutingCfg.foodImageIntake.bearerToken, + timeoutMs: contentRoutingCfg.foodImageIntake.timeoutMs, + }) + .then((result) => { + logVerbose( + result.ok + ? "food-image-upload: success" + : `food-image-upload: failed — ${result.error}`, + ); + }) + .catch((error) => { + logVerbose(`food-image-upload: unexpected error — ${String(error)}`); + }); + } + // 2. Classify content - const fastPathClassification = resolveContentRouteFastPath({ text: bodyText, mediaType }); const classification = fastPathClassification ?? (await classifyContentWithLLM({ diff --git a/src/lionroot/routing/content-intake-pipe.test.ts b/src/lionroot/routing/content-intake-pipe.test.ts index a4b0ae89f937..d98dccd2de0c 100644 --- a/src/lionroot/routing/content-intake-pipe.test.ts +++ b/src/lionroot/routing/content-intake-pipe.test.ts @@ -93,45 +93,36 @@ describe("intake pipe: scenario walkthrough", () => { }); it("scenario 2: breakfast photo → intake-tracker stream", async () => { - vi.stubGlobal( - "fetch", - vi.fn().mockResolvedValueOnce({ - ok: true, - json: async () => ({ message: { content: "liev:intake" } }), - }), - ); - - const classification = await classifyContentWithLLM({ - text: "", + const classification = resolveContentRouteFastPath({ + text: "This was my breakfast with eggs and coffee", mediaType: "image/jpeg", - model: "test-model", - ollamaUrl: "http://localhost:11434", - agentDescriptions: AGENT_DESCRIPTIONS, }); - expect(classification.kind).toBe("recognized"); - if (classification.kind !== "recognized") { + expect(classification?.kind).toBe("recognized"); + if (!classification || classification.kind !== "recognized") { throw new Error("expected recognized classification"); } expect(classification.agentId).toBe("liev"); expect(classification.category).toBe("intake"); const topicInfo = buildTopicSuffix({ - text: "", + text: "This was my breakfast with eggs and coffee", mediaType: "image/jpeg", }); - expect(topicInfo.prefix).toBe("photo"); + expect(topicInfo.prefix).toBeUndefined(); const target = buildForwardTarget({ config: forwardConfig, - agentId: "liev", - category: "intake", + agentId: classification.agentId, + category: classification.category, topicSuffix: topicInfo.suffix, topicPrefix: topicInfo.prefix, }); - // intake category → intake-tracker stream - expect(target.to).toMatch(/^stream:08🌱 intake-tracker:topic:photo: \d{2}-\d{2}$/); + // intake category → intake-tracker stream using the meal text as topic suffix + expect(target.to).toBe( + "stream:08🌱 intake-tracker:topic:x: This was my breakfast with eggs and coffee", + ); }); it("scenario 3: text question → intake-tracker stream", async () => { diff --git a/src/lionroot/routing/content-route.test.ts b/src/lionroot/routing/content-route.test.ts index 4b3e42d6d4fb..7a015082eb15 100644 --- a/src/lionroot/routing/content-route.test.ts +++ b/src/lionroot/routing/content-route.test.ts @@ -1,6 +1,7 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../config/config.js"; import { + MEAL_INDICATOR_RE, parseAgentCategory, resolveContentRouteFastPath, classifyContentWithLLM, @@ -59,6 +60,45 @@ describe("parseAgentCategory", () => { // ── Fast-path tests ── describe("resolveContentRouteFastPath", () => { + it("routes food image meal reports to liev intake", () => { + const result = resolveContentRouteFastPath({ + text: "This was my breakfast with eggs and coffee", + mediaType: "image/heic", + }); + expect(result).toEqual({ + kind: "recognized", + agentId: "liev", + category: "intake", + confidence: "high", + reason: "fast-path: food image with meal text", + }); + }); + + it("does not trigger for non-image media with meal text", () => { + const result = resolveContentRouteFastPath({ + text: "This was my lunch", + mediaType: "audio/mpeg", + }); + expect(result).toBeNull(); + }); + + it("does not trigger for image without meal text", () => { + const result = resolveContentRouteFastPath({ + text: "check this screenshot", + mediaType: "image/jpeg", + }); + expect(result).toBeNull(); + }); + + it("prioritizes food images over GitHub URL matching", () => { + const result = resolveContentRouteFastPath({ + text: "breakfast recipe https://github.com/openclaw/openclaw/pull/123", + mediaType: "image/png", + }); + expect(result?.agentId).toBe("liev"); + expect(result?.category).toBe("intake"); + }); + it("routes GitHub URLs to Cody", () => { const result = resolveContentRouteFastPath({ text: "Check this out https://github.com/openclaw/openclaw/pull/123", @@ -277,6 +317,14 @@ describe("resolveContentRoutingConfig", () => { // ── LLM classification tests ── +describe("MEAL_INDICATOR_RE", () => { + it("matches common meal-report keywords", () => { + expect(MEAL_INDICATOR_RE.test("breakfast with supplements")).toBe(true); + expect(MEAL_INDICATOR_RE.test("meal prep container")).toBe(true); + expect(MEAL_INDICATOR_RE.test("dashboard screenshot")).toBe(false); + }); +}); + describe("classifyContentWithLLM", () => { it("returns agent from successful LLM response", async () => { const mockFetch = vi.fn().mockResolvedValueOnce({ diff --git a/src/lionroot/routing/content-route.ts b/src/lionroot/routing/content-route.ts index 6fe4eec76d73..42fa4f1ba0aa 100644 --- a/src/lionroot/routing/content-route.ts +++ b/src/lionroot/routing/content-route.ts @@ -34,6 +34,11 @@ export type ContentRoutingConfig = { stickyTimeoutMs?: number; defaultAgentId?: string; agents: Record; + foodImageIntake?: { + endpointUrl: string; + bearerToken: string; + timeoutMs?: number; + }; }; const DEFAULT_MODEL = "qwen3:14b"; @@ -48,6 +53,8 @@ export const TWITTER_STATUS_RE = /(?:twitter\.com|x\.com)\/[^/]+\/status\/(\d+)/ // GitHub URL pattern const GITHUB_RE = /github\.com\//i; +export const MEAL_INDICATOR_RE = + /\b(breakfast|lunch|dinner|snack|brunch|meal|ate|eating|food|calories|macros|protein|intake|nutrition|prep|smoothie|shake|coffee|supplements|recipe|cooking|cooked)\b/i; const HEALTH_TAG_PATTERNS: Array<{ pattern: RegExp; @@ -88,6 +95,13 @@ export function resolveContentRoutingConfig(cfg: OpenClawConfig): ContentRouting stickyTimeoutMs: cr.stickyTimeoutMs, defaultAgentId: typeof cr.defaultAgentId === "string" ? cr.defaultAgentId.trim() : undefined, agents, + foodImageIntake: cr.foodImageIntake + ? { + endpointUrl: cr.foodImageIntake.endpointUrl, + bearerToken: cr.foodImageIntake.bearerToken, + timeoutMs: cr.foodImageIntake.timeoutMs, + } + : undefined, }; } @@ -111,6 +125,16 @@ export function resolveContentRouteFastPath(opts: { } } + if (opts.mediaType?.startsWith("image/") && MEAL_INDICATOR_RE.test(opts.text)) { + return { + kind: "recognized", + agentId: "liev", + category: "intake", + confidence: "high", + reason: "fast-path: food image with meal text", + }; + } + const urls = opts.text.match(URL_RE); if (!urls) { return null; From 03f60f912ec99e6467fe030bcd0aa09bc3f1208c Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Wed, 18 Mar 2026 14:41:53 -0400 Subject: [PATCH 6/7] feat(routing): add routing decision ring buffer and budget.routing-status RPC Records recent model routing decisions (tier, scores, reorder, selected model) in a 50-entry ring buffer and exposes them via a new gateway RPC endpoint for Command Post dashboard visibility. Co-Authored-By: Claude Opus 4.6 --- src/agents/model-fallback.ts | 37 +++++++++++-- src/agents/routing-decisions.ts | 80 ++++++++++++++++++++++++++++ src/gateway/server-methods/budget.ts | 45 ++++++++++++++++ 3 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 src/agents/routing-decisions.ts diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 187b770a9f05..39e3dd41cdf2 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -35,6 +35,7 @@ import { } from "./model-selection.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js"; +import { recordRoutingDecision } from "./routing-decisions.js"; const log = createSubsystemLogger("model-fallback"); @@ -704,7 +705,11 @@ async function reorderCandidatesByBudget(params: { candidates: ModelCandidate[]; complexityTier: ComplexityTier; enforcement: "soft" | "strict"; -}): Promise<{ candidates: ModelCandidate[]; usageSummary: UsageSummary | null }> { +}): Promise<{ + candidates: ModelCandidate[]; + usageSummary: UsageSummary | null; + scored?: Array<{ provider: string; model: string; score: number; usagePercent?: number }>; +}> { if (process.env.BUDGET_ROUTING_DISABLED === "1") { return { candidates: params.candidates, usageSummary: null }; } @@ -740,7 +745,18 @@ async function reorderCandidatesByBudget(params: { ); } - return { candidates: reordered, usageSummary: summary }; + return { + candidates: reordered, + usageSummary: summary, + scored: scored.map((s) => ({ + provider: s.candidate.provider, + model: s.candidate.model, + score: s.score, + usagePercent: summary + ? (getWorstUsageWindow(summary, s.candidate.provider)?.worstPercent ?? undefined) + : undefined, + })), + }; } // --------------------------------------------------------------------------- @@ -769,11 +785,24 @@ export async function runWithModelFallback(params: { const complexityTier = classifyComplexity(params.complexityHints ?? {}); const enforcement = process.env.BUDGET_ROUTING_ENFORCEMENT === "strict" ? ("strict" as const) : ("soft" as const); - const { candidates } = await reorderCandidatesByBudget({ + const { candidates, scored: providerScores } = await reorderCandidatesByBudget({ candidates: rawCandidates, complexityTier, enforcement, }); + const routingDecision = { + timestamp: new Date().toISOString(), + complexityTier, + enforcement, + originalOrder: rawCandidates.map((c) => `${c.provider}/${c.model}`), + finalOrder: candidates.map((c) => `${c.provider}/${c.model}`), + reordered: + candidates.some((c, i) => c !== rawCandidates[i]) || + candidates.length !== rawCandidates.length, + providerScores: providerScores ?? undefined, + selectedModel: undefined as string | undefined, + }; + const authStore = params.cfg ? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }) : null; @@ -848,6 +877,8 @@ export async function runWithModelFallback(params: { `Model "${sanitizeForLog(notFoundAttempt.provider)}/${sanitizeForLog(notFoundAttempt.model)}" not found. Fell back to "${sanitizeForLog(candidate.provider)}/${sanitizeForLog(candidate.model)}".`, ); } + routingDecision.selectedModel = `${candidate.provider}/${candidate.model}`; + recordRoutingDecision(routingDecision); return attemptRun.success; } const err = attemptRun.error; diff --git a/src/agents/routing-decisions.ts b/src/agents/routing-decisions.ts new file mode 100644 index 000000000000..59c8c03d67bd --- /dev/null +++ b/src/agents/routing-decisions.ts @@ -0,0 +1,80 @@ +/** + * Ring buffer for recent model routing decisions. + * + * Stores the last N routing decisions made by model-fallback so the + * gateway can expose them via `budget.routing-status` for the Command + * Post dashboard. + */ + +export type RoutingDecision = { + /** ISO timestamp */ + timestamp: string; + /** Which agent made the request */ + agentId?: string; + /** Detected complexity tier */ + complexityTier: string; + /** Budget enforcement mode */ + enforcement: "soft" | "strict"; + /** Original candidate order before budget reordering */ + originalOrder: string[]; + /** Final candidate order after budget reordering */ + finalOrder: string[]; + /** Whether the order changed */ + reordered: boolean; + /** Which model was ultimately selected (first success) */ + selectedModel?: string; + /** Provider usage snapshot at decision time */ + providerScores?: Array<{ + provider: string; + model: string; + score: number; + usagePercent?: number; + }>; +}; + +const MAX_DECISIONS = 50; +const ring: RoutingDecision[] = []; + +export function recordRoutingDecision(decision: RoutingDecision): void { + ring.push(decision); + if (ring.length > MAX_DECISIONS) { + ring.shift(); + } +} + +export function getRecentRoutingDecisions(limit = 20): RoutingDecision[] { + return ring.slice(-limit).toReversed(); // most recent first +} + +export function getRoutingDecisionStats(): { + totalDecisions: number; + reorderedCount: number; + tierBreakdown: Record; + topSelectedModels: Array<{ model: string; count: number }>; +} { + const tierBreakdown: Record = {}; + const modelCounts: Record = {}; + let reorderedCount = 0; + + for (const d of ring) { + tierBreakdown[d.complexityTier] = (tierBreakdown[d.complexityTier] ?? 0) + 1; + if (d.reordered) { + reorderedCount++; + } + if (d.selectedModel) { + modelCounts[d.selectedModel] = (modelCounts[d.selectedModel] ?? 0) + 1; + } + } + + const topSelectedModels = Object.entries(modelCounts) + .map(([model, count]) => ({ model, count })) + .toSorted((a, b) => b.count - a.count) + .slice(0, 10); + + return { + totalDecisions: ring.length, + reorderedCount, + tierBreakdown, + topSelectedModels, + }; +} diff --git a/src/gateway/server-methods/budget.ts b/src/gateway/server-methods/budget.ts index a271b2dd6795..1a711c4177d1 100644 --- a/src/gateway/server-methods/budget.ts +++ b/src/gateway/server-methods/budget.ts @@ -5,6 +5,11 @@ */ import { resolveDefaultAgentId } from "../../agents/agent-scope.js"; +import { TIER_MODEL_PREFERENCES } from "../../agents/complexity-tier.js"; +import { + getRecentRoutingDecisions, + getRoutingDecisionStats, +} from "../../agents/routing-decisions.js"; import { loadConfig } from "../../config/config.js"; import { checkBudgetGates, type WorkCategory } from "../../infra/budget-gates.js"; import { @@ -13,6 +18,7 @@ import { formatFleetCostSummary, } from "../../infra/cost-attribution.js"; import { formatErrorMessage } from "../../infra/errors.js"; +import { getCachedProviderUsageSummary } from "../../infra/provider-usage.cache.js"; import { readEmbeddingCostSummary } from "../../memory/embedding-cost-tracker.js"; import type { GatewayRequestHandlers } from "./types.js"; @@ -121,6 +127,45 @@ export const budgetHandlers: GatewayRequestHandlers = { } }, + /** + * Get current routing status: recent decisions, provider scores, tier config. + * Params: { limit?: number } + */ + "budget.routing-status": async ({ respond, params }) => { + const limit = typeof params?.limit === "number" ? params.limit : 20; + + try { + const decisions = getRecentRoutingDecisions(limit); + const stats = getRoutingDecisionStats(); + const usageSummary = await getCachedProviderUsageSummary(); + const enforcement = process.env.BUDGET_ROUTING_ENFORCEMENT === "strict" ? "strict" : "soft"; + const disabled = process.env.BUDGET_ROUTING_DISABLED === "1"; + + respond(true, { + enabled: !disabled, + enforcement, + dailySpendCeiling: Number(process.env.DAILY_SPEND_CEILING || 0), + tierConfig: TIER_MODEL_PREFERENCES, + stats, + recentDecisions: decisions, + providerUsage: usageSummary + ? usageSummary.providers.map((p) => ({ + provider: p.provider, + displayName: p.displayName, + plan: p.plan, + windows: p.windows, + })) + : null, + updatedAt: new Date().toISOString(), + }); + } catch (err) { + respond(false, undefined, { + code: "-1", + message: `Failed to load routing status: ${formatErrorMessage(err)}`, + }); + } + }, + /** * Get embedding cost summary for an agent. * Params: { agentId?: string, days?: number } From c8e5a3b5b21bfcdc53af10d9715bae9ee625cc04 Mon Sep 17 00:00:00 2001 From: Bryan Fisher Date: Sun, 22 Mar 2026 02:36:58 -0400 Subject: [PATCH 7/7] fix(gateway): avoid local fallback after accepted agent runs --- .../gateway-agent-1006-fix-2026-03-22/plan.md | 69 ++++++++++++ .../gateway-agent-1006-fix-2026-03-22/prd.md | 35 ++++++ .workflow/inputs/original-request.md | 12 +- .workflow/plan.md | 106 ++++++++---------- .workflow/prd.md | 38 +++---- src/commands/agent-via-gateway.test.ts | 15 +++ src/commands/agent-via-gateway.ts | 7 ++ src/gateway/call.test.ts | 29 ++++- src/gateway/call.ts | 10 +- src/gateway/client.test.ts | 46 ++++++++ src/gateway/client.ts | 24 +++- 11 files changed, 300 insertions(+), 91 deletions(-) create mode 100644 .workflow/archive/gateway-agent-1006-fix-2026-03-22/plan.md create mode 100644 .workflow/archive/gateway-agent-1006-fix-2026-03-22/prd.md diff --git a/.workflow/archive/gateway-agent-1006-fix-2026-03-22/plan.md b/.workflow/archive/gateway-agent-1006-fix-2026-03-22/plan.md new file mode 100644 index 000000000000..8a9467721dde --- /dev/null +++ b/.workflow/archive/gateway-agent-1006-fix-2026-03-22/plan.md @@ -0,0 +1,69 @@ +# Technical Plan: Cron Agent Routing Repair for Self-Learning Jobs + +**Status:** Approved +**Date:** 2026-03-12 +**Flow/Context Builder Output:** Repo-grounded planning identified `clawdbot/src/gateway/server-cron.ts` as the primary fault point. The live runtime config is also missing `maclern`, so a complete fix needs both gateway hardening and a config repair. + +## Architecture + +- `src/gateway/server-cron.ts` is the cron gateway wiring layer. It currently resolves requested cron agents through `resolveCronAgent`, which falls back to the default agent when the requested agent is absent from `agents.list`. +- `src/cron/service/timer.ts` passes `job.agentId` and `job.sessionKey` into the gateway callbacks, so the gateway layer is the final authority for cron wake/session routing. +- `src/cron/isolated-agent/run.ts` already supports requested agent IDs for isolated runs, but workspace selection still depends on runtime config when an explicit per-agent workspace is needed. +- The live config in `~/.openclaw/openclaw.json` must include `maclern` so the runtime resolves the intended workspace `/Users/lionheart/clawd/agents/maclern` and agent directory `~/.openclaw/agents/maclern/agent`. + +## Files to Modify + +- `clawdbot/src/gateway/server-cron.ts` — preserve requested isolated cron agent IDs instead of silently falling back to `main` when the agent is missing from config. +- `clawdbot/src/gateway/server-cron.test.ts` — add regression coverage for a cron agent that is missing from `agents.list`. +- `.openclaw/openclaw.json` — add the missing `maclern` agent entry following the existing per-agent workspace/agentDir convention. + +## New Files + +- None. + +## Tasks + +1. [ ] Patch `server-cron.ts` so `resolveCronAgent` preserves an explicit requested `agentId` for isolated cron routing. +2. [ ] Add regression tests that cover the missing-agent-in-config case and verify the requested agent is passed into isolated cron runs. +3. [ ] Patch `~/.openclaw/openclaw.json` to add a `maclern` agent entry with the correct workspace and agent directory. +4. [ ] Verify there are no other current cron agent IDs missing from `agents.list`. +5. [ ] Run targeted tests for the gateway cron path. +6. [ ] Do a review pass before wrapping up. + +## Detailed File Plan + +### 1) `clawdbot/src/gateway/server-cron.ts` + +- Update `resolveCronAgent` to normalize and preserve a non-empty requested `agentId` instead of requiring membership in `agents.list`. +- Keep the default-agent fallback only for empty or missing `agentId`. +- Leave main-session constraints enforced by existing cron job validation in `src/cron/service/jobs.ts`. + +### 2) `clawdbot/src/gateway/server-cron.test.ts` + +- Mock or observe the isolated cron run path so the test can assert that a job with `agentId: "maclern"` is passed through as `maclern` even when runtime config only lists `main`. +- Add coverage for the summary/wake path if needed to prove the gateway callbacks stay agent-scoped. + +### 3) `.openclaw/openclaw.json` + +- Add a `maclern` entry under `agents.list` using the existing convention: + - `workspace`: `/Users/lionheart/clawd/agents/maclern` + - `agentDir`: `/Users/lionheart/.openclaw/agents/maclern/agent` +- Do not disturb the ordering or semantics of existing agent entries beyond what is necessary. + +## Testing Strategy + +- Run targeted gateway cron tests covering the new regression. +- Re-check the current cron agent IDs against `agents.list` after the config patch. +- If practical, inspect the resolved Maclern routing via a focused local invocation or config readback. + +## Rollback Plan + +- Revert the `server-cron.ts` change to restore the previous fallback behavior. +- Remove the `maclern` config entry if it proves incorrect. +- Existing misrouted historical runs remain untouched. + +## Risks and Validation Gates + +- The code hardening should not change behavior for default-agent cron jobs. +- The config patch must not duplicate an existing `maclern` entry or break JSON formatting. +- Main-session jobs must remain restricted to the default agent through current validation. diff --git a/.workflow/archive/gateway-agent-1006-fix-2026-03-22/prd.md b/.workflow/archive/gateway-agent-1006-fix-2026-03-22/prd.md new file mode 100644 index 000000000000..16c0733dcc81 --- /dev/null +++ b/.workflow/archive/gateway-agent-1006-fix-2026-03-22/prd.md @@ -0,0 +1,35 @@ +# PRD: Cron Agent Routing Repair for Self-Learning Jobs + +**Status:** Approved +**Date:** 2026-03-12 +**Provenance:** See `.workflow/inputs/original-request.md` and `.workflow/inputs/references.md`. + +## Summary + +Repair isolated cron routing so Maclern self-learning jobs run as Maclern instead of silently falling back to the default `main` agent, and harden the cron runtime so future non-default cron agents do not misroute their self-learning jobs into the wrong agent/session namespace. + +## User Stories + +- As Bryan, I want Maclern overnight self-learning jobs to run with Maclern’s own agent identity, workspace, and session store so the digest reflects actual training context. +- As an operator, I want cron jobs for non-default agents to fail or route correctly instead of silently downgrading to `main`. +- As a future agent owner, I want isolated cron jobs to remain agent-scoped even when the runtime config is incomplete. + +## Acceptance Criteria + +- [ ] `maclern-nightly-queue-review` and `maclern-overnight-digest` no longer resolve to `main` because `maclern` is absent from `agents.list`. +- [ ] Isolated cron jobs preserve the requested non-default `agentId` through gateway cron routing, session key selection, wake routing, and isolated run execution. +- [ ] Maclern resolves to its intended workspace and agent directory for self-learning runs instead of using the default main workspace. +- [ ] Regression tests cover the missing-agent-in-config case for isolated cron jobs. +- [ ] The live runtime config includes the Maclern agent entry needed for correct workspace mapping. + +## Out of Scope + +- Rewriting Maclern’s prompts, training artifacts, or Training Ops product surfaces. +- Changing main-session cron semantics for the default agent. +- Backfilling old misrouted Maclern session history. + +## Technical Notes + +- Current root cause is twofold: `server-cron.ts` falls back to `main` when a requested cron agent is not present in runtime config, and the live config currently omits `maclern`. +- The code change should be minimal and defensible, with regression tests in gateway cron coverage. +- The runtime config patch should follow the existing per-agent convention already used for agents like `cody`, `storie`, and `grove`. diff --git a/.workflow/inputs/original-request.md b/.workflow/inputs/original-request.md index 80f819837f15..4e9b074eab21 100644 --- a/.workflow/inputs/original-request.md +++ b/.workflow/inputs/original-request.md @@ -1,16 +1,16 @@ # Original Request -**Date:** 2026-03-12 +**Date:** 2026-03-22 **Source:** Codex session **From:** Bryan ## The Ask -> okay, fix maclern, and the other agents if they didn't follow their self-learning +> yes, put it in a new worktree and fix it ## Initial Context -- Overnight investigation for Wednesday night, March 11, 2026 into Thursday morning, March 12, 2026 found Maclern self-learning jobs produced irrelevant output. -- Evidence showed the latest Maclern digest was recorded under `agent:main:cron:...` and used workspace `/Users/lionheart/clawd`. -- Live runtime config at `~/.openclaw/openclaw.json` does not include `maclern` in `agents.list`, while `~/.openclaw/cron/jobs.json` includes `maclern-nightly-queue-review` and `maclern-overnight-digest`. -- Existing cron agent IDs in config and jobs were compared; `maclern` is the only current cron agent missing from `agents.list`. +- The resolved-model telemetry bug was fixed separately. +- A distinct bug remains in the gateway transport path for `openclaw agent`. +- CLI gateway calls for `agent` can fail with `gateway closed (1006 abnormal closure (no close frame))` and then fall back to embedded local execution. +- That fallback is unsafe if the gateway had already accepted the run, because it can duplicate execution. diff --git a/.workflow/plan.md b/.workflow/plan.md index 8a9467721dde..c8f99e6e2a68 100644 --- a/.workflow/plan.md +++ b/.workflow/plan.md @@ -1,69 +1,51 @@ -# Technical Plan: Cron Agent Routing Repair for Self-Learning Jobs +# Technical Plan: Gateway Agent 1006 Final-Response Transport Fix -**Status:** Approved -**Date:** 2026-03-12 -**Flow/Context Builder Output:** Repo-grounded planning identified `clawdbot/src/gateway/server-cron.ts` as the primary fault point. The live runtime config is also missing `maclern`, so a complete fix needs both gateway hardening and a config repair. +**Status:** Draft +**Date:** 2026-03-22 +**Source:** RepoPrompt context_builder + plan chat `gateway-1006-agent-57FB48` ## Architecture -- `src/gateway/server-cron.ts` is the cron gateway wiring layer. It currently resolves requested cron agents through `resolveCronAgent`, which falls back to the default agent when the requested agent is absent from `agents.list`. -- `src/cron/service/timer.ts` passes `job.agentId` and `job.sessionKey` into the gateway callbacks, so the gateway layer is the final authority for cron wake/session routing. -- `src/cron/isolated-agent/run.ts` already supports requested agent IDs for isolated runs, but workspace selection still depends on runtime config when an explicit per-agent workspace is needed. -- The live config in `~/.openclaw/openclaw.json` must include `maclern` so the runtime resolves the intended workspace `/Users/lionheart/clawd/agents/maclern` and agent directory `~/.openclaw/agents/maclern/agent`. +The `openclaw agent` CLI uses `agentViaGatewayCommand()` to call the gateway `agent` RPC with `expectFinal:true`. The gateway method sends two `res` frames on the same request ID: an immediate `accepted` ack and a terminal `ok`/`error` response. The client transport currently waits through the ack, but if the websocket closes before the terminal response arrives, the close is surfaced as a generic error and `agentCliCommand()` unconditionally falls back to embedded local execution. That can duplicate work when the gateway already accepted the run. ## Files to Modify -- `clawdbot/src/gateway/server-cron.ts` — preserve requested isolated cron agent IDs instead of silently falling back to `main` when the agent is missing from config. -- `clawdbot/src/gateway/server-cron.test.ts` — add regression coverage for a cron agent that is missing from `agents.list`. -- `.openclaw/openclaw.json` — add the missing `maclern` agent entry following the existing per-agent workspace/agentDir convention. - -## New Files - -- None. - -## Tasks - -1. [ ] Patch `server-cron.ts` so `resolveCronAgent` preserves an explicit requested `agentId` for isolated cron routing. -2. [ ] Add regression tests that cover the missing-agent-in-config case and verify the requested agent is passed into isolated cron runs. -3. [ ] Patch `~/.openclaw/openclaw.json` to add a `maclern` agent entry with the correct workspace and agent directory. -4. [ ] Verify there are no other current cron agent IDs missing from `agents.list`. -5. [ ] Run targeted tests for the gateway cron path. -6. [ ] Do a review pass before wrapping up. - -## Detailed File Plan - -### 1) `clawdbot/src/gateway/server-cron.ts` - -- Update `resolveCronAgent` to normalize and preserve a non-empty requested `agentId` instead of requiring membership in `agents.list`. -- Keep the default-agent fallback only for empty or missing `agentId`. -- Leave main-session constraints enforced by existing cron job validation in `src/cron/service/jobs.ts`. - -### 2) `clawdbot/src/gateway/server-cron.test.ts` - -- Mock or observe the isolated cron run path so the test can assert that a job with `agentId: "maclern"` is passed through as `maclern` even when runtime config only lists `main`. -- Add coverage for the summary/wake path if needed to prove the gateway callbacks stay agent-scoped. - -### 3) `.openclaw/openclaw.json` - -- Add a `maclern` entry under `agents.list` using the existing convention: - - `workspace`: `/Users/lionheart/clawd/agents/maclern` - - `agentDir`: `/Users/lionheart/.openclaw/agents/maclern/agent` -- Do not disturb the ordering or semantics of existing agent entries beyond what is necessary. - -## Testing Strategy - -- Run targeted gateway cron tests covering the new regression. -- Re-check the current cron agent IDs against `agents.list` after the config patch. -- If practical, inspect the resolved Maclern routing via a focused local invocation or config readback. - -## Rollback Plan - -- Revert the `server-cron.ts` change to restore the previous fallback behavior. -- Remove the `maclern` config entry if it proves incorrect. -- Existing misrouted historical runs remain untouched. - -## Risks and Validation Gates - -- The code hardening should not change behavior for default-agent cron jobs. -- The config patch must not duplicate an existing `maclern` entry or break JSON formatting. -- Main-session jobs must remain restricted to the default agent through current validation. +- `src/gateway/client.ts` — add ack-tracking state for `expectFinal:true`, a typed accepted-then-closed error, and disable reconnects for one-shot clients. +- `src/gateway/call.ts` — configure one-shot gateway clients with `reconnect: false` and surface accepted-then-closed as a distinct error. +- `src/commands/agent-via-gateway.ts` — stop local fallback when the gateway had already accepted the run; keep fallback for genuine pre-accept failures. +- `src/gateway/client.test.ts` — cover ack tracking, accepted-then-close, and reconnect-disabled behavior. +- `src/gateway/call.test.ts` — cover propagation of accepted-then-close for one-shot `callGateway()` usage. +- `src/commands/agent-via-gateway.test.ts` — cover no-fallback-after-accept and continued fallback for pre-accept failures. + +## New Types / Errors + +- `GatewayRequestAcceptedError` in `src/gateway/client.ts` + - subclass of `Error` + - used when a connection closes after an `accepted` ack was already observed for a pending `expectFinal:true` request. + +## Implementation Steps + +1. Add `ackReceived` tracking to pending requests in `src/gateway/client.ts` and expose whether the client has seen an accepted ack. +2. Add `GatewayRequestAcceptedError` and emit it when a pending accepted request is interrupted by close. +3. Add `reconnect?: boolean` to `GatewayClientOptions`; set `reconnect: false` for one-shot `callGateway()` clients. +4. Update `src/gateway/call.ts` to use the new error type in `onClose` when the request was already accepted. +5. Update `src/commands/agent-via-gateway.ts` so `agentCliCommand()` does not fall back to embedded execution on `GatewayRequestAcceptedError`. +6. Add focused tests for `client`, `call`, and CLI fallback behavior. +7. Verify with a live `openclaw agent` run that the CLI no longer double-executes accepted requests. + +## Risks + +- `expectFinal:true` behavior is convention-based (`status: "accepted"`), not protocol-versioned. The fix must stay scoped to that existing contract. +- If other callers rely on generic close handling, typed accepted-close errors must remain backward compatible as `Error` subclasses. +- Fallback behavior changes user-visible CLI behavior; the replacement error message must be explicit that the gateway may still be running the request. + +## Validation + +- Targeted tests: + - `bunx vitest run src/gateway/client.test.ts` + - `bunx vitest run src/gateway/call.test.ts` + - `bunx vitest run src/commands/agent-via-gateway.test.ts` +- Live verification: + - reproduce previous `1006` path if possible + - confirm no embedded fallback occurs after accept + - confirm normal gateway RPCs still work diff --git a/.workflow/prd.md b/.workflow/prd.md index 16c0733dcc81..21660c69df1b 100644 --- a/.workflow/prd.md +++ b/.workflow/prd.md @@ -1,35 +1,35 @@ -# PRD: Cron Agent Routing Repair for Self-Learning Jobs +# PRD: Gateway Agent 1006 Final-Response Transport Fix -**Status:** Approved -**Date:** 2026-03-12 -**Provenance:** See `.workflow/inputs/original-request.md` and `.workflow/inputs/references.md`. +**Status:** Draft +**Date:** 2026-03-22 +**Provenance:** See `.workflow/inputs/original-request.md` ## Summary -Repair isolated cron routing so Maclern self-learning jobs run as Maclern instead of silently falling back to the default `main` agent, and harden the cron runtime so future non-default cron agents do not misroute their self-learning jobs into the wrong agent/session namespace. +Fix the `openclaw agent` gateway transport path so CLI agent runs that use `expectFinal:true` do not fail with websocket close code `1006` and incorrectly fall back to embedded local execution after the gateway has already accepted the run. ## User Stories -- As Bryan, I want Maclern overnight self-learning jobs to run with Maclern’s own agent identity, workspace, and session store so the digest reflects actual training context. -- As an operator, I want cron jobs for non-default agents to fail or route correctly instead of silently downgrading to `main`. -- As a future agent owner, I want isolated cron jobs to remain agent-scoped even when the runtime config is incomplete. +- As a CLI user, I want `openclaw agent` to reliably wait for the gateway's final response so that I do not get silent fallback behavior. +- As an operator, I want the CLI to distinguish pre-accept gateway failures from post-accept disconnects so that accepted runs are not executed twice. +- As a maintainer, I want the gateway request transport to preserve normal RPC behavior while making the `agent` ack/final sequence robust. ## Acceptance Criteria -- [ ] `maclern-nightly-queue-review` and `maclern-overnight-digest` no longer resolve to `main` because `maclern` is absent from `agents.list`. -- [ ] Isolated cron jobs preserve the requested non-default `agentId` through gateway cron routing, session key selection, wake routing, and isolated run execution. -- [ ] Maclern resolves to its intended workspace and agent directory for self-learning runs instead of using the default main workspace. -- [ ] Regression tests cover the missing-agent-in-config case for isolated cron jobs. -- [ ] The live runtime config includes the Maclern agent entry needed for correct workspace mapping. +- [ ] `openclaw agent` no longer falls back to embedded local execution when the gateway already accepted the run. +- [ ] Gateway close-after-accept is surfaced as a distinct error path, not treated like pre-connect/unreachable failures. +- [ ] One-shot gateway clients do not perform unnecessary reconnect attempts during `expectFinal:true` agent calls. +- [ ] Existing non-agent gateway RPC calls continue to work normally. +- [ ] Tests cover ack-then-close sequencing, no-fallback-after-accept behavior, and continued fallback for genuine pre-accept failures. ## Out of Scope -- Rewriting Maclern’s prompts, training artifacts, or Training Ops product surfaces. -- Changing main-session cron semantics for the default agent. -- Backfilling old misrouted Maclern session history. +- Protocol redesign for all response frames. +- General websocket keepalive/ping redesign. +- Dashboard/routing telemetry changes already fixed under separate tasks. ## Technical Notes -- Current root cause is twofold: `server-cron.ts` falls back to `main` when a requested cron agent is not present in runtime config, and the live config currently omits `maclern`. -- The code change should be minimal and defensible, with regression tests in gateway cron coverage. -- The runtime config patch should follow the existing per-agent convention already used for agents like `cody`, `storie`, and `grove`. +- Likely files: `src/gateway/client.ts`, `src/gateway/call.ts`, `src/commands/agent-via-gateway.ts`, and related tests. +- The current `agent` server method intentionally sends ack and final responses as separate `res` frames with the same request ID. +- The bug is in the client transport / fallback semantics, not in model routing. diff --git a/src/commands/agent-via-gateway.test.ts b/src/commands/agent-via-gateway.test.ts index 15ba8f0c6772..cfbb30bd3cc4 100644 --- a/src/commands/agent-via-gateway.test.ts +++ b/src/commands/agent-via-gateway.test.ts @@ -14,6 +14,7 @@ vi.mock("./agent.js", () => ({ import type { OpenClawConfig } from "../config/config.js"; import * as configModule from "../config/config.js"; import { callGateway } from "../gateway/call.js"; +import { GatewayRequestAcceptedError } from "../gateway/client.js"; import type { RuntimeEnv } from "../runtime.js"; import { agentCliCommand } from "./agent-via-gateway.js"; import { agentCommand } from "./agent.js"; @@ -120,6 +121,20 @@ describe("agentCliCommand", () => { }); }); + it("does not fall back when the gateway accepted the run before disconnect", async () => { + await withTempStore(async () => { + vi.mocked(callGateway).mockRejectedValue( + new GatewayRequestAcceptedError("gateway closed (1006 abnormal closure): no close reason"), + ); + + await expect(agentCliCommand({ message: "hi", to: "+1555" }, runtime)).rejects.toThrow( + "Gateway accepted the agent run", + ); + + expect(agentCommand).not.toHaveBeenCalled(); + }); + }); + it("skips gateway when --local is set", async () => { await withTempStore(async () => { mockLocalAgentReply(); diff --git a/src/commands/agent-via-gateway.ts b/src/commands/agent-via-gateway.ts index a44caa3f3bf1..03aa64353708 100644 --- a/src/commands/agent-via-gateway.ts +++ b/src/commands/agent-via-gateway.ts @@ -4,6 +4,7 @@ import type { CliDeps } from "../cli/deps.js"; import { withProgress } from "../cli/progress.js"; import { loadConfig } from "../config/config.js"; import { callGateway, randomIdempotencyKey } from "../gateway/call.js"; +import { GatewayRequestAcceptedError } from "../gateway/client.js"; import { normalizeAgentId } from "../routing/session-key.js"; import type { RuntimeEnv } from "../runtime.js"; import { @@ -190,6 +191,12 @@ export async function agentCliCommand(opts: AgentCliOpts, runtime: RuntimeEnv, d try { return await agentViaGatewayCommand(opts, runtime); } catch (err) { + if (err instanceof GatewayRequestAcceptedError) { + throw new Error( + `Gateway accepted the agent run, but the connection dropped before the final response was received. The agent may still be running on the gateway.\n${err.message}`, + { cause: err }, + ); + } runtime.error?.(`Gateway agent failed; falling back to embedded: ${String(err)}`); return await agentCommand(localOpts, runtime, deps); } diff --git a/src/gateway/call.test.ts b/src/gateway/call.test.ts index 10fc52441d1b..2a9dd5711dab 100644 --- a/src/gateway/call.test.ts +++ b/src/gateway/call.test.ts @@ -14,16 +14,20 @@ let lastClientOptions: { password?: string; tlsFingerprint?: string; scopes?: string[]; + reconnect?: boolean; onHelloOk?: (hello: { features?: { methods?: string[] } }) => void | Promise; onClose?: (code: number, reason: string) => void; } | null = null; -type StartMode = "hello" | "close" | "silent"; +type StartMode = "hello" | "close" | "silent" | "accepted-close"; let startMode: StartMode = "hello"; let closeCode = 1006; let closeReason = ""; let helloMethods: string[] | undefined = ["health", "secrets.resolve"]; vi.mock("./client.js", () => ({ + GatewayRequestAcceptedError: class GatewayRequestAcceptedError extends Error { + readonly code = "GATEWAY_REQUEST_ACCEPTED_THEN_CLOSED"; + }, describeGatewayCloseCode: (code: number) => { if (code === 1000) { return "normal closure"; @@ -34,11 +38,13 @@ vi.mock("./client.js", () => ({ return undefined; }, GatewayClient: class { + hadAcceptedRequest = false; constructor(opts: { url?: string; token?: string; password?: string; scopes?: string[]; + reconnect?: boolean; onHelloOk?: (hello: { features?: { methods?: string[] } }) => void | Promise; onClose?: (code: number, reason: string) => void; }) { @@ -54,6 +60,9 @@ vi.mock("./client.js", () => ({ methods: helloMethods, }, }); + } else if (startMode === "accepted-close") { + this.hadAcceptedRequest = true; + lastClientOptions?.onClose?.(1006, ""); } else if (startMode === "close") { lastClientOptions?.onClose?.(closeCode, closeReason); } @@ -197,6 +206,24 @@ describe("callGateway url resolution", () => { expect(lastClientOptions?.token).toBe("explicit-token"); }); + it("disables reconnect for one-shot gateway calls", async () => { + setLocalLoopbackGatewayConfig(); + + await callGateway({ method: "health" }); + + expect(lastClientOptions?.reconnect).toBe(false); + }); + + it("surfaces accepted-then-close as a typed error", async () => { + setLocalLoopbackGatewayConfig(); + startMode = "accepted-close"; + + const err = await callGateway({ method: "agent", expectFinal: true }).catch((caught) => caught); + + expect(err).toBeInstanceOf(Error); + expect(String(err?.message ?? err)).toContain("gateway closed (1006"); + }); + it("uses OPENCLAW_GATEWAY_URL env override in remote mode when remote URL is missing", async () => { loadConfig.mockReturnValue({ gateway: { mode: "remote", bind: "loopback", remote: {} }, diff --git a/src/gateway/call.ts b/src/gateway/call.ts index 31d11ac14b96..4ca63038439f 100644 --- a/src/gateway/call.ts +++ b/src/gateway/call.ts @@ -17,7 +17,7 @@ import { type GatewayClientName, } from "../utils/message-channel.js"; import { VERSION } from "../version.js"; -import { GatewayClient } from "./client.js"; +import { GatewayClient, GatewayRequestAcceptedError } from "./client.js"; import { GatewaySecretRefUnavailableError, resolveGatewayCredentialsFromConfig, @@ -818,6 +818,7 @@ async function executeGatewayRequestWithScopes(params: { mode: opts.mode ?? GATEWAY_CLIENT_MODES.CLI, role: "operator", scopes, + reconnect: false, deviceIdentity: loadOrCreateDeviceIdentity(), minProtocol: opts.minProtocol ?? PROTOCOL_VERSION, maxProtocol: opts.maxProtocol ?? PROTOCOL_VERSION, @@ -846,7 +847,12 @@ async function executeGatewayRequestWithScopes(params: { } ignoreClose = true; client.stop(); - stop(new Error(formatGatewayCloseError(code, reason, params.connectionDetails))); + const closeError = formatGatewayCloseError(code, reason, params.connectionDetails); + stop( + client.hadAcceptedRequest + ? new GatewayRequestAcceptedError(closeError) + : new Error(closeError), + ); }, }); diff --git a/src/gateway/client.test.ts b/src/gateway/client.test.ts index 04ddc5027d40..1e2641dbab68 100644 --- a/src/gateway/client.test.ts +++ b/src/gateway/client.test.ts @@ -19,11 +19,13 @@ type WsEventHandlers = { }; class MockWebSocket { + static readonly OPEN = 1; private openHandlers: WsEventHandlers["open"][] = []; private messageHandlers: WsEventHandlers["message"][] = []; private closeHandlers: WsEventHandlers["close"][] = []; private errorHandlers: WsEventHandlers["error"][] = []; readonly sent: string[] = []; + readyState = MockWebSocket.OPEN; constructor(_url: string, _options?: unknown) { wsInstances.push(this); @@ -108,6 +110,7 @@ vi.mock("../logger.js", async (importOriginal) => { }); const { GatewayClient } = await import("./client.js"); +const { GatewayRequestAcceptedError } = await import("./client.js"); function getLatestWs(): MockWebSocket { const ws = wsInstances.at(-1); @@ -347,6 +350,49 @@ describe("GatewayClient close handling", () => { expect(onClose).toHaveBeenCalledWith(1008, "unauthorized: device token mismatch"); client.stop(); }); + + it("rejects expectFinal requests with GatewayRequestAcceptedError when the socket closes after accepted ack", async () => { + const client = createClientWithIdentity("dev-6", vi.fn()); + + client.start(); + const ws = getLatestWs(); + Reflect.set(client as object, "ws", ws); + + const requestPromise = client.request("agent", { hello: "world" }, { expectFinal: true }); + const agentFrame = ws.sent.find((frame) => frame.includes('"method":"agent"')); + if (!agentFrame) { + throw new Error("missing agent frame"); + } + const agentId = (JSON.parse(agentFrame) as { id: string }).id; + ws.emitMessage( + JSON.stringify({ + type: "res", + id: agentId, + ok: true, + payload: { status: "accepted", runId: "run-1" }, + }), + ); + + expect(client.hadAcceptedRequest).toBe(true); + + ws.emitClose(1006, ""); + + await expect(requestPromise).rejects.toBeInstanceOf(GatewayRequestAcceptedError); + client.stop(); + }); + + it("does not reconnect when reconnect is disabled", () => { + const client = new GatewayClient({ + url: "ws://127.0.0.1:18789", + reconnect: false, + }); + + client.start(); + expect(wsInstances.length).toBe(1); + getLatestWs().emitClose(1006, ""); + expect(wsInstances.length).toBe(1); + client.stop(); + }); }); describe("GatewayClient connect auth payload", () => { diff --git a/src/gateway/client.ts b/src/gateway/client.ts index 4641545ea8e8..a2282b9c6942 100644 --- a/src/gateway/client.ts +++ b/src/gateway/client.ts @@ -39,6 +39,7 @@ type Pending = { resolve: (value: unknown) => void; reject: (err: unknown) => void; expectFinal: boolean; + ackReceived: boolean; }; export type GatewayClientOptions = { @@ -70,6 +71,7 @@ export type GatewayClientOptions = { onConnectError?: (err: Error) => void; onClose?: (code: number, reason: string) => void; onGap?: (info: { expected: number; received: number }) => void; + reconnect?: boolean; }; export const GATEWAY_CLOSE_CODE_HINTS: Readonly> = { @@ -83,6 +85,15 @@ export function describeGatewayCloseCode(code: number): string | undefined { return GATEWAY_CLOSE_CODE_HINTS[code]; } +export class GatewayRequestAcceptedError extends Error { + readonly code = "GATEWAY_REQUEST_ACCEPTED_THEN_CLOSED"; + + constructor(message: string) { + super(message); + this.name = "GatewayRequestAcceptedError"; + } +} + export class GatewayClient { private ws: WebSocket | null = null; private opts: GatewayClientOptions; @@ -93,6 +104,7 @@ export class GatewayClient { private connectNonce: string | null = null; private connectSent = false; private connectTimer: NodeJS.Timeout | null = null; + private hadAcceptedRequestValue = false; // Track last tick to detect silent stalls. private lastTick: number | null = null; private tickIntervalMs = 30_000; @@ -232,6 +244,10 @@ export class GatewayClient { this.flushPendingErrors(new Error("gateway client stopped")); } + get hadAcceptedRequest(): boolean { + return this.hadAcceptedRequestValue; + } + private sendConnect() { if (this.connectSent) { return; @@ -396,6 +412,8 @@ export class GatewayClient { const payload = parsed.payload as { status?: unknown } | undefined; const status = payload?.status; if (pending.expectFinal && status === "accepted") { + pending.ackReceived = true; + this.hadAcceptedRequestValue = true; return; } this.pending.delete(parsed.id); @@ -431,6 +449,9 @@ export class GatewayClient { } private scheduleReconnect() { + if (this.opts.reconnect === false) { + return; + } if (this.closed) { return; } @@ -445,7 +466,7 @@ export class GatewayClient { private flushPendingErrors(err: Error) { for (const [, p] of this.pending) { - p.reject(err); + p.reject(p.ackReceived ? new GatewayRequestAcceptedError(err.message) : err); } this.pending.clear(); } @@ -522,6 +543,7 @@ export class GatewayClient { resolve: (value) => resolve(value as T), reject, expectFinal, + ackReceived: false, }); }); this.ws.send(JSON.stringify(frame));