diff --git a/docs/plans/2026-03-18-staging-ci-triage.md b/docs/plans/2026-03-18-staging-ci-triage.md deleted file mode 100644 index adfd5d0521..0000000000 --- a/docs/plans/2026-03-18-staging-ci-triage.md +++ /dev/null @@ -1,87 +0,0 @@ -# Staging CI Review Issues Triage - -**Date:** 2026-03-18 -**Branch:** staging (HEAD `b7a1edf`) -**Total open issues:** 50 - ---- - -## Batch 1 — Critical & 100-confidence issues - -| # | Title | Severity | Verdict | File(s) | Action | -|---|-------|----------|---------|---------|--------| -| 1281 | Logic inversion in Telegram auto-verification | CRITICAL:100 | **FALSE POSITIVE** (closed) | `src/channels/web/server.rs` | Different handlers with intentional different SSE behavior | -| 908 | Missing consecutive_failures reset | CRITICAL:100 | **STALE** | `src/llm/circuit_breaker.rs` | Close — `record_success()` already resets to 0 | -| 1282 | Variable shadowing fallback notification | HIGH:100 | **STALE** | `src/agent/agent_loop.rs` | Close — fixed in commit `bcc38ce` | -| 1283 | Inconsistent fallback logic DRY | HIGH:75 | **STALE** | `src/agent/agent_loop.rs` | Close — fixed in commit `bcc38ce` | -| 1178 | Workflow linting bypass for test code | CRITICAL:75 | **FALSE POSITIVE** | `.github/workflows/code_style.yml` | Close — script reads full file, not hunk headers | - ---- - -## Remaining Batches (queued) - -### Batch 2 — Retry/DRY + CI workflow issues (completed) - -| # | Title | Severity | Verdict | Action | -|---|-------|----------|---------|--------| -| 1288 | DRY violation: retry-after parsing | HIGH:95 | **LEGIT** | Fixed: extracted shared `parse_retry_after()` | -| 1289 | Semantic mismatch in RFC2822 test helpers | MEDIUM:85 | **DUPLICATE** (closed) | Duplicate of #1288 | -| 1290 | Unnecessary eager `chrono::Utc::now()` call | LOW:85 | **FALSE POSITIVE** (closed) | Already deferred inside successful parse branch | -| 963 | Logical equivalence bug in workflow conditions | HIGH:100 | **FALSE POSITIVE** (closed) | Refactored condition correctly handles `workflow_call` | -| 1280 | Flaky OAuth wildcard callback tests | Flaky | **LEGIT** | Fixed: added `tokio::sync::Mutex` for env var serialization | - -### Batch 3 — Routine engine + notification routing -- #1365 — too_many_arguments on RoutineEngine::new() -- #1371 — Discovery schema regeneration on every tool_info call -- #1364 — Prompt injection via unescaped channel/user in lightweight routines -- #1284 — notification_target_for_channel() assumes channel owner - -### Batch 4 — Telegram/Extension Manager webhook group -- #1247 — Synchronous 120-second blocking poll in HTTP handler -- #1248 — Hardcoded channel-specific logic violates architecture -- #1249 — Telegram-specific business logic bloats ExtensionManager -- #1250 — Response success/failure logic mismatch in chat auth -- #1251 — Channel-specific configuration mappings lack extensibility - -### Batch 5 — HMAC/Auth/Security -- #1034 — Signature verification not constant-time -- #1035 — Incorrect order of operations in HMAC verification -- #1036 — Double opt-in lacks runtime validation consistency -- #1037 — API breaking change: auth() signature -- #1038 — CSP policy allows CDN scripts with risky fallback - -### Batch 6 — Webhook handler + config -- #1039 — Per-request HTTP client creation in hot path -- #1040 — Complex nested auth logic in webhook_handler -- #1041 — Redundant JSON deserialization in webhook handler -- #1042 — Implicit state mutation in config conversion -- #1005 — Inconsistent double opt-in enforcement - -### Batch 7 — Tool schema validation / WASM bounds -- #974 — Unbounded recursion in resolve_nested() -- #975 — Unbounded recursion in validate_tool_schema() -- #976 — Unbounded description string in CapabilitiesFile -- #977 — Unbounded parameters schema JSON -- #978 — Unnecessary clone of large JSON in hot path - -### Batch 8 — Tool schema + config + security -- #979 — No size limits on JSON files read -- #980 — Misleading warning condition for missing parameters -- #988 — Hardcoded CLI_ENABLED env var in systemd template -- #990 — Configuration semantics unclear for daemon mode -- #1103 — SSRF risk via configurable embedding base URL - -### Batch 9 — Agent loop / job worker -- #870 — Unbounded loop without cancellation token -- #871 — Stringly-typed unsupported parameter filtering -- #873 — RwLock overhead on hot path -- #892 — JobDelegate::check_signals() treats non-terminal as terminal -- #1252 — String concatenation in hot polling loop - -### Batch 10 — Agent loop perf + CI scripts -- #893 — Unnecessary parameter cloning on every tool execution -- #894 — truncate_for_preview allocates for non-truncated strings -- #895 — Tool definitions fetched every iteration without caching -- #1179 — AWK state machine never resets between hunks -- #1180 — Code fence detection logic flawed in extract_suggestions() -- #1181 — Unsafe .unwrap() in production code manifest.rs diff --git a/src/agent/agent_loop.rs b/src/agent/agent_loop.rs index 565ee07048..a0e8278fc7 100644 --- a/src/agent/agent_loop.rs +++ b/src/agent/agent_loop.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use futures::StreamExt; +use uuid::Uuid; use crate::agent::context_monitor::ContextMonitor; use crate::agent::heartbeat::spawn_heartbeat; @@ -1014,15 +1015,59 @@ impl Agent { } } - // Resolve session and thread - let (session, thread_id) = self - .session_manager - .resolve_thread( - &message.user_id, - &message.channel, - message.conversation_scope(), - ) - .await; + // Resolve session and thread. Approval submissions are allowed to + // target an already-loaded owned thread by UUID across channels so the + // web approval UI can approve work that originated from HTTP/other + // owner-scoped channels. + let approval_thread_uuid = if matches!( + submission, + Submission::ExecApproval { .. } | Submission::ApprovalResponse { .. } + ) { + message + .conversation_scope() + .and_then(|thread_id| Uuid::parse_str(thread_id).ok()) + } else { + None + }; + + let (session, thread_id) = if let Some(target_thread_id) = approval_thread_uuid { + let session = self + .session_manager + .get_or_create_session(&message.user_id) + .await; + let mut sess = session.lock().await; + if sess.threads.contains_key(&target_thread_id) { + sess.active_thread = Some(target_thread_id); + sess.last_active_at = chrono::Utc::now(); + drop(sess); + self.session_manager + .register_thread( + &message.user_id, + &message.channel, + target_thread_id, + Arc::clone(&session), + ) + .await; + (session, target_thread_id) + } else { + drop(sess); + self.session_manager + .resolve_thread( + &message.user_id, + &message.channel, + message.conversation_scope(), + ) + .await + } + } else { + self.session_manager + .resolve_thread( + &message.user_id, + &message.channel, + message.conversation_scope(), + ) + .await + }; tracing::debug!( message_id = %message.id, thread_id = %thread_id, diff --git a/src/agent/session_manager.rs b/src/agent/session_manager.rs index 3db275cc27..3bf20697a6 100644 --- a/src/agent/session_manager.rs +++ b/src/agent/session_manager.rs @@ -772,6 +772,33 @@ mod tests { assert_ne!(resolved, tid); } + #[tokio::test] + async fn test_register_then_resolve_same_uuid_on_second_channel_reuses_thread() { + use crate::agent::session::{Session, Thread}; + + let manager = SessionManager::new(); + let tid = Uuid::new_v4(); + + let session = Arc::new(Mutex::new(Session::new("user-cross"))); + { + let mut sess = session.lock().await; + let thread = Thread::with_id(tid, sess.id); + sess.threads.insert(tid, thread); + } + + manager + .register_thread("user-cross", "http", tid, Arc::clone(&session)) + .await; + manager + .register_thread("user-cross", "gateway", tid, Arc::clone(&session)) + .await; + + let (_, resolved) = manager + .resolve_thread("user-cross", "gateway", Some(&tid.to_string())) + .await; + assert_eq!(resolved, tid); + } + // === QA Plan P3 - 4.2: Concurrent session stress tests === #[tokio::test] diff --git a/src/config/embeddings.rs b/src/config/embeddings.rs index 4f99dab4eb..68b0ff2c67 100644 --- a/src/config/embeddings.rs +++ b/src/config/embeddings.rs @@ -299,15 +299,12 @@ mod tests { // SAFETY: Under ENV_MUTEX, no concurrent env access. unsafe { - std::env::set_var("EMBEDDING_BASE_URL", "https://custom.example.com"); + std::env::set_var("EMBEDDING_BASE_URL", "https://8.8.8.8"); } let settings = Settings::default(); let config = EmbeddingsConfig::resolve(&settings).expect("resolve should succeed"); - assert_eq!( - config.openai_base_url.as_deref(), - Some("https://custom.example.com") - ); + assert_eq!(config.openai_base_url.as_deref(), Some("https://8.8.8.8")); // SAFETY: Under ENV_MUTEX. unsafe { std::env::remove_var("EMBEDDING_BASE_URL"); diff --git a/src/config/llm.rs b/src/config/llm.rs index cc51561163..03ce1f8590 100644 --- a/src/config/llm.rs +++ b/src/config/llm.rs @@ -855,19 +855,19 @@ mod tests { // SAFETY: Under ENV_MUTEX. unsafe { std::env::set_var("LLM_BACKEND", "openai_compatible"); - std::env::set_var("LLM_BASE_URL", "http://env-url/v1"); + std::env::set_var("LLM_BASE_URL", "http://localhost:8000/v1"); } let settings = Settings { llm_backend: Some("openai_compatible".to_string()), - openai_compatible_base_url: Some("http://settings-url/v1".to_string()), + openai_compatible_base_url: Some("http://localhost:9000/v1".to_string()), ..Default::default() }; let cfg = LlmConfig::resolve(&settings).expect("resolve should succeed"); let provider = cfg.provider.expect("should have provider config"); assert_eq!( - provider.base_url, "http://env-url/v1", + provider.base_url, "http://localhost:8000/v1", "env var should take priority over settings" ); @@ -879,7 +879,7 @@ mod tests { let cfg = LlmConfig::resolve(&settings).expect("resolve should succeed"); let provider = cfg.provider.expect("should have provider config"); assert_eq!( - provider.base_url, "http://settings-url/v1", + provider.base_url, "http://localhost:9000/v1", "settings should take priority over registry default" ); diff --git a/tests/e2e/scenarios/test_owner_scope.py b/tests/e2e/scenarios/test_owner_scope.py index 56f3b01ec7..5cb9df2a26 100644 --- a/tests/e2e/scenarios/test_owner_scope.py +++ b/tests/e2e/scenarios/test_owner_scope.py @@ -4,7 +4,6 @@ - the web gateway chat UI - the owner-scoped HTTP webhook channel - routine tools / routines tab -- job creation via routine execution / jobs tab """ import asyncio @@ -13,7 +12,13 @@ import httpx -from helpers import SEL, AUTH_TOKEN, signed_http_webhook_headers +from helpers import ( + AUTH_TOKEN, + SEL, + api_get, + api_post, + signed_http_webhook_headers, +) async def _send_and_get_response( @@ -58,13 +63,14 @@ async def _post_http_webhook( content: str, sender_id: str, thread_id: str, -) -> str: + wait_for_response: bool = True, +) -> str | None: """Send a signed request to the owner-scoped HTTP webhook channel.""" payload = { "user_id": sender_id, "thread_id": thread_id, "content": content, - "wait_for_response": True, + "wait_for_response": wait_for_response, } body = json.dumps(payload).encode("utf-8") @@ -81,8 +87,9 @@ async def _post_http_webhook( ) data = response.json() assert data["status"] == "accepted", f"Unexpected webhook response: {data}" - assert data["response"], f"Expected synchronous response body, got: {data}" - return data["response"] + if wait_for_response: + assert data["response"], f"Expected synchronous response body, got: {data}" + return data.get("response") async def _open_tab(page, tab: str) -> None: @@ -112,22 +119,60 @@ async def _wait_for_routine(base_url: str, name: str, timeout: float = 20.0) -> raise AssertionError(f"Routine '{name}' was not created within {timeout}s") -async def _wait_for_job(base_url: str, title: str, timeout: float = 30.0) -> dict: - """Poll the jobs API until the named job exists.""" - async with httpx.AsyncClient() as client: - for _ in range(int(timeout * 2)): - response = await client.get( - f"{base_url}/api/jobs", - headers={"Authorization": f"Bearer {AUTH_TOKEN}"}, - timeout=10, - ) - response.raise_for_status() - jobs = response.json()["jobs"] - for job in jobs: - if job["title"] == title: - return job - await _poll_sleep() - raise AssertionError(f"Job '{title}' was not created within {timeout}s") +async def _wait_for_http_thread(base_url: str, title_fragment: str, timeout: float = 20.0) -> str: + """Poll the chat thread list until the matching HTTP thread is visible.""" + for _ in range(int(timeout * 2)): + response = await api_get(base_url, "/api/chat/threads", timeout=10) + response.raise_for_status() + threads = response.json()["threads"] + for thread in threads: + if thread.get("channel") != "http": + continue + if title_fragment in (thread.get("title") or ""): + return thread["id"] + await _poll_sleep() + raise AssertionError( + f"HTTP thread containing '{title_fragment}' was not visible within {timeout}s" + ) + + +async def _wait_for_pending_approval( + base_url: str, + thread_id: str, + timeout: float = 20.0, +) -> dict: + """Poll chat history until the thread exposes a pending approval payload.""" + for _ in range(int(timeout * 2)): + response = await api_get( + base_url, + f"/api/chat/history?thread_id={thread_id}", + timeout=10, + ) + response.raise_for_status() + pending = response.json().get("pending_approval") + if pending: + return pending + await _poll_sleep() + raise AssertionError(f"Thread '{thread_id}' did not expose a pending approval") + + +async def _approve_pending_request(base_url: str, thread_id: str, request_id: str) -> None: + """Approve a pending tool request through the web gateway API.""" + response = await api_post( + base_url, + "/api/chat/approval", + json={ + "request_id": request_id, + "action": "approve", + "thread_id": thread_id, + }, + timeout=10, + ) + assert response.status_code == 202, ( + f"Approval submission failed: {response.status_code} {response.text[:400]}" + ) + data = response.json() + assert data["status"] == "accepted", f"Unexpected approval response: {data}" async def _poll_sleep() -> None: @@ -194,33 +239,34 @@ async def test_web_created_routine_is_listed_from_http_channel_across_senders( assert routine_name in second_sender_text, second_sender_text -async def test_http_created_full_job_routine_can_be_run_from_web_and_shows_in_jobs( +async def test_http_created_full_job_routine_is_visible_in_web_after_approval( page, ironclaw_server, http_channel_server, ): - """A full-job routine created via HTTP can be run from the web UI and create a job.""" + """A full-job routine created via HTTP appears in the web owner UI after approval.""" routine_name = f"owner-job-{uuid.uuid4().hex[:8]}" - response_text = await _post_http_webhook( + await _post_http_webhook( http_channel_server, content=f"create full-job owner routine {routine_name}", sender_id="http-job-sender", thread_id="owner-job-thread", + wait_for_response=False, ) - assert routine_name in response_text - await _wait_for_routine(ironclaw_server, routine_name) + thread_id = await _wait_for_http_thread(ironclaw_server, routine_name) + pending = await _wait_for_pending_approval(ironclaw_server, thread_id) + assert pending["tool_name"] == "routine_create" + await _approve_pending_request( + ironclaw_server, + thread_id, + pending["request_id"], + ) + + routine = await _wait_for_routine(ironclaw_server, routine_name) + assert routine["action_type"] == "full_job" await _open_tab(page, "routines") routine_row = page.locator(SEL["routine_row"]).filter(has_text=routine_name).first await routine_row.wait_for(state="visible", timeout=15000) - await routine_row.locator('button[data-action="trigger-routine"]').click() - - await _wait_for_job(ironclaw_server, routine_name, timeout=45.0) - - await _open_tab(page, "jobs") - await page.locator(SEL["job_row"]).filter(has_text=routine_name).first.wait_for( - state="visible", - timeout=20000, - )