diff --git a/codec_agent_runner.py b/codec_agent_runner.py index 7d3bf6a..16a5bd1 100644 --- a/codec_agent_runner.py +++ b/codec_agent_runner.py @@ -364,8 +364,40 @@ def _execute_checkpoint(plan_dict: Dict[str, Any], if action.kind == "checkpoint_done": return history - # Permission gate (raises PermissionViolation if outside manifest) - permission_gate(action, agent_grants, global_grants) + # Permission gate (raises PermissionViolation if outside manifest). + # Phase 3.5 hotfix: if the LLM hallucinates a skill name (e.g. + # "fetch_url" instead of the real "web_fetch"), give it ONE retry + # with the corrected skill list as context. Most skill-hallucination + # errors recover with a single correction pass; only block on the + # SECOND consecutive miss. This dramatically reduces user-visible + # blocked_on_permission events caused by LLM naming drift. + try: + permission_gate(action, agent_grants, global_grants) + except PermissionViolation as pv: + if pv.reason == "skill_not_authorized": + # Append the failed action to history with a correction nudge + # so the next _qwen_next_action call sees the error context. + allowed = sorted(set(agent_grants.get("skills", [])) | + set(global_grants.get("skills", []))) + history.append({ + "step": len(history), + "skill": action.skill, + "task": action.task[:200], + "result": (f""), + "is_destructive": False, + "_skill_correction_nudge": True, + }) + # Re-call Qwen — if it still picks the wrong skill, fall through + # and the SECOND permission_gate call will raise normally. + action2 = _qwen_next_action(plan_dict, checkpoint, history) + if action2.kind == "checkpoint_done": + return history + permission_gate(action2, agent_grants, global_grants) + action = action2 # use the corrected action going forward + else: + raise # path / domain violations not auto-recoverable # Destructive gate (raises DestructiveOpRejected on user reject) if action.is_destructive: diff --git a/codec_tasks.html b/codec_tasks.html index 7b7d06a..af3f90d 100644 --- a/codec_tasks.html +++ b/codec_tasks.html @@ -908,9 +908,28 @@

Task Reports

var reports = []; var _reportPollTimer = null; +// Phase 3.5 hotfix: Reports tab was filtering to ONLY 'task_report' so all +// the agent_*/proactive/shift_report/question notifications got dropped +// (bell counted, nothing rendered). Now shows everything CODEC generates. +var REPORT_NOTIF_TYPES = [ + 'task_report', // Crews + 'question', // Phase 1 Step 3 ask_user + 'shift_report', // Phase 2 Step 7 + 'agent_update', 'agent_blocked', 'agent_question', // Phase 3 Step 10 + 'agent_done', 'agent_aborted', // Phase 3 Step 10 + 'proactive_suggestion', // Phase 3.5 +]; + function loadReports() { fetch('/api/notifications').then(function(r){ return r.json(); }).then(function(data) { - reports = (data.notifications || data || []).filter(function(n){ return n.type === 'task_report'; }); + var all = data.notifications || data || []; + reports = all.filter(function(n){ return REPORT_NOTIF_TYPES.indexOf(n.type) >= 0; }); + // Newest first + reports.sort(function(a, b) { + var ta = a.timestamp || a.created || a.ts || ''; + var tb = b.timestamp || b.created || b.ts || ''; + return (tb || '').localeCompare(ta || ''); + }); renderReports(); // Auto-refresh every 5s if any task is still running var hasRunning = reports.some(function(r){ return r.status === 'running'; }); diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 725e2ee..9c1dd15 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -945,3 +945,53 @@ def test_permission_gate_allows_read_path_in_grants(basic_grants, empty_global_g action = Action(skill="weather", task="read", reads_path=True, read_path="~/Documents/research/notes.md") permission_gate(action, basic_grants, empty_global_grants) # no exception + + +# ───────────────────────────────────────────────────────────────────────────── +# Phase 3.5 hotfix — LLM skill-name hallucination retry (1 test) +# ───────────────────────────────────────────────────────────────────────────── + +def test_skill_hallucination_retries_with_corrected_skill_list(monkeypatch, temp_codec_dir): + """When LLM picks an unauthorized skill, runner appends a correction nudge + to history and re-calls Qwen. If second pick is allowed, execution proceeds. + No blocked_on_permission for transient LLM naming drift.""" + import codec_agent_runner as car + + grants = {"skills": ["weather"], "read_paths": [], "write_paths": [], + "network_domains": []} + global_grants = {"schema": 1, "version": 0, + "skills": [], "read_paths": [], "write_paths": [], "network_domains": []} + checkpoint = {"id": "cp1", "title": "t", "description": "d", + "expected_output": "o", "step_budget": 5} + + # First call returns hallucinated skill; second call returns valid one; + # third returns checkpoint_done. + actions = [ + car.Action(skill="fetch_url", task="x", kind="skill_call", # hallucination + is_destructive=False, network_call=False, touches_path=False), + car.Action(skill="weather", task="real call", kind="skill_call", # corrected + is_destructive=False, network_call=False, touches_path=False), + car.Action(skill="", task="", kind="checkpoint_done"), + ] + idx = {"n": 0} + def fake_next(*a, **k): + out = actions[idx["n"]] + idx["n"] += 1 + return out + monkeypatch.setattr(car, "_qwen_next_action", fake_next) + fake_run = MagicMock(return_value="r") + monkeypatch.setattr(car, "_run_skill", fake_run) + + history = car._execute_checkpoint( + plan_dict={"goals": ["g"]}, checkpoint=checkpoint, + agent_grants=grants, global_grants=global_grants, + agent_id="agent_test", + ) + + # The corrected weather call ran (via _run_skill); the hallucinated + # fetch_url did NOT run (it never passed permission_gate). + fake_run.assert_called_once_with("weather", "real call", "agent_test") + # History contains the correction nudge entry + nudges = [h for h in history if h.get("_skill_correction_nudge")] + assert len(nudges) == 1 + assert "fetch_url" in nudges[0]["skill"]