AVADSA25 · AVADSA25 · May 3, 2026 · May 3, 2026
diff --git a/codec_agent_runner.py b/codec_agent_runner.py
@@ -364,8 +364,40 @@ def _execute_checkpoint(plan_dict: Dict[str, Any],
         if action.kind == "checkpoint_done":
             return history
 
-        # Permission gate (raises PermissionViolation if outside manifest)
-        permission_gate(action, agent_grants, global_grants)
+        # Permission gate (raises PermissionViolation if outside manifest).
+        # Phase 3.5 hotfix: if the LLM hallucinates a skill name (e.g.
+        # "fetch_url" instead of the real "web_fetch"), give it ONE retry
+        # with the corrected skill list as context. Most skill-hallucination
+        # errors recover with a single correction pass; only block on the
+        # SECOND consecutive miss. This dramatically reduces user-visible
+        # blocked_on_permission events caused by LLM naming drift.
+        try:
+            permission_gate(action, agent_grants, global_grants)
+        except PermissionViolation as pv:
+            if pv.reason == "skill_not_authorized":
+                # Append the failed action to history with a correction nudge
+                # so the next _qwen_next_action call sees the error context.
+                allowed = sorted(set(agent_grants.get("skills", [])) |
+                                 set(global_grants.get("skills", [])))
+                history.append({
+                    "step": len(history),
+                    "skill": action.skill,
+                    "task": action.task[:200],
+                    "result": (f"<skill_error: '{action.skill}' is NOT in this "
+                               f"agent's permission_manifest.skills. Allowed skills: "
+                               f"{', '.join(allowed)}. Pick one of those instead.>"),
+                    "is_destructive": False,
+                    "_skill_correction_nudge": True,
+                })
+                # Re-call Qwen — if it still picks the wrong skill, fall through
+                # and the SECOND permission_gate call will raise normally.
+                action2 = _qwen_next_action(plan_dict, checkpoint, history)
+                if action2.kind == "checkpoint_done":
+                    return history
+                permission_gate(action2, agent_grants, global_grants)
+                action = action2  # use the corrected action going forward
+            else:
+                raise   # path / domain violations not auto-recoverable
 
         # Destructive gate (raises DestructiveOpRejected on user reject)
         if action.is_destructive:

diff --git a/codec_tasks.html b/codec_tasks.html
@@ -908,9 +908,28 @@ <h2 style="font-size:16px; font-weight:600;">Task Reports</h2>
 var reports = [];
 var _reportPollTimer = null;
 
+// Phase 3.5 hotfix: Reports tab was filtering to ONLY 'task_report' so all
+// the agent_*/proactive/shift_report/question notifications got dropped
+// (bell counted, nothing rendered). Now shows everything CODEC generates.
+var REPORT_NOTIF_TYPES = [
+  'task_report',                                            // Crews
+  'question',                                               // Phase 1 Step 3 ask_user
+  'shift_report',                                           // Phase 2 Step 7
+  'agent_update', 'agent_blocked', 'agent_question',        // Phase 3 Step 10
+  'agent_done', 'agent_aborted',                            // Phase 3 Step 10
+  'proactive_suggestion',                                   // Phase 3.5
+];
+
 function loadReports() {
   fetch('/api/notifications').then(function(r){ return r.json(); }).then(function(data) {
-    reports = (data.notifications || data || []).filter(function(n){ return n.type === 'task_report'; });
+    var all = data.notifications || data || [];
+    reports = all.filter(function(n){ return REPORT_NOTIF_TYPES.indexOf(n.type) >= 0; });
+    // Newest first
+    reports.sort(function(a, b) {
+      var ta = a.timestamp || a.created || a.ts || '';
+      var tb = b.timestamp || b.created || b.ts || '';
+      return (tb || '').localeCompare(ta || '');
+    });
     renderReports();
     // Auto-refresh every 5s if any task is still running
     var hasRunning = reports.some(function(r){ return r.status === 'running'; });

diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -945,3 +945,53 @@ def test_permission_gate_allows_read_path_in_grants(basic_grants, empty_global_g
     action = Action(skill="weather", task="read",
                     reads_path=True, read_path="~/Documents/research/notes.md")
     permission_gate(action, basic_grants, empty_global_grants)  # no exception
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Phase 3.5 hotfix — LLM skill-name hallucination retry (1 test)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_skill_hallucination_retries_with_corrected_skill_list(monkeypatch, temp_codec_dir):
+    """When LLM picks an unauthorized skill, runner appends a correction nudge
+    to history and re-calls Qwen. If second pick is allowed, execution proceeds.
+    No blocked_on_permission for transient LLM naming drift."""
+    import codec_agent_runner as car
+
+    grants = {"skills": ["weather"], "read_paths": [], "write_paths": [],
+              "network_domains": []}
+    global_grants = {"schema": 1, "version": 0,
+                     "skills": [], "read_paths": [], "write_paths": [], "network_domains": []}
+    checkpoint = {"id": "cp1", "title": "t", "description": "d",
+                  "expected_output": "o", "step_budget": 5}
+
+    # First call returns hallucinated skill; second call returns valid one;
+    # third returns checkpoint_done.
+    actions = [
+        car.Action(skill="fetch_url", task="x", kind="skill_call",   # hallucination
+                   is_destructive=False, network_call=False, touches_path=False),
+        car.Action(skill="weather", task="real call", kind="skill_call",  # corrected
+                   is_destructive=False, network_call=False, touches_path=False),
+        car.Action(skill="", task="", kind="checkpoint_done"),
+    ]
+    idx = {"n": 0}
+    def fake_next(*a, **k):
+        out = actions[idx["n"]]
+        idx["n"] += 1
+        return out
+    monkeypatch.setattr(car, "_qwen_next_action", fake_next)
+    fake_run = MagicMock(return_value="r")
+    monkeypatch.setattr(car, "_run_skill", fake_run)
+
+    history = car._execute_checkpoint(
+        plan_dict={"goals": ["g"]}, checkpoint=checkpoint,
+        agent_grants=grants, global_grants=global_grants,
+        agent_id="agent_test",
+    )
+
+    # The corrected weather call ran (via _run_skill); the hallucinated
+    # fetch_url did NOT run (it never passed permission_gate).
+    fake_run.assert_called_once_with("weather", "real call", "agent_test")
+    # History contains the correction nudge entry
+    nudges = [h for h in history if h.get("_skill_correction_nudge")]
+    assert len(nudges) == 1
+    assert "fetch_url" in nudges[0]["skill"]