Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions codec_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,40 @@ def _execute_checkpoint(plan_dict: Dict[str, Any],
if action.kind == "checkpoint_done":
return history

# Permission gate (raises PermissionViolation if outside manifest)
permission_gate(action, agent_grants, global_grants)
# Permission gate (raises PermissionViolation if outside manifest).
# Phase 3.5 hotfix: if the LLM hallucinates a skill name (e.g.
# "fetch_url" instead of the real "web_fetch"), give it ONE retry
# with the corrected skill list as context. Most skill-hallucination
# errors recover with a single correction pass; only block on the
# SECOND consecutive miss. This dramatically reduces user-visible
# blocked_on_permission events caused by LLM naming drift.
try:
permission_gate(action, agent_grants, global_grants)
except PermissionViolation as pv:
if pv.reason == "skill_not_authorized":
# Append the failed action to history with a correction nudge
# so the next _qwen_next_action call sees the error context.
allowed = sorted(set(agent_grants.get("skills", [])) |
set(global_grants.get("skills", [])))
history.append({
"step": len(history),
"skill": action.skill,
"task": action.task[:200],
"result": (f"<skill_error: '{action.skill}' is NOT in this "
f"agent's permission_manifest.skills. Allowed skills: "
f"{', '.join(allowed)}. Pick one of those instead.>"),
"is_destructive": False,
"_skill_correction_nudge": True,
})
# Re-call Qwen — if it still picks the wrong skill, fall through
# and the SECOND permission_gate call will raise normally.
action2 = _qwen_next_action(plan_dict, checkpoint, history)
if action2.kind == "checkpoint_done":
return history
permission_gate(action2, agent_grants, global_grants)
action = action2 # use the corrected action going forward
else:
raise # path / domain violations not auto-recoverable

# Destructive gate (raises DestructiveOpRejected on user reject)
if action.is_destructive:
Expand Down
21 changes: 20 additions & 1 deletion codec_tasks.html
Original file line number Diff line number Diff line change
Expand Up @@ -908,9 +908,28 @@ <h2 style="font-size:16px; font-weight:600;">Task Reports</h2>
var reports = [];
var _reportPollTimer = null;

// Phase 3.5 hotfix: Reports tab was filtering to ONLY 'task_report' so all
// the agent_*/proactive/shift_report/question notifications got dropped
// (bell counted, nothing rendered). Now shows everything CODEC generates.
var REPORT_NOTIF_TYPES = [
'task_report', // Crews
'question', // Phase 1 Step 3 ask_user
'shift_report', // Phase 2 Step 7
'agent_update', 'agent_blocked', 'agent_question', // Phase 3 Step 10
'agent_done', 'agent_aborted', // Phase 3 Step 10
'proactive_suggestion', // Phase 3.5
];

function loadReports() {
fetch('/api/notifications').then(function(r){ return r.json(); }).then(function(data) {
reports = (data.notifications || data || []).filter(function(n){ return n.type === 'task_report'; });
var all = data.notifications || data || [];
reports = all.filter(function(n){ return REPORT_NOTIF_TYPES.indexOf(n.type) >= 0; });
// Newest first
reports.sort(function(a, b) {
var ta = a.timestamp || a.created || a.ts || '';
var tb = b.timestamp || b.created || b.ts || '';
return (tb || '').localeCompare(ta || '');
});
renderReports();
// Auto-refresh every 5s if any task is still running
var hasRunning = reports.some(function(r){ return r.status === 'running'; });
Expand Down
50 changes: 50 additions & 0 deletions tests/test_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,3 +945,53 @@ def test_permission_gate_allows_read_path_in_grants(basic_grants, empty_global_g
action = Action(skill="weather", task="read",
reads_path=True, read_path="~/Documents/research/notes.md")
permission_gate(action, basic_grants, empty_global_grants) # no exception


# ─────────────────────────────────────────────────────────────────────────────
# Phase 3.5 hotfix — LLM skill-name hallucination retry (1 test)
# ─────────────────────────────────────────────────────────────────────────────

def test_skill_hallucination_retries_with_corrected_skill_list(monkeypatch, temp_codec_dir):
"""When LLM picks an unauthorized skill, runner appends a correction nudge
to history and re-calls Qwen. If second pick is allowed, execution proceeds.
No blocked_on_permission for transient LLM naming drift."""
import codec_agent_runner as car

grants = {"skills": ["weather"], "read_paths": [], "write_paths": [],
"network_domains": []}
global_grants = {"schema": 1, "version": 0,
"skills": [], "read_paths": [], "write_paths": [], "network_domains": []}
checkpoint = {"id": "cp1", "title": "t", "description": "d",
"expected_output": "o", "step_budget": 5}

# First call returns hallucinated skill; second call returns valid one;
# third returns checkpoint_done.
actions = [
car.Action(skill="fetch_url", task="x", kind="skill_call", # hallucination
is_destructive=False, network_call=False, touches_path=False),
car.Action(skill="weather", task="real call", kind="skill_call", # corrected
is_destructive=False, network_call=False, touches_path=False),
car.Action(skill="", task="", kind="checkpoint_done"),
]
idx = {"n": 0}
def fake_next(*a, **k):
out = actions[idx["n"]]
idx["n"] += 1
return out
monkeypatch.setattr(car, "_qwen_next_action", fake_next)
fake_run = MagicMock(return_value="r")
monkeypatch.setattr(car, "_run_skill", fake_run)

history = car._execute_checkpoint(
plan_dict={"goals": ["g"]}, checkpoint=checkpoint,
agent_grants=grants, global_grants=global_grants,
agent_id="agent_test",
)

# The corrected weather call ran (via _run_skill); the hallucinated
# fetch_url did NOT run (it never passed permission_gate).
fake_run.assert_called_once_with("weather", "real call", "agent_test")
# History contains the correction nudge entry
nudges = [h for h in history if h.get("_skill_correction_nudge")]
assert len(nudges) == 1
assert "fetch_url" in nudges[0]["skill"]
Loading