Clyra-AI · RyshMan · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/.agents/skills/commit-push/SKILL.md b/.agents/skills/commit-push/SKILL.md
@@ -63,7 +63,66 @@ If preconditions fail, stop and report.
 - flaky/infra/transient
 - permission/workflow policy failure
 
-8. Pre-merge unresolved comment triage and fix loop (max 2 loops):
+8. Codex review settle gate (mandatory, latest PR head SHA):
+- After PR creation/update and green CI, wait for Codex review output before merge.
+- Poll PR reviews/comments every `15s` for up to `5 minutes`, scoped to the latest PR head SHA.
+- Default reviewer identity for this gate: `chatgpt-codex-connector` (GitHub UI may render as `chatgpt-codex-connector bot`).
+- Accepted settle signals:
+  - Codex posts actionable review comments/suggestions -> proceed to pre-merge fix loop.
+  - Codex posts explicit approval/all-good signal -> review gate is satisfied.
+- If no Codex review signal appears within timeout, stop and report blocker (`review pending`).
+- Example `gh api` polling implementation:
+```bash
+PR_NUMBER="$(gh pr view --json number --jq .number)"
+REPO="$(gh repo view --json nameWithOwner --jq .nameWithOwner)"
+HEAD_SHA="$(gh pr view --json headRefOid --jq .headRefOid)"
+BOT_RE="${BOT_RE:-^chatgpt-codex-connector(\\[bot\\])?$}"   # override if your org uses a different reviewer bot login
+DEADLINE=$(( $(date +%s) + 300 ))
+SETTLED=0
+SETTLE_KIND=""
+
+while [ "$(date +%s)" -lt "$DEADLINE" ]; do
+  APPROVAL_COUNT="$(gh api "repos/$REPO/pulls/$PR_NUMBER/reviews" \
+    --jq "[.[] | select((.user.login | test(\"$BOT_RE\"; \"i\")) and .commit_id==\"$HEAD_SHA\" and .state==\"APPROVED\")] | length")"
+
+  ACTION_REVIEW_COUNT="$(gh api "repos/$REPO/pulls/$PR_NUMBER/reviews" \
+    --jq "[.[] | select((.user.login | test(\"$BOT_RE\"; \"i\")) and .commit_id==\"$HEAD_SHA\" and (.state==\"CHANGES_REQUESTED\" or .state==\"COMMENTED\"))] | length")"
+
+  ACTION_COMMENT_COUNT="$(gh api "repos/$REPO/pulls/$PR_NUMBER/comments" \
+    --jq "[.[] | select((.user.login | test(\"$BOT_RE\"; \"i\")) and .commit_id==\"$HEAD_SHA\")] | length")"
+
+  if [ "$ACTION_REVIEW_COUNT" -gt 0 ] || [ "$ACTION_COMMENT_COUNT" -gt 0 ]; then
+    SETTLED=1
+    SETTLE_KIND="actionable"
+    break
+  fi
+
+  if [ "$APPROVAL_COUNT" -gt 0 ]; then
+    SETTLED=1
+    SETTLE_KIND="approved"
+    break
+  fi
+
+  sleep 15
+done
+
+if [ "$SETTLED" -ne 1 ]; then
+  echo "BLOCKER: review pending (no Codex signal within 5 minutes for head $HEAD_SHA)"
+  exit 1
+fi
+
+echo "Codex review settle result: $SETTLE_KIND"
+```
+- Example signal inspection commands for reporting:
+```bash
+gh api "repos/$REPO/pulls/$PR_NUMBER/reviews" \
+  --jq ".[] | select((.user.login | test(\"$BOT_RE\"; \"i\")) and .commit_id==\"$HEAD_SHA\") | {state, user: .user.login, submitted_at, body}"
+
+gh api "repos/$REPO/pulls/$PR_NUMBER/comments" \
+  --jq ".[] | select((.user.login | test(\"$BOT_RE\"; \"i\")) and .commit_id==\"$HEAD_SHA\") | {path, user: .user.login, created_at, body}"
+```
+
+9. Pre-merge unresolved comment triage and fix loop (max 2 loops):
 - Fetch unresolved PR review threads/comments (including bot comments) for the latest PR head SHA.
 - Triage each unresolved item: `implement`, `defer`, `reject`.
 - Auto-fix only `implement` items that are:
@@ -76,21 +135,26 @@ If preconditions fail, stop and report.
   - `git commit -m "fix: address actionable PR comments (loop <n>)"` (skip only if no changes)
   - push branch
   - re-watch PR CI to green
+  - re-run Codex review settle gate on the new PR head SHA (poll `15s`, timeout `5 minutes`)
   - re-fetch unresolved threads/comments
 - If unresolved `P0/P1` remain after loop cap, stop and report blocker.
 
-9. Merge PR after green:
+10. Merge PR after green and review gate satisfied:
+- Merge only when all are true on latest PR head SHA:
+  - required PR CI is green
+  - Codex review settle gate is satisfied
+  - no unresolved `P0/P1` review items remain
 - Merge non-interactively (repo-default merge strategy or explicitly chosen one).
 - Record merged PR URL and merge commit SHA.
 
-10. Switch to main and sync:
+11. Switch to main and sync:
 - `git checkout main`
 - `git pull --ff-only origin main`
 
-11. Monitor post-merge CI on `main`:
+12. Monitor post-merge CI on `main`:
 - Watch the latest `main` CI run with timeout `25 minutes`.
 
-12. Hotfix loop on post-merge red (max 2 loops):
+13. Hotfix loop on post-merge red (max 2 loops):
 - Run only for actionable failures.
 - Loop cap: `2`.
 - For each loop:
@@ -106,8 +170,9 @@ If preconditions fail, stop and report.
 - `git checkout main && git pull --ff-only origin main`
 - Monitor post-merge CI again (25 min timeout).
 
-13. Stop conditions:
+14. Stop conditions:
 - CI green on main: success.
+- Codex review signal not received within settle timeout (`5 minutes`): stop and report blocker.
 - Unresolved pre-merge `P0/P1` comments after 2 fix loops: stop and report blocker.
 - Non-actionable failure class: stop and report.
 - Loop count exceeded (`>2`): stop and report blocker.
@@ -140,6 +205,8 @@ Never use inline `--body "..."` for multi-line PR text.
 
 - Required local gate before push: `make prepush-full` (includes CodeQL in this repo).
 - PR CI watch timeout: `25 minutes`.
+- Codex review settle polling interval: `15 seconds`.
+- Codex review settle timeout: `5 minutes` (mandatory pre-merge gate).
 - Pre-merge comment-fix loop cap: `2`.
 - Post-merge main CI watch timeout: `25 minutes`.
 - Retry/hotfix loop cap: `2`.
@@ -150,6 +217,7 @@ Never use inline `--body "..."` for multi-line PR text.
 - Commit SHA(s)
 - PR URL(s)
 - CI status per cycle
+- Codex review settle status per cycle
 - Merge commit SHA(s)
 - Post-merge CI status on `main`
 - If stopped: blocker reason and last failing check
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -93,6 +93,11 @@ jobs:
           go-version: '1.25.7'
           check-latest: false
 
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
       - name: Run acceptance lane
         run: |
           make test-integration
@@ -101,6 +106,14 @@ jobs:
           scripts/validate_contracts.sh
           scripts/validate_scenarios.sh
           go test ./internal/scenarios -count=1 -tags=scenario
+          scripts/run_agent_benchmarks.sh --output .tmp/agent-benchmarks-main.json
+
+      - name: Upload agent benchmark report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: agent-benchmark-main
+          path: .tmp/agent-benchmarks-main.json
 
   docs-smoke:
     name: docs-smoke

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -55,6 +55,9 @@ jobs:
       - name: Run performance budgets
         run: scripts/test_perf_budgets.sh
 
+      - name: Run agent benchmark gate
+        run: scripts/run_agent_benchmarks.sh --output .tmp/agent-benchmarks-nightly.json
+
       - name: Run cross-product interop suite
         run: go test ./internal/integration/interop -count=1
 
@@ -75,3 +78,10 @@ jobs:
           path: |
             .tmp/release/v1-scorecard.json
             .tmp/release/v1-scorecard.md
+
+      - name: Upload agent benchmark report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: agent-benchmark-nightly
+          path: .tmp/agent-benchmarks-nightly.json
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -81,6 +81,14 @@ jobs:
           make test-fast
           make test-contracts
           scripts/validate_scenarios.sh
+          scripts/run_agent_benchmarks.sh --output .tmp/agent-benchmarks-pr.json
+
+      - name: Upload agent benchmark report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: agent-benchmark-pr
+          path: .tmp/agent-benchmarks-pr.json
 
       - name: Docs parity and smoke subset
         if: steps.changes.outputs.go == 'true' || steps.changes.outputs.docs == 'true' || steps.changes.outputs.workflow_or_policy == 'true'

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -33,6 +33,11 @@ jobs:
           go-version: '1.25.7'
           check-latest: false
 
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
       - name: Setup Node
         uses: actions/setup-node@v4
         with:
@@ -79,8 +84,16 @@ jobs:
         run: |
           scripts/test_hardening_core.sh
           scripts/test_perf_budgets.sh
+          scripts/run_agent_benchmarks.sh --output .tmp/release/agent-benchmarks-release.json
           go test ./internal/integration/interop -count=1
 
+      - name: Upload agent benchmark report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: agent-benchmark-release
+          path: .tmp/release/agent-benchmarks-release.json
+
       - name: Run pre-publish install-path UAT smoke
         env:
           HOMEBREW_TAP_GITHUB_TOKEN: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}

diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ GOFILES := $(shell git ls-files '*.go')
 DOCS_SITE_NPM_CACHE ?= $(CURDIR)/.tmp/npm-cache
 
 .PHONY: fmt lint lint-fast test test-fast test-integration test-e2e test-contracts test-scenarios \
-	test-hardening test-chaos test-perf test-risk-lane build hooks prepush prepush-full codeql lint-ci \
+	test-hardening test-chaos test-perf test-agent-benchmarks test-risk-lane build hooks prepush prepush-full codeql lint-ci \
 	test-docs-consistency test-docs-storyline test-adapter-parity test-v1-acceptance test-uat-local test-release-smoke \
 	docs-site-install docs-site-lint docs-site-build docs-site-check docs-site-audit-prod
 
@@ -51,7 +51,10 @@ test-chaos:
 test-perf:
 	@scripts/test_perf_budgets.sh
 
-test-risk-lane: test-contracts test-scenarios test-hardening test-chaos test-perf
+test-agent-benchmarks:
+	@scripts/run_agent_benchmarks.sh --output .tmp/agent-benchmarks.json
+
+test-risk-lane: test-contracts test-scenarios test-hardening test-chaos test-perf test-agent-benchmarks
 
 test-docs-consistency:
 	@scripts/check_docs_cli_parity.sh

diff --git a/core/aggregate/inventory/inventory.go b/core/aggregate/inventory/inventory.go
@@ -740,7 +740,7 @@ func classifyToolCategory(toolType string) string {
 	switch normalized {
 	case "claude", "cursor", "codex", "copilot", "cody", "windsurf":
 		return "assistant"
-	case "a2a", "agent", "agent_framework", "ci_agent", "compiled_action", "langchain", "crewai", "autogen":
+	case "a2a", "agent", "agent_framework", "ci_agent", "compiled_action", "langchain", "crewai", "autogen", "llamaindex", "openai_agents", "mcp_client", "custom_agent":
 		return "agent_framework"
 	case "mcp", "mcpgateway", "webmcp":
 		return "mcp_integration"

diff --git a/core/detect/agentautogen/detector.go b/core/detect/agentautogen/detector.go
@@ -17,10 +17,31 @@ func New() Detector { return Detector{} }
 func (Detector) ID() string { return detectorID }
 
 func (Detector) Detect(ctx context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) {
-	return agentframework.Detect(ctx, scope, agentframework.DetectorConfig{
-		DetectorID: detectorID,
-		Framework:  "autogen",
-		ConfigPath: ".wrkr/agents/autogen.json",
-		Format:     "json",
+	_ = ctx
+	return agentframework.DetectMany(scope, []agentframework.DetectorConfig{
+		{
+			DetectorID: detectorID,
+			Framework:  "autogen",
+			ConfigPath: ".wrkr/agents/autogen.json",
+			Format:     "json",
+		},
+		{
+			DetectorID: detectorID,
+			Framework:  "autogen",
+			ConfigPath: ".wrkr/agents/autogen.yaml",
+			Format:     "yaml",
+		},
+		{
+			DetectorID: detectorID,
+			Framework:  "autogen",
+			ConfigPath: ".wrkr/agents/autogen.yml",
+			Format:     "yaml",
+		},
+		{
+			DetectorID: detectorID,
+			Framework:  "autogen",
+			ConfigPath: ".wrkr/agents/autogen.toml",
+			Format:     "toml",
+		},
 	})
 }
diff --git a/core/detect/agentautogen/detector_test.go b/core/detect/agentautogen/detector_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"os"
 	"path/filepath"
+	"reflect"
 	"testing"
 
 	"github.com/Clyra-AI/wrkr/core/detect"
@@ -36,6 +37,47 @@ func TestAutoGenDetector_PrecisionBaseline(t *testing.T) {
 	}
 }
 
+func TestAutoGenDetector_ExpandedFormatsDeterministic(t *testing.T) {
+	t.Parallel()
+
+	root := t.TempDir()
+	writeFile(t, root, ".wrkr/agents/autogen.yaml", `agents:
+  - name: planner
+    file: agents/planner.py
+`)
+	writeFile(t, root, ".wrkr/agents/autogen.toml", `[[agents]]
+name = "executor"
+file = "agents/executor.py"
+`)
+
+	scope := detect.Scope{Org: "acme", Repo: "platform", Root: root}
+	first, err := New().Detect(context.Background(), scope, detect.Options{})
+	if err != nil {
+		t.Fatalf("detect: %v", err)
+	}
+	if len(first) != 2 {
+		t.Fatalf("expected two findings from yaml+toml declarations, got %d", len(first))
+	}
+	for _, finding := range first {
+		if finding.ToolType != "autogen" {
+			t.Fatalf("unexpected tool type %q", finding.ToolType)
+		}
+		if finding.FindingType != "agent_framework" {
+			t.Fatalf("unexpected finding type %q", finding.FindingType)
+		}
+	}
+
+	for i := 0; i < 10; i++ {
+		next, err := New().Detect(context.Background(), scope, detect.Options{})
+		if err != nil {
+			t.Fatalf("detect run %d: %v", i+1, err)
+		}
+		if !reflect.DeepEqual(first, next) {
+			t.Fatalf("non-deterministic output at run %d", i+1)
+		}
+	}
+}
+
 func writeFile(t *testing.T, root, rel, content string) {
 	t.Helper()
 	path := filepath.Join(root, filepath.FromSlash(rel))