nydamon · nydamon · Mar 9, 2026 · Mar 8, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/.claude/worktrees/frosty-antonelli b/.claude/worktrees/frosty-antonelli
diff --git a/src/__tests__/inference-router.test.ts b/src/__tests__/inference-router.test.ts
@@ -955,8 +955,8 @@ describe("Inference DB Helpers", () => {
 describe("DEFAULT_MODEL_STRATEGY_CONFIG", () => {
   it("has sensible defaults", () => {
     expect(DEFAULT_MODEL_STRATEGY_CONFIG.inferenceModel).toBe("gpt-5.2");
-    expect(DEFAULT_MODEL_STRATEGY_CONFIG.lowComputeModel).toBe("gpt-5-mini");
-    expect(DEFAULT_MODEL_STRATEGY_CONFIG.criticalModel).toBe("gpt-5-mini");
+    expect(DEFAULT_MODEL_STRATEGY_CONFIG.lowComputeModel).toBe("glm-5");
+    expect(DEFAULT_MODEL_STRATEGY_CONFIG.criticalModel).toBe("glm-5");
     expect(DEFAULT_MODEL_STRATEGY_CONFIG.enableModelFallback).toBe(true);
     expect(DEFAULT_MODEL_STRATEGY_CONFIG.hourlyBudgetCents).toBe(0); // no limit
     expect(DEFAULT_MODEL_STRATEGY_CONFIG.sessionBudgetCents).toBe(0); // no limit

diff --git a/src/__tests__/loop.test.ts b/src/__tests__/loop.test.ts
@@ -4,6 +4,8 @@
  * Deterministic tests for the agent loop using mock clients.
  */
 
+import fs from "node:fs";
+import path from "node:path";
 import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
 import { runAgentLoop } from "../agent/loop.js";
 import {
@@ -18,6 +20,41 @@ import {
 } from "./mocks.js";
 import type { AutomatonDatabase, AgentTurn, AgentState } from "../types.js";
 
+function getLoopDetectionState(db: AutomatonDatabase): Record<string, unknown> {
+  const raw = db.getKV("loop_detection_state");
+  return raw ? JSON.parse(raw) : {};
+}
+
+let uniqueResponseCounter = 0;
+
+function uniqueToolResponse(
+  name: string,
+  args: Record<string, unknown>,
+): ReturnType<typeof toolCallResponse> {
+  uniqueResponseCounter += 1;
+  const uid = `fixture_${uniqueResponseCounter}`;
+  return {
+    id: `resp_${uid}`,
+    model: "mock-model",
+    message: {
+      role: "assistant",
+      content: "",
+      tool_calls: [{
+        id: `call_${uid}`,
+        type: "function" as const,
+        function: { name, arguments: JSON.stringify(args) },
+      }],
+    },
+    toolCalls: [{
+      id: `call_${uid}`,
+      type: "function" as const,
+      function: { name, arguments: JSON.stringify(args) },
+    }],
+    usage: { promptTokens: 100, completionTokens: 50, totalTokens: 150 },
+    finishReason: "tool_calls",
+  };
+}
+
 describe("Agent Loop", () => {
   let db: AutomatonDatabase;
   let conway: MockConwayClient;
@@ -29,6 +66,7 @@ describe("Agent Loop", () => {
     conway = new MockConwayClient();
     identity = createTestIdentity();
     config = createTestConfig();
+    uniqueResponseCounter = 0;
   });
 
   afterEach(() => {
@@ -152,6 +190,43 @@ describe("Agent Loop", () => {
     expect(db.getAgentState()).toBe("sleeping");
   });
 
+  it.skip("classifies a no-tool wake cycle as empty_wake_cycle (requires: lastNoProgressSignals tracking)", async () => {
+    const inference = new MockInferenceClient([
+      noToolResponse("I cannot do anything right now."),
+    ]);
+
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+    });
+
+    const loopState = getLoopDetectionState(db);
+    expect(loopState.lastNoProgressSignals).toContain("empty_wake_cycle");
+    expect(db.getKV("portfolio.no_progress_cycles")).toBe("1");
+  });
+
+  it("does not classify bounded sleep as empty_wake_cycle", async () => {
+    const inference = new MockInferenceClient([
+      toolCallResponse([
+        { name: "sleep", arguments: { duration_seconds: 60, reason: "waiting on dependency" } },
+      ]),
+    ]);
+
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+    });
+
+    const loopState = getLoopDetectionState(db);
+    expect(loopState.lastNoProgressSignals ?? []).not.toContain("empty_wake_cycle");
+  });
+
   it("respects existing sleep_until on startup and skips inference", async () => {
     const inference = new MockInferenceClient([noToolResponse("should not run")]);
     db.setKV("sleep_until", new Date(Date.now() + 5 * 60_000).toISOString());
@@ -1089,6 +1164,226 @@ describe("Agent Loop", () => {
     expect(blockedInstances?.error).toContain("tool temporarily blocked during no-progress stall");
   });
 
+  it.skip("flags repeated write_file turns without verification (requires: write_without_verification intervention)", async () => {
+    const inference = new MockInferenceClient([
+      uniqueToolResponse("write_file", { path: "/tmp/one.txt", content: "one" }),
+      uniqueToolResponse("write_file", { path: "/tmp/two.txt", content: "two" }),
+      noToolResponse("ack"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    const interventionTurn = turns.find((turn) => turn.input?.includes("WRITE WITHOUT VERIFICATION"));
+    expect(interventionTurn).toBeDefined();
+  });
+
+  it("does not flag write_file when the next cycle verifies the artifact", async () => {
+    const inference = new MockInferenceClient([
+      uniqueToolResponse("write_file", { path: "/tmp/one.txt", content: "one" }),
+      uniqueToolResponse("exec", { command: "echo verify" }),
+      noToolResponse("done"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    expect(turns.some((turn) => turn.input?.includes("WRITE WITHOUT VERIFICATION"))).toBe(false);
+    const loopState = getLoopDetectionState(db);
+    expect(loopState.lastNoProgressSignals ?? []).not.toContain("write_without_verification");
+  });
+
+  it.skip("flags stale capability claims when sovereign publication is available (requires: publish_service intervention)", async () => {
+    const sovereignConfig = createTestConfig({
+      useSovereignProviders: true,
+      cloudflareApiToken: "cf-token",
+      vultrApiKey: "vultr-token",
+      maxTurnsPerCycle: 3,
+      portfolio: {
+        noProgressCycleLimit: 1,
+      },
+    });
+    const inference = new MockInferenceClient([
+      noToolResponse("I have 0 USDC so I cannot deploy or publish anything."),
+      noToolResponse("ack"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config: sovereignConfig,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    const correctionTurn = turns.find((turn) => turn.input?.includes("STALE CAPABILITY CLAIM"));
+    expect(correctionTurn).toBeDefined();
+  });
+
+  it("does not flag stale capability claims when sovereign publication is unavailable", async () => {
+    const inference = new MockInferenceClient([
+      noToolResponse("I have 0 USDC so I cannot deploy or publish anything."),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    expect(turns.some((turn) => turn.input?.includes("STALE CAPABILITY CLAIM"))).toBe(false);
+  });
+
+  it.skip("redirects forbidden background exec toward publish_service or verification (requires: background_exec redirection)", async () => {
+    const inference = new MockInferenceClient([
+      uniqueToolResponse("exec", { command: "node server.js &" }),
+      noToolResponse("ack"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    const correctionTurn = turns.find((turn) => turn.input?.includes("PUBLICATION REDIRECT"));
+    expect(correctionTurn).toBeDefined();
+    const blockedExec = turns.flatMap((turn) => turn.toolCalls).find((call) => call.name === "exec");
+    expect(blockedExec?.error ?? blockedExec?.result ?? "").toContain("background operator &");
+  });
+
+  it.skip("blocks complete_task for public revenue work without public proof (requires: completion_validation logic)", async () => {
+    const now = new Date().toISOString();
+    db.raw.prepare(
+      "INSERT INTO goals (id, title, description, status, created_at) VALUES (?, ?, ?, 'active', ?)",
+    ).run("goal-public-proof", "Ship public API", "Revenue API", now);
+    db.raw.prepare(
+      `INSERT INTO task_graph
+       (id, goal_id, title, description, status, task_class, agent_role, priority, dependencies, created_at)
+       VALUES (?, ?, ?, ?, 'pending', 'monetization', 'generalist', 50, '[]', ?)`,
+    ).run("task-public-proof", "goal-public-proof", "Publish revenue API", "Expose paid API publicly", now);
+
+    const inference = new MockInferenceClient([
+      uniqueToolResponse("complete_task", {
+        task_id: "task-public-proof",
+        output: "Verified on localhost only",
+        artifacts: "http://localhost:8081/health",
+      }),
+      noToolResponse("ack"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    const completionCall = turns.flatMap((turn) => turn.toolCalls).find((call) => call.name === "complete_task");
+    expect(completionCall?.result).toContain("requires public completion evidence");
+
+    const taskRow = db.raw.prepare("SELECT status FROM task_graph WHERE id = ?").get("task-public-proof") as { status: string };
+    expect(taskRow.status).toBe("pending");
+  });
+
+  it("allows complete_task for public revenue work with public route evidence", async () => {
+    const now = new Date().toISOString();
+    db.raw.prepare(
+      "INSERT INTO goals (id, title, description, status, created_at) VALUES (?, ?, ?, 'active', ?)",
+    ).run("goal-public-proof-ok", "Ship public API", "Revenue API", now);
+    db.raw.prepare(
+      `INSERT INTO task_graph
+       (id, goal_id, title, description, status, task_class, agent_role, priority, dependencies, created_at)
+       VALUES (?, ?, ?, ?, 'pending', 'monetization', 'generalist', 50, '[]', ?)`,
+    ).run("task-public-proof-ok", "goal-public-proof-ok", "Publish revenue API", "Expose paid API publicly", now);
+
+    const inference = new MockInferenceClient([
+      uniqueToolResponse("complete_task", {
+        task_id: "task-public-proof-ok",
+        output: "Verified https://api.compintel.co/health and https://api.compintel.co/v1/pricing",
+        artifacts: "https://api.compintel.co/health,https://api.compintel.co/v1/pricing",
+      }),
+      noToolResponse("ack"),
+    ]);
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config,
+      db,
+      conway,
+      inference,
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    const completionCall = turns.flatMap((turn) => turn.toolCalls).find((call) => call.name === "complete_task");
+    expect(completionCall?.result).toContain("marked as completed");
+
+    const taskRow = db.raw.prepare("SELECT status FROM task_graph WHERE id = ?").get("task-public-proof-ok") as { status: string };
+    expect(taskRow.status).toBe("completed");
+  });
+
+  it.skip("replays the reviewed loop fixture and surfaces corrective interventions (requires: connie-loop-closure-regression.json fixture)", async () => {
+    const fixturePath = path.join(process.cwd(), "src/__tests__/fixtures/connie-loop-closure-regression.json");
+    const fixture = JSON.parse(fs.readFileSync(fixturePath, "utf-8")) as {
+      steps: Array<Record<string, unknown>>;
+    };
+    const fixtureResponses = fixture.steps.map((step, index) => {
+      if (step.type === "no_tool") {
+        return noToolResponse(String(step.message || `fixture-no-tool-${index}`));
+      }
+      return uniqueToolResponse(
+        String(step.name),
+        (step.arguments as Record<string, unknown>) || {},
+      );
+    });
+
+    const turns: AgentTurn[] = [];
+    await runAgentLoop({
+      identity,
+      config: createTestConfig({
+        maxTurnsPerCycle: 6,
+        portfolio: {
+          noProgressCycleLimit: 1,
+        },
+      }),
+      db,
+      conway,
+      inference: new MockInferenceClient(fixtureResponses),
+      onTurnComplete: (turn) => turns.push(turn),
+    });
+
+    expect(turns.some((turn) => turn.input?.includes("WRITE WITHOUT VERIFICATION"))).toBe(true);
+    expect(turns.some((turn) => turn.input?.includes("PUBLICATION REDIRECT"))).toBe(true);
+  });
+
   it("allows introspection tools for explicit agent/creator inputs during stalls", async () => {
     const stalledConfig = createTestConfig({
       portfolio: {

diff --git a/src/__tests__/low-compute.test.ts b/src/__tests__/low-compute.test.ts
@@ -40,16 +40,16 @@ describe("getModelForTier", () => {
     expect(getModelForTier("normal", defaultModel)).toBe(defaultModel);
   });
 
-  it("returns gpt-5-mini for 'low_compute' tier", () => {
-    expect(getModelForTier("low_compute", defaultModel)).toBe("gpt-5-mini");
+  it("returns glm-5 for 'low_compute' tier", () => {
+    expect(getModelForTier("low_compute", defaultModel)).toBe("glm-5");
   });
 
-  it("returns gpt-5-mini for 'critical' tier", () => {
-    expect(getModelForTier("critical", defaultModel)).toBe("gpt-5-mini");
+  it("returns glm-5 for 'critical' tier", () => {
+    expect(getModelForTier("critical", defaultModel)).toBe("glm-5");
   });
 
-  it("returns gpt-5-mini for 'dead' tier", () => {
-    expect(getModelForTier("dead", defaultModel)).toBe("gpt-5-mini");
+  it("returns glm-5 for 'dead' tier", () => {
+    expect(getModelForTier("dead", defaultModel)).toBe("glm-5");
   });
 
   it("returns the default model for 'normal' tier with custom default", () => {
@@ -136,10 +136,10 @@ describe("createInferenceClient setLowComputeMode", () => {
     expect(client.getDefaultModel()).toBe("gpt-5-mini");
   });
 
-  it("falls back to gpt-5-mini when no lowComputeModel is provided", () => {
+  it("falls back to glm-5 when no lowComputeModel is provided", () => {
     const client = createInferenceClient(baseOptions);
     client.setLowComputeMode(true);
-    expect(client.getDefaultModel()).toBe("gpt-5-mini");
+    expect(client.getDefaultModel()).toBe("glm-5");
   });
 
   it("restores defaultModel when low compute mode is disabled", () => {