diff --git a/docs/features/F152-catagent-thin-runtime.md b/docs/features/F152-catagent-thin-runtime.md new file mode 100644 index 000000000..54e237421 --- /dev/null +++ b/docs/features/F152-catagent-thin-runtime.md @@ -0,0 +1,111 @@ +--- +feature_ids: [F152] +related_features: [F143, F050, F043, F108] +topics: [agent-runtime, architecture, llm-api, tool-dispatch, context-management] +doc_kind: spec +created: 2026-04-08 +--- + +# F152: CatAgent — Thin Agent Runtime + +> **Status**: spike | **Owner**: Ragdoll Opus 4.6 | **Priority**: P1 + +## Why + +Cat Cafe 是多智能体协作平台,但不提供自有 agent runtime。所有猫猫的"大脑"(agent loop)依赖外部 CLI(Claude Code / Codex / Gemini CLI),Cat Cafe 只提供"身体"(协作、记忆、消息路由、MCP 工具)。 + +这导致三个痛点: + +1. **无法控制 context 管理** — compact 策略、身份保护全看外部 runtime,我们记录过"compact 后身份漂移"教训(docs/public-lessons.md:505) +2. **接入成本高** — 每个新 agent = ~450 行适配代码(F143 已指出) +3. **协作工具走桥接** — Cat Cafe MCP 工具通过 HTTP callback 注入外部 CLI,增加延迟和故障点 + +Claude Code 源码分析(四猫合议 2026-04-08)确认了 5 个可借鉴机制,验证了"薄 agent"方向可行。 + +## What + +### 定位 + +**Thin CatAgent** — 协作与中等任务闭环执行器。 + +| 做 | 不做 | +|----|------| +| 80% 日常交互(讨论/分析/review/协作) | 全功能编码代理(文件系统/sandbox/IDE) | +| 原生 Cat Cafe 工具集成 | 1:1 克隆 Claude Code | +| F143 下的 first-party provider | 平行再造架构 | +| LLM API 直连(opt-in,非默认) | 替代 CLI 子进程主路径 | + +### 架构约束 + +- **ADR-001**:CatAgent 走 API key 路径,作为 opt-in runtime,不升格为平台默认(CLI 子进程仍为主路径) +- **安全红线**:没有代码级权限状态机,不给写/exec/跨线程副作用工具 +- **F143 关系**:CatAgent 实现 `AgentService.invoke()` 接口,作为 provider 注册到 AgentRegistry + +### 从 Claude Code 借鉴的机制 + +| 机制 | 阶段 | 来源 | +|------|------|------| +| System prompt 每轮重建 | Spike | query.ts + systemPrompt.ts | +| 两层压缩(Micro→Full) | POC | services/compact/ | +| 权限状态机 + decisionReason | MVP | utils/permissions/ | +| StreamingToolExecutor(只读) | Alpha | services/tools/ | +| Deferred tool loading | Alpha | Tool.ts + ToolSearchTool | + +### Phase 0: Spike(1 周) + +验证"LLM 直连 + 原生 Cat Cafe 工具 + 单猫 agent loop"可行性。 + +组件: +1. **CatAgentService** — 实现 `AgentService.invoke()` 接口 +2. **Agent Loop** — while(hasToolUse) { callLLM → dispatch tools → collect results } +3. **Kernel Prompt Builder** — 每轮重建 system prompt(猫身份 + 铁律 + 线程上下文) +4. **Tool Registry** — Cat Cafe 原生工具定义 + 权限白名单 +5. **Message Adapter** — 将 Anthropic API 响应转为 `AgentMessage` yield + +验收标准: +- [ ] AC-S1: CatAgent 可注册为 provider 并通过 AgentRegistry 被路由 +- [ ] AC-S2: 完成一个完整任务链(读文件 → 分析 → post_message) +- [ ] AC-S3: System prompt 每轮重建,3 轮后身份无漂移 +- [ ] AC-S4: 工具白名单生效(Cat Cafe 工具放行,其余 deny) + +### Phase 1: POC(+1 周)— Go/No-Go + +在 Spike 基础上增加: +1. **MicroCompact** — 剥离旧 tool output,控制 token 增长 +2. **多轮稳定性** — 10 轮任务不 OOM,上下文连贯 + +Go/No-Go 闸门: +- ✅ Go:10 轮稳定 + 零 P1 安全事故 + 审计链可回放 +- ❌ No-Go:权限无法闭环 或 压缩后身份漂移不可控 + +### Phase 2: MVP(+2-3 周) + +- 两层压缩完整化(micro + full summary) +- 权限决策状态机 + decisionReason 审计 +- 接入 F143 宿主层 +- 压缩摘要模板强制保留"身份/铁律/未完成任务"槽位 + +### Phase 3: Alpha(+4-6 周) + +- 只读工具并行执行 +- 写/exec 工具(需权限状态机完备) +- MCP deferred loading +- Resume/cancel + 失败恢复 + +## 安全风险 + +| 级别 | 风险 | 缓解 | +|------|------|------| +| P1 | 并发工具副作用竞态 | Spike 串行执行,只读工具才可并行 | +| P1 | 权限绕过 | 默认 deny + 白名单 + decisionReason | +| P1 | 压缩后身份漂移 | kernel prompt 每轮重建 + 槽位保留 | +| P2 | API key 成本失控 | Token budget + 用量监控 | +| P2 | 复杂度外溢 | 以 provider 接入,禁止新增北向接口 | + +## 参与者 + +四猫合议(2026-04-08): +- 宪宪/Opus-46:主架构 + 源码分析 +- Sonnet:快速原型 + UX 视角 +- GPT-5.4:架构完整性 + ADR-001 约束 +- 砚砚:实现可行性 + 安全风险 + Go/No-Go 闸门 diff --git a/docs/features/index.json b/docs/features/index.json index edcef5569..c7a4b9b75 100644 --- a/docs/features/index.json +++ b/docs/features/index.json @@ -924,6 +924,12 @@ "status": "done | **Owner**: Ragdoll | **Priority**: P1", "file": "F151-xiaoyi-channel-gateway.md" }, + { + "id": "F152", + "name": "CatAgent — Thin Agent Runtime", + "status": "spike | **Owner**: Ragdoll Opus 4.6 | **Priority**: P1", + "file": "F152-catagent-thin-runtime.md" + }, { "id": "F152", "name": "Expedition Memory — 外部项目记忆冷启动 + 经验回流", @@ -961,5 +967,5 @@ "file": "F157-feishu-receipt-ack.md" } ], - "generated_at": "2026-04-10T11:52:59.639Z" + "generated_at": "2026-04-10T12:50:21.139Z" } diff --git a/packages/api/package.json b/packages/api/package.json index 01af390b3..ea81116e3 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -22,6 +22,7 @@ "clean": "rm -rf dist" }, "dependencies": { + "@anthropic-ai/sdk": "^0.85.0", "@cat-cafe/shared": "workspace:*", "@fastify/cors": "^9.0.0", "@fastify/multipart": "8", diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/CatAgentService.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/CatAgentService.ts new file mode 100644 index 000000000..e7c72a914 --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/CatAgentService.ts @@ -0,0 +1,69 @@ +/** + * CatAgent Service — F152: Thin Agent Runtime + * + * Implements AgentService by calling the Anthropic Messages API directly + * (not via CLI subprocess). Designed as an opt-in provider under F143, + * not a replacement for the CLI subprocess main path (ADR-001). + * + * API key resolution: env override → account resolver (credentials.json). + * Spike scope: read-only tools, serial execution, kernel prompt rebuild per turn. + */ + +import type { CatId } from '@cat-cafe/shared'; +import { getCatModel } from '../../../../../../config/cat-models.js'; +import { createModuleLogger } from '../../../../../../infrastructure/logger.js'; +import type { AgentMessage, AgentService, AgentServiceOptions } from '../../../types.js'; +import { resolveApiCredentials } from './catagent-credentials.js'; +import { runCatAgentLoop } from './catagent-loop.js'; + +const log = createModuleLogger('catagent-service'); + +/** Default max turns before forced stop */ +const DEFAULT_MAX_TURNS = 20; +/** Default max output tokens per LLM call */ +const DEFAULT_MAX_TOKENS = 8192; +/** Default cumulative token budget (input+output). ~200K = ~10 substantial turns. */ +const DEFAULT_TOKEN_BUDGET = 200_000; + +export class CatAgentService implements AgentService { + private readonly catId: CatId; + + constructor({ catId }: { catId: CatId }) { + this.catId = catId; + } + + async *invoke(prompt: string, options?: AgentServiceOptions): AsyncIterable { + const creds = resolveApiCredentials(); + if (!creds) { + yield { + type: 'error', + catId: this.catId, + error: + 'CatAgent: no Anthropic API key found. Set CATAGENT_ANTHROPIC_API_KEY or configure an anthropic account.', + timestamp: Date.now(), + }; + return; + } + + const model = getCatModel(this.catId); + const workingDirectory = options?.workingDirectory ?? process.cwd(); + + log.info(`CatAgent invoke: cat=${this.catId} model=${model} creds=${creds.source} cwd=${workingDirectory}`); + + yield* runCatAgentLoop( + prompt, + { + catId: this.catId, + model, + apiKey: creds.apiKey, + baseURL: creds.baseURL, + maxTurns: DEFAULT_MAX_TURNS, + maxTokens: DEFAULT_MAX_TOKENS, + tokenBudgetLimit: DEFAULT_TOKEN_BUDGET, + workingDirectory, + signal: options?.signal, + }, + options?.systemPrompt, + ); + } +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-credentials.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-credentials.ts new file mode 100644 index 000000000..45cca2dbb --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-credentials.ts @@ -0,0 +1,60 @@ +/** + * CatAgent Credentials — F152: Thin Agent Runtime + * + * Resolves Anthropic API key for direct API calls. + * Priority: env override → account resolver → credentials.json scan. + */ + +import { resolveAnthropicRuntimeProfile } from '../../../../../../config/account-resolver.js'; +import { readCredentials } from '../../../../../../config/credentials.js'; +import { createModuleLogger } from '../../../../../../infrastructure/logger.js'; + +const log = createModuleLogger('catagent-creds'); + +const CATAGENT_API_KEY_ENV = 'CATAGENT_ANTHROPIC_API_KEY'; +const CATAGENT_BASE_URL_ENV = 'CATAGENT_ANTHROPIC_BASE_URL'; + +export interface ApiCredentials { + apiKey: string; + baseURL?: string; + source: string; +} + +/** Resolve API credentials: env override → account resolver → credentials scan */ +export function resolveApiCredentials(): ApiCredentials | null { + // Priority 1: explicit env var override + const envKey = process.env[CATAGENT_API_KEY_ENV]; + if (envKey) { + return { apiKey: envKey, baseURL: process.env[CATAGENT_BASE_URL_ENV], source: 'env' }; + } + + // Priority 2: account resolver (catalog-based) + try { + const profile = resolveAnthropicRuntimeProfile(process.cwd()); + if (profile.apiKey) { + log.info(`Resolved API key from account: ${profile.id}`); + return { apiKey: profile.apiKey, baseURL: profile.baseUrl, source: `account:${profile.id}` }; + } + } catch (err) { + log.warn(`Account resolver failed: ${err instanceof Error ? err.message : String(err)}`); + } + + // Priority 3: scan credentials.json for any Anthropic API key (sk-ant-*) + return scanCredentialsForAnthropicKey(); +} + +function scanCredentialsForAnthropicKey(): ApiCredentials | null { + try { + const all = readCredentials(); + for (const [ref, entry] of Object.entries(all)) { + const key = entry.apiKey; + if (typeof key === 'string' && key.startsWith('sk-ant-') && !ref.includes('migrated')) { + log.info(`Found Anthropic API key in credentials: ${ref}`); + return { apiKey: key, source: `creds:${ref}` }; + } + } + } catch (err) { + log.warn(`Credentials scan failed: ${err instanceof Error ? err.message : String(err)}`); + } + return null; +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-kernel-prompt.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-kernel-prompt.ts new file mode 100644 index 000000000..677810d64 --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-kernel-prompt.ts @@ -0,0 +1,54 @@ +/** + * CatAgent Kernel Prompt — F152: Thin Agent Runtime + * + * Builds the system prompt fresh every turn (borrowed from Claude Code's pattern). + * The kernel prompt is the "minimum non-compressible" identity that survives + * context compaction. It contains: + * - Cat identity and role + * - Iron Laws (safety rules) + * - Current context (turn number, working directory) + * - Custom instructions from invocation + */ + +import type { KernelPromptContext } from './catagent-types.js'; + +/** Iron Laws — always present, never compressed */ +const IRON_LAWS = `## Safety Rules (Iron Laws) +1. Data Storage Sanctuary — Never delete/flush databases or persistent storage. +2. Process Self-Preservation — Never kill parent processes or modify startup config. +3. Config Immutability — Never modify cat-config.json, .env, or MCP config at runtime. +4. Network Boundary — Never access localhost ports outside your service.`; + +/** Build the kernel prompt fresh for each turn */ +export function buildKernelPrompt(ctx: KernelPromptContext): string { + const sections: string[] = []; + + // Identity (always first — most important to preserve) + sections.push(`# Identity +You are ${ctx.catName} (CatAgent runtime, model=${ctx.model}). +CatId: ${ctx.catId} +Role: AI assistant in the Clowder AI multi-agent collaboration platform.`); + + // Iron Laws + sections.push(IRON_LAWS); + + // Context + sections.push(`## Current Context +- Working directory: ${ctx.workingDirectory} +- Turn: ${ctx.turnNumber} +- Date: ${new Date().toISOString().split('T')[0]}`); + + // Tool usage guidelines + sections.push(`## Tool Usage +- You have access to read-only file tools (read_file, list_files, search_content). +- Use tools to gather information before answering questions. +- Be thorough but efficient — read relevant files, search for patterns, then synthesize. +- Your text output becomes the message in the Cat Cafe thread.`); + + // Custom system prompt (from invocation options) + if (ctx.customSystemPrompt) { + sections.push(`## Additional Instructions\n${ctx.customSystemPrompt}`); + } + + return sections.join('\n\n'); +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-loop.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-loop.ts new file mode 100644 index 000000000..dd199a805 --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-loop.ts @@ -0,0 +1,221 @@ +/** + * CatAgent Loop — F152: Thin Agent Runtime + * + * Core think→act→observe loop. Borrowed patterns from Claude Code: + * - System prompt rebuilt every turn (anti-drift) + * - Serial tool execution (spike; parallel in Phase 3) + * - Clean termination conditions + * + * Loop: callLLM → check tool_use → dispatch tools → collect results → next turn + */ + +import Anthropic from '@anthropic-ai/sdk'; +import type { CatId } from '@cat-cafe/shared'; +import { createModuleLogger } from '../../../../../../infrastructure/logger.js'; +import type { AgentMessage, MessageMetadata } from '../../../types.js'; +import { buildKernelPrompt } from './catagent-kernel-prompt.js'; +import { microcompact } from './catagent-microcompact.js'; +import { createToolRegistry, getToolSchemas } from './catagent-tools.js'; +import type { CatAgentLoopConfig, CatAgentTool, SessionTokenUsage } from './catagent-types.js'; + +const log = createModuleLogger('catagent-loop'); + +type ContentBlock = Anthropic.Messages.ContentBlock; +type ToolUseBlock = Anthropic.Messages.ToolUseBlock; +type ToolResultBlockParam = Anthropic.Messages.ToolResultBlockParam; + +/** Run the CatAgent loop, yielding AgentMessages */ +export async function* runCatAgentLoop( + prompt: string, + config: CatAgentLoopConfig, + customSystemPrompt?: string, +): AsyncGenerator { + // biome-ignore lint: test seam — cast mock client to Anthropic for DI + const client = (config._testClient ?? new Anthropic({ apiKey: config.apiKey, baseURL: config.baseURL })) as Anthropic; + const toolRegistry = createToolRegistry(config.workingDirectory); + const toolSchemas = getToolSchemas(toolRegistry); + const messages: Anthropic.Messages.MessageParam[] = [{ role: 'user', content: prompt }]; + const sessionId = `catagent-${config.catId}-${Date.now()}`; + + const usage: SessionTokenUsage = { totalInputTokens: 0, totalOutputTokens: 0, turns: 0 }; + + yield makeMessage(config.catId, 'session_init', { sessionId }); + + for (let turn = 1; turn <= config.maxTurns; turn++) { + if (config.signal?.aborted) break; + + // Budget guard: stop if cumulative tokens exceed limit + if (config.tokenBudgetLimit > 0) { + const total = usage.totalInputTokens + usage.totalOutputTokens; + if (total >= config.tokenBudgetLimit) { + log.warn(`[turn ${turn}] token budget exhausted: ${total}/${config.tokenBudgetLimit}`); + yield makeMessage(config.catId, 'text', { + content: `[Token budget exhausted: ${total} tokens used of ${config.tokenBudgetLimit} limit. Stopping.]`, + }); + break; + } + } + + const systemPrompt = buildKernelPrompt({ + catId: config.catId, + catName: getCatDisplayName(config.catId), + model: config.model, + workingDirectory: config.workingDirectory, + turnNumber: turn, + customSystemPrompt, + }); + + // MicroCompact: strip old tool results before API call (Claude Code pattern) + const compactedMessages = microcompact(messages); + log.debug( + `[turn ${turn}] calling API with ${compactedMessages.length} messages, tokens=${usage.totalInputTokens + usage.totalOutputTokens}`, + ); + + let response: Anthropic.Messages.Message; + try { + response = await client.messages.create({ + model: config.model, + max_tokens: config.maxTokens, + system: systemPrompt, + messages: compactedMessages, + tools: toolSchemas.length > 0 ? toolSchemas : undefined, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + log.error(`[turn ${turn}] API error: ${msg}`); + yield makeMessage(config.catId, 'error', { error: `API error: ${msg}` }); + yield makeMessage(config.catId, 'done', { + isFinal: true, + metadata: { + provider: 'anthropic-api', + model: config.model, + sessionId, + usage: { inputTokens: usage.totalInputTokens, outputTokens: usage.totalOutputTokens, numTurns: usage.turns }, + }, + }); + return; + } + + // Track cumulative usage + usage.totalInputTokens += response.usage.input_tokens; + usage.totalOutputTokens += response.usage.output_tokens; + usage.turns = turn; + + const textContent = extractText(response.content); + if (textContent) { + yield makeMessage(config.catId, 'text', { + content: textContent, + metadata: buildSessionMetadata(sessionId, response, usage), + }); + } + + const toolUseBlocks = response.content.filter(isToolUse); + if (toolUseBlocks.length === 0) break; + + messages.push({ role: 'assistant', content: response.content }); + const toolResults = yield* executeTools(toolUseBlocks, toolRegistry, config.catId, turn); + messages.push({ role: 'user', content: toolResults }); + } + + yield makeMessage(config.catId, 'done', { + isFinal: true, + metadata: { + provider: 'anthropic-api', + model: config.model, + sessionId, + usage: { + inputTokens: usage.totalInputTokens, + outputTokens: usage.totalOutputTokens, + numTurns: usage.turns, + }, + }, + }); +} + +/** Execute tool calls serially, yielding progress messages */ +async function* executeTools( + toolUseBlocks: ToolUseBlock[], + registry: Map, + catId: CatId, + turn: number, +): AsyncGenerator { + const results: ToolResultBlockParam[] = []; + + for (const toolUse of toolUseBlocks) { + const tool = registry.get(toolUse.name); + if (!tool || tool.permission === 'deny') { + const denied = `Tool "${toolUse.name}" is not permitted.`; + log.warn(`[turn ${turn}] denied: ${toolUse.name}`); + results.push({ type: 'tool_result', tool_use_id: toolUse.id, content: denied, is_error: true }); + continue; + } + + yield makeMessage(catId, 'tool_use', { + toolName: toolUse.name, + toolInput: toolUse.input as Record, + }); + + const result = await executeSingleTool(tool, toolUse); + yield makeMessage(catId, 'tool_result', { content: typeof result.content === 'string' ? result.content : '' }); + results.push(result); + } + + return results; +} + +async function executeSingleTool(tool: CatAgentTool, toolUse: ToolUseBlock): Promise { + try { + const raw = await tool.execute(toolUse.input as Record); + const content = raw.length > 30_000 ? `${raw.slice(0, 30_000)}\n... (truncated)` : raw; + return { type: 'tool_result', tool_use_id: toolUse.id, content }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { type: 'tool_result', tool_use_id: toolUse.id, content: msg, is_error: true }; + } +} + +// --- Helpers --- + +function isToolUse(block: ContentBlock): block is ToolUseBlock { + return block.type === 'tool_use'; +} + +function extractText(content: ContentBlock[]): string { + return content + .filter((b): b is Anthropic.Messages.TextBlock => b.type === 'text') + .map((b) => b.text) + .join(''); +} + +function buildSessionMetadata( + sessionId: string, + response: Anthropic.Messages.Message, + cumulative: SessionTokenUsage, +): MessageMetadata { + return { + provider: 'anthropic-api', + model: response.model, + sessionId, + usage: { + inputTokens: cumulative.totalInputTokens, + outputTokens: cumulative.totalOutputTokens, + lastTurnInputTokens: response.usage.input_tokens, + numTurns: cumulative.turns, + }, + }; +} + +function getCatDisplayName(catId: CatId): string { + const names: Record = { + opus: '布偶猫/宪宪', + sonnet: '布偶猫 Sonnet', + codex: '缅因猫/砚砚', + gpt52: '缅因猫 GPT-5.4', + gemini: '暹罗猫/烁烁', + }; + return names[catId] ?? `CatAgent(${catId})`; +} + +function makeMessage(catId: CatId, type: AgentMessage['type'], fields: Partial): AgentMessage { + return { type, catId, timestamp: Date.now(), ...fields }; +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-microcompact.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-microcompact.ts new file mode 100644 index 000000000..c69edd1a7 --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-microcompact.ts @@ -0,0 +1,80 @@ +/** + * CatAgent MicroCompact — F152: Thin Agent Runtime + * + * Lightweight context compression inspired by Claude Code's microcompact. + * Strips old tool results from message history to control token growth. + * + * Strategy: keep the last N turns of tool results intact, replace older ones + * with a short summary marker. System prompt is rebuilt per turn (not in messages). + */ + +import type Anthropic from '@anthropic-ai/sdk'; + +type MessageParam = Anthropic.Messages.MessageParam; +type ContentBlockParam = Anthropic.Messages.ContentBlockParam; +type ToolResultBlockParam = Anthropic.Messages.ToolResultBlockParam; + +/** How many recent turns of tool results to keep intact */ +const KEEP_RECENT_TURNS = 3; +/** Max characters for a tool result before truncation in kept turns */ +const MAX_RESULT_CHARS = 10_000; + +/** + * Strip old tool results from message history. + * Keeps the last KEEP_RECENT_TURNS user messages with tool results intact. + * Older tool results are replaced with a short placeholder. + */ +export function microcompact(messages: MessageParam[]): MessageParam[] { + // Find indices of user messages that contain tool_result blocks + const toolResultIndices: number[] = []; + for (let i = 0; i < messages.length; i++) { + if (hasToolResults(messages[i])) toolResultIndices.push(i); + } + + // If few enough tool result turns, still truncate oversized results + if (toolResultIndices.length <= KEEP_RECENT_TURNS) { + return messages.map((msg) => (hasToolResults(msg) ? truncateKeptResults(msg) : msg)); + } + + const cutoffIdx = toolResultIndices[toolResultIndices.length - KEEP_RECENT_TURNS]; + return messages.map((msg, i) => { + if (!hasToolResults(msg)) return msg; + // Old turns: replace content with placeholder + if (i < cutoffIdx) return compactToolResults(msg); + // Kept turns: truncate oversized results + return truncateKeptResults(msg); + }); +} + +function hasToolResults(msg: MessageParam): boolean { + if (msg.role !== 'user' || typeof msg.content === 'string') return false; + return (msg.content as ContentBlockParam[]).some((b) => b.type === 'tool_result'); +} + +function compactToolResults(msg: MessageParam): MessageParam { + if (msg.role !== 'user' || typeof msg.content === 'string') return msg; + const blocks = (msg.content as ContentBlockParam[]).map((block) => { + if (block.type !== 'tool_result') return block; + const tb = block as ToolResultBlockParam; + return { + ...tb, + content: typeof tb.content === 'string' ? '[compacted — see recent results]' : tb.content, + } satisfies ToolResultBlockParam; + }); + return { ...msg, content: blocks }; +} + +/** Truncate kept-turn tool results that exceed max length */ +function truncateKeptResults(msg: MessageParam): MessageParam { + if (msg.role !== 'user' || typeof msg.content === 'string') return msg; + const blocks = (msg.content as ContentBlockParam[]).map((block) => { + if (block.type !== 'tool_result') return block; + const tb = block as ToolResultBlockParam; + if (typeof tb.content !== 'string' || tb.content.length <= MAX_RESULT_CHARS) return block; + return { + ...tb, + content: `${tb.content.slice(0, MAX_RESULT_CHARS)}\n... (truncated ${tb.content.length - MAX_RESULT_CHARS} chars)`, + } satisfies ToolResultBlockParam; + }); + return { ...msg, content: blocks }; +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-tools.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-tools.ts new file mode 100644 index 000000000..bd583f7fc --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-tools.ts @@ -0,0 +1,137 @@ +/** + * CatAgent Tool Registry — F152: Thin Agent Runtime + * + * Spike tools: read-only file operations only. + * Permission whitelist: all registered tools are 'allow' (read-only). + * Write/exec tools will be added in Phase 2 with permission state machine. + */ + +import { readdir, readFile } from 'node:fs/promises'; +import { resolve } from 'node:path'; +import type Anthropic from '@anthropic-ai/sdk'; +import type { CatAgentTool } from './catagent-types.js'; + +/** Create the spike tool registry (read-only tools) */ +export function createToolRegistry(workingDirectory: string): Map { + const registry = new Map(); + + registry.set('read_file', { + schema: { + name: 'read_file', + description: 'Read the contents of a file. Returns the file content as text.', + input_schema: { + type: 'object' as const, + properties: { + path: { type: 'string', description: 'Absolute or relative file path' }, + limit: { type: 'number', description: 'Max lines to read (default: 200)' }, + offset: { type: 'number', description: 'Line offset to start from (default: 0)' }, + }, + required: ['path'], + }, + }, + execute: async (input) => executeReadFile(workingDirectory, input), + permission: 'allow', + }); + + registry.set('list_files', { + schema: { + name: 'list_files', + description: 'List files in a directory. Returns file names, one per line.', + input_schema: { + type: 'object' as const, + properties: { + path: { type: 'string', description: 'Directory path (default: working directory)' }, + pattern: { type: 'string', description: 'Glob-like filter (e.g. "*.ts")' }, + }, + required: [], + }, + }, + execute: async (input) => executeListFiles(workingDirectory, input), + permission: 'allow', + }); + + registry.set('search_content', { + schema: { + name: 'search_content', + description: 'Search for a pattern in files. Returns matching file paths.', + input_schema: { + type: 'object' as const, + properties: { + pattern: { type: 'string', description: 'Search pattern (regex supported)' }, + path: { type: 'string', description: 'Directory to search in (default: working directory)' }, + glob: { type: 'string', description: 'File glob filter (e.g. "*.ts")' }, + }, + required: ['pattern'], + }, + }, + execute: async (input) => executeSearchContent(workingDirectory, input), + permission: 'allow', + }); + + return registry; +} + +/** Get Anthropic tool schemas from registry */ +export function getToolSchemas(registry: Map): Anthropic.Messages.Tool[] { + return [...registry.values()].map((t) => t.schema); +} + +// --- Tool implementations --- + +function resolvePath(workingDirectory: string, filePath: string): string { + const resolved = resolve(workingDirectory, filePath); + const root = resolve(workingDirectory); + // Path must equal root or be a child (trailing / prevents sibling prefix bypass) + if (resolved !== root && !resolved.startsWith(`${root}/`)) { + throw new Error(`Path traversal blocked: ${filePath}`); + } + return resolved; +} + +async function executeReadFile(cwd: string, input: Record): Promise { + const filePath = resolvePath(cwd, String(input.path ?? '')); + const limit = Number(input.limit ?? 200); + const offset = Number(input.offset ?? 0); + + const content = await readFile(filePath, 'utf-8'); + const lines = content.split('\n'); + const sliced = lines.slice(offset, offset + limit); + return sliced.map((line, i) => `${offset + i + 1}\t${line}`).join('\n'); +} + +async function executeListFiles(cwd: string, input: Record): Promise { + const dirPath = resolvePath(cwd, String(input.path ?? '.')); + const pattern = input.pattern ? String(input.pattern) : undefined; + + const entries = await readdir(dirPath, { withFileTypes: true }); + let names = entries.map((e) => (e.isDirectory() ? `${e.name}/` : e.name)); + + if (pattern) { + const regex = new RegExp(pattern.replace(/\*/g, '.*').replace(/\?/g, '.')); + names = names.filter((n) => regex.test(n)); + } + return names.sort().join('\n') || '(empty directory)'; +} + +async function executeSearchContent(cwd: string, input: Record): Promise { + const { execFile } = await import('node:child_process'); + const { promisify } = await import('node:util'); + const execFileAsync = promisify(execFile); + + const pattern = String(input.pattern); + const searchPath = resolvePath(cwd, String(input.path ?? '.')); + const glob = input.glob ? String(input.glob) : undefined; + + const args = ['--files-with-matches', '--max-count=1', '-r']; + if (glob) args.push('--glob', glob); + // Use -- to prevent pattern being parsed as rg flags (option injection) + args.push('--', pattern, searchPath); + + try { + const { stdout } = await execFileAsync('rg', args, { timeout: 10_000, maxBuffer: 512 * 1024 }); + const lines = stdout.trim().split('\n').slice(0, 50); + return lines.join('\n') || '(no matches)'; + } catch { + return '(no matches or rg not available)'; + } +} diff --git a/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-types.ts b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-types.ts new file mode 100644 index 000000000..0fce9cb37 --- /dev/null +++ b/packages/api/src/domains/cats/services/agents/providers/catagent/catagent-types.ts @@ -0,0 +1,66 @@ +/** + * CatAgent Types — F152: Thin Agent Runtime + * + * Type definitions for the CatAgent provider. + * CatAgent calls the LLM API directly (not via CLI subprocess), + * implementing a lightweight agent loop with native Cat Cafe tool integration. + */ + +import type Anthropic from '@anthropic-ai/sdk'; +import type { CatId } from '@cat-cafe/shared'; + +/** Tool permission decision */ +export type ToolPermission = 'allow' | 'deny'; + +/** Registered tool definition for CatAgent */ +export interface CatAgentTool { + /** Anthropic tool schema (sent to the API) */ + schema: Anthropic.Messages.Tool; + /** Execute the tool and return result string */ + execute: (input: Record) => Promise; + /** Permission level — 'allow' = auto-execute, 'deny' = blocked */ + permission: ToolPermission; +} + +/** Config passed to the agent loop */ +export interface CatAgentLoopConfig { + catId: CatId; + model: string; + apiKey: string; + /** Anthropic API base URL override */ + baseURL?: string; + /** Maximum turns before forced stop */ + maxTurns: number; + /** Maximum output tokens per LLM call */ + maxTokens: number; + /** Cumulative token budget (input+output) before forced stop. 0 = unlimited. */ + tokenBudgetLimit: number; + /** Working directory for file tools */ + workingDirectory: string; + /** AbortSignal for cancellation */ + signal?: AbortSignal; + /** Test-only: inject a mock Anthropic client */ + _testClient?: { messages: { create: (params: unknown) => Promise } }; +} + +/** Cumulative token usage across the entire agent session */ +export interface SessionTokenUsage { + totalInputTokens: number; + totalOutputTokens: number; + turns: number; +} + +/** Kernel prompt context — rebuilt every turn */ +export interface KernelPromptContext { + catId: CatId; + /** Cat identity name (e.g. '布偶猫/宪宪') */ + catName: string; + /** Model identifier */ + model: string; + /** Current working directory */ + workingDirectory: string; + /** Current turn number (for context awareness) */ + turnNumber: number; + /** Custom system prompt from invocation options */ + customSystemPrompt?: string; +} diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index 1ce43ffcc..1fb02ee0a 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -802,6 +802,13 @@ async function main(): Promise { case 'opencode': service = new OpenCodeAgentService({ catId }); break; + case 'catagent': { + const { CatAgentService } = await import( + './domains/cats/services/agents/providers/catagent/CatAgentService.js' + ); + service = new CatAgentService({ catId }); + break; + } case 'a2a': { const { A2AAgentService } = await import('./domains/cats/services/agents/providers/A2AAgentService.js'); const envKey = `CAT_${id.toUpperCase()}_A2A_URL`; diff --git a/packages/api/test/catagent-10turn-stability.test.js b/packages/api/test/catagent-10turn-stability.test.js new file mode 100644 index 000000000..096a59d26 --- /dev/null +++ b/packages/api/test/catagent-10turn-stability.test.js @@ -0,0 +1,192 @@ +/** + * CatAgent 10-Turn Stability Test — F152 Phase 1 Go/No-Go Gate + * + * Validates that the agent loop stays stable over 10 tool-use turns: + * - Kernel prompt is rebuilt each turn (identity preserved) + * - MicroCompact controls token growth (old tool results compacted) + * - Token budget guard stops the loop when exhausted + * - Cumulative usage is tracked correctly + * + * Uses a mock Anthropic client — no real API calls. + */ + +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +const { buildKernelPrompt } = await import( + '../dist/domains/cats/services/agents/providers/catagent/catagent-kernel-prompt.js' +); +const { microcompact } = await import( + '../dist/domains/cats/services/agents/providers/catagent/catagent-microcompact.js' +); + +// ── 10-Turn Kernel Prompt Identity Stability ── + +test('kernel prompt preserves identity across 10 turns', () => { + for (let turn = 1; turn <= 10; turn++) { + const prompt = buildKernelPrompt({ + catId: 'opus', + catName: '布偶猫/宪宪', + model: 'claude-sonnet-4-20250514', + workingDirectory: '/workspace/project', + turnNumber: turn, + }); + assert.ok(prompt.includes('布偶猫/宪宪'), `turn ${turn}: identity must be present`); + assert.ok( + prompt.includes('Safety Rules') || prompt.includes('Iron Laws'), + `turn ${turn}: iron laws must be present`, + ); + assert.ok(prompt.includes(`Turn: ${turn}`), `turn ${turn}: turn number must match`); + assert.ok(prompt.includes('CatAgent runtime'), `turn ${turn}: runtime role must be present`); + } +}); + +// ── 10-Turn MicroCompact Token Growth Control ── + +test('microcompact keeps message count stable over 10 turns', () => { + const messages = []; + + for (let turn = 0; turn < 10; turn++) { + // Simulate assistant response with tool call + messages.push({ + role: 'assistant', + content: [{ type: 'tool_use', id: `tool-${turn}`, name: 'read_file', input: { path: `file${turn}.ts` } }], + }); + // Simulate tool result (large content) + messages.push({ + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: `tool-${turn}`, + content: `x`.repeat(5000), + }, + ], + }); + } + + const compacted = microcompact(messages); + + // All messages should still be present + assert.equal(compacted.length, 20, 'message count preserved'); + + // First 7 tool-result turns (indices 1,3,5,7,9,11,13) should be compacted + for (let i = 0; i < 7; i++) { + const idx = i * 2 + 1; // tool result message indices + const content = compacted[idx].content[0].content; + assert.equal(content, '[compacted — see recent results]', `turn ${i} should be compacted`); + } + + // Last 3 tool-result turns (indices 15, 17, 19) should keep content + for (let i = 7; i < 10; i++) { + const idx = i * 2 + 1; + const content = compacted[idx].content[0].content; + assert.ok(content.startsWith('x'), `turn ${i} should keep original content`); + } +}); + +test('microcompact truncates oversized results in kept turns', () => { + const messages = []; + + for (let turn = 0; turn < 4; turn++) { + messages.push({ + role: 'assistant', + content: [{ type: 'tool_use', id: `t-${turn}`, name: 'read_file', input: {} }], + }); + messages.push({ + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: `t-${turn}`, + // 15K chars — exceeds 10K limit for kept turns + content: 'A'.repeat(15_000), + }, + ], + }); + } + + const compacted = microcompact(messages); + + // First turn should be compacted (placeholder) + assert.equal(compacted[1].content[0].content, '[compacted — see recent results]'); + + // Last 3 kept turns should be truncated to ~10K + for (let i = 1; i < 4; i++) { + const idx = i * 2 + 1; + const content = compacted[idx].content[0].content; + assert.ok(content.length < 15_000, `turn ${i}: should be truncated`); + assert.ok(content.includes('truncated'), `turn ${i}: should have truncation marker`); + } +}); + +// ── Token Budget Growth Simulation ── + +test('simulated 10-turn token growth stays within budget', () => { + // Simulate token usage pattern: ~8K input + ~2K output per turn + // With microcompact, input shouldn't grow linearly + const inputPerTurn = 8000; + const outputPerTurn = 2000; + const budget = 200_000; + let totalInput = 0; + let totalOutput = 0; + + for (let turn = 1; turn <= 10; turn++) { + // After microcompact, input growth slows (old results stripped) + // Model: first 3 turns grow linearly, then plateau + const inputGrowth = turn <= 3 ? inputPerTurn : inputPerTurn * 0.3; + totalInput += inputPerTurn + (turn > 1 ? inputGrowth : 0); + totalOutput += outputPerTurn; + + const total = totalInput + totalOutput; + assert.ok(total < budget, `turn ${turn}: cumulative ${total} must stay under ${budget}`); + } +}); + +// ── MicroCompact Compacted Content Size ── + +test('total content size of compacted 10-turn history is bounded', () => { + const messages = []; + + for (let turn = 0; turn < 10; turn++) { + messages.push({ + role: 'assistant', + content: [ + { type: 'text', text: `Analysis for turn ${turn}` }, + { type: 'tool_use', id: `t-${turn}`, name: 'search_content', input: { pattern: 'test' } }, + ], + }); + messages.push({ + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: `t-${turn}`, + // Each result is 20K chars + content: `Result ${turn}: ${'data '.repeat(4000)}`, + }, + ], + }); + } + + const compacted = microcompact(messages); + + // Calculate total content size + let totalChars = 0; + for (const msg of compacted) { + if (typeof msg.content === 'string') { + totalChars += msg.content.length; + } else { + for (const block of msg.content) { + if (block.type === 'text') totalChars += block.text.length; + else if (block.type === 'tool_result' && typeof block.content === 'string') { + totalChars += block.content.length; + } + } + } + } + + // With 7 compacted turns (~30 chars each) + 3 kept turns (truncated to ~10K each) + // Total should be well under 50K, not 200K (10 * 20K) + assert.ok(totalChars < 50_000, `total content ${totalChars} chars should be < 50K after compaction`); +}); diff --git a/packages/api/test/catagent-loop-mock.test.js b/packages/api/test/catagent-loop-mock.test.js new file mode 100644 index 000000000..cb2eccf0d --- /dev/null +++ b/packages/api/test/catagent-loop-mock.test.js @@ -0,0 +1,202 @@ +/** + * CatAgent Loop Mock Test — F152 Phase 1 Go/No-Go Gate (real loop) + * + * Tests runCatAgentLoop with a mock Anthropic client to validate: + * - Budget guard stops at exact boundary + * - done event has correct usage keys (inputTokens/outputTokens) + * - Truncation applies even when <= KEEP_RECENT_TURNS tool results + */ + +import assert from 'node:assert/strict'; +import { mkdtempSync, realpathSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { test } from 'node:test'; + +const { runCatAgentLoop } = await import('../dist/domains/cats/services/agents/providers/catagent/catagent-loop.js'); + +/** Create a mock Anthropic client that returns tool_use for N turns then stops */ +function createMockClient(toolTurns, inputTokensPerCall = 5000, outputTokensPerCall = 1000) { + let callCount = 0; + return { + messages: { + async create() { + callCount++; + const turn = callCount; + + if (turn <= toolTurns) { + return { + model: 'mock-model', + usage: { input_tokens: inputTokensPerCall, output_tokens: outputTokensPerCall }, + content: [ + { type: 'text', text: `Turn ${turn} analysis` }, + { type: 'tool_use', id: `tool-${turn}`, name: 'read_file', input: { path: 'package.json' } }, + ], + stop_reason: 'tool_use', + }; + } + + // Final turn: just text, no tools + return { + model: 'mock-model', + usage: { input_tokens: inputTokensPerCall, output_tokens: outputTokensPerCall }, + content: [{ type: 'text', text: 'Done.' }], + stop_reason: 'end_turn', + }; + }, + }, + getCallCount() { + return callCount; + }, + }; +} + +function createTmpDir() { + const dir = realpathSync(mkdtempSync(join(tmpdir(), 'catagent-loop-test-'))); + writeFileSync(join(dir, 'package.json'), '{"name":"test","version":"1.0.0"}'); + return dir; +} + +// ── Budget guard stops at exact boundary ── + +test('budget guard stops loop when cumulative tokens exceed limit', async () => { + const tmpDir = createTmpDir(); + try { + // Each call uses 5000 + 1000 = 6000 tokens. Budget = 15000. + // Turn 1: 6000 (ok), Turn 2: 12000 (ok), Turn 3: check 12000 < 15000 → call → 18000. + // Turn 4: check 18000 >= 15000 → stop. + const mockClient = createMockClient(10, 5000, 1000); + const messages = []; + + for await (const msg of runCatAgentLoop('test prompt', { + catId: 'opus', + model: 'mock-model', + apiKey: 'fake-key', + maxTurns: 20, + maxTokens: 4096, + tokenBudgetLimit: 15_000, + workingDirectory: tmpDir, + _testClient: mockClient, + })) { + messages.push(msg); + } + + // Should have stopped before completing all 10 tool turns + assert.ok(mockClient.getCallCount() < 10, `should stop early, got ${mockClient.getCallCount()} calls`); + + // Should end with a budget-exhausted text + done + const budgetMsg = messages.find((m) => m.type === 'text' && m.content?.includes('budget exhausted')); + assert.ok(budgetMsg, 'should have a budget exhausted message'); + + const doneMsg = messages.find((m) => m.type === 'done'); + assert.ok(doneMsg, 'should have done message'); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +// ── done event has correct usage keys ── + +test('done event metadata uses inputTokens/outputTokens keys', async () => { + const tmpDir = createTmpDir(); + try { + const mockClient = createMockClient(1, 3000, 500); + const messages = []; + + for await (const msg of runCatAgentLoop('read package.json', { + catId: 'opus', + model: 'mock-model', + apiKey: 'fake-key', + maxTurns: 10, + maxTokens: 4096, + tokenBudgetLimit: 200_000, + workingDirectory: tmpDir, + _testClient: mockClient, + })) { + messages.push(msg); + } + + const doneMsg = messages.find((m) => m.type === 'done'); + assert.ok(doneMsg, 'should have done message'); + assert.ok(doneMsg.metadata?.usage, 'done should have usage metadata'); + + const { usage } = doneMsg.metadata; + // Must use downstream-compatible keys + assert.equal(typeof usage.inputTokens, 'number', 'must have inputTokens'); + assert.equal(typeof usage.outputTokens, 'number', 'must have outputTokens'); + assert.ok(usage.inputTokens > 0, 'inputTokens must be positive'); + assert.ok(usage.outputTokens > 0, 'outputTokens must be positive'); + // Must NOT have SessionTokenUsage keys + assert.equal(usage.totalInputTokens, undefined, 'must not have totalInputTokens'); + assert.equal(usage.totalOutputTokens, undefined, 'must not have totalOutputTokens'); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +// ── Cumulative usage is correct across turns ── + +test('cumulative usage tracks correctly across 5 tool turns', async () => { + const tmpDir = createTmpDir(); + try { + const mockClient = createMockClient(5, 2000, 500); + const textMessages = []; + + for await (const msg of runCatAgentLoop('analyze the project', { + catId: 'opus', + model: 'mock-model', + apiKey: 'fake-key', + maxTurns: 20, + maxTokens: 4096, + tokenBudgetLimit: 200_000, + workingDirectory: tmpDir, + _testClient: mockClient, + })) { + if (msg.type === 'text' && msg.metadata?.usage) textMessages.push(msg); + } + + // Last text message should have cumulative usage + const lastText = textMessages[textMessages.length - 1]; + assert.ok(lastText.metadata.usage.inputTokens > 2000, 'cumulative input should exceed single call'); + assert.ok(lastText.metadata.usage.numTurns > 1, 'should track multiple turns'); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +// ── 10-turn loop completes with identity preserved ── + +test('10-turn loop completes and yields correct message sequence', async () => { + const tmpDir = createTmpDir(); + try { + const mockClient = createMockClient(10, 1000, 200); + const messages = []; + + for await (const msg of runCatAgentLoop('deep analysis', { + catId: 'opus', + model: 'mock-model', + apiKey: 'fake-key', + maxTurns: 20, + maxTokens: 4096, + tokenBudgetLimit: 200_000, + workingDirectory: tmpDir, + _testClient: mockClient, + })) { + messages.push(msg); + } + + // Should have session_init, multiple text+tool_use+tool_result, then done + assert.equal(messages[0].type, 'session_init'); + assert.equal(messages[messages.length - 1].type, 'done'); + + // All messages should have catId = opus + for (const msg of messages) { + assert.equal(msg.catId, 'opus', `${msg.type} message should have catId=opus`); + } + + // 10 tool turns + 1 final = 11 API calls + assert.equal(mockClient.getCallCount(), 11, 'should make 11 API calls (10 tool + 1 final)'); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); diff --git a/packages/api/test/catagent-service.test.js b/packages/api/test/catagent-service.test.js new file mode 100644 index 000000000..a74f29945 --- /dev/null +++ b/packages/api/test/catagent-service.test.js @@ -0,0 +1,192 @@ +/** + * CatAgent Service Tests — F152: Thin Agent Runtime + * Unit tests for kernel prompt, microcompact, tool registry, and credential resolution. + * Does NOT call the real Anthropic API — mocks the LLM client. + */ + +import assert from 'node:assert/strict'; +import { mkdirSync, mkdtempSync, realpathSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { test } from 'node:test'; + +const { buildKernelPrompt } = await import( + '../dist/domains/cats/services/agents/providers/catagent/catagent-kernel-prompt.js' +); +const { createToolRegistry, getToolSchemas } = await import( + '../dist/domains/cats/services/agents/providers/catagent/catagent-tools.js' +); +const { microcompact } = await import( + '../dist/domains/cats/services/agents/providers/catagent/catagent-microcompact.js' +); + +// ── Kernel Prompt ── + +test('buildKernelPrompt includes identity and iron laws', () => { + const prompt = buildKernelPrompt({ + catId: 'opus', + catName: '布偶猫/宪宪', + model: 'claude-sonnet-4-20250514', + workingDirectory: '/tmp/test', + turnNumber: 1, + }); + assert.ok(prompt.includes('布偶猫/宪宪'), 'should include cat name'); + assert.ok(prompt.includes('Iron Laws') || prompt.includes('Safety Rules'), 'should include safety rules'); + assert.ok(prompt.includes('Turn: 1'), 'should include turn number'); + assert.ok(prompt.includes('/tmp/test'), 'should include working directory'); +}); + +test('buildKernelPrompt rebuilds with different turn numbers', () => { + const p1 = buildKernelPrompt({ + catId: 'opus', + catName: 'test', + model: 'm', + workingDirectory: '/tmp', + turnNumber: 1, + }); + const p5 = buildKernelPrompt({ + catId: 'opus', + catName: 'test', + model: 'm', + workingDirectory: '/tmp', + turnNumber: 5, + }); + assert.ok(p1.includes('Turn: 1')); + assert.ok(p5.includes('Turn: 5')); + assert.notEqual(p1, p5, 'prompts should differ per turn'); +}); + +test('buildKernelPrompt includes custom system prompt', () => { + const prompt = buildKernelPrompt({ + catId: 'opus', + catName: 'test', + model: 'm', + workingDirectory: '/tmp', + turnNumber: 1, + customSystemPrompt: 'You are a code reviewer.', + }); + assert.ok(prompt.includes('You are a code reviewer.')); +}); + +// ── Tool Registry ── + +test('createToolRegistry returns 3 read-only tools', () => { + const registry = createToolRegistry('/tmp'); + assert.equal(registry.size, 3, 'should have 3 tools'); + assert.ok(registry.has('read_file')); + assert.ok(registry.has('list_files')); + assert.ok(registry.has('search_content')); + for (const [, tool] of registry) { + assert.equal(tool.permission, 'allow', 'all spike tools should be allowed'); + } +}); + +test('getToolSchemas returns valid Anthropic tool schemas', () => { + const registry = createToolRegistry('/tmp'); + const schemas = getToolSchemas(registry); + assert.equal(schemas.length, 3); + for (const schema of schemas) { + assert.ok(schema.name, 'each schema should have a name'); + assert.ok(schema.input_schema, 'each schema should have input_schema'); + } +}); + +test('read_file tool reads a file correctly', async () => { + const tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'catagent-test-'))); + try { + writeFileSync(join(tmpDir, 'hello.txt'), 'line1\nline2\nline3\n'); + const registry = createToolRegistry(tmpDir); + const result = await registry.get('read_file').execute({ path: 'hello.txt' }); + assert.ok(result.includes('line1')); + assert.ok(result.includes('line2')); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +test('read_file tool blocks path traversal', async () => { + const tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'catagent-test-'))); + try { + const registry = createToolRegistry(tmpDir); + await assert.rejects( + () => registry.get('read_file').execute({ path: '../../../etc/passwd' }), + /Path traversal blocked/, + ); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +test('read_file tool blocks sibling prefix traversal', async () => { + // e.g. workingDir=/tmp/repo, path=../repo2/secret → /tmp/repo2/secret starts with /tmp/repo + const tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'catagent-test-'))); + const siblingDir = `${tmpDir}2`; + mkdirSync(siblingDir, { recursive: true }); + writeFileSync(join(siblingDir, 'secret.txt'), 'leaked'); + try { + const registry = createToolRegistry(tmpDir); + await assert.rejects( + () => registry.get('read_file').execute({ path: `../` + `${tmpDir.split('/').pop()}2/secret.txt` }), + /Path traversal blocked/, + ); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + rmSync(siblingDir, { recursive: true, force: true }); + } +}); + +test('list_files tool lists directory contents', async () => { + const tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'catagent-test-'))); + try { + writeFileSync(join(tmpDir, 'a.ts'), ''); + writeFileSync(join(tmpDir, 'b.js'), ''); + mkdirSync(join(tmpDir, 'sub')); + const registry = createToolRegistry(tmpDir); + const result = await registry.get('list_files').execute({}); + assert.ok(result.includes('a.ts')); + assert.ok(result.includes('b.js')); + assert.ok(result.includes('sub/')); + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +// ── MicroCompact ── + +test('microcompact does nothing when few tool results', () => { + const messages = [ + { role: 'user', content: 'hello' }, + { role: 'assistant', content: [{ type: 'text', text: 'hi' }] }, + ]; + const result = microcompact(messages); + assert.deepEqual(result, messages, 'should not modify when no tool results'); +}); + +test('microcompact strips old tool results beyond 3 turns', () => { + const messages = []; + // Create 5 turns of tool use + for (let i = 0; i < 5; i++) { + messages.push({ + role: 'assistant', + content: [{ type: 'tool_use', id: `t${i}`, name: 'read_file', input: {} }], + }); + messages.push({ + role: 'user', + content: [{ type: 'tool_result', tool_use_id: `t${i}`, content: `result-${i}-long-content` }], + }); + } + + const result = microcompact(messages); + assert.equal(result.length, messages.length, 'should preserve message count'); + + // First 2 tool result turns (indices 1, 3) should be compacted + const first = result[1]; + assert.equal(first.content[0].content, '[compacted — see recent results]'); + + const second = result[3]; + assert.equal(second.content[0].content, '[compacted — see recent results]'); + + // Last 3 turns (indices 5, 7, 9) should be intact + const last = result[9]; + assert.equal(last.content[0].content, 'result-4-long-content'); +}); diff --git a/packages/api/test/catagent-smoke.mjs b/packages/api/test/catagent-smoke.mjs new file mode 100644 index 000000000..d8f1dace1 --- /dev/null +++ b/packages/api/test/catagent-smoke.mjs @@ -0,0 +1,79 @@ +#!/usr/bin/env node +/** + * CatAgent Smoke Test — F152 + * + * Standalone script to validate the CatAgent agent loop end-to-end. + * Calls the real Anthropic API — requires credentials. + * + * Usage: + * # Uses account resolver (credentials.json): + * node packages/api/test/catagent-smoke.mjs + * + * # Or with explicit API key: + * CATAGENT_ANTHROPIC_API_KEY=sk-... node packages/api/test/catagent-smoke.mjs + */ + +// Must run from project root so account-resolver can find credentials +const { CatAgentService } = await import('../dist/domains/cats/services/agents/providers/catagent/CatAgentService.js'); + +const service = new CatAgentService({ catId: 'opus' }); + +const prompt = `请读取当前目录下的 package.json 文件,告诉我这个项目叫什么名字、版本号是多少。用一句话回答。`; + +console.log('═══ CatAgent Smoke Test ═══'); +console.log(`Prompt: ${prompt}`); +console.log('───────────────────────────'); + +let messageCount = 0; +let toolCallCount = 0; +let finalText = ''; + +for await (const msg of service.invoke(prompt, { + workingDirectory: process.cwd(), +})) { + messageCount++; + switch (msg.type) { + case 'session_init': + console.log(`[session] ${msg.sessionId}`); + break; + case 'text': + console.log(`[text] ${msg.content?.slice(0, 200)}`); + finalText = msg.content ?? ''; + if (msg.metadata?.usage) { + const u = msg.metadata.usage; + console.log(` tokens: in=${u.inputTokens} out=${u.outputTokens}`); + } + break; + case 'tool_use': + toolCallCount++; + console.log(`[tool] ${msg.toolName}(${JSON.stringify(msg.toolInput).slice(0, 100)})`); + break; + case 'tool_result': + console.log(`[result] ${msg.content?.slice(0, 100)}...`); + break; + case 'error': + console.error(`[ERROR] ${msg.error}`); + process.exit(1); + break; + case 'done': + console.log('[done]'); + break; + default: + console.log(`[${msg.type}] ${msg.content?.slice(0, 80) ?? ''}`); + } +} + +console.log('───────────────────────────'); +console.log(`Messages: ${messageCount} | Tool calls: ${toolCallCount}`); +console.log(`Final answer: ${finalText.slice(0, 200)}`); + +// Basic assertions +if (messageCount < 3) { + console.error('FAIL: expected at least 3 messages (session_init + text + done)'); + process.exit(1); +} +if (toolCallCount < 1) { + console.error('FAIL: expected at least 1 tool call (read_file)'); + process.exit(1); +} +console.log('═══ PASS ═══'); diff --git a/packages/shared/src/types/cat.ts b/packages/shared/src/types/cat.ts index e7c40c73a..569a2ec04 100644 --- a/packages/shared/src/types/cat.ts +++ b/packages/shared/src/types/cat.ts @@ -11,7 +11,7 @@ import { createCatId } from './ids.js'; * CLI client identity used to invoke a cat (e.g. 'anthropic' → claude CLI, 'openai' → codex CLI). * Renamed from CatProvider in F340 P5. */ -export type ClientId = 'anthropic' | 'openai' | 'google' | 'dare' | 'antigravity' | 'opencode' | 'a2a'; +export type ClientId = 'anthropic' | 'openai' | 'google' | 'dare' | 'antigravity' | 'opencode' | 'a2a' | 'catagent'; /** @deprecated F340: Use {@link ClientId} instead. Kept as alias for backward compatibility. */ export type CatProvider = ClientId; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d5daebfc6..2d18b4bd0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -26,6 +26,9 @@ importers: packages/api: dependencies: + '@anthropic-ai/sdk': + specifier: ^0.85.0 + version: 0.85.0(zod@3.25.76) '@cat-cafe/shared': specifier: workspace:* version: link:../shared @@ -356,6 +359,15 @@ packages: resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==} engines: {node: '>=10'} + '@anthropic-ai/sdk@0.85.0': + resolution: {integrity: sha512-nmwwB1zYSOwDSKtw+HXUzx+SKfBekTknt92R63tGZAZkppwyHw+cMHugjCvWZ9G92I965tz0062VKeUnzVJZlA==} + hasBin: true + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + '@apideck/better-ajv-errors@0.3.6': resolution: {integrity: sha512-P+ZygBLZtkp0qqOAJJVX4oX/sFo5JR3eBWwwuqHHhK0GIgQOKWrAfiAaWX0aArHkRWHMuggFEgAZNxVPwPZYaA==} engines: {node: '>=10'} @@ -3909,6 +3921,10 @@ packages: json-schema-ref-resolver@1.0.1: resolution: {integrity: sha512-EJAj1pgHc1hxF6vo2Z3s69fMjO1INq6eGHXZ8Z6wCQeldCuwxGK9Sxf4/cScGn3FZubCVUehfWtcDM/PLteCQw==} + json-schema-to-ts@3.1.1: + resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==} + engines: {node: '>=16'} + json-schema-traverse@0.4.1: resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==} @@ -5489,6 +5505,9 @@ packages: trough@2.2.0: resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==} + ts-algebra@2.0.0: + resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==} + ts-api-utils@2.4.0: resolution: {integrity: sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==} engines: {node: '>=18.12'} @@ -6027,6 +6046,12 @@ snapshots: '@alloc/quick-lru@5.2.0': {} + '@anthropic-ai/sdk@0.85.0(zod@3.25.76)': + dependencies: + json-schema-to-ts: 3.1.1 + optionalDependencies: + zod: 3.25.76 + '@apideck/better-ajv-errors@0.3.6(ajv@8.17.1)': dependencies: ajv: 8.17.1 @@ -9032,8 +9057,8 @@ snapshots: '@typescript-eslint/parser': 8.54.0(eslint@8.57.1)(typescript@5.9.3) eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1) - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1) eslint-plugin-react: 7.37.5(eslint@8.57.1) eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.57.1) @@ -9052,7 +9077,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1): + eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1): dependencies: '@nolyfill/is-core-module': 1.0.39 debug: 4.4.3 @@ -9063,24 +9088,24 @@ snapshots: tinyglobby: 0.2.15 unrs-resolver: 1.11.1 optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): + eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): dependencies: debug: 3.2.7 optionalDependencies: '@typescript-eslint/parser': 8.54.0(eslint@8.57.1)(typescript@5.9.3) eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1) transitivePeerDependencies: - supports-color eslint-plugin-cafe@file:packages/web/eslint-plugins: {} - eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): + eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.9 @@ -9091,7 +9116,7 @@ snapshots: doctrine: 2.1.0 eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 @@ -10059,6 +10084,11 @@ snapshots: dependencies: fast-deep-equal: 3.1.3 + json-schema-to-ts@3.1.1: + dependencies: + '@babel/runtime': 7.28.6 + ts-algebra: 2.0.0 + json-schema-traverse@0.4.1: {} json-schema-traverse@1.0.0: {} @@ -12098,6 +12128,8 @@ snapshots: trough@2.2.0: {} + ts-algebra@2.0.0: {} + ts-api-utils@2.4.0(typescript@5.9.3): dependencies: typescript: 5.9.3