diff --git a/.changeset/fix-rpc-permission-gate-bypass.md b/.changeset/fix-rpc-permission-gate-bypass.md new file mode 100644 index 0000000..5217d04 --- /dev/null +++ b/.changeset/fix-rpc-permission-gate-bypass.md @@ -0,0 +1,5 @@ +--- +"@aliou/pi-guardrails": patch +--- + +Fix permission gate bypass in RPC mode: deny-by-default when `ctx.ui.custom()` returns undefined, with fallback to `ctx.ui.select()`. diff --git a/AGENTS.md b/AGENTS.md index 63005b9..405b01d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,12 +8,15 @@ Pi is pre-1.0.0, so breaking changes can happen between Pi versions. This extens - TypeScript (strict mode) - pnpm 10.26.1 +- Vitest for testing - Biome for linting/formatting - Changesets for versioning ## Scripts ```bash +pnpm test # Run tests +pnpm test:watch # Run tests in watch mode pnpm typecheck # Type check pnpm lint # Lint (runs on pre-commit) pnpm format # Format @@ -31,10 +34,19 @@ src/ components/ # UI components (pattern editor) lib/ # Vendored subagent executor core (Phase 1) utils/ # Helpers (matching, glob expansion, migration, shell AST) +tests/ + utils/ # Test harness utilities (adapted from pi-harness) + pi-context.ts # Spy-based ExtensionContext / UI context builders + pi-test-harness.ts # Full extension loader with emitEvent() for hook testing + load-extension.ts # Wrapper for Pi internal extension loader + matchers.ts # Custom vitest matchers (toHaveRegisteredTool, etc.) ``` ## Conventions +- Tests live next to the code they test (`src/hooks/foo.test.ts`) +- Hook tests use `setupXxxHook()` directly with a mock `pi` and spy contexts from `tests/utils/pi-context.ts`, rather than loading the full extension (avoids `configLoader` side effects) +- The full `createPiTestHarness()` is available for testing commands and tools that go through the extension factory - New hooks: follow patterns in `src/hooks/` - Built-in dangerous command matching uses AST parsing via `@aliou/sh`; user-configured patterns use substring/regex matching - File protection is policy-based (`features.policies`, `policies.rules`), not legacy `envFiles` diff --git a/README.md b/README.md index fbdf240..b6d825a 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,16 @@ Also note: - `preventBrew`, `preventPython`, `enforcePackageManager`, `packageManager` were removed from guardrails and moved to `@aliou/pi-toolchain`. +## Development + +```bash +pnpm test # Run tests +pnpm test:watch # Run tests in watch mode +pnpm typecheck # Type check +pnpm lint # Lint +pnpm format # Format +``` + ## Events Guardrails emits events for other extensions: diff --git a/src/hooks/permission-gate.test.ts b/src/hooks/permission-gate.test.ts new file mode 100644 index 0000000..359d6ae --- /dev/null +++ b/src/hooks/permission-gate.test.ts @@ -0,0 +1,332 @@ +import type { + BashToolCallEvent, + ExtensionAPI, + ExtensionContext, +} from "@mariozechner/pi-coding-agent"; +import { createEventBus } from "@mariozechner/pi-coding-agent"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { createEventContext } from "../../tests/utils/pi-context"; +import type { ResolvedConfig } from "../config"; +import { configLoader } from "../config"; +import { setupPermissionGateHook } from "./permission-gate"; + +// Mock configLoader so allow-session path doesn't throw. +vi.mock("../config", async (importOriginal) => { + const original = (await importOriginal()) as Record; + return { + ...original, + configLoader: { + getConfig: vi.fn(() => ({ + permissionGate: { allowedPatterns: [] }, + })), + save: vi.fn(async () => {}), + }, + }; +}); + +// --------------------------------------------------------------------------- +// Constants — must match the production code's SELECT_* constants +// --------------------------------------------------------------------------- + +const SELECT_ALLOW_ONCE = "Allow once"; +const SELECT_ALLOW_SESSION = "Allow for session"; +const SELECT_DENY = "Deny"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Minimal config enabling the permission gate with defaults. + * No custom patterns — relies on built-in structural matchers. + */ +function makeConfig( + overrides: Partial = {}, +): ResolvedConfig { + return { + version: "1", + enabled: true, + applyBuiltinDefaults: true, + features: { policies: false, permissionGate: true, pathAccess: false }, + policies: { rules: [] }, + pathAccess: { mode: "ask", allowedPaths: [] }, + permissionGate: { + patterns: [], + useBuiltinMatchers: true, + requireConfirmation: true, + allowedPatterns: [], + autoDenyPatterns: [], + explainCommands: false, + explainModel: null, + explainTimeout: 5000, + ...overrides, + }, + }; +} + +type ToolCallHandler = ( + event: BashToolCallEvent, + ctx: ExtensionContext, +) => Promise<{ block: true; reason: string } | undefined>; + +/** + * Create a mock ExtensionAPI that captures tool_call handler registrations. + * Returns the mock and a function to retrieve the registered handler. + */ +function createMockPi() { + const handlers: ToolCallHandler[] = []; + const eventBus = createEventBus(); + + const pi = { + on(event: string, handler: ToolCallHandler) { + if (event === "tool_call") { + handlers.push(handler); + } + }, + events: eventBus, + // Stubs for any other ExtensionAPI methods that might be called. + registerCommand: vi.fn(), + registerTool: vi.fn(), + emit: vi.fn(), + } as unknown as ExtensionAPI; + + return { + pi, + getHandler(): ToolCallHandler { + if (handlers.length === 0) { + throw new Error("No tool_call handler registered"); + } + return handlers[0]; + }, + }; +} + +function bashEvent(command: string): BashToolCallEvent { + return { + type: "tool_call", + toolCallId: "tc_test", + toolName: "bash", + input: { command }, + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("permission gate", () => { + let handle: ReturnType; + let handler: ToolCallHandler; + + beforeEach(() => { + handle = createMockPi(); + setupPermissionGateHook(handle.pi, makeConfig()); + handler = handle.getHandler(); + }); + + it("allows safe commands", async () => { + const ctx = createEventContext({ hasUI: true }); + const result = await handler(bashEvent("echo hello"), ctx); + expect(result).toBeUndefined(); + }); + + it("blocks dangerous commands when user denies", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn(async () => "deny") as ExtensionContext["ui"]["custom"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual({ + block: true, + reason: "User denied dangerous command", + }); + }); + + it("allows dangerous commands when user explicitly allows", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn(async () => "allow") as ExtensionContext["ui"]["custom"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toBeUndefined(); + }); + + it("blocks when hasUI is false (print/RPC mode)", async () => { + const ctx = createEventContext({ hasUI: false }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual(expect.objectContaining({ block: true })); + }); + + it("blocks when ctx.ui.custom() returns undefined (RPC stub)", async () => { + // This is the bug from issue #19: in RPC mode, ctx.ui.custom() returns + // undefined. The permission gate only checks for "deny", so undefined + // falls through and the command is silently allowed. + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual(expect.objectContaining({ block: true })); + expect(ctx.ui.select).toHaveBeenCalled(); + }); + + it("blocks auto-deny patterns without prompting", async () => { + const { pi, getHandler } = createMockPi(); + setupPermissionGateHook( + pi, + makeConfig({ + autoDenyPatterns: [{ pattern: "DROP TABLE" }], + }), + ); + const h = getHandler(); + const ctx = createEventContext({ hasUI: true }); + const result = await h(bashEvent("psql -c 'DROP TABLE users'"), ctx); + expect(result).toEqual(expect.objectContaining({ block: true })); + // Should not have prompted the user. + expect(ctx.ui.custom).not.toHaveBeenCalled(); + }); + + it("skips allowed patterns", async () => { + const { pi, getHandler } = createMockPi(); + setupPermissionGateHook( + pi, + makeConfig({ + allowedPatterns: [{ pattern: "sudo echo" }], + }), + ); + const h = getHandler(); + const ctx = createEventContext({ hasUI: true }); + const result = await h(bashEvent("sudo echo hello"), ctx); + expect(result).toBeUndefined(); + }); + + // --------------------------------------------------------------------------- + // RPC mode: ctx.ui.select() fallback when ctx.ui.custom() returns undefined + // --------------------------------------------------------------------------- + + it("falls back to select() when custom() returns undefined and allows on 'Allow once'", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => SELECT_ALLOW_ONCE, + ) as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toBeUndefined(); // not blocked → allowed + expect(ctx.ui.select).toHaveBeenCalled(); + }); + + it("falls back to select() when custom() returns undefined and allows-session on 'Allow for session'", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => SELECT_ALLOW_SESSION, + ) as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toBeUndefined(); // not blocked → allowed with session grant + expect(ctx.ui.select).toHaveBeenCalled(); + }); + + it("falls back to select() when custom() returns undefined and blocks on 'Deny'", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => SELECT_DENY, + ) as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual({ + block: true, + reason: "User denied dangerous command", + }); + }); + + it("blocks when both custom() and select() return undefined", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual(expect.objectContaining({ block: true })); + expect(ctx.ui.select).toHaveBeenCalled(); + }); + + it("does not call select() when custom() returns a valid result", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn(async () => "deny") as ExtensionContext["ui"]["custom"], + }, + }); + await handler(bashEvent("sudo rm -rf /"), ctx); + expect(ctx.ui.select).not.toHaveBeenCalled(); + }); + + it("blocks when select() returns an unrecognized string", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn(async () => "maybe") as ExtensionContext["ui"]["select"], + }, + }); + const result = await handler(bashEvent("sudo rm -rf /"), ctx); + expect(result).toEqual(expect.objectContaining({ block: true })); + }); + + it("saves session grant via configLoader when select() returns 'Allow for session'", async () => { + const ctx = createEventContext({ + hasUI: true, + ui: { + custom: vi.fn( + async () => undefined, + ) as ExtensionContext["ui"]["custom"], + select: vi.fn( + async () => SELECT_ALLOW_SESSION, + ) as ExtensionContext["ui"]["select"], + }, + }); + await handler(bashEvent("sudo rm -rf /"), ctx); + expect(configLoader.save).toHaveBeenCalledWith("memory", { + permissionGate: { + allowedPatterns: [{ pattern: "sudo rm -rf /" }], + }, + }); + }); +}); diff --git a/src/hooks/permission-gate.ts b/src/hooks/permission-gate.ts index 03ad1ad..8308e2e 100644 --- a/src/hooks/permission-gate.ts +++ b/src/hooks/permission-gate.ts @@ -580,10 +580,34 @@ export function setupPermissionGateHook( type ConfirmResult = "allow" | "allow-session" | "deny"; - const result = await ctx.ui.custom( + // Fallback select options for RPC mode (ctx.ui.custom is unimplemented). + const SELECT_ALLOW_ONCE = "Allow once"; + const SELECT_ALLOW_SESSION = "Allow for session"; + const SELECT_DENY = "Deny"; + const SELECT_OPTIONS = [ + SELECT_ALLOW_ONCE, + SELECT_ALLOW_SESSION, + SELECT_DENY, + ] as const; + + let result = await ctx.ui.custom( createPermissionGateConfirmComponent(command, description, explanation), ); + // Fallback: ctx.ui.custom() returns undefined in RPC/headless mode + // (Pi's RPC runtime stubs it as `async custom() { return undefined; }`). + // Fall back to ctx.ui.select() which works over the RPC protocol. + // If select() also returns undefined/malformed, deny by default. + if (result === undefined) { + const selection = await ctx.ui.select( + `Dangerous command: ${description}`, + [...SELECT_OPTIONS], + ); + if (selection === SELECT_ALLOW_ONCE) result = "allow"; + else if (selection === SELECT_ALLOW_SESSION) result = "allow-session"; + else result = "deny"; + } + if (result === "allow-session") { // Save command as allowed in memory scope (session-only). // Spread the resolved allowed patterns and append the new one. diff --git a/tests/utils/load-extension.ts b/tests/utils/load-extension.ts new file mode 100644 index 0000000..5256f9f --- /dev/null +++ b/tests/utils/load-extension.ts @@ -0,0 +1,11 @@ +/** + * Wrapper around pi-coding-agent's internal `loadExtensionFromFactory`. + * + * This function is not part of the package's public API (the `exports` field + * only exposes "." and "./hooks"). We import the compiled JS directly by + * absolute path and re-export it from this single module so that only one + * place needs updating if the internal path changes upstream. + * + * Vitest resolves this via the `resolve.alias` entry in vitest.config.ts. + */ +export { loadExtensionFromFactory } from "#pi-internal/extensions-loader"; diff --git a/tests/utils/matchers.ts b/tests/utils/matchers.ts new file mode 100644 index 0000000..a3d4960 --- /dev/null +++ b/tests/utils/matchers.ts @@ -0,0 +1,54 @@ +/** + * Custom vitest matchers for Pi extension test harness. + * + * These matchers inspect the real `Extension` object produced by the + * harness, not proxy-based mock state. + */ + +import { expect } from "vitest"; +import type { PiTestHarness } from "./pi-test-harness"; + +expect.extend({ + toHaveRegisteredTool(received: unknown, name: string) { + const harness = received as PiTestHarness; + const registered = harness.listRegisteredTools(); + const pass = registered.includes(name); + + return { + pass, + message: () => + pass + ? `expected harness not to have registered tool "${name}"` + : `expected harness to have registered tool "${name}", registered: [${registered.join(", ")}]`, + actual: registered, + expected: name, + }; + }, + toHaveRegisteredCommand(received: unknown, name: string) { + const harness = received as PiTestHarness; + const registered = harness.listRegisteredCommands(); + const pass = registered.includes(name); + + return { + pass, + message: () => + pass + ? `expected harness not to have registered command "${name}"` + : `expected harness to have registered command "${name}", registered: [${registered.join(", ")}]`, + actual: registered, + expected: name, + }; + }, +}); + +declare module "vitest" { + interface Assertion { + toHaveRegisteredTool(name: string): T; + toHaveRegisteredCommand(name: string): T; + } + + interface AsymmetricMatchersContaining { + toHaveRegisteredTool(name: string): void; + toHaveRegisteredCommand(name: string): void; + } +} diff --git a/tests/utils/pi-context.ts b/tests/utils/pi-context.ts new file mode 100644 index 0000000..2d25538 --- /dev/null +++ b/tests/utils/pi-context.ts @@ -0,0 +1,221 @@ +/** + * Explicit spy-based context builders for Pi extension tests. + * + * Every function property is a `vi.fn()` with a sensible default. This makes + * tests readable (you see exactly which properties exist) and keeps call + * tracking / override ergonomics that deep proxy mocks provide, without the + * hidden "any property access succeeds" footgun. + */ + +import type { + ExtensionAPI, + ExtensionCommandContext, + ExtensionUIContext, + SessionManager, +} from "@mariozechner/pi-coding-agent"; +import { vi } from "vitest"; + +/** + * ReadonlySessionManager is not exported from pi-coding-agent's public API. + * We reconstruct the type here as a Pick of SessionManager. + */ +type ReadonlySessionManager = Pick< + SessionManager, + | "getCwd" + | "getSessionDir" + | "getSessionId" + | "getSessionFile" + | "getLeafId" + | "getLeafEntry" + | "getEntry" + | "getLabel" + | "getBranch" + | "getHeader" + | "getEntries" + | "getTree" + | "getSessionName" +>; + +// --------------------------------------------------------------------------- +// UI context +// --------------------------------------------------------------------------- + +export type UIOverrides = Partial; + +function createUIContext(overrides: UIOverrides = {}): ExtensionUIContext { + return { + select: vi.fn(async () => undefined), + confirm: vi.fn(async () => false), + input: vi.fn(async () => undefined), + notify: vi.fn(), + custom: vi.fn(async () => undefined), + onTerminalInput: vi.fn(() => () => {}), + setStatus: vi.fn(), + setWorkingMessage: vi.fn(), + setWidget: vi.fn(), + setFooter: vi.fn(), + setHeader: vi.fn(), + setTitle: vi.fn(), + pasteToEditor: vi.fn(), + setEditorText: vi.fn(), + getEditorText: vi.fn(() => ""), + editor: vi.fn(async () => undefined), + setEditorComponent: vi.fn(), + setToolsExpanded: vi.fn(), + ...overrides, + } as ExtensionUIContext; +} + +// --------------------------------------------------------------------------- +// Command context +// --------------------------------------------------------------------------- + +export interface CommandContextOverrides { + cwd?: string; + hasUI?: boolean; + ui?: UIOverrides; + sessionManager?: ReadonlySessionManager; + modelRegistry?: ExtensionCommandContext["modelRegistry"]; + model?: ExtensionCommandContext["model"]; + isIdle?: () => boolean; + abort?: () => void; + hasPendingMessages?: () => boolean; + shutdown?: () => void; + getContextUsage?: () => undefined; + compact?: () => void; + getSystemPrompt?: () => string; + waitForIdle?: () => Promise; + newSession?: ExtensionCommandContext["newSession"]; + fork?: ExtensionCommandContext["fork"]; + navigateTree?: ExtensionCommandContext["navigateTree"]; + switchSession?: ExtensionCommandContext["switchSession"]; + reload?: () => Promise; +} + +/** + * Build an `ExtensionCommandContext` with every method as a spy. + * Pass overrides for the properties your test cares about. + */ +export function createCommandContext( + overrides: CommandContextOverrides = {}, +): ExtensionCommandContext { + const ui = createUIContext(overrides.ui); + + return { + cwd: overrides.cwd ?? process.cwd(), + hasUI: overrides.hasUI ?? true, + ui, + signal: undefined, + sessionManager: overrides.sessionManager ?? stubSessionManager(), + modelRegistry: + overrides.modelRegistry ?? + ({} as ExtensionCommandContext["modelRegistry"]), + model: overrides.model ?? undefined, + isIdle: vi.fn(overrides.isIdle ?? (() => true)), + abort: vi.fn(overrides.abort ?? (() => {})), + hasPendingMessages: vi.fn(overrides.hasPendingMessages ?? (() => false)), + shutdown: vi.fn(overrides.shutdown ?? (() => {})), + getContextUsage: vi.fn(overrides.getContextUsage ?? (() => undefined)), + compact: vi.fn(overrides.compact ?? (() => {})), + getSystemPrompt: vi.fn(overrides.getSystemPrompt ?? (() => "")), + waitForIdle: vi.fn(overrides.waitForIdle ?? (async () => {})), + newSession: vi.fn( + overrides.newSession ?? (async () => ({ cancelled: false })), + ), + fork: vi.fn(overrides.fork ?? (async () => ({ cancelled: false }))), + navigateTree: vi.fn( + overrides.navigateTree ?? (async () => ({ cancelled: false })), + ), + switchSession: vi.fn( + overrides.switchSession ?? (async () => ({ cancelled: false })), + ), + reload: vi.fn(overrides.reload ?? (async () => {})), + } as ExtensionCommandContext; +} + +// --------------------------------------------------------------------------- +// Tool context +// --------------------------------------------------------------------------- + +export interface ToolContextOverrides { + cwd?: string; +} + +type ToolContext = NonNullable< + Parameters[0]["execute"]>[4] +>; + +/** + * Build a minimal tool execution context. Tools typically only need `cwd`. + */ +export function createToolContext( + overrides: ToolContextOverrides = {}, +): ToolContext { + return { + cwd: overrides.cwd ?? process.cwd(), + signal: undefined, + } as unknown as ToolContext; +} + +// --------------------------------------------------------------------------- +// Event context (for tool_call / session_start handlers) +// --------------------------------------------------------------------------- + +export interface EventContextOverrides { + cwd?: string; + hasUI?: boolean; + ui?: UIOverrides; + sessionManager?: ReadonlySessionManager; +} + +/** + * Build an `ExtensionContext` for event handlers (tool_call, session_start). + * Lighter than command context — no session control methods. + */ +export function createEventContext(overrides: EventContextOverrides = {}) { + const ui = createUIContext(overrides.ui); + + return { + cwd: overrides.cwd ?? process.cwd(), + hasUI: overrides.hasUI ?? true, + ui, + signal: undefined, + sessionManager: overrides.sessionManager ?? stubSessionManager(), + modelRegistry: {} as ExtensionCommandContext["modelRegistry"], + model: undefined, + isIdle: vi.fn(() => true), + abort: vi.fn(), + hasPendingMessages: vi.fn(() => false), + shutdown: vi.fn(), + getContextUsage: vi.fn(() => undefined), + compact: vi.fn(), + getSystemPrompt: vi.fn(() => ""), + }; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Minimal stub for ReadonlySessionManager when the test does not interact + * with session state at all. Every method is a vi.fn() returning a safe + * default. + */ +function stubSessionManager(): ReadonlySessionManager { + return { + getCwd: vi.fn(() => process.cwd()), + getSessionDir: vi.fn(() => ""), + getSessionId: vi.fn(() => "stub-session-id"), + getSessionFile: vi.fn(() => undefined), + getLeafId: vi.fn(() => null), + getLeafEntry: vi.fn(() => undefined), + getEntry: vi.fn(() => undefined), + getLabel: vi.fn(() => undefined), + getBranch: vi.fn(() => []), + getHeader: vi.fn(() => undefined), + getEntries: vi.fn(() => []), + getTree: vi.fn(() => []), + getSessionName: vi.fn(() => undefined), + } as unknown as ReadonlySessionManager; +} diff --git a/tests/utils/pi-internal.d.ts b/tests/utils/pi-internal.d.ts new file mode 100644 index 0000000..31a9f1b --- /dev/null +++ b/tests/utils/pi-internal.d.ts @@ -0,0 +1,21 @@ +/** + * Type declarations for the internal pi-coding-agent module aliased via + * vitest.config.ts. This mirrors the exports of + * `@mariozechner/pi-coding-agent/dist/core/extensions/loader.js`. + */ +declare module "#pi-internal/extensions-loader" { + import type { + EventBus, + Extension, + ExtensionFactory, + ExtensionRuntime, + } from "@mariozechner/pi-coding-agent"; + + export function loadExtensionFromFactory( + factory: ExtensionFactory, + cwd: string, + eventBus: EventBus, + runtime: ExtensionRuntime, + extensionPath?: string, + ): Promise; +} diff --git a/tests/utils/pi-test-harness.ts b/tests/utils/pi-test-harness.ts new file mode 100644 index 0000000..f4d9517 --- /dev/null +++ b/tests/utils/pi-test-harness.ts @@ -0,0 +1,230 @@ +/** + * Test harness that loads extension factories using real Pi internals. + * + * Instead of deep proxy mocks, this uses: + * - Real `createEventBus()` and `createExtensionRuntime()` + * - Real `loadExtensionFromFactory()` so extensions register through the + * actual `ExtensionAPI` code path + * - Explicit vi.fn() spies for context objects (see pi-context.ts) + * + * The harness exposes the loaded `Extension` object so matchers and tests + * can inspect registered commands, tools, and event handlers directly. + * + * Context overrides (sessionManager, UI spies, etc.) are set at harness + * creation time and apply to every command execution. Per-call overrides + * passed to `execute()` merge on top when needed. + * + * A built-in `newSession` spy creates a real `SessionManager.inMemory()` + * for each child session and exposes it via `getChildSessionManager()`. + */ + +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import type { + Extension, + ExtensionCommandContext, + ExtensionFactory, + ExtensionRuntime, + RegisteredCommand, + SessionManager, + ToolDefinition, +} from "@mariozechner/pi-coding-agent"; +import { + createEventBus, + createExtensionRuntime, + SessionManager as SessionManagerClass, +} from "@mariozechner/pi-coding-agent"; +import { vi } from "vitest"; +import { loadExtensionFromFactory } from "./load-extension"; +import { + type CommandContextOverrides, + createCommandContext, + createEventContext, + createToolContext, + type EventContextOverrides, +} from "./pi-context"; + +export interface PiTestHarness { + /** Working directory used by the harness. */ + cwd: string; + /** The real Extension object produced by the factory. */ + extension: Extension; + /** + * The shared ExtensionRuntime. Action methods (setModel, setThinkingLevel, + * etc.) are throwing stubs until replaced. Tests can assign vi.fn() spies + * directly to patch them before emitting events. + */ + runtime: ExtensionRuntime; + /** + * Built-in `newSession` spy. When a command calls `ctx.newSession()`, + * this spy creates a real `SessionManager.inMemory()`, runs the + * `setup()` callback on it, and returns `{ cancelled: false }`. + */ + newSession: ExtensionCommandContext["newSession"]; + /** + * Returns the `SessionManager` that was created for the most recent + * child session (from the `newSession` spy), or `undefined` if no + * child session has been created yet. + */ + getChildSessionManager(): SessionManager | undefined; + /** Look up a registered command by name and get an executor. */ + command(name: string): CommandHandle; + /** Look up a registered tool by name and get an executor. */ + tool(name: string): ToolHandle; + /** + * Emit an event directly to the extension's registered handlers. + * Returns the first non-undefined result (e.g., { block: true }). + */ + emitEvent( + eventName: string, + event: unknown, + ctxOverrides?: EventContextOverrides, + ): Promise; + /** All registered command names. */ + listRegisteredCommands(): string[]; + /** All registered tool names. */ + listRegisteredTools(): string[]; +} + +export interface CommandHandle { + registered: RegisteredCommand; + execute( + args?: string, + overrides?: CommandContextOverrides, + ): Promise; +} + +export interface ToolHandle { + /** The ToolDefinition that was registered (has execute, renderCall, etc). */ + registered: ToolDefinition; + execute(params: Record): Promise; +} + +export interface PiTestHarnessOptions { + cwd?: string; + extensionPath?: string; + /** + * Default context overrides applied to every command execution. + * Per-call overrides passed to `execute()` merge on top, with UI + * overrides deep-merged so harness-level and per-call spies coexist. + */ + context?: CommandContextOverrides; +} + +/** + * Create a test harness that loads an extension factory through real Pi + * internals. The returned harness lets you execute registered commands and + * tools with spy-based contexts. + * + * Context overrides set here become defaults for all command executions. + * The harness includes a built-in `newSession` spy that creates real + * in-memory session managers for child sessions. Access the most recent + * child via `getChildSessionManager()`. + */ +export async function createPiTestHarness( + factory: ExtensionFactory, + options: PiTestHarnessOptions = {}, +): Promise { + const cwd = options.cwd ?? mkdtempSync(join(tmpdir(), "pi-test-cwd-")); + const harnessContext = options.context ?? {}; + const eventBus = createEventBus(); + const runtime = createExtensionRuntime(); + + const extension = await loadExtensionFromFactory( + factory, + cwd, + eventBus, + runtime, + options.extensionPath ?? "", + ); + + // Built-in newSession spy: creates a real child SessionManager and runs + // the setup callback, so tests can inspect entries written to the child. + let childSm: SessionManager | undefined; + const newSession = vi.fn( + async (opts?: Parameters[0]) => { + childSm = SessionManagerClass.inMemory(); + if (opts?.setup) { + await opts.setup(childSm); + } + return { cancelled: false }; + }, + ) as unknown as ExtensionCommandContext["newSession"]; + + let toolCallCounter = 0; + + function command(name: string): CommandHandle { + const registered = extension.commands.get(name); + if (!registered) { + const available = [...extension.commands.keys()].join(", "); + throw new Error( + `Command "${name}" is not registered. Registered: [${available}]`, + ); + } + return { + registered, + async execute( + args = "", + overrides: CommandContextOverrides = {}, + ): Promise { + const ctx = createCommandContext({ + cwd, + newSession, + ...harnessContext, + ...overrides, + // Deep-merge UI so harness-level and per-call spies coexist. + ui: { ...harnessContext.ui, ...overrides.ui }, + }); + await registered.handler(args, ctx); + return ctx; + }, + }; + } + + function tool(name: string): ToolHandle { + const entry = extension.tools.get(name); + if (!entry) { + const available = [...extension.tools.keys()].join(", "); + throw new Error( + `Tool "${name}" is not registered. Registered: [${available}]`, + ); + } + const definition = entry.definition; + return { + registered: definition, + execute(params: Record) { + const id = `tc_${++toolCallCounter}`; + const ctx = createToolContext({ cwd }); + return definition.execute(id, params, undefined, undefined, ctx); + }, + }; + } + + async function emitEvent( + eventName: string, + event: unknown, + ctxOverrides: EventContextOverrides = {}, + ): Promise { + const handlers = extension.handlers.get(eventName) ?? []; + const ctx = createEventContext({ cwd, ...ctxOverrides }); + for (const handler of handlers) { + const result = await handler(event, ctx); + if (result !== undefined) return result; + } + return undefined; + } + + return { + cwd, + extension, + runtime, + newSession, + getChildSessionManager: () => childSm, + command, + tool, + emitEvent, + listRegisteredCommands: () => [...extension.commands.keys()], + listRegisteredTools: () => [...extension.tools.keys()], + }; +} diff --git a/tests/utils/theme.ts b/tests/utils/theme.ts new file mode 100644 index 0000000..5b0850a --- /dev/null +++ b/tests/utils/theme.ts @@ -0,0 +1,19 @@ +/** + * No-op theme for testing render functions. Every styling function returns + * the text unchanged, which is enough to exercise renderCall / renderResult + * without pulling in a real terminal theme. + */ + +import type { Theme } from "@mariozechner/pi-coding-agent"; + +const identity = (_color: string, text: string) => text; + +export const NOOP_THEME: Theme = { + fg: identity, + bg: identity, + bold: (t: string) => t, + italic: (t: string) => t, + underline: (t: string) => t, + strikethrough: (t: string) => t, + inverse: (t: string) => t, +} as Theme; diff --git a/tests/utils/tmpdir.ts b/tests/utils/tmpdir.ts new file mode 100644 index 0000000..fecf10a --- /dev/null +++ b/tests/utils/tmpdir.ts @@ -0,0 +1,13 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { test as baseTest } from "vitest"; + +export const tmpdirTest = baseTest.extend<{ tmpdir: string }>({ + // biome-ignore lint/correctness/noEmptyPattern: Vitest fixture API requires destructuring first arg + tmpdir: async ({}, use) => { + const directory = await mkdtemp(join(tmpdir(), "vitest-")); + await use(directory); + await rm(directory, { recursive: true, force: true }); + }, +}); diff --git a/tests/vitest.setup.ts b/tests/vitest.setup.ts new file mode 100644 index 0000000..9f79ac0 --- /dev/null +++ b/tests/vitest.setup.ts @@ -0,0 +1 @@ +import "./utils/matchers"; diff --git a/tsconfig.json b/tsconfig.json index 69a38f1..9831d3a 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,6 +10,6 @@ "resolveJsonModule": true, "noEmit": true }, - "include": ["src/**/*"], + "include": ["src/**/*", "tests/**/*", "vitest.config.ts"], "exclude": ["node_modules"] } diff --git a/vitest.config.ts b/vitest.config.ts index a7104d7..0a7fa63 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,9 +1,21 @@ +import { resolve } from "node:path"; import { defineConfig } from "vitest/config"; export default defineConfig({ + resolve: { + alias: { + // Internal pi-coding-agent module not exposed via package "exports". + // Mapped here so tests can import it; the single wrapper in + // tests/utils/load-extension.ts is the only consumer. + "#pi-internal/extensions-loader": resolve( + "node_modules/@mariozechner/pi-coding-agent/dist/core/extensions/loader.js", + ), + }, + }, test: { environment: "node", - include: ["src/**/*.test.ts"], + include: ["src/**/*.test.ts", "tests/**/*.test.ts"], + setupFiles: ["./tests/vitest.setup.ts"], mockReset: true, }, });